Skip to content

Commit 91bc482

Browse files
Alexei Starovoitovdavem330
Alexei Starovoitov
authored andcommitted
tc: bpf: add checksum helpers
Commit 608cd71 ("tc: bpf: generalize pedit action") has added the possibility to mangle packet data to BPF programs in the tc pipeline. This patch adds two helpers bpf_l3_csum_replace() and bpf_l4_csum_replace() for fixing up the protocol checksums after the packet mangling. It also adds 'flags' argument to bpf_skb_store_bytes() helper to avoid unnecessary checksum recomputations when BPF programs adjusting l3/l4 checksums and documents all three helpers in uapi header. Moreover, a sample program is added to show how BPF programs can make use of the mangle and csum helpers. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 5888b93 commit 91bc482

File tree

5 files changed

+220
-5
lines changed

5 files changed

+220
-5
lines changed

include/uapi/linux/bpf.h

+37-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,43 @@ enum bpf_func_id {
168168
BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
169169
BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
170170
BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
171-
BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
171+
172+
/**
173+
* skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
174+
* @skb: pointer to skb
175+
* @offset: offset within packet from skb->data
176+
* @from: pointer where to copy bytes from
177+
* @len: number of bytes to store into packet
178+
* @flags: bit 0 - if true, recompute skb->csum
179+
* other bits - reserved
180+
* Return: 0 on success
181+
*/
182+
BPF_FUNC_skb_store_bytes,
183+
184+
/**
185+
* l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
186+
* @skb: pointer to skb
187+
* @offset: offset within packet where IP checksum is located
188+
* @from: old value of header field
189+
* @to: new value of header field
190+
* @flags: bits 0-3 - size of header field
191+
* other bits - reserved
192+
* Return: 0 on success
193+
*/
194+
BPF_FUNC_l3_csum_replace,
195+
196+
/**
197+
* l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
198+
* @skb: pointer to skb
199+
* @offset: offset within packet where TCP/UDP checksum is located
200+
* @from: old value of header field
201+
* @to: new value of header field
202+
* @flags: bits 0-3 - size of header field
203+
* bit 4 - is pseudo header
204+
* other bits - reserved
205+
* Return: 0 on success
206+
*/
207+
BPF_FUNC_l4_csum_replace,
172208
__BPF_FUNC_MAX_ID,
173209
};
174210

net/core/filter.c

+104-4
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
11751175
return 0;
11761176
}
11771177

1178-
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1178+
#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
1179+
1180+
static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
11791181
{
11801182
struct sk_buff *skb = (struct sk_buff *) (long) r1;
11811183
unsigned int offset = (unsigned int) r2;
@@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
11921194
*
11931195
* so check for invalid 'offset' and too large 'len'
11941196
*/
1195-
if (offset > 0xffff || len > sizeof(buf))
1197+
if (unlikely(offset > 0xffff || len > sizeof(buf)))
11961198
return -EFAULT;
11971199

11981200
if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
@@ -1202,15 +1204,16 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
12021204
if (unlikely(!ptr))
12031205
return -EFAULT;
12041206

1205-
skb_postpull_rcsum(skb, ptr, len);
1207+
if (BPF_RECOMPUTE_CSUM(flags))
1208+
skb_postpull_rcsum(skb, ptr, len);
12061209

12071210
memcpy(ptr, from, len);
12081211

12091212
if (ptr == buf)
12101213
/* skb_store_bits cannot return -EFAULT here */
12111214
skb_store_bits(skb, offset, ptr, len);
12121215

1213-
if (skb->ip_summed == CHECKSUM_COMPLETE)
1216+
if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
12141217
skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
12151218
return 0;
12161219
}
@@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
12231226
.arg2_type = ARG_ANYTHING,
12241227
.arg3_type = ARG_PTR_TO_STACK,
12251228
.arg4_type = ARG_CONST_STACK_SIZE,
1229+
.arg5_type = ARG_ANYTHING,
1230+
};
1231+
1232+
#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
1233+
#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
1234+
1235+
static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
1236+
{
1237+
struct sk_buff *skb = (struct sk_buff *) (long) r1;
1238+
__sum16 sum, *ptr;
1239+
1240+
if (unlikely(offset > 0xffff))
1241+
return -EFAULT;
1242+
1243+
if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
1244+
return -EFAULT;
1245+
1246+
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
1247+
if (unlikely(!ptr))
1248+
return -EFAULT;
1249+
1250+
switch (BPF_HEADER_FIELD_SIZE(flags)) {
1251+
case 2:
1252+
csum_replace2(ptr, from, to);
1253+
break;
1254+
case 4:
1255+
csum_replace4(ptr, from, to);
1256+
break;
1257+
default:
1258+
return -EINVAL;
1259+
}
1260+
1261+
if (ptr == &sum)
1262+
/* skb_store_bits guaranteed to not return -EFAULT here */
1263+
skb_store_bits(skb, offset, ptr, sizeof(sum));
1264+
1265+
return 0;
1266+
}
1267+
1268+
const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1269+
.func = bpf_l3_csum_replace,
1270+
.gpl_only = false,
1271+
.ret_type = RET_INTEGER,
1272+
.arg1_type = ARG_PTR_TO_CTX,
1273+
.arg2_type = ARG_ANYTHING,
1274+
.arg3_type = ARG_ANYTHING,
1275+
.arg4_type = ARG_ANYTHING,
1276+
.arg5_type = ARG_ANYTHING,
1277+
};
1278+
1279+
static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
1280+
{
1281+
struct sk_buff *skb = (struct sk_buff *) (long) r1;
1282+
u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
1283+
__sum16 sum, *ptr;
1284+
1285+
if (unlikely(offset > 0xffff))
1286+
return -EFAULT;
1287+
1288+
if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
1289+
return -EFAULT;
1290+
1291+
ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
1292+
if (unlikely(!ptr))
1293+
return -EFAULT;
1294+
1295+
switch (BPF_HEADER_FIELD_SIZE(flags)) {
1296+
case 2:
1297+
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1298+
break;
1299+
case 4:
1300+
inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1301+
break;
1302+
default:
1303+
return -EINVAL;
1304+
}
1305+
1306+
if (ptr == &sum)
1307+
/* skb_store_bits guaranteed to not return -EFAULT here */
1308+
skb_store_bits(skb, offset, ptr, sizeof(sum));
1309+
1310+
return 0;
1311+
}
1312+
1313+
const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1314+
.func = bpf_l4_csum_replace,
1315+
.gpl_only = false,
1316+
.ret_type = RET_INTEGER,
1317+
.arg1_type = ARG_PTR_TO_CTX,
1318+
.arg2_type = ARG_ANYTHING,
1319+
.arg3_type = ARG_ANYTHING,
1320+
.arg4_type = ARG_ANYTHING,
1321+
.arg5_type = ARG_ANYTHING,
12261322
};
12271323

12281324
static const struct bpf_func_proto *
@@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
12501346
switch (func_id) {
12511347
case BPF_FUNC_skb_store_bytes:
12521348
return &bpf_skb_store_bytes_proto;
1349+
case BPF_FUNC_l3_csum_replace:
1350+
return &bpf_l3_csum_replace_proto;
1351+
case BPF_FUNC_l4_csum_replace:
1352+
return &bpf_l4_csum_replace_proto;
12531353
default:
12541354
return sk_filter_func_proto(func_id);
12551355
}

samples/bpf/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
1717
always := $(hostprogs-y)
1818
always += sockex1_kern.o
1919
always += sockex2_kern.o
20+
always += tcbpf1_kern.o
2021

2122
HOSTCFLAGS += -I$(objtree)/usr/include
2223

samples/bpf/bpf_helpers.h

+7
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,11 @@ struct bpf_map_def {
3737
unsigned int max_entries;
3838
};
3939

40+
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
41+
(void *) BPF_FUNC_skb_store_bytes;
42+
static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
43+
(void *) BPF_FUNC_l3_csum_replace;
44+
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
45+
(void *) BPF_FUNC_l4_csum_replace;
46+
4047
#endif

samples/bpf/tcbpf1_kern.c

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#include <uapi/linux/bpf.h>
2+
#include <uapi/linux/if_ether.h>
3+
#include <uapi/linux/if_packet.h>
4+
#include <uapi/linux/ip.h>
5+
#include <uapi/linux/in.h>
6+
#include <uapi/linux/tcp.h>
7+
#include "bpf_helpers.h"
8+
9+
/* compiler workaround */
10+
#define _htonl __builtin_bswap32
11+
12+
static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
13+
{
14+
bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
15+
}
16+
17+
/* use 1 below for ingress qdisc and 0 for egress */
18+
#if 0
19+
#undef ETH_HLEN
20+
#define ETH_HLEN 0
21+
#endif
22+
23+
#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
24+
#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
25+
26+
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
27+
{
28+
__u8 old_tos = load_byte(skb, TOS_OFF);
29+
30+
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
31+
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
32+
}
33+
34+
#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
35+
#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
36+
37+
#define IS_PSEUDO 0x10
38+
39+
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
40+
{
41+
__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
42+
43+
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
44+
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
45+
bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
46+
}
47+
48+
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
49+
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
50+
{
51+
__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
52+
53+
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
54+
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
55+
}
56+
57+
SEC("classifier")
58+
int bpf_prog1(struct __sk_buff *skb)
59+
{
60+
__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
61+
long *value;
62+
63+
if (proto == IPPROTO_TCP) {
64+
set_ip_tos(skb, 8);
65+
set_tcp_ip_src(skb, 0xA010101);
66+
set_tcp_dest_port(skb, 5001);
67+
}
68+
69+
return 0;
70+
}
71+
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)