Skip to content

Commit d3aa45c

Browse files
Alexei Starovoitovdavem330
Alexei Starovoitov
authored andcommitted
bpf: add helpers to access tunnel metadata
Introduce helpers to let eBPF programs attached to TC manipulate tunnel metadata: bpf_skb_[gs]et_tunnel_key(skb, key, size, flags) skb: pointer to skb key: pointer to 'struct bpf_tunnel_key' size: size of 'struct bpf_tunnel_key' flags: room for future extensions First eBPF program that uses these helpers will allocate per_cpu metadata_dst structures that will be used on TX. On RX metadata_dst is allocated by tunnel driver. Typical usage for TX: struct bpf_tunnel_key tkey; ... populate tkey ... bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); RX: struct bpf_tunnel_key tkey = {}; bpf_skb_get_tunnel_key(skb, &tkey, sizeof(tkey), 0); ... lookup or redirect based on tkey ... 'struct bpf_tunnel_key' will be extended in the future by adding elements to the end and the 'size' argument will indicate which fields are populated, thereby keeping backwards compatibility. The 'flags' argument may be used as well when the 'size' is not enough or to indicate completely different layout of bpf_tunnel_key. Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Thomas Graf <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 55d7de9 commit d3aa45c

File tree

4 files changed

+124
-6
lines changed

4 files changed

+124
-6
lines changed

include/net/dst_metadata.h

+1
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,6 @@ static inline bool skb_valid_dst(const struct sk_buff *skb)
5151
}
5252

5353
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
54+
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);
5455

5556
#endif /* __NET_DST_METADATA_H */

include/uapi/linux/bpf.h

+17
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,18 @@ enum bpf_func_id {
258258
BPF_FUNC_get_cgroup_classid,
259259
BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
260260
BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */
261+
262+
/**
263+
* bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
264+
* retrieve or populate tunnel metadata
265+
* @skb: pointer to skb
266+
* @key: pointer to 'struct bpf_tunnel_key'
267+
* @size: size of 'struct bpf_tunnel_key'
268+
* @flags: room for future extensions
269+
* Retrun: 0 on success
270+
*/
271+
BPF_FUNC_skb_get_tunnel_key,
272+
BPF_FUNC_skb_set_tunnel_key,
261273
__BPF_FUNC_MAX_ID,
262274
};
263275

@@ -280,4 +292,9 @@ struct __sk_buff {
280292
__u32 cb[5];
281293
};
282294

295+
struct bpf_tunnel_key {
296+
__u32 tunnel_id;
297+
__u32 remote_ipv4;
298+
};
299+
283300
#endif /* _UAPI__LINUX_BPF_H__ */

net/core/dst.c

+29-6
Original file line numberDiff line numberDiff line change
@@ -362,15 +362,10 @@ static int dst_md_discard(struct sk_buff *skb)
362362
return 0;
363363
}
364364

365-
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
365+
static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
366366
{
367-
struct metadata_dst *md_dst;
368367
struct dst_entry *dst;
369368

370-
md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
371-
if (!md_dst)
372-
return ERR_PTR(-ENOMEM);
373-
374369
dst = &md_dst->dst;
375370
dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
376371
DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
@@ -380,11 +375,39 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
380375

381376
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
382377
md_dst->opts_len = optslen;
378+
}
379+
380+
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
381+
{
382+
struct metadata_dst *md_dst;
383+
384+
md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
385+
if (!md_dst)
386+
return NULL;
387+
388+
__metadata_dst_init(md_dst, optslen);
383389

384390
return md_dst;
385391
}
386392
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
387393

394+
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
395+
{
396+
int cpu;
397+
struct metadata_dst __percpu *md_dst;
398+
399+
md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
400+
__alignof__(struct metadata_dst), flags);
401+
if (!md_dst)
402+
return NULL;
403+
404+
for_each_possible_cpu(cpu)
405+
__metadata_dst_init(per_cpu_ptr(md_dst, cpu), optslen);
406+
407+
return md_dst;
408+
}
409+
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
410+
388411
/* Dirty hack. We did it in 2.2 (in __dst_free),
389412
* we have _very_ good reasons not to repeat
390413
* this mistake in 2.3, but we have no choice

net/core/filter.c

+77
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <linux/bpf.h>
4949
#include <net/sch_generic.h>
5050
#include <net/cls_cgroup.h>
51+
#include <net/dst_metadata.h>
5152

5253
/**
5354
* sk_filter - run a packet through a socket filter
@@ -1483,6 +1484,78 @@ bool bpf_helper_changes_skb_data(void *func)
14831484
return false;
14841485
}
14851486

1487+
static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1488+
{
1489+
struct sk_buff *skb = (struct sk_buff *) (long) r1;
1490+
struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
1491+
struct ip_tunnel_info *info = skb_tunnel_info(skb, AF_INET);
1492+
1493+
if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
1494+
return -EINVAL;
1495+
1496+
to->tunnel_id = be64_to_cpu(info->key.tun_id);
1497+
to->remote_ipv4 = be32_to_cpu(info->key.ipv4_src);
1498+
1499+
return 0;
1500+
}
1501+
1502+
const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1503+
.func = bpf_skb_get_tunnel_key,
1504+
.gpl_only = false,
1505+
.ret_type = RET_INTEGER,
1506+
.arg1_type = ARG_PTR_TO_CTX,
1507+
.arg2_type = ARG_PTR_TO_STACK,
1508+
.arg3_type = ARG_CONST_STACK_SIZE,
1509+
.arg4_type = ARG_ANYTHING,
1510+
};
1511+
1512+
static struct metadata_dst __percpu *md_dst;
1513+
1514+
static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1515+
{
1516+
struct sk_buff *skb = (struct sk_buff *) (long) r1;
1517+
struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
1518+
struct metadata_dst *md = this_cpu_ptr(md_dst);
1519+
struct ip_tunnel_info *info;
1520+
1521+
if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
1522+
return -EINVAL;
1523+
1524+
skb_dst_drop(skb);
1525+
dst_hold((struct dst_entry *) md);
1526+
skb_dst_set(skb, (struct dst_entry *) md);
1527+
1528+
info = &md->u.tun_info;
1529+
info->mode = IP_TUNNEL_INFO_TX;
1530+
info->key.tun_id = cpu_to_be64(from->tunnel_id);
1531+
info->key.ipv4_dst = cpu_to_be32(from->remote_ipv4);
1532+
1533+
return 0;
1534+
}
1535+
1536+
const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1537+
.func = bpf_skb_set_tunnel_key,
1538+
.gpl_only = false,
1539+
.ret_type = RET_INTEGER,
1540+
.arg1_type = ARG_PTR_TO_CTX,
1541+
.arg2_type = ARG_PTR_TO_STACK,
1542+
.arg3_type = ARG_CONST_STACK_SIZE,
1543+
.arg4_type = ARG_ANYTHING,
1544+
};
1545+
1546+
static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
1547+
{
1548+
if (!md_dst) {
1549+
/* race is not possible, since it's called from
1550+
* verifier that is holding verifier mutex
1551+
*/
1552+
md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
1553+
if (!md_dst)
1554+
return NULL;
1555+
}
1556+
return &bpf_skb_set_tunnel_key_proto;
1557+
}
1558+
14861559
static const struct bpf_func_proto *
14871560
sk_filter_func_proto(enum bpf_func_id func_id)
14881561
{
@@ -1526,6 +1599,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
15261599
return &bpf_skb_vlan_push_proto;
15271600
case BPF_FUNC_skb_vlan_pop:
15281601
return &bpf_skb_vlan_pop_proto;
1602+
case BPF_FUNC_skb_get_tunnel_key:
1603+
return &bpf_skb_get_tunnel_key_proto;
1604+
case BPF_FUNC_skb_set_tunnel_key:
1605+
return bpf_get_skb_set_tunnel_key_proto();
15291606
default:
15301607
return sk_filter_func_proto(func_id);
15311608
}

0 commit comments

Comments
 (0)