Skip to content

Commit 174a79f

Browse files
jrfastabdavem330
authored andcommitted
bpf: sockmap with sk redirect support
Recently we added a new map type called dev map used to forward XDP packets between ports (6093ec2). This patches introduces a similar notion for sockets. A sockmap allows users to add participating sockets to a map. When sockets are added to the map enough context is stored with the map entry to use the entry with a new helper bpf_sk_redirect_map(map, key, flags) This helper (analogous to bpf_redirect_map in XDP) is given the map and an entry in the map. When called from a sockmap program, discussed below, the skb will be sent on the socket using skb_send_sock(). With the above we need a bpf program to call the helper from that will then implement the send logic. The initial site implemented in this series is the recv_sock hook. For this to work we implemented a map attach command to add attributes to a map. In sockmap we add two programs a parse program and a verdict program. The parse program uses strparser to build messages and pass them to the verdict program. The parse programs use the normal strparser semantics. The verdict program is of type SK_SKB. The verdict program returns a verdict SK_DROP, or SK_REDIRECT for now. Additional actions may be added later. When SK_REDIRECT is returned, expected when bpf program uses bpf_sk_redirect_map(), the sockmap logic will consult per cpu variables set by the helper routine and pull the sock entry out of the sock map. This pattern follows the existing redirect logic in cls and xdp programs. This gives the flow, recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock \ -> kfree_skb As an example use case a message based load balancer may use specific logic in the verdict program to select the sock to send on. Sample programs are provided in future patches that hopefully illustrate the user interfaces. Also selftests are in follow-on patches. Signed-off-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a6f6df6 commit 174a79f

File tree

9 files changed

+940
-5
lines changed

9 files changed

+940
-5
lines changed

include/linux/bpf.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/rbtree_latch.h>
1717

1818
struct perf_event;
19+
struct bpf_prog;
1920
struct bpf_map;
2021

2122
/* map is generic key/value storage optionally accesible by eBPF programs */
@@ -37,6 +38,8 @@ struct bpf_map_ops {
3738
void (*map_fd_put_ptr)(void *ptr);
3839
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
3940
u32 (*map_fd_sys_lookup_elem)(void *ptr);
41+
int (*map_attach)(struct bpf_map *map,
42+
struct bpf_prog *p1, struct bpf_prog *p2);
4043
};
4144

4245
struct bpf_map {
@@ -138,8 +141,6 @@ enum bpf_reg_type {
138141
PTR_TO_PACKET_END, /* skb->data + headlen */
139142
};
140143

141-
struct bpf_prog;
142-
143144
/* The information passed from prog-specific *_is_valid_access
144145
* back to the verifier.
145146
*/
@@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
312313

313314
/* Map specifics */
314315
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
316+
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
315317
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
316318
void __dev_map_flush(struct bpf_map *map);
317319

@@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
391393
extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
392394
extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
393395
extern const struct bpf_func_proto bpf_get_stackid_proto;
396+
extern const struct bpf_func_proto bpf_sock_map_update_proto;
394397

395398
/* Shared helpers among cBPF and eBPF. */
396399
void bpf_user_rnd_init_once(void);

include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
3838
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
3939
#ifdef CONFIG_NET
4040
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
41+
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
4142
#endif

include/linux/filter.h

+2
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,8 @@ void xdp_do_flush_map(void);
727727
void bpf_warn_invalid_xdp_action(u32 act);
728728
void bpf_warn_invalid_xdp_redirect(u32 ifindex);
729729

730+
struct sock *do_sk_redirect_map(void);
731+
730732
#ifdef CONFIG_BPF_JIT
731733
extern int bpf_jit_enable;
732734
extern int bpf_jit_harden;

include/uapi/linux/bpf.h

+32-1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ enum bpf_map_type {
110110
BPF_MAP_TYPE_ARRAY_OF_MAPS,
111111
BPF_MAP_TYPE_HASH_OF_MAPS,
112112
BPF_MAP_TYPE_DEVMAP,
113+
BPF_MAP_TYPE_SOCKMAP,
113114
};
114115

115116
enum bpf_prog_type {
@@ -135,11 +136,15 @@ enum bpf_attach_type {
135136
BPF_CGROUP_INET_EGRESS,
136137
BPF_CGROUP_INET_SOCK_CREATE,
137138
BPF_CGROUP_SOCK_OPS,
139+
BPF_CGROUP_SMAP_INGRESS,
138140
__MAX_BPF_ATTACH_TYPE
139141
};
140142

141143
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
142144

145+
/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
146+
#define BPF_SOCKMAP_STRPARSER (1U << 0)
147+
143148
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
144149
* to the given target_fd cgroup the descendent cgroup will be able to
145150
* override effective bpf program that was inherited from this cgroup
@@ -211,6 +216,7 @@ union bpf_attr {
211216
__u32 attach_bpf_fd; /* eBPF program to attach */
212217
__u32 attach_type;
213218
__u32 attach_flags;
219+
__u32 attach_bpf_fd2;
214220
};
215221

216222
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -557,6 +563,23 @@ union bpf_attr {
557563
* @mode: operation mode (enum bpf_adj_room_mode)
558564
* @flags: reserved for future use
559565
* Return: 0 on success or negative error code
566+
*
567+
* int bpf_sk_redirect_map(map, key, flags)
568+
* Redirect skb to a sock in map using key as a lookup key for the
569+
* sock in map.
570+
* @map: pointer to sockmap
571+
* @key: key to lookup sock in map
572+
* @flags: reserved for future use
573+
* Return: SK_REDIRECT
574+
*
575+
* int bpf_sock_map_update(skops, map, key, flags, map_flags)
576+
* @skops: pointer to bpf_sock_ops
577+
* @map: pointer to sockmap to update
578+
* @key: key to insert/update sock in map
579+
* @flags: same flags as map update elem
580+
* @map_flags: sock map specific flags
581+
* bit 1: Enable strparser
582+
* other bits: reserved
560583
*/
561584
#define __BPF_FUNC_MAPPER(FN) \
562585
FN(unspec), \
@@ -610,7 +633,9 @@ union bpf_attr {
610633
FN(set_hash), \
611634
FN(setsockopt), \
612635
FN(skb_adjust_room), \
613-
FN(redirect_map),
636+
FN(redirect_map), \
637+
FN(sk_redirect_map), \
638+
FN(sock_map_update), \
614639

615640
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
616641
* function eBPF program intends to call
@@ -747,6 +772,12 @@ struct xdp_md {
747772
__u32 data_end;
748773
};
749774

775+
enum sk_action {
776+
SK_ABORTED = 0,
777+
SK_DROP,
778+
SK_REDIRECT,
779+
};
780+
750781
#define BPF_TAG_SIZE 8
751782

752783
struct bpf_prog_info {

kernel/bpf/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ obj-y := core.o
33
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
44
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
55
ifeq ($(CONFIG_NET),y)
6-
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
6+
obj-$(CONFIG_BPF_SYSCALL) += devmap.o sockmap.o
77
endif
88
ifeq ($(CONFIG_PERF_EVENTS),y)
99
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o

0 commit comments

Comments
 (0)