Skip to content

Commit 583c1f4

Browse files
Byte-Labanakryiko
authored andcommitted
bpf: Define new BPF_MAP_TYPE_USER_RINGBUF map type
We want to support a ringbuf map type where samples are published from user-space, to be consumed by BPF programs. BPF currently supports a kernel -> user-space circular ring buffer via the BPF_MAP_TYPE_RINGBUF map type. We'll need to define a new map type for user-space -> kernel, as none of the helpers exported for BPF_MAP_TYPE_RINGBUF will apply to a user-space producer ring buffer, and we'll want to add one or more helper functions that would not apply for a kernel-producer ring buffer. This patch therefore adds a new BPF_MAP_TYPE_USER_RINGBUF map type definition. The map type is useless in its current form, as there is no way to access or use it for anything until we one or more BPF helpers. A follow-on patch will therefore add a new helper function that allows BPF programs to run callbacks on samples that are published to the ring buffer. Signed-off-by: David Vernet <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 3a74904 commit 583c1f4

File tree

8 files changed

+65
-8
lines changed

8 files changed

+65
-8
lines changed

include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
126126
#endif
127127
BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
128128
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
129+
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
129130

130131
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
131132
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)

include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ enum bpf_map_type {
928928
BPF_MAP_TYPE_INODE_STORAGE,
929929
BPF_MAP_TYPE_TASK_STORAGE,
930930
BPF_MAP_TYPE_BLOOM_FILTER,
931+
BPF_MAP_TYPE_USER_RINGBUF,
931932
};
932933

933934
/* Note that tracing related programs such as

kernel/bpf/ringbuf.c

+56-6
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,27 @@ struct bpf_ringbuf {
3838
struct page **pages;
3939
int nr_pages;
4040
spinlock_t spinlock ____cacheline_aligned_in_smp;
41-
/* Consumer and producer counters are put into separate pages to allow
42-
* mapping consumer page as r/w, but restrict producer page to r/o.
43-
* This protects producer position from being modified by user-space
44-
* application and ruining in-kernel position tracking.
41+
/* Consumer and producer counters are put into separate pages to
42+
* allow each position to be mapped with different permissions.
43+
* This prevents a user-space application from modifying the
44+
* position and ruining in-kernel tracking. The permissions of the
45+
* pages depend on who is producing samples: user-space or the
46+
* kernel.
47+
*
48+
* Kernel-producer
49+
* ---------------
50+
* The producer position and data pages are mapped as r/o in
51+
* userspace. For this approach, bits in the header of samples are
52+
* used to signal to user-space, and to other producers, whether a
53+
* sample is currently being written.
54+
*
55+
* User-space producer
56+
* -------------------
57+
* Only the page containing the consumer position is mapped r/o in
58+
* user-space. User-space producers also use bits of the header to
59+
* communicate to the kernel, but the kernel must carefully check and
60+
* validate each sample to ensure that they're correctly formatted, and
61+
* fully contained within the ring buffer.
4562
*/
4663
unsigned long consumer_pos __aligned(PAGE_SIZE);
4764
unsigned long producer_pos __aligned(PAGE_SIZE);
@@ -224,7 +241,7 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
224241
return -ENOTSUPP;
225242
}
226243

227-
static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
244+
static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma)
228245
{
229246
struct bpf_ringbuf_map *rb_map;
230247

@@ -242,6 +259,26 @@ static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
242259
vma->vm_pgoff + RINGBUF_PGOFF);
243260
}
244261

262+
static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma)
263+
{
264+
struct bpf_ringbuf_map *rb_map;
265+
266+
rb_map = container_of(map, struct bpf_ringbuf_map, map);
267+
268+
if (vma->vm_flags & VM_WRITE) {
269+
if (vma->vm_pgoff == 0)
270+
/* Disallow writable mappings to the consumer pointer,
271+
* and allow writable mappings to both the producer
272+
* position, and the ring buffer data itself.
273+
*/
274+
return -EPERM;
275+
} else {
276+
vma->vm_flags &= ~VM_MAYWRITE;
277+
}
278+
/* remap_vmalloc_range() checks size and offset constraints */
279+
return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);
280+
}
281+
245282
static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb)
246283
{
247284
unsigned long cons_pos, prod_pos;
@@ -269,7 +306,7 @@ const struct bpf_map_ops ringbuf_map_ops = {
269306
.map_meta_equal = bpf_map_meta_equal,
270307
.map_alloc = ringbuf_map_alloc,
271308
.map_free = ringbuf_map_free,
272-
.map_mmap = ringbuf_map_mmap,
309+
.map_mmap = ringbuf_map_mmap_kern,
273310
.map_poll = ringbuf_map_poll,
274311
.map_lookup_elem = ringbuf_map_lookup_elem,
275312
.map_update_elem = ringbuf_map_update_elem,
@@ -278,6 +315,19 @@ const struct bpf_map_ops ringbuf_map_ops = {
278315
.map_btf_id = &ringbuf_map_btf_ids[0],
279316
};
280317

318+
BTF_ID_LIST_SINGLE(user_ringbuf_map_btf_ids, struct, bpf_ringbuf_map)
319+
const struct bpf_map_ops user_ringbuf_map_ops = {
320+
.map_meta_equal = bpf_map_meta_equal,
321+
.map_alloc = ringbuf_map_alloc,
322+
.map_free = ringbuf_map_free,
323+
.map_mmap = ringbuf_map_mmap_user,
324+
.map_lookup_elem = ringbuf_map_lookup_elem,
325+
.map_update_elem = ringbuf_map_update_elem,
326+
.map_delete_elem = ringbuf_map_delete_elem,
327+
.map_get_next_key = ringbuf_map_get_next_key,
328+
.map_btf_id = &user_ringbuf_map_btf_ids[0],
329+
};
330+
281331
/* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself,
282332
* calculate offset from record metadata to ring buffer in pages, rounded
283333
* down. This page offset is stored as part of record metadata and allows to

kernel/bpf/verifier.c

+3
Original file line numberDiff line numberDiff line change
@@ -6240,6 +6240,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
62406240
func_id != BPF_FUNC_ringbuf_discard_dynptr)
62416241
goto error;
62426242
break;
6243+
case BPF_MAP_TYPE_USER_RINGBUF:
6244+
goto error;
62436245
case BPF_MAP_TYPE_STACK_TRACE:
62446246
if (func_id != BPF_FUNC_get_stackid)
62456247
goto error;
@@ -12635,6 +12637,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
1263512637
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1263612638
case BPF_MAP_TYPE_HASH_OF_MAPS:
1263712639
case BPF_MAP_TYPE_RINGBUF:
12640+
case BPF_MAP_TYPE_USER_RINGBUF:
1263812641
case BPF_MAP_TYPE_INODE_STORAGE:
1263912642
case BPF_MAP_TYPE_SK_STORAGE:
1264012643
case BPF_MAP_TYPE_TASK_STORAGE:

tools/bpf/bpftool/Documentation/bpftool-map.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ MAP COMMANDS
5555
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
5656
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
5757
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
58-
| | **task_storage** | **bloom_filter** }
58+
| | **task_storage** | **bloom_filter** | **user_ringbuf** }
5959
6060
DESCRIPTION
6161
===========

tools/bpf/bpftool/map.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1459,7 +1459,7 @@ static int do_help(int argc, char **argv)
14591459
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
14601460
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
14611461
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
1462-
" task_storage | bloom_filter }\n"
1462+
" task_storage | bloom_filter | user_ringbuf }\n"
14631463
" " HELP_SPEC_OPTIONS " |\n"
14641464
" {-f|--bpffs} | {-n|--nomount} }\n"
14651465
"",

tools/include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ enum bpf_map_type {
928928
BPF_MAP_TYPE_INODE_STORAGE,
929929
BPF_MAP_TYPE_TASK_STORAGE,
930930
BPF_MAP_TYPE_BLOOM_FILTER,
931+
BPF_MAP_TYPE_USER_RINGBUF,
931932
};
932933

933934
/* Note that tracing related programs such as

tools/lib/bpf/libbpf.c

+1
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ static const char * const map_type_name[] = {
163163
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
164164
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
165165
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
166+
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
166167
};
167168

168169
static const char * const prog_type_name[] = {

0 commit comments

Comments
 (0)