17
17
#include <linux/filter.h>
18
18
#include <linux/perf_event.h>
19
19
20
+ static void bpf_array_free_percpu (struct bpf_array * array )
21
+ {
22
+ int i ;
23
+
24
+ for (i = 0 ; i < array -> map .max_entries ; i ++ )
25
+ free_percpu (array -> pptrs [i ]);
26
+ }
27
+
28
+ static int bpf_array_alloc_percpu (struct bpf_array * array )
29
+ {
30
+ void __percpu * ptr ;
31
+ int i ;
32
+
33
+ for (i = 0 ; i < array -> map .max_entries ; i ++ ) {
34
+ ptr = __alloc_percpu_gfp (array -> elem_size , 8 ,
35
+ GFP_USER | __GFP_NOWARN );
36
+ if (!ptr ) {
37
+ bpf_array_free_percpu (array );
38
+ return - ENOMEM ;
39
+ }
40
+ array -> pptrs [i ] = ptr ;
41
+ }
42
+
43
+ return 0 ;
44
+ }
45
+
20
46
/* Called from syscall */
21
47
static struct bpf_map * array_map_alloc (union bpf_attr * attr )
22
48
{
49
+ bool percpu = attr -> map_type == BPF_MAP_TYPE_PERCPU_ARRAY ;
23
50
struct bpf_array * array ;
24
- u32 elem_size , array_size ;
51
+ u64 array_size ;
52
+ u32 elem_size ;
25
53
26
54
/* check sanity of attributes */
27
55
if (attr -> max_entries == 0 || attr -> key_size != 4 ||
@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
36
64
37
65
elem_size = round_up (attr -> value_size , 8 );
38
66
39
- /* check round_up into zero and u32 overflow */
40
- if (elem_size == 0 ||
41
- attr -> max_entries > (U32_MAX - PAGE_SIZE - sizeof (* array )) / elem_size )
67
+ array_size = sizeof (* array );
68
+ if (percpu )
69
+ array_size += (u64 ) attr -> max_entries * sizeof (void * );
70
+ else
71
+ array_size += (u64 ) attr -> max_entries * elem_size ;
72
+
73
+ /* make sure there is no u32 overflow later in round_up() */
74
+ if (array_size >= U32_MAX - PAGE_SIZE )
42
75
return ERR_PTR (- ENOMEM );
43
76
44
- array_size = sizeof (* array ) + attr -> max_entries * elem_size ;
45
77
46
78
/* allocate all map elements and zero-initialize them */
47
79
array = kzalloc (array_size , GFP_USER | __GFP_NOWARN );
@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
52
84
}
53
85
54
86
/* copy mandatory map attributes */
87
+ array -> map .map_type = attr -> map_type ;
55
88
array -> map .key_size = attr -> key_size ;
56
89
array -> map .value_size = attr -> value_size ;
57
90
array -> map .max_entries = attr -> max_entries ;
58
- array -> map .pages = round_up (array_size , PAGE_SIZE ) >> PAGE_SHIFT ;
59
91
array -> elem_size = elem_size ;
60
92
93
+ if (!percpu )
94
+ goto out ;
95
+
96
+ array_size += (u64 ) attr -> max_entries * elem_size * num_possible_cpus ();
97
+
98
+ if (array_size >= U32_MAX - PAGE_SIZE ||
99
+ elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu (array )) {
100
+ kvfree (array );
101
+ return ERR_PTR (- ENOMEM );
102
+ }
103
+ out :
104
+ array -> map .pages = round_up (array_size , PAGE_SIZE ) >> PAGE_SHIFT ;
105
+
61
106
return & array -> map ;
62
107
}
63
108
@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
67
112
struct bpf_array * array = container_of (map , struct bpf_array , map );
68
113
u32 index = * (u32 * )key ;
69
114
70
- if (index >= array -> map .max_entries )
115
+ if (unlikely ( index >= array -> map .max_entries ) )
71
116
return NULL ;
72
117
73
118
return array -> value + array -> elem_size * index ;
74
119
}
75
120
121
+ /* Called from eBPF program */
122
+ static void * percpu_array_map_lookup_elem (struct bpf_map * map , void * key )
123
+ {
124
+ struct bpf_array * array = container_of (map , struct bpf_array , map );
125
+ u32 index = * (u32 * )key ;
126
+
127
+ if (unlikely (index >= array -> map .max_entries ))
128
+ return NULL ;
129
+
130
+ return this_cpu_ptr (array -> pptrs [index ]);
131
+ }
132
+
76
133
/* Called from syscall */
77
134
static int array_map_get_next_key (struct bpf_map * map , void * key , void * next_key )
78
135
{
@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
99
156
struct bpf_array * array = container_of (map , struct bpf_array , map );
100
157
u32 index = * (u32 * )key ;
101
158
102
- if (map_flags > BPF_EXIST )
159
+ if (unlikely ( map_flags > BPF_EXIST ) )
103
160
/* unknown flags */
104
161
return - EINVAL ;
105
162
106
- if (index >= array -> map .max_entries )
163
+ if (unlikely ( index >= array -> map .max_entries ) )
107
164
/* all elements were pre-allocated, cannot insert a new one */
108
165
return - E2BIG ;
109
166
110
- if (map_flags == BPF_NOEXIST )
167
+ if (unlikely ( map_flags == BPF_NOEXIST ) )
111
168
/* all elements already exist */
112
169
return - EEXIST ;
113
170
114
- memcpy (array -> value + array -> elem_size * index , value , map -> value_size );
171
+ if (array -> map .map_type == BPF_MAP_TYPE_PERCPU_ARRAY )
172
+ memcpy (this_cpu_ptr (array -> pptrs [index ]),
173
+ value , map -> value_size );
174
+ else
175
+ memcpy (array -> value + array -> elem_size * index ,
176
+ value , map -> value_size );
115
177
return 0 ;
116
178
}
117
179
@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map)
133
195
*/
134
196
synchronize_rcu ();
135
197
198
+ if (array -> map .map_type == BPF_MAP_TYPE_PERCPU_ARRAY )
199
+ bpf_array_free_percpu (array );
200
+
136
201
kvfree (array );
137
202
}
138
203
@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
150
215
.type = BPF_MAP_TYPE_ARRAY ,
151
216
};
152
217
218
+ static const struct bpf_map_ops percpu_array_ops = {
219
+ .map_alloc = array_map_alloc ,
220
+ .map_free = array_map_free ,
221
+ .map_get_next_key = array_map_get_next_key ,
222
+ .map_lookup_elem = percpu_array_map_lookup_elem ,
223
+ .map_update_elem = array_map_update_elem ,
224
+ .map_delete_elem = array_map_delete_elem ,
225
+ };
226
+
227
+ static struct bpf_map_type_list percpu_array_type __read_mostly = {
228
+ .ops = & percpu_array_ops ,
229
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY ,
230
+ };
231
+
153
232
static int __init register_array_map (void )
154
233
{
155
234
bpf_register_map_type (& array_type );
235
+ bpf_register_map_type (& percpu_array_type );
156
236
return 0 ;
157
237
}
158
238
late_initcall (register_array_map );
0 commit comments