Skip to content

Commit b00628b

Browse files
Alexei Starovoitovborkmann
Alexei Starovoitov
authored andcommittedJul 15, 2021
bpf: Introduce bpf timers.
Introduce 'struct bpf_timer { __u64 :64; __u64 :64; };' that can be embedded in hash/array/lru maps as a regular field and helpers to operate on it: // Initialize the timer. // First 4 bits of 'flags' specify clockid. // Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, int flags); // Configure the timer to call 'callback_fn' static function. long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); // Arm the timer to expire 'nsec' nanoseconds from the current time. long bpf_timer_start(struct bpf_timer *timer, u64 nsec, u64 flags); // Cancel the timer and wait for callback_fn to finish if it was running. long bpf_timer_cancel(struct bpf_timer *timer); Here is how BPF program might look like: struct map_elem { int counter; struct bpf_timer timer; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1000); __type(key, int); __type(value, struct map_elem); } hmap SEC(".maps"); static int timer_cb(void *map, int *key, struct map_elem *val); /* val points to particular map element that contains bpf_timer. */ SEC("fentry/bpf_fentry_test1") int BPF_PROG(test1, int a) { struct map_elem *val; int key = 0; val = bpf_map_lookup_elem(&hmap, &key); if (val) { bpf_timer_init(&val->timer, &hmap, CLOCK_REALTIME); bpf_timer_set_callback(&val->timer, timer_cb); bpf_timer_start(&val->timer, 1000 /* call timer_cb2 in 1 usec */, 0); } } This patch adds helper implementations that rely on hrtimers to call bpf functions as timers expire. The following patches add necessary safety checks. Only programs with CAP_BPF are allowed to use bpf_timer. The amount of timers used by the program is constrained by the memcg recorded at map creation time. The bpf_timer_init() helper needs explicit 'map' argument because inner maps are dynamic and not known at load time. While the bpf_timer_set_callback() is receiving hidden 'aux->prog' argument supplied by the verifier. The prog pointer is needed to do refcnting of bpf program to make sure that program doesn't get freed while the timer is armed. This approach relies on "user refcnt" scheme used in prog_array that stores bpf programs for bpf_tail_call. The bpf_timer_set_callback() will increment the prog refcnt which is paired with bpf_timer_cancel() that will drop the prog refcnt. The ops->map_release_uref is responsible for cancelling the timers and dropping prog refcnt when user space reference to a map reaches zero. This uref approach is done to make sure that Ctrl-C of user space process will not leave timers running forever unless the user space explicitly pinned a map that contained timers in bpffs. bpf_timer_init() and bpf_timer_set_callback() will return -EPERM if map doesn't have user references (is not held by open file descriptor from user space and not pinned in bpffs). The bpf_map_delete_elem() and bpf_map_update_elem() operations cancel and free the timer if given map element had it allocated. "bpftool map update" command can be used to cancel timers. The 'struct bpf_timer' is explicitly __attribute__((aligned(8))) because '__u64 :64' has 1 byte alignment of 8 byte padding. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Acked-by: Toke Høiland-Jørgensen <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent c1b3fed commit b00628b

File tree

7 files changed

+585
-1
lines changed

7 files changed

+585
-1
lines changed
 

‎include/linux/bpf.h

+3
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct bpf_map {
168168
u32 max_entries;
169169
u32 map_flags;
170170
int spin_lock_off; /* >=0 valid offset, <0 error */
171+
int timer_off; /* >=0 valid offset, <0 error */
171172
u32 id;
172173
int numa_node;
173174
u32 btf_key_type_id;
@@ -221,6 +222,7 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
221222
}
222223
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
223224
bool lock_src);
225+
void bpf_timer_cancel_and_free(void *timer);
224226
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
225227

226228
struct bpf_offload_dev;
@@ -314,6 +316,7 @@ enum bpf_arg_type {
314316
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
315317
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
316318
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
319+
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
317320
__BPF_ARG_TYPE_MAX,
318321
};
319322

‎include/uapi/linux/bpf.h

+73
Original file line numberDiff line numberDiff line change
@@ -4777,6 +4777,70 @@ union bpf_attr {
47774777
* Execute close syscall for given FD.
47784778
* Return
47794779
* A syscall result.
4780+
*
4781+
* long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
4782+
* Description
4783+
* Initialize the timer.
4784+
* First 4 bits of *flags* specify clockid.
4785+
* Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
4786+
* All other bits of *flags* are reserved.
4787+
* The verifier will reject the program if *timer* is not from
4788+
* the same *map*.
4789+
* Return
4790+
* 0 on success.
4791+
* **-EBUSY** if *timer* is already initialized.
4792+
* **-EINVAL** if invalid *flags* are passed.
4793+
* **-EPERM** if *timer* is in a map that doesn't have any user references.
4794+
* The user space should either hold a file descriptor to a map with timers
4795+
* or pin such map in bpffs. When map is unpinned or file descriptor is
4796+
* closed all timers in the map will be cancelled and freed.
4797+
*
4798+
* long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
4799+
* Description
4800+
* Configure the timer to call *callback_fn* static function.
4801+
* Return
4802+
* 0 on success.
4803+
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
4804+
* **-EPERM** if *timer* is in a map that doesn't have any user references.
4805+
* The user space should either hold a file descriptor to a map with timers
4806+
* or pin such map in bpffs. When map is unpinned or file descriptor is
4807+
* closed all timers in the map will be cancelled and freed.
4808+
*
4809+
* long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
4810+
* Description
4811+
* Set timer expiration N nanoseconds from the current time. The
4812+
* configured callback will be invoked in soft irq context on some cpu
4813+
* and will not repeat unless another bpf_timer_start() is made.
4814+
* In such case the next invocation can migrate to a different cpu.
4815+
* Since struct bpf_timer is a field inside map element the map
4816+
* owns the timer. The bpf_timer_set_callback() will increment refcnt
4817+
* of BPF program to make sure that callback_fn code stays valid.
4818+
* When user space reference to a map reaches zero all timers
4819+
* in a map are cancelled and corresponding program's refcnts are
4820+
* decremented. This is done to make sure that Ctrl-C of a user
4821+
* process doesn't leave any timers running. If map is pinned in
4822+
* bpffs the callback_fn can re-arm itself indefinitely.
4823+
* bpf_map_update/delete_elem() helpers and user space sys_bpf commands
4824+
* cancel and free the timer in the given map element.
4825+
* The map can contain timers that invoke callback_fn-s from different
4826+
* programs. The same callback_fn can serve different timers from
4827+
* different maps if key/value layout matches across maps.
4828+
* Every bpf_timer_set_callback() can have different callback_fn.
4829+
*
4830+
* Return
4831+
* 0 on success.
4832+
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
4833+
* or invalid *flags* are passed.
4834+
*
4835+
* long bpf_timer_cancel(struct bpf_timer *timer)
4836+
* Description
4837+
* Cancel the timer and wait for callback_fn to finish if it was running.
4838+
* Return
4839+
* 0 if the timer was not active.
4840+
* 1 if the timer was active.
4841+
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
4842+
* **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
4843+
* own timer which would have led to a deadlock otherwise.
47804844
*/
47814845
#define __BPF_FUNC_MAPPER(FN) \
47824846
FN(unspec), \
@@ -4948,6 +5012,10 @@ union bpf_attr {
49485012
FN(sys_bpf), \
49495013
FN(btf_find_by_name_kind), \
49505014
FN(sys_close), \
5015+
FN(timer_init), \
5016+
FN(timer_set_callback), \
5017+
FN(timer_start), \
5018+
FN(timer_cancel), \
49515019
/* */
49525020

49535021
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6074,6 +6142,11 @@ struct bpf_spin_lock {
60746142
__u32 val;
60756143
};
60766144

6145+
struct bpf_timer {
6146+
__u64 :64;
6147+
__u64 :64;
6148+
} __attribute__((aligned(8)));
6149+
60776150
struct bpf_sysctl {
60786151
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
60796152
* Allows 1,2,4-byte read, but no write.

0 commit comments

Comments
 (0)
Please sign in to comment.