Skip to content

Commit 0c8493d

Browse files
Björn TöpelJeff Kirsher
Björn Töpel
authored and
Jeff Kirsher
committed
i40e: add XDP support for pass and drop actions
This commit adds basic XDP support for i40e derived NICs. All XDP actions will end up in XDP_DROP. Signed-off-by: Björn Töpel <[email protected]> Tested-by: Andrew Bowers <[email protected]> Signed-off-by: Jeff Kirsher <[email protected]>
1 parent f5c3064 commit 0c8493d

File tree

4 files changed

+194
-31
lines changed

4 files changed

+194
-31
lines changed

drivers/net/ethernet/intel/i40e/i40e.h

+7
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,8 @@ struct i40e_vsi {
645645
u16 max_frame;
646646
u16 rx_buf_len;
647647

648+
struct bpf_prog *xdp_prog;
649+
648650
/* List of q_vectors allocated to this VSI */
649651
struct i40e_q_vector **q_vectors;
650652
int num_q_vectors;
@@ -972,4 +974,9 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
972974
i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
973975
i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
974976
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
977+
978+
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
979+
{
980+
return !!vsi->xdp_prog;
981+
}
975982
#endif /* _I40E_H_ */

drivers/net/ethernet/intel/i40e/i40e_main.c

+87
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <linux/etherdevice.h>
2828
#include <linux/of_net.h>
2929
#include <linux/pci.h>
30+
#include <linux/bpf.h>
3031

3132
/* Local includes */
3233
#include "i40e.h"
@@ -2395,6 +2396,18 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
23952396
}
23962397
}
23972398

2399+
/**
2400+
* i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2401+
* @vsi: the vsi
2402+
**/
2403+
static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
2404+
{
2405+
if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
2406+
return I40E_RXBUFFER_2048;
2407+
else
2408+
return I40E_RXBUFFER_3072;
2409+
}
2410+
23982411
/**
23992412
* i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
24002413
* @netdev: network interface device structure
@@ -2408,6 +2421,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
24082421
struct i40e_vsi *vsi = np->vsi;
24092422
struct i40e_pf *pf = vsi->back;
24102423

2424+
if (i40e_enabled_xdp_vsi(vsi)) {
2425+
int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
2426+
2427+
if (frame_size > i40e_max_xdp_frame_size(vsi))
2428+
return -EINVAL;
2429+
}
2430+
24112431
netdev_info(netdev, "changing MTU from %d to %d\n",
24122432
netdev->mtu, new_mtu);
24132433
netdev->mtu = new_mtu;
@@ -9311,6 +9331,72 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
93119331
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
93129332
}
93139333

9334+
/**
9335+
* i40e_xdp_setup - add/remove an XDP program
9336+
* @vsi: VSI to changed
9337+
* @prog: XDP program
9338+
**/
9339+
static int i40e_xdp_setup(struct i40e_vsi *vsi,
9340+
struct bpf_prog *prog)
9341+
{
9342+
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
9343+
struct i40e_pf *pf = vsi->back;
9344+
struct bpf_prog *old_prog;
9345+
bool need_reset;
9346+
int i;
9347+
9348+
/* Don't allow frames that span over multiple buffers */
9349+
if (frame_size > vsi->rx_buf_len)
9350+
return -EINVAL;
9351+
9352+
if (!i40e_enabled_xdp_vsi(vsi) && !prog)
9353+
return 0;
9354+
9355+
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
9356+
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
9357+
9358+
if (need_reset)
9359+
i40e_prep_for_reset(pf, true);
9360+
9361+
old_prog = xchg(&vsi->xdp_prog, prog);
9362+
9363+
if (need_reset)
9364+
i40e_reset_and_rebuild(pf, true, true);
9365+
9366+
for (i = 0; i < vsi->num_queue_pairs; i++)
9367+
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
9368+
9369+
if (old_prog)
9370+
bpf_prog_put(old_prog);
9371+
9372+
return 0;
9373+
}
9374+
9375+
/**
9376+
* i40e_xdp - implements ndo_xdp for i40e
9377+
* @dev: netdevice
9378+
* @xdp: XDP command
9379+
**/
9380+
static int i40e_xdp(struct net_device *dev,
9381+
struct netdev_xdp *xdp)
9382+
{
9383+
struct i40e_netdev_priv *np = netdev_priv(dev);
9384+
struct i40e_vsi *vsi = np->vsi;
9385+
9386+
if (vsi->type != I40E_VSI_MAIN)
9387+
return -EINVAL;
9388+
9389+
switch (xdp->command) {
9390+
case XDP_SETUP_PROG:
9391+
return i40e_xdp_setup(vsi, xdp->prog);
9392+
case XDP_QUERY_PROG:
9393+
xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
9394+
return 0;
9395+
default:
9396+
return -EINVAL;
9397+
}
9398+
}
9399+
93149400
static const struct net_device_ops i40e_netdev_ops = {
93159401
.ndo_open = i40e_open,
93169402
.ndo_stop = i40e_close,
@@ -9343,6 +9429,7 @@ static const struct net_device_ops i40e_netdev_ops = {
93439429
.ndo_features_check = i40e_features_check,
93449430
.ndo_bridge_getlink = i40e_ndo_bridge_getlink,
93459431
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
9432+
.ndo_xdp = i40e_xdp,
93469433
};
93479434

93489435
/**

drivers/net/ethernet/intel/i40e/i40e_txrx.c

+99-31
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <linux/prefetch.h>
2828
#include <net/busy_poll.h>
29+
#include <linux/bpf_trace.h>
2930
#include "i40e.h"
3031
#include "i40e_trace.h"
3132
#include "i40e_prototype.h"
@@ -1195,6 +1196,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
11951196
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
11961197
{
11971198
i40e_clean_rx_ring(rx_ring);
1199+
rx_ring->xdp_prog = NULL;
11981200
kfree(rx_ring->rx_bi);
11991201
rx_ring->rx_bi = NULL;
12001202

@@ -1241,6 +1243,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
12411243
rx_ring->next_to_clean = 0;
12421244
rx_ring->next_to_use = 0;
12431245

1246+
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
1247+
12441248
return 0;
12451249
err:
12461250
kfree(rx_ring->rx_bi);
@@ -1593,6 +1597,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
15931597
* i40e_cleanup_headers - Correct empty headers
15941598
* @rx_ring: rx descriptor ring packet is being transacted on
15951599
* @skb: pointer to current skb being fixed
1600+
* @rx_desc: pointer to the EOP Rx descriptor
15961601
*
15971602
* Also address the case where we are pulling data in on pages only
15981603
* and as such no data is present in the skb header.
@@ -1602,8 +1607,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
16021607
*
16031608
* Returns true if an error was encountered and skb was freed.
16041609
**/
1605-
static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
1610+
static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
1611+
union i40e_rx_desc *rx_desc)
1612+
16061613
{
1614+
/* XDP packets use error pointer so abort at this point */
1615+
if (IS_ERR(skb))
1616+
return true;
1617+
1618+
/* ERR_MASK will only have valid bits if EOP set, and
1619+
* what we are doing here is actually checking
1620+
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
1621+
* the error field
1622+
*/
1623+
if (unlikely(i40e_test_staterr(rx_desc,
1624+
BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
1625+
dev_kfree_skb_any(skb);
1626+
return true;
1627+
}
1628+
16071629
/* if eth_skb_pad returns an error the skb was freed */
16081630
if (eth_skb_pad(skb))
16091631
return true;
@@ -1776,17 +1798,17 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
17761798
* i40e_construct_skb - Allocate skb and populate it
17771799
* @rx_ring: rx descriptor ring to transact packets on
17781800
* @rx_buffer: rx buffer to pull data from
1779-
* @size: size of buffer to add to skb
1801+
* @xdp: xdp_buff pointing to the data
17801802
*
17811803
* This function allocates an skb. It then populates it with the page
17821804
* data from the current receive descriptor, taking care to set up the
17831805
* skb correctly.
17841806
*/
17851807
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
17861808
struct i40e_rx_buffer *rx_buffer,
1787-
unsigned int size)
1809+
struct xdp_buff *xdp)
17881810
{
1789-
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1811+
unsigned int size = xdp->data_end - xdp->data;
17901812
#if (PAGE_SIZE < 8192)
17911813
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
17921814
#else
@@ -1796,9 +1818,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
17961818
struct sk_buff *skb;
17971819

17981820
/* prefetch first cache line of first page */
1799-
prefetch(va);
1821+
prefetch(xdp->data);
18001822
#if L1_CACHE_BYTES < 128
1801-
prefetch(va + L1_CACHE_BYTES);
1823+
prefetch(xdp->data + L1_CACHE_BYTES);
18021824
#endif
18031825

18041826
/* allocate a skb to store the frags */
@@ -1811,10 +1833,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
18111833
/* Determine available headroom for copy */
18121834
headlen = size;
18131835
if (headlen > I40E_RX_HDR_SIZE)
1814-
headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
1836+
headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
18151837

18161838
/* align pull length to size of long to optimize memcpy performance */
1817-
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
1839+
memcpy(__skb_put(skb, headlen), xdp->data,
1840+
ALIGN(headlen, sizeof(long)));
18181841

18191842
/* update all of the pointers */
18201843
size -= headlen;
@@ -1841,16 +1864,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
18411864
* i40e_build_skb - Build skb around an existing buffer
18421865
* @rx_ring: Rx descriptor ring to transact packets on
18431866
* @rx_buffer: Rx buffer to pull data from
1844-
* @size: size of buffer to add to skb
1867+
* @xdp: xdp_buff pointing to the data
18451868
*
18461869
* This function builds an skb around an existing Rx buffer, taking care
18471870
* to set up the skb correctly and avoid any memcpy overhead.
18481871
*/
18491872
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
18501873
struct i40e_rx_buffer *rx_buffer,
1851-
unsigned int size)
1874+
struct xdp_buff *xdp)
18521875
{
1853-
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1876+
unsigned int size = xdp->data_end - xdp->data;
18541877
#if (PAGE_SIZE < 8192)
18551878
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
18561879
#else
@@ -1860,12 +1883,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
18601883
struct sk_buff *skb;
18611884

18621885
/* prefetch first cache line of first page */
1863-
prefetch(va);
1886+
prefetch(xdp->data);
18641887
#if L1_CACHE_BYTES < 128
1865-
prefetch(va + L1_CACHE_BYTES);
1888+
prefetch(xdp->data + L1_CACHE_BYTES);
18661889
#endif
18671890
/* build an skb around the page buffer */
1868-
skb = build_skb(va - I40E_SKB_PAD, truesize);
1891+
skb = build_skb(xdp->data_hard_start, truesize);
18691892
if (unlikely(!skb))
18701893
return NULL;
18711894

@@ -1944,6 +1967,46 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
19441967
return true;
19451968
}
19461969

1970+
#define I40E_XDP_PASS 0
1971+
#define I40E_XDP_CONSUMED 1
1972+
1973+
/**
1974+
* i40e_run_xdp - run an XDP program
1975+
* @rx_ring: Rx ring being processed
1976+
* @xdp: XDP buffer containing the frame
1977+
**/
1978+
static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
1979+
struct xdp_buff *xdp)
1980+
{
1981+
int result = I40E_XDP_PASS;
1982+
struct bpf_prog *xdp_prog;
1983+
u32 act;
1984+
1985+
rcu_read_lock();
1986+
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
1987+
1988+
if (!xdp_prog)
1989+
goto xdp_out;
1990+
1991+
act = bpf_prog_run_xdp(xdp_prog, xdp);
1992+
switch (act) {
1993+
case XDP_PASS:
1994+
break;
1995+
default:
1996+
bpf_warn_invalid_xdp_action(act);
1997+
case XDP_TX:
1998+
case XDP_ABORTED:
1999+
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
2000+
/* fallthrough -- handle aborts by dropping packet */
2001+
case XDP_DROP:
2002+
result = I40E_XDP_CONSUMED;
2003+
break;
2004+
}
2005+
xdp_out:
2006+
rcu_read_unlock();
2007+
return ERR_PTR(-result);
2008+
}
2009+
19472010
/**
19482011
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
19492012
* @rx_ring: rx descriptor ring to transact packets on
@@ -1966,6 +2029,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
19662029
while (likely(total_rx_packets < budget)) {
19672030
struct i40e_rx_buffer *rx_buffer;
19682031
union i40e_rx_desc *rx_desc;
2032+
struct xdp_buff xdp;
19692033
unsigned int size;
19702034
u16 vlan_tag;
19712035
u8 rx_ptype;
@@ -2006,12 +2070,27 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
20062070
rx_buffer = i40e_get_rx_buffer(rx_ring, size);
20072071

20082072
/* retrieve a buffer from the ring */
2009-
if (skb)
2073+
if (!skb) {
2074+
xdp.data = page_address(rx_buffer->page) +
2075+
rx_buffer->page_offset;
2076+
xdp.data_hard_start = xdp.data -
2077+
i40e_rx_offset(rx_ring);
2078+
xdp.data_end = xdp.data + size;
2079+
2080+
skb = i40e_run_xdp(rx_ring, &xdp);
2081+
}
2082+
2083+
if (IS_ERR(skb)) {
2084+
total_rx_bytes += size;
2085+
total_rx_packets++;
2086+
rx_buffer->pagecnt_bias++;
2087+
} else if (skb) {
20102088
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
2011-
else if (ring_uses_build_skb(rx_ring))
2012-
skb = i40e_build_skb(rx_ring, rx_buffer, size);
2013-
else
2014-
skb = i40e_construct_skb(rx_ring, rx_buffer, size);
2089+
} else if (ring_uses_build_skb(rx_ring)) {
2090+
skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
2091+
} else {
2092+
skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
2093+
}
20152094

20162095
/* exit if we failed to retrieve a buffer */
20172096
if (!skb) {
@@ -2026,18 +2105,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
20262105
if (i40e_is_non_eop(rx_ring, rx_desc, skb))
20272106
continue;
20282107

2029-
/* ERR_MASK will only have valid bits if EOP set, and
2030-
* what we are doing here is actually checking
2031-
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
2032-
* the error field
2033-
*/
2034-
if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
2035-
dev_kfree_skb_any(skb);
2036-
skb = NULL;
2037-
continue;
2038-
}
2039-
2040-
if (i40e_cleanup_headers(rx_ring, skb)) {
2108+
if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
20412109
skb = NULL;
20422110
continue;
20432111
}

drivers/net/ethernet/intel/i40e/i40e_txrx.h

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ struct i40e_ring {
360360
void *desc; /* Descriptor ring memory */
361361
struct device *dev; /* Used for DMA mapping */
362362
struct net_device *netdev; /* netdev ring maps to */
363+
struct bpf_prog *xdp_prog;
363364
union {
364365
struct i40e_tx_buffer *tx_bi;
365366
struct i40e_rx_buffer *rx_bi;

0 commit comments

Comments
 (0)