Skip to content

Commit f9c4bb0

Browse files
Roopa Prabhudavem330
Roopa Prabhu
authored andcommitted
vxlan: vni filtering support on collect metadata device
This patch adds vnifiltering support to collect metadata device. Motivation: You can only use a single vxlan collect metadata device for a given vxlan udp port in the system today. The vxlan collect metadata device terminates all received vxlan packets. As shown in the below diagram, there are use-cases where you need to support multiple such vxlan devices in independent bridge domains. Each vxlan device must terminate the vni's it is configured for. Example usecase: In a service provider network a service provider typically supports multiple bridge domains with overlapping vlans. One bridge domain per customer. Vlans in each bridge domain are mapped to globally unique vxlan ranges assigned to each customer. vnifiltering support in collect metadata devices terminates only configured vnis. This is similar to vlan filtering in bridge driver. The vni filtering capability is provided by a new flag on collect metadata device. In the below pic: - customer1 is mapped to br1 bridge domain - customer2 is mapped to br2 bridge domain - customer1 vlan 10-11 is mapped to vni 1001-1002 - customer2 vlan 10-11 is mapped to vni 2001-2002 - br1 and br2 are vlan filtering bridges - vxlan1 and vxlan2 are collect metadata devices with vnifiltering enabled ┌──────────────────────────────────────────────────────────────────┐ │ switch │ │ │ │ ┌───────────┐ ┌───────────┐ │ │ │ │ │ │ │ │ │ br1 │ │ br2 │ │ │ └┬─────────┬┘ └──┬───────┬┘ │ │ vlans│ │ vlans │ │ │ │ 10,11│ │ 10,11│ │ │ │ │ vlanvnimap: │ vlanvnimap: │ │ │ 10-1001,11-1002 │ 10-2001,11-2002 │ │ │ │ │ │ │ │ ┌──────┴┐ ┌──┴─────────┐ ┌───┴────┐ │ │ │ │ swp1 │ │vxlan1 │ │ swp2 │ ┌┴─────────────┐ │ │ │ │ │ vnifilter:│ │ │ │vxlan2 │ │ │ └───┬───┘ │ 1001,1002│ └───┬────┘ │ vnifilter: │ │ │ │ └────────────┘ │ │ 2001,2002 │ │ │ │ │ └──────────────┘ │ │ │ │ │ └───────┼──────────────────────────────────┼───────────────────────┘ │ │ │ │ ┌─────┴───────┐ │ │ customer1 │ ┌─────┴──────┐ │ host/VM │ │customer2 │ └─────────────┘ │ host/VM │ └────────────┘ With this implementation, vxlan dst metadata device can be associated with range of vnis. struct vxlan_vni_node is introduced to represent a configured vni. We start with vni and its associated remote_ip in this structure. This structure can be extended to bring in other per vni attributes if there are usecases for it. A vni inherits an attribute from the base vxlan device if there is no per vni attributes defined. struct vxlan_dev gets a new rhashtable for vnis called vxlan_vni_group. vxlan_vnifilter.c implements the necessary netlink api, notifications and helper functions to process and manage lifecycle of vxlan_vni_node. This patch also adds new helper functions in vxlan_multicast.c to handle per vni remote_ip multicast groups which are part of vxlan_vni_group. Fix build problems: Reported-by: kernel test robot <[email protected]> Signed-off-by: Roopa Prabhu <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a498c59 commit f9c4bb0

File tree

6 files changed

+1147
-32
lines changed

6 files changed

+1147
-32
lines changed

drivers/net/vxlan/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
obj-$(CONFIG_VXLAN) += vxlan.o
66

7-
vxlan-objs := vxlan_core.o vxlan_multicast.o
7+
vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o

drivers/net/vxlan/vxlan_core.c

+75-21
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,19 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
145145
struct vxlan_dev_node *node;
146146

147147
/* For flow based devices, map all packets to VNI 0 */
148-
if (vs->flags & VXLAN_F_COLLECT_METADATA)
148+
if (vs->flags & VXLAN_F_COLLECT_METADATA &&
149+
!(vs->flags & VXLAN_F_VNIFILTER))
149150
vni = 0;
150151

151152
hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
152-
if (node->vxlan->default_dst.remote_vni != vni)
153+
if (!node->vxlan)
153154
continue;
155+
if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
156+
if (!vxlan_vnifilter_lookup(node->vxlan, vni))
157+
continue;
158+
} else if (node->vxlan->default_dst.remote_vni != vni) {
159+
continue;
160+
}
154161

155162
if (IS_ENABLED(CONFIG_IPV6)) {
156163
const struct vxlan_config *cfg = &node->vxlan->cfg;
@@ -1478,7 +1485,10 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
14781485
RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
14791486
synchronize_net();
14801487

1481-
vxlan_vs_del_dev(vxlan);
1488+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
1489+
vxlan_vs_del_vnigrp(vxlan);
1490+
else
1491+
vxlan_vs_del_dev(vxlan);
14821492

14831493
if (__vxlan_sock_release_prep(sock4)) {
14841494
udp_tunnel_sock_release(sock4->sock);
@@ -2857,6 +2867,9 @@ static int vxlan_init(struct net_device *dev)
28572867
struct vxlan_dev *vxlan = netdev_priv(dev);
28582868
int err;
28592869

2870+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2871+
vxlan_vnigroup_init(vxlan);
2872+
28602873
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
28612874
if (!dev->tstats)
28622875
return -ENOMEM;
@@ -2886,6 +2899,9 @@ static void vxlan_uninit(struct net_device *dev)
28862899
{
28872900
struct vxlan_dev *vxlan = netdev_priv(dev);
28882901

2902+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
2903+
vxlan_vnigroup_uninit(vxlan);
2904+
28892905
gro_cells_destroy(&vxlan->gro_cells);
28902906

28912907
vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
@@ -2903,15 +2919,10 @@ static int vxlan_open(struct net_device *dev)
29032919
if (ret < 0)
29042920
return ret;
29052921

2906-
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
2907-
ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip,
2908-
vxlan->default_dst.remote_ifindex);
2909-
if (ret == -EADDRINUSE)
2910-
ret = 0;
2911-
if (ret) {
2912-
vxlan_sock_release(vxlan);
2913-
return ret;
2914-
}
2922+
ret = vxlan_multicast_join(vxlan);
2923+
if (ret) {
2924+
vxlan_sock_release(vxlan);
2925+
return ret;
29152926
}
29162927

29172928
if (vxlan->cfg.age_interval)
@@ -2948,13 +2959,9 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
29482959
static int vxlan_stop(struct net_device *dev)
29492960
{
29502961
struct vxlan_dev *vxlan = netdev_priv(dev);
2951-
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
29522962
int ret = 0;
29532963

2954-
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
2955-
!vxlan_group_used(vn, vxlan, NULL, 0))
2956-
ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip,
2957-
vxlan->default_dst.remote_ifindex);
2964+
vxlan_multicast_leave(vxlan);
29582965

29592966
del_timer_sync(&vxlan->age_timer);
29602967

@@ -3184,6 +3191,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
31843191
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
31853192
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
31863193
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
3194+
[IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
31873195
};
31883196

31893197
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3369,6 +3377,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
33693377
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
33703378
{
33713379
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
3380+
bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
33723381
struct vxlan_sock *vs = NULL;
33733382
struct vxlan_dev_node *node;
33743383
int l3mdev_index = 0;
@@ -3404,7 +3413,12 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
34043413
rcu_assign_pointer(vxlan->vn4_sock, vs);
34053414
node = &vxlan->hlist4;
34063415
}
3407-
vxlan_vs_add_dev(vs, vxlan, node);
3416+
3417+
if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER))
3418+
vxlan_vs_add_vnigrp(vxlan, vs, ipv6);
3419+
else
3420+
vxlan_vs_add_dev(vs, vxlan, node);
3421+
34083422
return 0;
34093423
}
34103424

@@ -3431,17 +3445,21 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
34313445
return ret;
34323446
}
34333447

3434-
static int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
3435-
struct vxlan_config *conf, __be32 vni)
3448+
int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
3449+
struct vxlan_config *conf, __be32 vni)
34363450
{
34373451
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
34383452
struct vxlan_dev *tmp;
34393453

34403454
list_for_each_entry(tmp, &vn->vxlan_list, next) {
34413455
if (tmp == vxlan)
34423456
continue;
3443-
if (tmp->cfg.vni != vni)
3457+
if (tmp->cfg.flags & VXLAN_F_VNIFILTER) {
3458+
if (!vxlan_vnifilter_lookup(tmp, vni))
3459+
continue;
3460+
} else if (tmp->cfg.vni != vni) {
34443461
continue;
3462+
}
34453463
if (tmp->cfg.dst_port != conf->dst_port)
34463464
continue;
34473465
if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
@@ -4051,6 +4069,21 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
40514069
if (data[IFLA_VXLAN_DF])
40524070
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
40534071

4072+
if (data[IFLA_VXLAN_VNIFILTER]) {
4073+
err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER,
4074+
VXLAN_F_VNIFILTER, changelink, false,
4075+
extack);
4076+
if (err)
4077+
return err;
4078+
4079+
if ((conf->flags & VXLAN_F_VNIFILTER) &&
4080+
!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
4081+
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER],
4082+
"vxlan vnifilter only valid in collect metadata mode");
4083+
return -EINVAL;
4084+
}
4085+
}
4086+
40544087
return 0;
40554088
}
40564089

@@ -4126,6 +4159,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
41264159
dst->remote_ifindex,
41274160
true);
41284161
spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4162+
4163+
/* If vni filtering device, also update fdb entries of
4164+
* all vnis that were using default remote ip
4165+
*/
4166+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
4167+
err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip,
4168+
&conf.remote_ip, extack);
4169+
if (err) {
4170+
netdev_adjacent_change_abort(dst->remote_dev,
4171+
lowerdev, dev);
4172+
return err;
4173+
}
4174+
}
41294175
}
41304176

41314177
if (conf.age_interval != vxlan->cfg.age_interval)
@@ -4271,6 +4317,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
42714317
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
42724318
goto nla_put_failure;
42734319

4320+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER &&
4321+
nla_put_u8(skb, IFLA_VXLAN_VNIFILTER,
4322+
!!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
4323+
goto nla_put_failure;
4324+
42744325
return 0;
42754326

42764327
nla_put_failure:
@@ -4630,6 +4681,8 @@ static int __init vxlan_init_module(void)
46304681
if (rc)
46314682
goto out4;
46324683

4684+
vxlan_vnifilter_init();
4685+
46334686
return 0;
46344687
out4:
46354688
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
@@ -4644,6 +4697,7 @@ late_initcall(vxlan_init_module);
46444697

46454698
static void __exit vxlan_cleanup_module(void)
46464699
{
4700+
vxlan_vnifilter_uninit();
46474701
rtnl_link_unregister(&vxlan_link_ops);
46484702
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
46494703
unregister_netdevice_notifier(&vxlan_notifier_block);

drivers/net/vxlan/vxlan_multicast.c

+144-6
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,48 @@ int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
8282
return ret;
8383
}
8484

85+
static bool vxlan_group_used_match(union vxlan_addr *ip, int ifindex,
86+
union vxlan_addr *rip, int rifindex)
87+
{
88+
if (!vxlan_addr_multicast(rip))
89+
return false;
90+
91+
if (!vxlan_addr_equal(rip, ip))
92+
return false;
93+
94+
if (rifindex != ifindex)
95+
return false;
96+
97+
return true;
98+
}
99+
100+
static bool vxlan_group_used_by_vnifilter(struct vxlan_dev *vxlan,
101+
union vxlan_addr *ip, int ifindex)
102+
{
103+
struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
104+
struct vxlan_vni_node *v, *tmp;
105+
106+
if (vxlan_group_used_match(ip, ifindex,
107+
&vxlan->default_dst.remote_ip,
108+
vxlan->default_dst.remote_ifindex))
109+
return true;
110+
111+
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
112+
if (!vxlan_addr_multicast(&v->remote_ip))
113+
continue;
114+
115+
if (vxlan_group_used_match(ip, ifindex,
116+
&v->remote_ip,
117+
vxlan->default_dst.remote_ifindex))
118+
return true;
119+
}
120+
121+
return false;
122+
}
123+
85124
/* See if multicast group is already in use by other ID */
86125
bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev,
87-
union vxlan_addr *rip, int rifindex)
126+
__be32 vni, union vxlan_addr *rip, int rifindex)
88127
{
89128
union vxlan_addr *ip = (rip ? : &dev->default_dst.remote_ip);
90129
int ifindex = (rifindex ? : dev->default_dst.remote_ifindex);
@@ -121,14 +160,113 @@ bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev,
121160
rtnl_dereference(vxlan->vn6_sock) != sock6)
122161
continue;
123162
#endif
124-
if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip, ip))
125-
continue;
126-
127-
if (vxlan->default_dst.remote_ifindex != ifindex)
128-
continue;
163+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
164+
if (!vxlan_group_used_by_vnifilter(vxlan, ip, ifindex))
165+
continue;
166+
} else {
167+
if (!vxlan_group_used_match(ip, ifindex,
168+
&vxlan->default_dst.remote_ip,
169+
vxlan->default_dst.remote_ifindex))
170+
continue;
171+
}
129172

130173
return true;
131174
}
132175

133176
return false;
134177
}
178+
179+
static int vxlan_multicast_join_vnigrp(struct vxlan_dev *vxlan)
180+
{
181+
struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
182+
struct vxlan_vni_node *v, *tmp, *vgood = NULL;
183+
int ret = 0;
184+
185+
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
186+
if (!vxlan_addr_multicast(&v->remote_ip))
187+
continue;
188+
/* skip if address is same as default address */
189+
if (vxlan_addr_equal(&v->remote_ip,
190+
&vxlan->default_dst.remote_ip))
191+
continue;
192+
ret = vxlan_igmp_join(vxlan, &v->remote_ip, 0);
193+
if (ret == -EADDRINUSE)
194+
ret = 0;
195+
if (ret)
196+
goto out;
197+
vgood = v;
198+
}
199+
out:
200+
if (ret) {
201+
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
202+
if (!vxlan_addr_multicast(&v->remote_ip))
203+
continue;
204+
if (vxlan_addr_equal(&v->remote_ip,
205+
&vxlan->default_dst.remote_ip))
206+
continue;
207+
vxlan_igmp_leave(vxlan, &v->remote_ip, 0);
208+
if (v == vgood)
209+
break;
210+
}
211+
}
212+
213+
return ret;
214+
}
215+
216+
static int vxlan_multicast_leave_vnigrp(struct vxlan_dev *vxlan)
217+
{
218+
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
219+
struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
220+
struct vxlan_vni_node *v, *tmp;
221+
int last_err = 0, ret;
222+
223+
list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
224+
if (vxlan_addr_multicast(&v->remote_ip) &&
225+
!vxlan_group_used(vn, vxlan, v->vni, &v->remote_ip,
226+
0)) {
227+
ret = vxlan_igmp_leave(vxlan, &v->remote_ip, 0);
228+
if (ret)
229+
last_err = ret;
230+
}
231+
}
232+
233+
return last_err;
234+
}
235+
236+
int vxlan_multicast_join(struct vxlan_dev *vxlan)
237+
{
238+
int ret = 0;
239+
240+
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
241+
ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip,
242+
vxlan->default_dst.remote_ifindex);
243+
if (ret == -EADDRINUSE)
244+
ret = 0;
245+
if (ret)
246+
return ret;
247+
}
248+
249+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
250+
return vxlan_multicast_join_vnigrp(vxlan);
251+
252+
return 0;
253+
}
254+
255+
int vxlan_multicast_leave(struct vxlan_dev *vxlan)
256+
{
257+
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
258+
int ret = 0;
259+
260+
if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
261+
!vxlan_group_used(vn, vxlan, 0, NULL, 0)) {
262+
ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip,
263+
vxlan->default_dst.remote_ifindex);
264+
if (ret)
265+
return ret;
266+
}
267+
268+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
269+
return vxlan_multicast_leave_vnigrp(vxlan);
270+
271+
return 0;
272+
}

0 commit comments

Comments
 (0)