switchdev: introduce switchdev deferred ops infrastructure
[linux-2.6-block.git] / net / switchdev / switchdev.c
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/list.h>
20 #include <linux/workqueue.h>
21 #include <net/ip_fib.h>
22 #include <net/switchdev.h>
23
24 /**
25  *      switchdev_trans_item_enqueue - Enqueue data item to transaction queue
26  *
27  *      @trans: transaction
28  *      @data: pointer to data being queued
29  *      @destructor: data destructor
30  *      @tritem: transaction item being queued
31  *
32  *      Enqeueue data item to transaction queue. tritem is typically placed in
33  *      cointainter pointed at by data pointer. Destructor is called on
34  *      transaction abort and after successful commit phase in case
35  *      the caller did not dequeue the item before.
36  */
37 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
38                                   void *data, void (*destructor)(void const *),
39                                   struct switchdev_trans_item *tritem)
40 {
41         tritem->data = data;
42         tritem->destructor = destructor;
43         list_add_tail(&tritem->list, &trans->item_list);
44 }
45 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
46
47 static struct switchdev_trans_item *
48 __switchdev_trans_item_dequeue(struct switchdev_trans *trans)
49 {
50         struct switchdev_trans_item *tritem;
51
52         if (list_empty(&trans->item_list))
53                 return NULL;
54         tritem = list_first_entry(&trans->item_list,
55                                   struct switchdev_trans_item, list);
56         list_del(&tritem->list);
57         return tritem;
58 }
59
60 /**
61  *      switchdev_trans_item_dequeue - Dequeue data item from transaction queue
62  *
63  *      @trans: transaction
64  */
65 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
66 {
67         struct switchdev_trans_item *tritem;
68
69         tritem = __switchdev_trans_item_dequeue(trans);
70         BUG_ON(!tritem);
71         return tritem->data;
72 }
73 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
74
75 static void switchdev_trans_init(struct switchdev_trans *trans)
76 {
77         INIT_LIST_HEAD(&trans->item_list);
78 }
79
80 static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
81 {
82         struct switchdev_trans_item *tritem;
83
84         while ((tritem = __switchdev_trans_item_dequeue(trans)))
85                 tritem->destructor(tritem->data);
86 }
87
88 static void switchdev_trans_items_warn_destroy(struct net_device *dev,
89                                                struct switchdev_trans *trans)
90 {
91         WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
92              dev->name);
93         switchdev_trans_items_destroy(trans);
94 }
95
96 static LIST_HEAD(deferred);
97 static DEFINE_SPINLOCK(deferred_lock);
98
99 typedef void switchdev_deferred_func_t(struct net_device *dev,
100                                        const void *data);
101
102 struct switchdev_deferred_item {
103         struct list_head list;
104         struct net_device *dev;
105         switchdev_deferred_func_t *func;
106         unsigned long data[0];
107 };
108
109 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
110 {
111         struct switchdev_deferred_item *dfitem;
112
113         spin_lock_bh(&deferred_lock);
114         if (list_empty(&deferred)) {
115                 dfitem = NULL;
116                 goto unlock;
117         }
118         dfitem = list_first_entry(&deferred,
119                                   struct switchdev_deferred_item, list);
120         list_del(&dfitem->list);
121 unlock:
122         spin_unlock_bh(&deferred_lock);
123         return dfitem;
124 }
125
126 /**
127  *      switchdev_deferred_process - Process ops in deferred queue
128  *
129  *      Called to flush the ops currently queued in deferred ops queue.
130  *      rtnl_lock must be held.
131  */
132 void switchdev_deferred_process(void)
133 {
134         struct switchdev_deferred_item *dfitem;
135
136         ASSERT_RTNL();
137
138         while ((dfitem = switchdev_deferred_dequeue())) {
139                 dfitem->func(dfitem->dev, dfitem->data);
140                 dev_put(dfitem->dev);
141                 kfree(dfitem);
142         }
143 }
144 EXPORT_SYMBOL_GPL(switchdev_deferred_process);
145
146 static void switchdev_deferred_process_work(struct work_struct *work)
147 {
148         rtnl_lock();
149         switchdev_deferred_process();
150         rtnl_unlock();
151 }
152
153 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
154
155 static int switchdev_deferred_enqueue(struct net_device *dev,
156                                       const void *data, size_t data_len,
157                                       switchdev_deferred_func_t *func)
158 {
159         struct switchdev_deferred_item *dfitem;
160
161         dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
162         if (!dfitem)
163                 return -ENOMEM;
164         dfitem->dev = dev;
165         dfitem->func = func;
166         memcpy(dfitem->data, data, data_len);
167         dev_hold(dev);
168         spin_lock_bh(&deferred_lock);
169         list_add_tail(&dfitem->list, &deferred);
170         spin_unlock_bh(&deferred_lock);
171         schedule_work(&deferred_process_work);
172         return 0;
173 }
174
175 /**
176  *      switchdev_port_attr_get - Get port attribute
177  *
178  *      @dev: port device
179  *      @attr: attribute to get
180  */
181 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
182 {
183         const struct switchdev_ops *ops = dev->switchdev_ops;
184         struct net_device *lower_dev;
185         struct list_head *iter;
186         struct switchdev_attr first = {
187                 .id = SWITCHDEV_ATTR_ID_UNDEFINED
188         };
189         int err = -EOPNOTSUPP;
190
191         if (ops && ops->switchdev_port_attr_get)
192                 return ops->switchdev_port_attr_get(dev, attr);
193
194         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
195                 return err;
196
197         /* Switch device port(s) may be stacked under
198          * bond/team/vlan dev, so recurse down to get attr on
199          * each port.  Return -ENODATA if attr values don't
200          * compare across ports.
201          */
202
203         netdev_for_each_lower_dev(dev, lower_dev, iter) {
204                 err = switchdev_port_attr_get(lower_dev, attr);
205                 if (err)
206                         break;
207                 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
208                         first = *attr;
209                 else if (memcmp(&first, attr, sizeof(*attr)))
210                         return -ENODATA;
211         }
212
213         return err;
214 }
215 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
216
217 static int __switchdev_port_attr_set(struct net_device *dev,
218                                      struct switchdev_attr *attr,
219                                      struct switchdev_trans *trans)
220 {
221         const struct switchdev_ops *ops = dev->switchdev_ops;
222         struct net_device *lower_dev;
223         struct list_head *iter;
224         int err = -EOPNOTSUPP;
225
226         if (ops && ops->switchdev_port_attr_set)
227                 return ops->switchdev_port_attr_set(dev, attr, trans);
228
229         if (attr->flags & SWITCHDEV_F_NO_RECURSE)
230                 goto done;
231
232         /* Switch device port(s) may be stacked under
233          * bond/team/vlan dev, so recurse down to set attr on
234          * each port.
235          */
236
237         netdev_for_each_lower_dev(dev, lower_dev, iter) {
238                 err = __switchdev_port_attr_set(lower_dev, attr, trans);
239                 if (err == -EOPNOTSUPP &&
240                     attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
241                         continue;
242                 if (err)
243                         break;
244         }
245
246 done:
247         if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
248                 err = 0;
249
250         return err;
251 }
252
253 struct switchdev_attr_set_work {
254         struct work_struct work;
255         struct net_device *dev;
256         struct switchdev_attr attr;
257 };
258
259 static void switchdev_port_attr_set_work(struct work_struct *work)
260 {
261         struct switchdev_attr_set_work *asw =
262                 container_of(work, struct switchdev_attr_set_work, work);
263         int err;
264
265         rtnl_lock();
266         err = switchdev_port_attr_set(asw->dev, &asw->attr);
267         if (err && err != -EOPNOTSUPP)
268                 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
269                            err, asw->attr.id);
270         rtnl_unlock();
271
272         dev_put(asw->dev);
273         kfree(work);
274 }
275
276 static int switchdev_port_attr_set_defer(struct net_device *dev,
277                                          struct switchdev_attr *attr)
278 {
279         struct switchdev_attr_set_work *asw;
280
281         asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
282         if (!asw)
283                 return -ENOMEM;
284
285         INIT_WORK(&asw->work, switchdev_port_attr_set_work);
286
287         dev_hold(dev);
288         asw->dev = dev;
289         memcpy(&asw->attr, attr, sizeof(asw->attr));
290
291         schedule_work(&asw->work);
292
293         return 0;
294 }
295
296 /**
297  *      switchdev_port_attr_set - Set port attribute
298  *
299  *      @dev: port device
300  *      @attr: attribute to set
301  *
302  *      Use a 2-phase prepare-commit transaction model to ensure
303  *      system is not left in a partially updated state due to
304  *      failure from driver/device.
305  */
306 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
307 {
308         struct switchdev_trans trans;
309         int err;
310
311         if (!rtnl_is_locked()) {
312                 /* Running prepare-commit transaction across stacked
313                  * devices requires nothing moves, so if rtnl_lock is
314                  * not held, schedule a worker thread to hold rtnl_lock
315                  * while setting attr.
316                  */
317
318                 return switchdev_port_attr_set_defer(dev, attr);
319         }
320
321         switchdev_trans_init(&trans);
322
323         /* Phase I: prepare for attr set. Driver/device should fail
324          * here if there are going to be issues in the commit phase,
325          * such as lack of resources or support.  The driver/device
326          * should reserve resources needed for the commit phase here,
327          * but should not commit the attr.
328          */
329
330         trans.ph_prepare = true;
331         err = __switchdev_port_attr_set(dev, attr, &trans);
332         if (err) {
333                 /* Prepare phase failed: abort the transaction.  Any
334                  * resources reserved in the prepare phase are
335                  * released.
336                  */
337
338                 if (err != -EOPNOTSUPP)
339                         switchdev_trans_items_destroy(&trans);
340
341                 return err;
342         }
343
344         /* Phase II: commit attr set.  This cannot fail as a fault
345          * of driver/device.  If it does, it's a bug in the driver/device
346          * because the driver said everythings was OK in phase I.
347          */
348
349         trans.ph_prepare = false;
350         err = __switchdev_port_attr_set(dev, attr, &trans);
351         WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
352              dev->name, attr->id);
353         switchdev_trans_items_warn_destroy(dev, &trans);
354
355         return err;
356 }
357 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
358
359 static int __switchdev_port_obj_add(struct net_device *dev,
360                                     const struct switchdev_obj *obj,
361                                     struct switchdev_trans *trans)
362 {
363         const struct switchdev_ops *ops = dev->switchdev_ops;
364         struct net_device *lower_dev;
365         struct list_head *iter;
366         int err = -EOPNOTSUPP;
367
368         if (ops && ops->switchdev_port_obj_add)
369                 return ops->switchdev_port_obj_add(dev, obj, trans);
370
371         /* Switch device port(s) may be stacked under
372          * bond/team/vlan dev, so recurse down to add object on
373          * each port.
374          */
375
376         netdev_for_each_lower_dev(dev, lower_dev, iter) {
377                 err = __switchdev_port_obj_add(lower_dev, obj, trans);
378                 if (err)
379                         break;
380         }
381
382         return err;
383 }
384
385 /**
386  *      switchdev_port_obj_add - Add port object
387  *
388  *      @dev: port device
389  *      @id: object ID
390  *      @obj: object to add
391  *
392  *      Use a 2-phase prepare-commit transaction model to ensure
393  *      system is not left in a partially updated state due to
394  *      failure from driver/device.
395  *
396  *      rtnl_lock must be held.
397  */
398 int switchdev_port_obj_add(struct net_device *dev,
399                            const struct switchdev_obj *obj)
400 {
401         struct switchdev_trans trans;
402         int err;
403
404         ASSERT_RTNL();
405
406         switchdev_trans_init(&trans);
407
408         /* Phase I: prepare for obj add. Driver/device should fail
409          * here if there are going to be issues in the commit phase,
410          * such as lack of resources or support.  The driver/device
411          * should reserve resources needed for the commit phase here,
412          * but should not commit the obj.
413          */
414
415         trans.ph_prepare = true;
416         err = __switchdev_port_obj_add(dev, obj, &trans);
417         if (err) {
418                 /* Prepare phase failed: abort the transaction.  Any
419                  * resources reserved in the prepare phase are
420                  * released.
421                  */
422
423                 if (err != -EOPNOTSUPP)
424                         switchdev_trans_items_destroy(&trans);
425
426                 return err;
427         }
428
429         /* Phase II: commit obj add.  This cannot fail as a fault
430          * of driver/device.  If it does, it's a bug in the driver/device
431          * because the driver said everythings was OK in phase I.
432          */
433
434         trans.ph_prepare = false;
435         err = __switchdev_port_obj_add(dev, obj, &trans);
436         WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
437         switchdev_trans_items_warn_destroy(dev, &trans);
438
439         return err;
440 }
441 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
442
443 /**
444  *      switchdev_port_obj_del - Delete port object
445  *
446  *      @dev: port device
447  *      @id: object ID
448  *      @obj: object to delete
449  */
450 int switchdev_port_obj_del(struct net_device *dev,
451                            const struct switchdev_obj *obj)
452 {
453         const struct switchdev_ops *ops = dev->switchdev_ops;
454         struct net_device *lower_dev;
455         struct list_head *iter;
456         int err = -EOPNOTSUPP;
457
458         if (ops && ops->switchdev_port_obj_del)
459                 return ops->switchdev_port_obj_del(dev, obj);
460
461         /* Switch device port(s) may be stacked under
462          * bond/team/vlan dev, so recurse down to delete object on
463          * each port.
464          */
465
466         netdev_for_each_lower_dev(dev, lower_dev, iter) {
467                 err = switchdev_port_obj_del(lower_dev, obj);
468                 if (err)
469                         break;
470         }
471
472         return err;
473 }
474 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
475
476 /**
477  *      switchdev_port_obj_dump - Dump port objects
478  *
479  *      @dev: port device
480  *      @id: object ID
481  *      @obj: object to dump
482  *      @cb: function to call with a filled object
483  */
484 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
485                             switchdev_obj_dump_cb_t *cb)
486 {
487         const struct switchdev_ops *ops = dev->switchdev_ops;
488         struct net_device *lower_dev;
489         struct list_head *iter;
490         int err = -EOPNOTSUPP;
491
492         if (ops && ops->switchdev_port_obj_dump)
493                 return ops->switchdev_port_obj_dump(dev, obj, cb);
494
495         /* Switch device port(s) may be stacked under
496          * bond/team/vlan dev, so recurse down to dump objects on
497          * first port at bottom of stack.
498          */
499
500         netdev_for_each_lower_dev(dev, lower_dev, iter) {
501                 err = switchdev_port_obj_dump(lower_dev, obj, cb);
502                 break;
503         }
504
505         return err;
506 }
507 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
508
509 static DEFINE_MUTEX(switchdev_mutex);
510 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
511
512 /**
513  *      register_switchdev_notifier - Register notifier
514  *      @nb: notifier_block
515  *
516  *      Register switch device notifier. This should be used by code
517  *      which needs to monitor events happening in particular device.
518  *      Return values are same as for atomic_notifier_chain_register().
519  */
520 int register_switchdev_notifier(struct notifier_block *nb)
521 {
522         int err;
523
524         mutex_lock(&switchdev_mutex);
525         err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
526         mutex_unlock(&switchdev_mutex);
527         return err;
528 }
529 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
530
531 /**
532  *      unregister_switchdev_notifier - Unregister notifier
533  *      @nb: notifier_block
534  *
535  *      Unregister switch device notifier.
536  *      Return values are same as for atomic_notifier_chain_unregister().
537  */
538 int unregister_switchdev_notifier(struct notifier_block *nb)
539 {
540         int err;
541
542         mutex_lock(&switchdev_mutex);
543         err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
544         mutex_unlock(&switchdev_mutex);
545         return err;
546 }
547 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
548
549 /**
550  *      call_switchdev_notifiers - Call notifiers
551  *      @val: value passed unmodified to notifier function
552  *      @dev: port device
553  *      @info: notifier information data
554  *
555  *      Call all network notifier blocks. This should be called by driver
556  *      when it needs to propagate hardware event.
557  *      Return values are same as for atomic_notifier_call_chain().
558  */
559 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
560                              struct switchdev_notifier_info *info)
561 {
562         int err;
563
564         info->dev = dev;
565         mutex_lock(&switchdev_mutex);
566         err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
567         mutex_unlock(&switchdev_mutex);
568         return err;
569 }
570 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
571
572 struct switchdev_vlan_dump {
573         struct switchdev_obj_port_vlan vlan;
574         struct sk_buff *skb;
575         u32 filter_mask;
576         u16 flags;
577         u16 begin;
578         u16 end;
579 };
580
581 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
582 {
583         struct bridge_vlan_info vinfo;
584
585         vinfo.flags = dump->flags;
586
587         if (dump->begin == 0 && dump->end == 0) {
588                 return 0;
589         } else if (dump->begin == dump->end) {
590                 vinfo.vid = dump->begin;
591                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
592                             sizeof(vinfo), &vinfo))
593                         return -EMSGSIZE;
594         } else {
595                 vinfo.vid = dump->begin;
596                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
597                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
598                             sizeof(vinfo), &vinfo))
599                         return -EMSGSIZE;
600                 vinfo.vid = dump->end;
601                 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
602                 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
603                 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
604                             sizeof(vinfo), &vinfo))
605                         return -EMSGSIZE;
606         }
607
608         return 0;
609 }
610
611 static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
612 {
613         struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
614         struct switchdev_vlan_dump *dump =
615                 container_of(vlan, struct switchdev_vlan_dump, vlan);
616         int err = 0;
617
618         if (vlan->vid_begin > vlan->vid_end)
619                 return -EINVAL;
620
621         if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
622                 dump->flags = vlan->flags;
623                 for (dump->begin = dump->end = vlan->vid_begin;
624                      dump->begin <= vlan->vid_end;
625                      dump->begin++, dump->end++) {
626                         err = switchdev_port_vlan_dump_put(dump);
627                         if (err)
628                                 return err;
629                 }
630         } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
631                 if (dump->begin > vlan->vid_begin &&
632                     dump->begin >= vlan->vid_end) {
633                         if ((dump->begin - 1) == vlan->vid_end &&
634                             dump->flags == vlan->flags) {
635                                 /* prepend */
636                                 dump->begin = vlan->vid_begin;
637                         } else {
638                                 err = switchdev_port_vlan_dump_put(dump);
639                                 dump->flags = vlan->flags;
640                                 dump->begin = vlan->vid_begin;
641                                 dump->end = vlan->vid_end;
642                         }
643                 } else if (dump->end <= vlan->vid_begin &&
644                            dump->end < vlan->vid_end) {
645                         if ((dump->end  + 1) == vlan->vid_begin &&
646                             dump->flags == vlan->flags) {
647                                 /* append */
648                                 dump->end = vlan->vid_end;
649                         } else {
650                                 err = switchdev_port_vlan_dump_put(dump);
651                                 dump->flags = vlan->flags;
652                                 dump->begin = vlan->vid_begin;
653                                 dump->end = vlan->vid_end;
654                         }
655                 } else {
656                         err = -EINVAL;
657                 }
658         }
659
660         return err;
661 }
662
663 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
664                                     u32 filter_mask)
665 {
666         struct switchdev_vlan_dump dump = {
667                 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
668                 .skb = skb,
669                 .filter_mask = filter_mask,
670         };
671         int err = 0;
672
673         if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
674             (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
675                 err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
676                                               switchdev_port_vlan_dump_cb);
677                 if (err)
678                         goto err_out;
679                 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
680                         /* last one */
681                         err = switchdev_port_vlan_dump_put(&dump);
682         }
683
684 err_out:
685         return err == -EOPNOTSUPP ? 0 : err;
686 }
687
688 /**
689  *      switchdev_port_bridge_getlink - Get bridge port attributes
690  *
691  *      @dev: port device
692  *
693  *      Called for SELF on rtnl_bridge_getlink to get bridge port
694  *      attributes.
695  */
696 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
697                                   struct net_device *dev, u32 filter_mask,
698                                   int nlflags)
699 {
700         struct switchdev_attr attr = {
701                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
702         };
703         u16 mode = BRIDGE_MODE_UNDEF;
704         u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
705         int err;
706
707         err = switchdev_port_attr_get(dev, &attr);
708         if (err && err != -EOPNOTSUPP)
709                 return err;
710
711         return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
712                                        attr.u.brport_flags, mask, nlflags,
713                                        filter_mask, switchdev_port_vlan_fill);
714 }
715 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
716
717 static int switchdev_port_br_setflag(struct net_device *dev,
718                                      struct nlattr *nlattr,
719                                      unsigned long brport_flag)
720 {
721         struct switchdev_attr attr = {
722                 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
723         };
724         u8 flag = nla_get_u8(nlattr);
725         int err;
726
727         err = switchdev_port_attr_get(dev, &attr);
728         if (err)
729                 return err;
730
731         if (flag)
732                 attr.u.brport_flags |= brport_flag;
733         else
734                 attr.u.brport_flags &= ~brport_flag;
735
736         return switchdev_port_attr_set(dev, &attr);
737 }
738
739 static const struct nla_policy
740 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
741         [IFLA_BRPORT_STATE]             = { .type = NLA_U8 },
742         [IFLA_BRPORT_COST]              = { .type = NLA_U32 },
743         [IFLA_BRPORT_PRIORITY]          = { .type = NLA_U16 },
744         [IFLA_BRPORT_MODE]              = { .type = NLA_U8 },
745         [IFLA_BRPORT_GUARD]             = { .type = NLA_U8 },
746         [IFLA_BRPORT_PROTECT]           = { .type = NLA_U8 },
747         [IFLA_BRPORT_FAST_LEAVE]        = { .type = NLA_U8 },
748         [IFLA_BRPORT_LEARNING]          = { .type = NLA_U8 },
749         [IFLA_BRPORT_LEARNING_SYNC]     = { .type = NLA_U8 },
750         [IFLA_BRPORT_UNICAST_FLOOD]     = { .type = NLA_U8 },
751 };
752
753 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
754                                               struct nlattr *protinfo)
755 {
756         struct nlattr *attr;
757         int rem;
758         int err;
759
760         err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
761                                   switchdev_port_bridge_policy);
762         if (err)
763                 return err;
764
765         nla_for_each_nested(attr, protinfo, rem) {
766                 switch (nla_type(attr)) {
767                 case IFLA_BRPORT_LEARNING:
768                         err = switchdev_port_br_setflag(dev, attr,
769                                                         BR_LEARNING);
770                         break;
771                 case IFLA_BRPORT_LEARNING_SYNC:
772                         err = switchdev_port_br_setflag(dev, attr,
773                                                         BR_LEARNING_SYNC);
774                         break;
775                 default:
776                         err = -EOPNOTSUPP;
777                         break;
778                 }
779                 if (err)
780                         return err;
781         }
782
783         return 0;
784 }
785
786 static int switchdev_port_br_afspec(struct net_device *dev,
787                                     struct nlattr *afspec,
788                                     int (*f)(struct net_device *dev,
789                                              const struct switchdev_obj *obj))
790 {
791         struct nlattr *attr;
792         struct bridge_vlan_info *vinfo;
793         struct switchdev_obj_port_vlan vlan = {
794                 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
795         };
796         int rem;
797         int err;
798
799         nla_for_each_nested(attr, afspec, rem) {
800                 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
801                         continue;
802                 if (nla_len(attr) != sizeof(struct bridge_vlan_info))
803                         return -EINVAL;
804                 vinfo = nla_data(attr);
805                 vlan.flags = vinfo->flags;
806                 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
807                         if (vlan.vid_begin)
808                                 return -EINVAL;
809                         vlan.vid_begin = vinfo->vid;
810                         /* don't allow range of pvids */
811                         if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
812                                 return -EINVAL;
813                 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
814                         if (!vlan.vid_begin)
815                                 return -EINVAL;
816                         vlan.vid_end = vinfo->vid;
817                         if (vlan.vid_end <= vlan.vid_begin)
818                                 return -EINVAL;
819                         err = f(dev, &vlan.obj);
820                         if (err)
821                                 return err;
822                         memset(&vlan, 0, sizeof(vlan));
823                 } else {
824                         if (vlan.vid_begin)
825                                 return -EINVAL;
826                         vlan.vid_begin = vinfo->vid;
827                         vlan.vid_end = vinfo->vid;
828                         err = f(dev, &vlan.obj);
829                         if (err)
830                                 return err;
831                         memset(&vlan, 0, sizeof(vlan));
832                 }
833         }
834
835         return 0;
836 }
837
838 /**
839  *      switchdev_port_bridge_setlink - Set bridge port attributes
840  *
841  *      @dev: port device
842  *      @nlh: netlink header
843  *      @flags: netlink flags
844  *
845  *      Called for SELF on rtnl_bridge_setlink to set bridge port
846  *      attributes.
847  */
848 int switchdev_port_bridge_setlink(struct net_device *dev,
849                                   struct nlmsghdr *nlh, u16 flags)
850 {
851         struct nlattr *protinfo;
852         struct nlattr *afspec;
853         int err = 0;
854
855         protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
856                                    IFLA_PROTINFO);
857         if (protinfo) {
858                 err = switchdev_port_br_setlink_protinfo(dev, protinfo);
859                 if (err)
860                         return err;
861         }
862
863         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
864                                  IFLA_AF_SPEC);
865         if (afspec)
866                 err = switchdev_port_br_afspec(dev, afspec,
867                                                switchdev_port_obj_add);
868
869         return err;
870 }
871 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
872
873 /**
874  *      switchdev_port_bridge_dellink - Set bridge port attributes
875  *
876  *      @dev: port device
877  *      @nlh: netlink header
878  *      @flags: netlink flags
879  *
880  *      Called for SELF on rtnl_bridge_dellink to set bridge port
881  *      attributes.
882  */
883 int switchdev_port_bridge_dellink(struct net_device *dev,
884                                   struct nlmsghdr *nlh, u16 flags)
885 {
886         struct nlattr *afspec;
887
888         afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
889                                  IFLA_AF_SPEC);
890         if (afspec)
891                 return switchdev_port_br_afspec(dev, afspec,
892                                                 switchdev_port_obj_del);
893
894         return 0;
895 }
896 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
897
898 /**
899  *      switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
900  *
901  *      @ndmsg: netlink hdr
902  *      @nlattr: netlink attributes
903  *      @dev: port device
904  *      @addr: MAC address to add
905  *      @vid: VLAN to add
906  *
907  *      Add FDB entry to switch device.
908  */
909 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
910                            struct net_device *dev, const unsigned char *addr,
911                            u16 vid, u16 nlm_flags)
912 {
913         struct switchdev_obj_port_fdb fdb = {
914                 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
915                 .addr = addr,
916                 .vid = vid,
917         };
918
919         return switchdev_port_obj_add(dev, &fdb.obj);
920 }
921 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
922
923 /**
924  *      switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
925  *
926  *      @ndmsg: netlink hdr
927  *      @nlattr: netlink attributes
928  *      @dev: port device
929  *      @addr: MAC address to delete
930  *      @vid: VLAN to delete
931  *
932  *      Delete FDB entry from switch device.
933  */
934 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
935                            struct net_device *dev, const unsigned char *addr,
936                            u16 vid)
937 {
938         struct switchdev_obj_port_fdb fdb = {
939                 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
940                 .addr = addr,
941                 .vid = vid,
942         };
943
944         return switchdev_port_obj_del(dev, &fdb.obj);
945 }
946 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
947
948 struct switchdev_fdb_dump {
949         struct switchdev_obj_port_fdb fdb;
950         struct net_device *dev;
951         struct sk_buff *skb;
952         struct netlink_callback *cb;
953         int idx;
954 };
955
956 static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
957 {
958         struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
959         struct switchdev_fdb_dump *dump =
960                 container_of(fdb, struct switchdev_fdb_dump, fdb);
961         u32 portid = NETLINK_CB(dump->cb->skb).portid;
962         u32 seq = dump->cb->nlh->nlmsg_seq;
963         struct nlmsghdr *nlh;
964         struct ndmsg *ndm;
965
966         if (dump->idx < dump->cb->args[0])
967                 goto skip;
968
969         nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
970                         sizeof(*ndm), NLM_F_MULTI);
971         if (!nlh)
972                 return -EMSGSIZE;
973
974         ndm = nlmsg_data(nlh);
975         ndm->ndm_family  = AF_BRIDGE;
976         ndm->ndm_pad1    = 0;
977         ndm->ndm_pad2    = 0;
978         ndm->ndm_flags   = NTF_SELF;
979         ndm->ndm_type    = 0;
980         ndm->ndm_ifindex = dump->dev->ifindex;
981         ndm->ndm_state   = fdb->ndm_state;
982
983         if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
984                 goto nla_put_failure;
985
986         if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
987                 goto nla_put_failure;
988
989         nlmsg_end(dump->skb, nlh);
990
991 skip:
992         dump->idx++;
993         return 0;
994
995 nla_put_failure:
996         nlmsg_cancel(dump->skb, nlh);
997         return -EMSGSIZE;
998 }
999
1000 /**
1001  *      switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
1002  *
1003  *      @skb: netlink skb
1004  *      @cb: netlink callback
1005  *      @dev: port device
1006  *      @filter_dev: filter device
1007  *      @idx:
1008  *
1009  *      Delete FDB entry from switch device.
1010  */
1011 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1012                             struct net_device *dev,
1013                             struct net_device *filter_dev, int idx)
1014 {
1015         struct switchdev_fdb_dump dump = {
1016                 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
1017                 .dev = dev,
1018                 .skb = skb,
1019                 .cb = cb,
1020                 .idx = idx,
1021         };
1022
1023         switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
1024         return dump.idx;
1025 }
1026 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
1027
1028 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
1029 {
1030         const struct switchdev_ops *ops = dev->switchdev_ops;
1031         struct net_device *lower_dev;
1032         struct net_device *port_dev;
1033         struct list_head *iter;
1034
1035         /* Recusively search down until we find a sw port dev.
1036          * (A sw port dev supports switchdev_port_attr_get).
1037          */
1038
1039         if (ops && ops->switchdev_port_attr_get)
1040                 return dev;
1041
1042         netdev_for_each_lower_dev(dev, lower_dev, iter) {
1043                 port_dev = switchdev_get_lowest_dev(lower_dev);
1044                 if (port_dev)
1045                         return port_dev;
1046         }
1047
1048         return NULL;
1049 }
1050
1051 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
1052 {
1053         struct switchdev_attr attr = {
1054                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1055         };
1056         struct switchdev_attr prev_attr;
1057         struct net_device *dev = NULL;
1058         int nhsel;
1059
1060         /* For this route, all nexthop devs must be on the same switch. */
1061
1062         for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1063                 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1064
1065                 if (!nh->nh_dev)
1066                         return NULL;
1067
1068                 dev = switchdev_get_lowest_dev(nh->nh_dev);
1069                 if (!dev)
1070                         return NULL;
1071
1072                 if (switchdev_port_attr_get(dev, &attr))
1073                         return NULL;
1074
1075                 if (nhsel > 0 &&
1076                     !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
1077                                 return NULL;
1078
1079                 prev_attr = attr;
1080         }
1081
1082         return dev;
1083 }
1084
1085 /**
1086  *      switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
1087  *
1088  *      @dst: route's IPv4 destination address
1089  *      @dst_len: destination address length (prefix length)
1090  *      @fi: route FIB info structure
1091  *      @tos: route TOS
1092  *      @type: route type
1093  *      @nlflags: netlink flags passed in (NLM_F_*)
1094  *      @tb_id: route table ID
1095  *
1096  *      Add/modify switch IPv4 route entry.
1097  */
1098 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1099                            u8 tos, u8 type, u32 nlflags, u32 tb_id)
1100 {
1101         struct switchdev_obj_ipv4_fib ipv4_fib = {
1102                 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1103                 .dst = dst,
1104                 .dst_len = dst_len,
1105                 .fi = fi,
1106                 .tos = tos,
1107                 .type = type,
1108                 .nlflags = nlflags,
1109                 .tb_id = tb_id,
1110         };
1111         struct net_device *dev;
1112         int err = 0;
1113
1114         /* Don't offload route if using custom ip rules or if
1115          * IPv4 FIB offloading has been disabled completely.
1116          */
1117
1118 #ifdef CONFIG_IP_MULTIPLE_TABLES
1119         if (fi->fib_net->ipv4.fib_has_custom_rules)
1120                 return 0;
1121 #endif
1122
1123         if (fi->fib_net->ipv4.fib_offload_disabled)
1124                 return 0;
1125
1126         dev = switchdev_get_dev_by_nhs(fi);
1127         if (!dev)
1128                 return 0;
1129
1130         err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
1131         if (!err)
1132                 fi->fib_flags |= RTNH_F_OFFLOAD;
1133
1134         return err == -EOPNOTSUPP ? 0 : err;
1135 }
1136 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1137
1138 /**
1139  *      switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1140  *
1141  *      @dst: route's IPv4 destination address
1142  *      @dst_len: destination address length (prefix length)
1143  *      @fi: route FIB info structure
1144  *      @tos: route TOS
1145  *      @type: route type
1146  *      @tb_id: route table ID
1147  *
1148  *      Delete IPv4 route entry from switch device.
1149  */
1150 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1151                            u8 tos, u8 type, u32 tb_id)
1152 {
1153         struct switchdev_obj_ipv4_fib ipv4_fib = {
1154                 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1155                 .dst = dst,
1156                 .dst_len = dst_len,
1157                 .fi = fi,
1158                 .tos = tos,
1159                 .type = type,
1160                 .nlflags = 0,
1161                 .tb_id = tb_id,
1162         };
1163         struct net_device *dev;
1164         int err = 0;
1165
1166         if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1167                 return 0;
1168
1169         dev = switchdev_get_dev_by_nhs(fi);
1170         if (!dev)
1171                 return 0;
1172
1173         err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
1174         if (!err)
1175                 fi->fib_flags &= ~RTNH_F_OFFLOAD;
1176
1177         return err == -EOPNOTSUPP ? 0 : err;
1178 }
1179 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1180
1181 /**
1182  *      switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1183  *
1184  *      @fi: route FIB info structure
1185  */
1186 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1187 {
1188         /* There was a problem installing this route to the offload
1189          * device.  For now, until we come up with more refined
1190          * policy handling, abruptly end IPv4 fib offloading for
1191          * for entire net by flushing offload device(s) of all
1192          * IPv4 routes, and mark IPv4 fib offloading broken from
1193          * this point forward.
1194          */
1195
1196         fib_flush_external(fi->fib_net);
1197         fi->fib_net->ipv4.fib_offload_disabled = true;
1198 }
1199 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1200
1201 static bool switchdev_port_same_parent_id(struct net_device *a,
1202                                           struct net_device *b)
1203 {
1204         struct switchdev_attr a_attr = {
1205                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1206                 .flags = SWITCHDEV_F_NO_RECURSE,
1207         };
1208         struct switchdev_attr b_attr = {
1209                 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1210                 .flags = SWITCHDEV_F_NO_RECURSE,
1211         };
1212
1213         if (switchdev_port_attr_get(a, &a_attr) ||
1214             switchdev_port_attr_get(b, &b_attr))
1215                 return false;
1216
1217         return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1218 }
1219
1220 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1221                                        struct net_device *group_dev)
1222 {
1223         struct net_device *lower_dev;
1224         struct list_head *iter;
1225
1226         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1227                 if (lower_dev == dev)
1228                         continue;
1229                 if (switchdev_port_same_parent_id(dev, lower_dev))
1230                         return lower_dev->offload_fwd_mark;
1231                 return switchdev_port_fwd_mark_get(dev, lower_dev);
1232         }
1233
1234         return dev->ifindex;
1235 }
1236
1237 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1238                                           u32 old_mark, u32 *reset_mark)
1239 {
1240         struct net_device *lower_dev;
1241         struct list_head *iter;
1242
1243         netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1244                 if (lower_dev->offload_fwd_mark == old_mark) {
1245                         if (!*reset_mark)
1246                                 *reset_mark = lower_dev->ifindex;
1247                         lower_dev->offload_fwd_mark = *reset_mark;
1248                 }
1249                 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1250         }
1251 }
1252
1253 /**
1254  *      switchdev_port_fwd_mark_set - Set port offload forwarding mark
1255  *
1256  *      @dev: port device
1257  *      @group_dev: containing device
1258  *      @joining: true if dev is joining group; false if leaving group
1259  *
1260  *      An ungrouped port's offload mark is just its ifindex.  A grouped
1261  *      port's (member of a bridge, for example) offload mark is the ifindex
1262  *      of one of the ports in the group with the same parent (switch) ID.
1263  *      Ports on the same device in the same group will have the same mark.
1264  *
1265  *      Example:
1266  *
1267  *              br0             ifindex=9
1268  *                sw1p1         ifindex=2       mark=2
1269  *                sw1p2         ifindex=3       mark=2
1270  *                sw2p1         ifindex=4       mark=5
1271  *                sw2p2         ifindex=5       mark=5
1272  *
1273  *      If sw2p2 leaves the bridge, we'll have:
1274  *
1275  *              br0             ifindex=9
1276  *                sw1p1         ifindex=2       mark=2
1277  *                sw1p2         ifindex=3       mark=2
1278  *                sw2p1         ifindex=4       mark=4
1279  *              sw2p2           ifindex=5       mark=5
1280  */
1281 void switchdev_port_fwd_mark_set(struct net_device *dev,
1282                                  struct net_device *group_dev,
1283                                  bool joining)
1284 {
1285         u32 mark = dev->ifindex;
1286         u32 reset_mark = 0;
1287
1288         if (group_dev && joining) {
1289                 mark = switchdev_port_fwd_mark_get(dev, group_dev);
1290         } else if (group_dev && !joining) {
1291                 if (dev->offload_fwd_mark == mark)
1292                         /* Ohoh, this port was the mark reference port,
1293                          * but it's leaving the group, so reset the
1294                          * mark for the remaining ports in the group.
1295                          */
1296                         switchdev_port_fwd_mark_reset(group_dev, mark,
1297                                                       &reset_mark);
1298         }
1299
1300         dev->offload_fwd_mark = mark;
1301 }
1302 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);