Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Nov 2019 23:42:43 +0000 (15:42 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Nov 2019 23:42:43 +0000 (15:42 -0800)
Pull RCU updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Dynamic tick (nohz) updates, perhaps most notably changes to force
     the tick on when needed due to lengthy in-kernel execution on CPUs
     on which RCU is waiting.

   - Linux-kernel memory consistency model updates.

   - Replace rcu_swap_protected() with rcu_prepace_pointer().

   - Torture-test updates.

   - Documentation updates.

   - Miscellaneous fixes"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits)
  security/safesetid: Replace rcu_swap_protected() with rcu_replace_pointer()
  net/sched: Replace rcu_swap_protected() with rcu_replace_pointer()
  net/netfilter: Replace rcu_swap_protected() with rcu_replace_pointer()
  net/core: Replace rcu_swap_protected() with rcu_replace_pointer()
  bpf/cgroup: Replace rcu_swap_protected() with rcu_replace_pointer()
  fs/afs: Replace rcu_swap_protected() with rcu_replace_pointer()
  drivers/scsi: Replace rcu_swap_protected() with rcu_replace_pointer()
  drm/i915: Replace rcu_swap_protected() with rcu_replace_pointer()
  x86/kvm/pmu: Replace rcu_swap_protected() with rcu_replace_pointer()
  rcu: Upgrade rcu_swap_protected() to rcu_replace_pointer()
  rcu: Suppress levelspread uninitialized messages
  rcu: Fix uninitialized variable in nocb_gp_wait()
  rcu: Update descriptions for rcu_future_grace_period tracepoint
  rcu: Update descriptions for rcu_nocb_wake tracepoint
  rcu: Remove obsolete descriptions for rcu_barrier tracepoint
  rcu: Ensure that ->rcu_urgent_qs is set before resched IPI
  workqueue: Convert for_each_wq to use built-in list check
  rcu: Several rcu_segcblist functions can be static
  rcu: Remove unused function hlist_bl_del_init_rcu()
  Documentation: Rename rcu_node_context_switch() to rcu_note_context_switch()
  ...

20 files changed:
1  2 
arch/x86/kvm/pmu.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
include/linux/tick.h
kernel/bpf/cgroup.c
kernel/time/tick-sched.c
kernel/workqueue.c
net/core/dev.c
net/netfilter/nf_tables_api.c
net/sched/act_api.c
net/sched/act_csum.c
net/sched/act_ct.c
net/sched/act_ctinfo.c
net/sched/act_ife.c
net/sched/act_mirred.c
net/sched/act_mpls.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_skbedit.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c

diff --combined arch/x86/kvm/pmu.c
index d5e6d5b3f06fd77af13d69192ecf6c5d4128d78f,5ddb05a26a1bd7293f81d6d5afd78998acdedc6c..bcc6a73d6628781809f781abfc0b372450af7106
@@@ -62,7 -62,8 +62,7 @@@ static void kvm_perf_overflow(struct pe
        struct kvm_pmc *pmc = perf_event->overflow_handler_context;
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  
 -      if (!test_and_set_bit(pmc->idx,
 -                            (unsigned long *)&pmu->reprogram_pmi)) {
 +      if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
                __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
                kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
        }
@@@ -75,7 -76,8 +75,7 @@@ static void kvm_perf_overflow_intr(stru
        struct kvm_pmc *pmc = perf_event->overflow_handler_context;
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
  
 -      if (!test_and_set_bit(pmc->idx,
 -                            (unsigned long *)&pmu->reprogram_pmi)) {
 +      if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
                __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
                kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
  
@@@ -135,37 -137,7 +135,37 @@@ static void pmc_reprogram_counter(struc
        }
  
        pmc->perf_event = event;
 -      clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
 +      pmc_to_pmu(pmc)->event_count++;
 +      clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 +}
 +
 +static void pmc_pause_counter(struct kvm_pmc *pmc)
 +{
 +      u64 counter = pmc->counter;
 +
 +      if (!pmc->perf_event)
 +              return;
 +
 +      /* update counter, reset event value to avoid redundant accumulation */
 +      counter += perf_event_pause(pmc->perf_event, true);
 +      pmc->counter = counter & pmc_bitmask(pmc);
 +}
 +
 +static bool pmc_resume_counter(struct kvm_pmc *pmc)
 +{
 +      if (!pmc->perf_event)
 +              return false;
 +
 +      /* recalibrate sample period and check if it's accepted by perf core */
 +      if (perf_event_period(pmc->perf_event,
 +                      (-pmc->counter) & pmc_bitmask(pmc)))
 +              return false;
 +
 +      /* reuse perf_event to serve as pmc_reprogram_counter() does*/
 +      perf_event_enable(pmc->perf_event);
 +
 +      clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 +      return true;
  }
  
  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
  
        pmc->eventsel = eventsel;
  
 -      pmc_stop_counter(pmc);
 +      pmc_pause_counter(pmc);
  
        if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
                return;
        if (type == PERF_TYPE_RAW)
                config = eventsel & X86_RAW_EVENT_MASK;
  
 +      if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
 +              return;
 +
 +      pmc_release_perf_event(pmc);
 +
 +      pmc->current_config = eventsel;
        pmc_reprogram_counter(pmc, type, config,
                              !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
                              !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
@@@ -243,7 -209,7 +243,7 @@@ void reprogram_fixed_counter(struct kvm
        struct kvm_pmu_event_filter *filter;
        struct kvm *kvm = pmc->vcpu->kvm;
  
 -      pmc_stop_counter(pmc);
 +      pmc_pause_counter(pmc);
  
        if (!en_field || !pmc_is_enabled(pmc))
                return;
                        return;
        }
  
 +      if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
 +              return;
 +
 +      pmc_release_perf_event(pmc);
 +
 +      pmc->current_config = (u64)ctrl;
        pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
                              kvm_x86_ops->pmu_ops->find_fixed_event(idx),
                              !(en_field & 0x2), /* exclude user */
@@@ -293,32 -253,27 +293,32 @@@ EXPORT_SYMBOL_GPL(reprogram_counter)
  void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
  {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 -      u64 bitmask;
        int bit;
  
 -      bitmask = pmu->reprogram_pmi;
 -
 -      for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
 +      for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
                struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
  
                if (unlikely(!pmc || !pmc->perf_event)) {
 -                      clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
 +                      clear_bit(bit, pmu->reprogram_pmi);
                        continue;
                }
  
                reprogram_counter(pmu, bit);
        }
 +
 +      /*
 +       * Unused perf_events are only released if the corresponding MSRs
 +       * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
 +       * triggers KVM_REQ_PMU if cleanup is needed.
 +       */
 +      if (unlikely(pmu->need_cleanup))
 +              kvm_pmu_cleanup(vcpu);
  }
  
  /* check if idx is a valid index to access PMU */
 -int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 +int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
  {
 -      return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
 +      return kvm_x86_ops->pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
  }
  
  bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@@ -368,7 -323,7 +368,7 @@@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu
        if (is_vmware_backdoor_pmc(idx))
                return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
  
 -      pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
 +      pmc = kvm_x86_ops->pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask);
        if (!pmc)
                return 1;
  
@@@ -384,17 -339,7 +384,17 @@@ void kvm_pmu_deliver_pmi(struct kvm_vcp
  
  bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
  {
 -      return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
 +      return kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr) ||
 +              kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
 +}
 +
 +static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
 +{
 +      struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 +      struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, msr);
 +
 +      if (pmc)
 +              __set_bit(pmc->idx, pmu->pmc_in_use);
  }
  
  int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
  
  int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
 +      kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
        return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
  }
  
@@@ -432,45 -376,9 +432,45 @@@ void kvm_pmu_init(struct kvm_vcpu *vcpu
        memset(pmu, 0, sizeof(*pmu));
        kvm_x86_ops->pmu_ops->init(vcpu);
        init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
 +      pmu->event_count = 0;
 +      pmu->need_cleanup = false;
        kvm_pmu_refresh(vcpu);
  }
  
 +static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
 +{
 +      struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 +
 +      if (pmc_is_fixed(pmc))
 +              return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
 +                      pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
 +
 +      return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
 +}
 +
 +/* Release perf_events for vPMCs that have been unused for a full time slice.  */
 +void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 +      struct kvm_pmc *pmc = NULL;
 +      DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
 +      int i;
 +
 +      pmu->need_cleanup = false;
 +
 +      bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
 +                    pmu->pmc_in_use, X86_PMC_IDX_MAX);
 +
 +      for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
 +              pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, i);
 +
 +              if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
 +                      pmc_stop_counter(pmc);
 +      }
 +
 +      bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
 +}
 +
  void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
  {
        kvm_pmu_reset(vcpu);
@@@ -508,8 -416,8 +508,8 @@@ int kvm_vm_ioctl_set_pmu_event_filter(s
        *filter = tmp;
  
        mutex_lock(&kvm->lock);
-       rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
-                          mutex_is_locked(&kvm->lock));
+       filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
+                                    mutex_is_locked(&kvm->lock));
        mutex_unlock(&kvm->lock);
  
        synchronize_srcu_expedited(&kvm->srcu);
index e41fd94ae5a9d98c76bc84fe3de471408316ef54,3f3e803dfd5a3fa12430c76863c8425e038bf3a0..755c4542629f11f61dc30ded3cf5288d64eda3e6
@@@ -319,8 -319,6 +319,8 @@@ static void i915_gem_context_free(struc
        free_engines(rcu_access_pointer(ctx->engines));
        mutex_destroy(&ctx->engines_mutex);
  
 +      kfree(ctx->jump_whitelist);
 +
        if (ctx->timeline)
                intel_timeline_put(ctx->timeline);
  
@@@ -443,9 -441,6 +443,9 @@@ __create_context(struct drm_i915_privat
        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
                ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
  
 +      ctx->jump_whitelist = NULL;
 +      ctx->jump_whitelist_cmds = 0;
 +
        return ctx;
  
  err_free:
@@@ -1634,7 -1629,7 +1634,7 @@@ replace
                i915_gem_context_set_user_engines(ctx);
        else
                i915_gem_context_clear_user_engines(ctx);
-       rcu_swap_protected(ctx->engines, set.engines, 1);
+       set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
        mutex_unlock(&ctx->engines_mutex);
  
        call_rcu(&set.engines->rcu, free_engines_rcu);
diff --combined include/linux/tick.h
index 7e050a356cc51a3ddcdd0fc753bf3d81d47c3b24,39eb44564058bbb90dc86c0aecbfbf96edba7b24..7896f792d3b0b78151b2e06b3003bbd4f2d7dbfe
@@@ -108,7 -108,8 +108,8 @@@ enum tick_dep_bits 
        TICK_DEP_BIT_POSIX_TIMER        = 0,
        TICK_DEP_BIT_PERF_EVENTS        = 1,
        TICK_DEP_BIT_SCHED              = 2,
-       TICK_DEP_BIT_CLOCK_UNSTABLE     = 3
+       TICK_DEP_BIT_CLOCK_UNSTABLE     = 3,
+       TICK_DEP_BIT_RCU                = 4
  };
  
  #define TICK_DEP_MASK_NONE            0
  #define TICK_DEP_MASK_PERF_EVENTS     (1 << TICK_DEP_BIT_PERF_EVENTS)
  #define TICK_DEP_MASK_SCHED           (1 << TICK_DEP_BIT_SCHED)
  #define TICK_DEP_MASK_CLOCK_UNSTABLE  (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
+ #define TICK_DEP_MASK_RCU             (1 << TICK_DEP_BIT_RCU)
  
  #ifdef CONFIG_NO_HZ_COMMON
  extern bool tick_nohz_enabled;
@@@ -174,7 -176,7 +176,7 @@@ extern cpumask_var_t tick_nohz_full_mas
  
  static inline bool tick_nohz_full_enabled(void)
  {
 -      if (!context_tracking_is_enabled())
 +      if (!context_tracking_enabled())
                return false;
  
        return tick_nohz_full_running;
@@@ -268,6 -270,9 +270,9 @@@ static inline bool tick_nohz_full_enabl
  static inline bool tick_nohz_full_cpu(int cpu) { return false; }
  static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
  
+ static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+ static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
  static inline void tick_dep_set(enum tick_dep_bits bit) { }
  static inline void tick_dep_clear(enum tick_dep_bits bit) { }
  static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
diff --combined kernel/bpf/cgroup.c
index a3eaf08e7dd3a843e697583e03e474aec7199ac1,c684cf424849bddcb93089b8aa834359b98e3576..9f90d3c92bdaca011f5645aa326fab4b09166d70
@@@ -180,8 -180,8 +180,8 @@@ static void activate_effective_progs(st
                                     enum bpf_attach_type type,
                                     struct bpf_prog_array *old_array)
  {
-       rcu_swap_protected(cgrp->bpf.effective[type], old_array,
-                          lockdep_is_held(&cgroup_mutex));
+       old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+                                       lockdep_is_held(&cgroup_mutex));
        /* free prog array after grace period, since __cgroup_bpf_run_*()
         * might be still walking the array
         */
@@@ -1311,12 -1311,12 +1311,12 @@@ static bool sysctl_is_valid_access(int 
                return false;
  
        switch (off) {
 -      case offsetof(struct bpf_sysctl, write):
 +      case bpf_ctx_range(struct bpf_sysctl, write):
                if (type != BPF_READ)
                        return false;
                bpf_ctx_record_field_size(info, size_default);
                return bpf_ctx_narrow_access_ok(off, size, size_default);
 -      case offsetof(struct bpf_sysctl, file_pos):
 +      case bpf_ctx_range(struct bpf_sysctl, file_pos):
                if (type == BPF_READ) {
                        bpf_ctx_record_field_size(info, size_default);
                        return bpf_ctx_narrow_access_ok(off, size, size_default);
diff --combined kernel/time/tick-sched.c
index c2748232f607b6fb100ece623798af4553246f50,1ffdb4ba1ded17911615182052622d8ef083ae70..8b192e67aabc9d16d8a7b08c0356641b5462c783
@@@ -172,6 -172,7 +172,7 @@@ static void tick_sched_handle(struct ti
  #ifdef CONFIG_NO_HZ_FULL
  cpumask_var_t tick_nohz_full_mask;
  bool tick_nohz_full_running;
+ EXPORT_SYMBOL_GPL(tick_nohz_full_running);
  static atomic_t tick_dep_mask;
  
  static bool check_tick_dependency(atomic_t *dep)
                return true;
        }
  
+       if (val & TICK_DEP_MASK_RCU) {
+               trace_tick_stop(0, TICK_DEP_MASK_RCU);
+               return true;
+       }
        return false;
  }
  
@@@ -324,6 -330,7 +330,7 @@@ void tick_nohz_dep_set_cpu(int cpu, enu
                preempt_enable();
        }
  }
+ EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
  
  void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
  {
  
        atomic_andnot(BIT(bit), &ts->tick_dep_mask);
  }
+ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
  
  /*
   * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
@@@ -344,11 -352,13 +352,13 @@@ void tick_nohz_dep_set_task(struct task
         */
        tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
  }
+ EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
  
  void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
  {
        atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
  }
+ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
  
  /*
   * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
@@@ -397,6 -407,7 +407,7 @@@ void __init tick_nohz_full_setup(cpumas
        cpumask_copy(tick_nohz_full_mask, cpumask);
        tick_nohz_full_running = true;
  }
+ EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
  
  static int tick_nohz_cpu_down(unsigned int cpu)
  {
@@@ -1119,7 -1130,7 +1130,7 @@@ static void tick_nohz_account_idle_tick
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        unsigned long ticks;
  
 -      if (vtime_accounting_cpu_enabled())
 +      if (vtime_accounting_enabled_this_cpu())
                return;
        /*
         * We stopped the tick in idle. Update process times would miss the
diff --combined kernel/workqueue.c
index 914b845ad4ff10e2d89ba34db43ed68d1d2367d2,e501c79e283adc5cd5df4ab7d87696a0e74acdde..bc88fd939f4e72c43de15ad96c42aa2fdce4a996
@@@ -248,7 -248,7 +248,7 @@@ struct workqueue_struct 
        struct list_head        flusher_overflow; /* WQ: flush overflow list */
  
        struct list_head        maydays;        /* MD: pwqs requesting rescue */
 -      struct worker           *rescuer;       /* I: rescue worker */
 +      struct worker           *rescuer;       /* MD: rescue worker */
  
        int                     nr_drainers;    /* WQ: drain in progress */
        int                     saved_max_active; /* WQ: saved pwq max_active */
@@@ -355,7 -355,6 +355,7 @@@ EXPORT_SYMBOL_GPL(system_freezable_powe
  
  static int worker_thread(void *__worker);
  static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 +static void show_pwq(struct pool_workqueue *pwq);
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/workqueue.h>
                         !lockdep_is_held(&wq_pool_mutex),              \
                         "RCU or wq_pool_mutex should be held")
  
- #define assert_rcu_or_wq_mutex(wq)                                    \
-       RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
-                        !lockdep_is_held(&wq->mutex),                  \
-                        "RCU or wq->mutex should be held")
  #define assert_rcu_or_wq_mutex_or_pool_mutex(wq)                      \
        RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
                         !lockdep_is_held(&wq->mutex) &&                \
   */
  #define for_each_pwq(pwq, wq)                                         \
        list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node,          \
-                               lockdep_is_held(&wq->mutex))            \
-               if (({ assert_rcu_or_wq_mutex(wq); false; })) { }       \
-               else
+                                lockdep_is_held(&(wq->mutex)))
  
  #ifdef CONFIG_DEBUG_OBJECTS_WORK
  
@@@ -2534,14 -2526,8 +2527,14 @@@ repeat
                         */
                        if (need_to_create_worker(pool)) {
                                spin_lock(&wq_mayday_lock);
 -                              get_pwq(pwq);
 -                              list_move_tail(&pwq->mayday_node, &wq->maydays);
 +                              /*
 +                               * Queue iff we aren't racing destruction
 +                               * and somebody else hasn't queued it already.
 +                               */
 +                              if (wq->rescuer && list_empty(&pwq->mayday_node)) {
 +                                      get_pwq(pwq);
 +                                      list_add_tail(&pwq->mayday_node, &wq->maydays);
 +                              }
                                spin_unlock(&wq_mayday_lock);
                        }
                }
@@@ -4322,22 -4308,6 +4315,22 @@@ err_destroy
  }
  EXPORT_SYMBOL_GPL(alloc_workqueue);
  
 +static bool pwq_busy(struct pool_workqueue *pwq)
 +{
 +      int i;
 +
 +      for (i = 0; i < WORK_NR_COLORS; i++)
 +              if (pwq->nr_in_flight[i])
 +                      return true;
 +
 +      if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
 +              return true;
 +      if (pwq->nr_active || !list_empty(&pwq->delayed_works))
 +              return true;
 +
 +      return false;
 +}
 +
  /**
   * destroy_workqueue - safely terminate a workqueue
   * @wq: target workqueue
@@@ -4349,51 -4319,31 +4342,51 @@@ void destroy_workqueue(struct workqueue
        struct pool_workqueue *pwq;
        int node;
  
 +      /*
 +       * Remove it from sysfs first so that sanity check failure doesn't
 +       * lead to sysfs name conflicts.
 +       */
 +      workqueue_sysfs_unregister(wq);
 +
        /* drain it before proceeding with destruction */
        drain_workqueue(wq);
  
 -      /* sanity checks */
 -      mutex_lock(&wq->mutex);
 -      for_each_pwq(pwq, wq) {
 -              int i;
 +      /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
 +      if (wq->rescuer) {
 +              struct worker *rescuer = wq->rescuer;
  
 -              for (i = 0; i < WORK_NR_COLORS; i++) {
 -                      if (WARN_ON(pwq->nr_in_flight[i])) {
 -                              mutex_unlock(&wq->mutex);
 -                              show_workqueue_state();
 -                              return;
 -                      }
 -              }
 +              /* this prevents new queueing */
 +              spin_lock_irq(&wq_mayday_lock);
 +              wq->rescuer = NULL;
 +              spin_unlock_irq(&wq_mayday_lock);
  
 -              if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
 -                  WARN_ON(pwq->nr_active) ||
 -                  WARN_ON(!list_empty(&pwq->delayed_works))) {
 +              /* rescuer will empty maydays list before exiting */
 +              kthread_stop(rescuer->task);
 +              kfree(rescuer);
 +      }
 +
 +      /*
 +       * Sanity checks - grab all the locks so that we wait for all
 +       * in-flight operations which may do put_pwq().
 +       */
 +      mutex_lock(&wq_pool_mutex);
 +      mutex_lock(&wq->mutex);
 +      for_each_pwq(pwq, wq) {
 +              spin_lock_irq(&pwq->pool->lock);
 +              if (WARN_ON(pwq_busy(pwq))) {
 +                      pr_warning("%s: %s has the following busy pwq\n",
 +                                 __func__, wq->name);
 +                      show_pwq(pwq);
 +                      spin_unlock_irq(&pwq->pool->lock);
                        mutex_unlock(&wq->mutex);
 +                      mutex_unlock(&wq_pool_mutex);
                        show_workqueue_state();
                        return;
                }
 +              spin_unlock_irq(&pwq->pool->lock);
        }
        mutex_unlock(&wq->mutex);
 +      mutex_unlock(&wq_pool_mutex);
  
        /*
         * wq list is used to freeze wq, remove from list after
        list_del_rcu(&wq->list);
        mutex_unlock(&wq_pool_mutex);
  
 -      workqueue_sysfs_unregister(wq);
 -
 -      if (wq->rescuer)
 -              kthread_stop(wq->rescuer->task);
 -
        if (!(wq->flags & WQ_UNBOUND)) {
                wq_unregister_lockdep(wq);
                /*
@@@ -4677,8 -4632,7 +4670,8 @@@ static void show_pwq(struct pool_workqu
        pr_info("  pwq %d:", pool->id);
        pr_cont_pool_info(pool);
  
 -      pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
 +      pr_cont(" active=%d/%d refcnt=%d%s\n",
 +              pwq->nr_active, pwq->max_active, pwq->refcnt,
                !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
  
        hash_for_each(pool->busy_hash, bkt, worker, hentry) {
  
                        pr_cont("%s %d%s:%ps", comma ? "," : "",
                                task_pid_nr(worker->task),
 -                              worker == pwq->wq->rescuer ? "(RESCUER)" : "",
 +                              worker->rescue_wq ? "(RESCUER)" : "",
                                worker->current_func);
                        list_for_each_entry(work, &worker->scheduled, entry)
                                pr_cont_work(false, work);
diff --combined net/core/dev.c
index c7fc902ccbdc5895a373c441f304b5e868a60dd3,c5d8882d100facd6f96a70186727f97f94a31ad7..46580b290450edfd2aa8a434ae2c14245bd336bb
  #include "net-sysfs.h"
  
  #define MAX_GRO_SKBS 8
 +#define MAX_NEST_DEV 8
  
  /* This should be increased if a protocol with a bigger head is added. */
  #define GRO_MAX_HEAD (MAX_HEADER + 128)
@@@ -229,122 -228,6 +229,122 @@@ static inline void rps_unlock(struct so
  #endif
  }
  
 +static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
 +                                                     const char *name)
 +{
 +      struct netdev_name_node *name_node;
 +
 +      name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
 +      if (!name_node)
 +              return NULL;
 +      INIT_HLIST_NODE(&name_node->hlist);
 +      name_node->dev = dev;
 +      name_node->name = name;
 +      return name_node;
 +}
 +
 +static struct netdev_name_node *
 +netdev_name_node_head_alloc(struct net_device *dev)
 +{
 +      struct netdev_name_node *name_node;
 +
 +      name_node = netdev_name_node_alloc(dev, dev->name);
 +      if (!name_node)
 +              return NULL;
 +      INIT_LIST_HEAD(&name_node->list);
 +      return name_node;
 +}
 +
 +static void netdev_name_node_free(struct netdev_name_node *name_node)
 +{
 +      kfree(name_node);
 +}
 +
 +static void netdev_name_node_add(struct net *net,
 +                               struct netdev_name_node *name_node)
 +{
 +      hlist_add_head_rcu(&name_node->hlist,
 +                         dev_name_hash(net, name_node->name));
 +}
 +
 +static void netdev_name_node_del(struct netdev_name_node *name_node)
 +{
 +      hlist_del_rcu(&name_node->hlist);
 +}
 +
 +static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
 +                                                      const char *name)
 +{
 +      struct hlist_head *head = dev_name_hash(net, name);
 +      struct netdev_name_node *name_node;
 +
 +      hlist_for_each_entry(name_node, head, hlist)
 +              if (!strcmp(name_node->name, name))
 +                      return name_node;
 +      return NULL;
 +}
 +
 +static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
 +                                                          const char *name)
 +{
 +      struct hlist_head *head = dev_name_hash(net, name);
 +      struct netdev_name_node *name_node;
 +
 +      hlist_for_each_entry_rcu(name_node, head, hlist)
 +              if (!strcmp(name_node->name, name))
 +                      return name_node;
 +      return NULL;
 +}
 +
 +int netdev_name_node_alt_create(struct net_device *dev, const char *name)
 +{
 +      struct netdev_name_node *name_node;
 +      struct net *net = dev_net(dev);
 +
 +      name_node = netdev_name_node_lookup(net, name);
 +      if (name_node)
 +              return -EEXIST;
 +      name_node = netdev_name_node_alloc(dev, name);
 +      if (!name_node)
 +              return -ENOMEM;
 +      netdev_name_node_add(net, name_node);
 +      /* The node that holds dev->name acts as a head of per-device list. */
 +      list_add_tail(&name_node->list, &dev->name_node->list);
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL(netdev_name_node_alt_create);
 +
 +static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
 +{
 +      list_del(&name_node->list);
 +      netdev_name_node_del(name_node);
 +      kfree(name_node->name);
 +      netdev_name_node_free(name_node);
 +}
 +
 +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
 +{
 +      struct netdev_name_node *name_node;
 +      struct net *net = dev_net(dev);
 +
 +      name_node = netdev_name_node_lookup(net, name);
 +      if (!name_node)
 +              return -ENOENT;
 +      __netdev_name_node_alt_destroy(name_node);
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL(netdev_name_node_alt_destroy);
 +
 +static void netdev_name_node_alt_flush(struct net_device *dev)
 +{
 +      struct netdev_name_node *name_node, *tmp;
 +
 +      list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
 +              __netdev_name_node_alt_destroy(name_node);
 +}
 +
  /* Device list insertion */
  static void list_netdevice(struct net_device *dev)
  {
  
        write_lock_bh(&dev_base_lock);
        list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
 -      hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 +      netdev_name_node_add(net, dev->name_node);
        hlist_add_head_rcu(&dev->index_hlist,
                           dev_index_hash(net, dev->ifindex));
        write_unlock_bh(&dev_base_lock);
@@@ -372,7 -255,7 +372,7 @@@ static void unlist_netdevice(struct net
        /* Unlink dev from the device chain */
        write_lock_bh(&dev_base_lock);
        list_del_rcu(&dev->dev_list);
 -      hlist_del_rcu(&dev->name_hlist);
 +      netdev_name_node_del(dev->name_node);
        hlist_del_rcu(&dev->index_hlist);
        write_unlock_bh(&dev_base_lock);
  
@@@ -393,6 -276,88 +393,6 @@@ static RAW_NOTIFIER_HEAD(netdev_chain)
  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
  EXPORT_PER_CPU_SYMBOL(softnet_data);
  
 -#ifdef CONFIG_LOCKDEP
 -/*
 - * register_netdevice() inits txq->_xmit_lock and sets lockdep class
 - * according to dev->type
 - */
 -static const unsigned short netdev_lock_type[] = {
 -       ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 -       ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 -       ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 -       ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 -       ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 -       ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 -       ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 -       ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 -       ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 -       ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 -       ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 -       ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 -       ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
 -       ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
 -       ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
 -
 -static const char *const netdev_lock_name[] = {
 -      "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 -      "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 -      "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 -      "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 -      "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 -      "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 -      "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 -      "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 -      "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 -      "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 -      "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 -      "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 -      "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
 -      "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
 -      "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
 -
 -static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 -static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 -
 -static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 -{
 -      int i;
 -
 -      for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 -              if (netdev_lock_type[i] == dev_type)
 -                      return i;
 -      /* the last key is used by default */
 -      return ARRAY_SIZE(netdev_lock_type) - 1;
 -}
 -
 -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 -                                               unsigned short dev_type)
 -{
 -      int i;
 -
 -      i = netdev_lock_pos(dev_type);
 -      lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 -                                 netdev_lock_name[i]);
 -}
 -
 -static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 -{
 -      int i;
 -
 -      i = netdev_lock_pos(dev->type);
 -      lockdep_set_class_and_name(&dev->addr_list_lock,
 -                                 &netdev_addr_lock_key[i],
 -                                 netdev_lock_name[i]);
 -}
 -#else
 -static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
 -                                               unsigned short dev_type)
 -{
 -}
 -static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 -{
 -}
 -#endif
 -
  /*******************************************************************************
   *
   *            Protocol management and registration routines
@@@ -768,10 -733,14 +768,10 @@@ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst
  
  struct net_device *__dev_get_by_name(struct net *net, const char *name)
  {
 -      struct net_device *dev;
 -      struct hlist_head *head = dev_name_hash(net, name);
 -
 -      hlist_for_each_entry(dev, head, name_hlist)
 -              if (!strncmp(dev->name, name, IFNAMSIZ))
 -                      return dev;
 +      struct netdev_name_node *node_name;
  
 -      return NULL;
 +      node_name = netdev_name_node_lookup(net, name);
 +      return node_name ? node_name->dev : NULL;
  }
  EXPORT_SYMBOL(__dev_get_by_name);
  
  
  struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
  {
 -      struct net_device *dev;
 -      struct hlist_head *head = dev_name_hash(net, name);
 +      struct netdev_name_node *node_name;
  
 -      hlist_for_each_entry_rcu(dev, head, name_hlist)
 -              if (!strncmp(dev->name, name, IFNAMSIZ))
 -                      return dev;
 -
 -      return NULL;
 +      node_name = netdev_name_node_lookup_rcu(net, name);
 +      return node_name ? node_name->dev : NULL;
  }
  EXPORT_SYMBOL(dev_get_by_name_rcu);
  
@@@ -1168,8 -1141,8 +1168,8 @@@ int dev_alloc_name(struct net_device *d
  }
  EXPORT_SYMBOL(dev_alloc_name);
  
 -int dev_get_valid_name(struct net *net, struct net_device *dev,
 -                     const char *name)
 +static int dev_get_valid_name(struct net *net, struct net_device *dev,
 +                            const char *name)
  {
        BUG_ON(!net);
  
  
        return 0;
  }
 -EXPORT_SYMBOL(dev_get_valid_name);
  
  /**
   *    dev_change_name - change name of a device
@@@ -1258,13 -1232,13 +1258,13 @@@ rollback
        netdev_adjacent_rename_links(dev, oldname);
  
        write_lock_bh(&dev_base_lock);
 -      hlist_del_rcu(&dev->name_hlist);
 +      netdev_name_node_del(dev->name_node);
        write_unlock_bh(&dev_base_lock);
  
        synchronize_rcu();
  
        write_lock_bh(&dev_base_lock);
 -      hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
 +      netdev_name_node_add(net, dev->name_node);
        write_unlock_bh(&dev_base_lock);
  
        ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
@@@ -1314,8 -1288,8 +1314,8 @@@ int dev_set_alias(struct net_device *de
        }
  
        mutex_lock(&ifalias_mutex);
-       rcu_swap_protected(dev->ifalias, new_alias,
-                          mutex_is_locked(&ifalias_mutex));
+       new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
+                                       mutex_is_locked(&ifalias_mutex));
        mutex_unlock(&ifalias_mutex);
  
        if (new_alias)
@@@ -1643,62 -1617,6 +1643,62 @@@ static int call_netdevice_notifier(stru
        return nb->notifier_call(nb, val, &info);
  }
  
 +static int call_netdevice_register_notifiers(struct notifier_block *nb,
 +                                           struct net_device *dev)
 +{
 +      int err;
 +
 +      err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 +      err = notifier_to_errno(err);
 +      if (err)
 +              return err;
 +
 +      if (!(dev->flags & IFF_UP))
 +              return 0;
 +
 +      call_netdevice_notifier(nb, NETDEV_UP, dev);
 +      return 0;
 +}
 +
 +static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
 +                                              struct net_device *dev)
 +{
 +      if (dev->flags & IFF_UP) {
 +              call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 +                                      dev);
 +              call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 +      }
 +      call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 +}
 +
 +static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
 +                                               struct net *net)
 +{
 +      struct net_device *dev;
 +      int err;
 +
 +      for_each_netdev(net, dev) {
 +              err = call_netdevice_register_notifiers(nb, dev);
 +              if (err)
 +                      goto rollback;
 +      }
 +      return 0;
 +
 +rollback:
 +      for_each_netdev_continue_reverse(net, dev)
 +              call_netdevice_unregister_notifiers(nb, dev);
 +      return err;
 +}
 +
 +static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
 +                                                  struct net *net)
 +{
 +      struct net_device *dev;
 +
 +      for_each_netdev(net, dev)
 +              call_netdevice_unregister_notifiers(nb, dev);
 +}
 +
  static int dev_boot_phase = 1;
  
  /**
  
  int register_netdevice_notifier(struct notifier_block *nb)
  {
 -      struct net_device *dev;
 -      struct net_device *last;
        struct net *net;
        int err;
  
        if (dev_boot_phase)
                goto unlock;
        for_each_net(net) {
 -              for_each_netdev(net, dev) {
 -                      err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 -                      err = notifier_to_errno(err);
 -                      if (err)
 -                              goto rollback;
 -
 -                      if (!(dev->flags & IFF_UP))
 -                              continue;
 -
 -                      call_netdevice_notifier(nb, NETDEV_UP, dev);
 -              }
 +              err = call_netdevice_register_net_notifiers(nb, net);
 +              if (err)
 +                      goto rollback;
        }
  
  unlock:
        return err;
  
  rollback:
 -      last = dev;
 -      for_each_net(net) {
 -              for_each_netdev(net, dev) {
 -                      if (dev == last)
 -                              goto outroll;
 -
 -                      if (dev->flags & IFF_UP) {
 -                              call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
 -                                                      dev);
 -                              call_netdevice_notifier(nb, NETDEV_DOWN, dev);
 -                      }
 -                      call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 -              }
 -      }
 +      for_each_net_continue_reverse(net)
 +              call_netdevice_unregister_net_notifiers(nb, net);
  
 -outroll:
        raw_notifier_chain_unregister(&netdev_chain, nb);
        goto unlock;
  }
@@@ -1792,80 -1733,6 +1792,80 @@@ unlock
  }
  EXPORT_SYMBOL(unregister_netdevice_notifier);
  
 +/**
 + * register_netdevice_notifier_net - register a per-netns network notifier block
 + * @net: network namespace
 + * @nb: notifier
 + *
 + * Register a notifier to be called when network device events occur.
 + * The notifier passed is linked into the kernel structures and must
 + * not be reused until it has been unregistered. A negative errno code
 + * is returned on a failure.
 + *
 + * When registered all registration and up events are replayed
 + * to the new notifier to allow device to have a race free
 + * view of the network device list.
 + */
 +
 +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
 +{
 +      int err;
 +
 +      rtnl_lock();
 +      err = raw_notifier_chain_register(&net->netdev_chain, nb);
 +      if (err)
 +              goto unlock;
 +      if (dev_boot_phase)
 +              goto unlock;
 +
 +      err = call_netdevice_register_net_notifiers(nb, net);
 +      if (err)
 +              goto chain_unregister;
 +
 +unlock:
 +      rtnl_unlock();
 +      return err;
 +
 +chain_unregister:
 +      raw_notifier_chain_unregister(&netdev_chain, nb);
 +      goto unlock;
 +}
 +EXPORT_SYMBOL(register_netdevice_notifier_net);
 +
 +/**
 + * unregister_netdevice_notifier_net - unregister a per-netns
 + *                                     network notifier block
 + * @net: network namespace
 + * @nb: notifier
 + *
 + * Unregister a notifier previously registered by
 + * register_netdevice_notifier(). The notifier is unlinked into the
 + * kernel structures and may then be reused. A negative errno code
 + * is returned on a failure.
 + *
 + * After unregistering unregister and down device events are synthesized
 + * for all devices on the device list to the removed notifier to remove
 + * the need for special case cleanup code.
 + */
 +
 +int unregister_netdevice_notifier_net(struct net *net,
 +                                    struct notifier_block *nb)
 +{
 +      int err;
 +
 +      rtnl_lock();
 +      err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
 +      if (err)
 +              goto unlock;
 +
 +      call_netdevice_unregister_net_notifiers(nb, net);
 +
 +unlock:
 +      rtnl_unlock();
 +      return err;
 +}
 +EXPORT_SYMBOL(unregister_netdevice_notifier_net);
 +
  /**
   *    call_netdevice_notifiers_info - call all network notifier blocks
   *    @val: value passed unmodified to notifier function
  static int call_netdevice_notifiers_info(unsigned long val,
                                         struct netdev_notifier_info *info)
  {
 +      struct net *net = dev_net(info->dev);
 +      int ret;
 +
        ASSERT_RTNL();
 +
 +      /* Run per-netns notifier block chain first, then run the global one.
 +       * Hopefully, one day, the global one is going to be removed after
 +       * all notifier block registrators get converted to be per-netns.
 +       */
 +      ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
 +      if (ret & NOTIFY_STOP_MASK)
 +              return ret;
        return raw_notifier_call_chain(&netdev_chain, val, info);
  }
  
@@@ -2915,7 -2771,7 +2915,7 @@@ static struct dev_kfree_skb_cb *get_kfr
  void netif_schedule_queue(struct netdev_queue *txq)
  {
        rcu_read_lock();
 -      if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
 +      if (!netif_xmit_stopped(txq)) {
                struct Qdisc *q = rcu_dereference(txq->qdisc);
  
                __netif_schedule(q);
@@@ -3083,9 -2939,12 +3083,9 @@@ int skb_checksum_help(struct sk_buff *s
        offset += skb->csum_offset;
        BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
  
 -      if (skb_cloned(skb) &&
 -          !skb_clone_writable(skb, offset + sizeof(__sum16))) {
 -              ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 -              if (ret)
 -                      goto out;
 -      }
 +      ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
 +      if (ret)
 +              goto out;
  
        *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
  out_set_summed:
@@@ -3120,11 -2979,12 +3120,11 @@@ int skb_crc32c_csum_help(struct sk_buf
                ret = -EINVAL;
                goto out;
        }
 -      if (skb_cloned(skb) &&
 -          !skb_clone_writable(skb, offset + sizeof(__le32))) {
 -              ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 -              if (ret)
 -                      goto out;
 -      }
 +
 +      ret = skb_ensure_writable(skb, offset + sizeof(__le32));
 +      if (ret)
 +              goto out;
 +
        crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
                                                  skb->len - start, ~(__u32)0,
                                                  crc32c_csum_stub));
@@@ -3607,7 -3467,7 +3607,7 @@@ static inline int __dev_xmit_skb(struc
        qdisc_calculate_pkt_len(skb, q);
  
        if (q->flags & TCQ_F_NOLOCK) {
 -              if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
 +              if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
                    qdisc_run_begin(q)) {
                        if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
                                              &q->state))) {
@@@ -5586,7 -5446,7 +5586,7 @@@ static struct list_head *gro_list_prepa
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
                diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
                if (skb_vlan_tag_present(p))
 -                      diffs |= p->vlan_tci ^ skb->vlan_tci;
 +                      diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
                diffs |= skb_metadata_dst_cmp(p, skb);
                diffs |= skb_metadata_differs(p, skb);
                if (maclen == ETH_HLEN)
@@@ -5611,7 -5471,8 +5611,7 @@@ static void skb_gro_reset_offset(struc
        NAPI_GRO_CB(skb)->frag0 = NULL;
        NAPI_GRO_CB(skb)->frag0_len = 0;
  
 -      if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
 -          pinfo->nr_frags &&
 +      if (!skb_headlen(skb) && pinfo->nr_frags &&
            !PageHighMem(skb_frag_page(frag0))) {
                NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
                NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
@@@ -5802,26 -5663,6 +5802,26 @@@ struct packet_offload *gro_find_complet
  }
  EXPORT_SYMBOL(gro_find_complete_by_type);
  
 +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
 +static void gro_normal_list(struct napi_struct *napi)
 +{
 +      if (!napi->rx_count)
 +              return;
 +      netif_receive_skb_list_internal(&napi->rx_list);
 +      INIT_LIST_HEAD(&napi->rx_list);
 +      napi->rx_count = 0;
 +}
 +
 +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
 + * pass the whole batch up to the stack.
 + */
 +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
 +{
 +      list_add_tail(&skb->list, &napi->rx_list);
 +      if (++napi->rx_count >= gro_normal_batch)
 +              gro_normal_list(napi);
 +}
 +
  static void napi_skb_free_stolen_head(struct sk_buff *skb)
  {
        skb_dst_drop(skb);
        kmem_cache_free(skbuff_head_cache, skb);
  }
  
 -static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 +static gro_result_t napi_skb_finish(struct napi_struct *napi,
 +                                  struct sk_buff *skb,
 +                                  gro_result_t ret)
  {
        switch (ret) {
        case GRO_NORMAL:
 -              if (netif_receive_skb_internal(skb))
 -                      ret = GRO_DROP;
 +              gro_normal_one(napi, skb);
                break;
  
        case GRO_DROP:
@@@ -5867,7 -5707,7 +5867,7 @@@ gro_result_t napi_gro_receive(struct na
  
        skb_gro_reset_offset(skb);
  
 -      ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
 +      ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
        trace_napi_gro_receive_exit(ret);
  
        return ret;
@@@ -5913,6 -5753,26 +5913,6 @@@ struct sk_buff *napi_get_frags(struct n
  }
  EXPORT_SYMBOL(napi_get_frags);
  
 -/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
 -static void gro_normal_list(struct napi_struct *napi)
 -{
 -      if (!napi->rx_count)
 -              return;
 -      netif_receive_skb_list_internal(&napi->rx_list);
 -      INIT_LIST_HEAD(&napi->rx_list);
 -      napi->rx_count = 0;
 -}
 -
 -/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
 - * pass the whole batch up to the stack.
 - */
 -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
 -{
 -      list_add_tail(&skb->list, &napi->rx_list);
 -      if (++napi->rx_count >= gro_normal_batch)
 -              gro_normal_list(napi);
 -}
 -
  static gro_result_t napi_frags_finish(struct napi_struct *napi,
                                      struct sk_buff *skb,
                                      gro_result_t ret)
@@@ -6629,9 -6489,6 +6629,9 @@@ struct netdev_adjacent 
        /* upper master flag, there can only be one master device per list */
        bool master;
  
 +      /* lookup ignore flag */
 +      bool ignore;
 +
        /* counter for the number of times this device was added to us */
        u16 ref_nr;
  
@@@ -6654,7 -6511,7 +6654,7 @@@ static struct netdev_adjacent *__netdev
        return NULL;
  }
  
 -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
 +static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
  {
        struct net_device *dev = data;
  
@@@ -6675,7 -6532,7 +6675,7 @@@ bool netdev_has_upper_dev(struct net_de
  {
        ASSERT_RTNL();
  
 -      return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
 +      return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                             upper_dev);
  }
  EXPORT_SYMBOL(netdev_has_upper_dev);
  bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev)
  {
 -      return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
 +      return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
                                               upper_dev);
  }
  EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@@ -6737,22 -6594,6 +6737,22 @@@ struct net_device *netdev_master_upper_
  }
  EXPORT_SYMBOL(netdev_master_upper_dev_get);
  
 +static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
 +{
 +      struct netdev_adjacent *upper;
 +
 +      ASSERT_RTNL();
 +
 +      if (list_empty(&dev->adj_list.upper))
 +              return NULL;
 +
 +      upper = list_first_entry(&dev->adj_list.upper,
 +                               struct netdev_adjacent, list);
 +      if (likely(upper->master) && !upper->ignore)
 +              return upper->dev;
 +      return NULL;
 +}
 +
  /**
   * netdev_has_any_lower_dev - Check if device is linked to some device
   * @dev: device
@@@ -6803,23 -6644,6 +6803,23 @@@ struct net_device *netdev_upper_get_nex
  }
  EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
  
 +static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
 +                                                struct list_head **iter,
 +                                                bool *ignore)
 +{
 +      struct netdev_adjacent *upper;
 +
 +      upper = list_entry((*iter)->next, struct netdev_adjacent, list);
 +
 +      if (&upper->list == &dev->adj_list.upper)
 +              return NULL;
 +
 +      *iter = &upper->list;
 +      *ignore = upper->ignore;
 +
 +      return upper->dev;
 +}
 +
  static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
                                                    struct list_head **iter)
  {
        return upper->dev;
  }
  
 +static int __netdev_walk_all_upper_dev(struct net_device *dev,
 +                                     int (*fn)(struct net_device *dev,
 +                                               void *data),
 +                                     void *data)
 +{
 +      struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 +      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
 +      int ret, cur = 0;
 +      bool ignore;
 +
 +      now = dev;
 +      iter = &dev->adj_list.upper;
 +
 +      while (1) {
 +              if (now != dev) {
 +                      ret = fn(now, data);
 +                      if (ret)
 +                              return ret;
 +              }
 +
 +              next = NULL;
 +              while (1) {
 +                      udev = __netdev_next_upper_dev(now, &iter, &ignore);
 +                      if (!udev)
 +                              break;
 +                      if (ignore)
 +                              continue;
 +
 +                      next = udev;
 +                      niter = &udev->adj_list.upper;
 +                      dev_stack[cur] = now;
 +                      iter_stack[cur++] = iter;
 +                      break;
 +              }
 +
 +              if (!next) {
 +                      if (!cur)
 +                              return 0;
 +                      next = dev_stack[--cur];
 +                      niter = iter_stack[cur];
 +              }
 +
 +              now = next;
 +              iter = niter;
 +      }
 +
 +      return 0;
 +}
 +
  int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
                                            void *data),
                                  void *data)
  {
 -      struct net_device *udev;
 -      struct list_head *iter;
 -      int ret;
 +      struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 +      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
 +      int ret, cur = 0;
  
 -      for (iter = &dev->adj_list.upper,
 -           udev = netdev_next_upper_dev_rcu(dev, &iter);
 -           udev;
 -           udev = netdev_next_upper_dev_rcu(dev, &iter)) {
 -              /* first is the upper device itself */
 -              ret = fn(udev, data);
 -              if (ret)
 -                      return ret;
 +      now = dev;
 +      iter = &dev->adj_list.upper;
  
 -              /* then look at all of its upper devices */
 -              ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
 -              if (ret)
 -                      return ret;
 +      while (1) {
 +              if (now != dev) {
 +                      ret = fn(now, data);
 +                      if (ret)
 +                              return ret;
 +              }
 +
 +              next = NULL;
 +              while (1) {
 +                      udev = netdev_next_upper_dev_rcu(now, &iter);
 +                      if (!udev)
 +                              break;
 +
 +                      next = udev;
 +                      niter = &udev->adj_list.upper;
 +                      dev_stack[cur] = now;
 +                      iter_stack[cur++] = iter;
 +                      break;
 +              }
 +
 +              if (!next) {
 +                      if (!cur)
 +                              return 0;
 +                      next = dev_stack[--cur];
 +                      niter = iter_stack[cur];
 +              }
 +
 +              now = next;
 +              iter = niter;
        }
  
        return 0;
  }
  EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
  
 +static bool __netdev_has_upper_dev(struct net_device *dev,
 +                                 struct net_device *upper_dev)
 +{
 +      ASSERT_RTNL();
 +
 +      return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
 +                                         upper_dev);
 +}
 +
  /**
   * netdev_lower_get_next_private - Get the next ->private from the
   *                               lower neighbour list
@@@ -7038,119 -6785,34 +7038,119 @@@ static struct net_device *netdev_next_l
        return lower->dev;
  }
  
 +static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
 +                                                struct list_head **iter,
 +                                                bool *ignore)
 +{
 +      struct netdev_adjacent *lower;
 +
 +      lower = list_entry((*iter)->next, struct netdev_adjacent, list);
 +
 +      if (&lower->list == &dev->adj_list.lower)
 +              return NULL;
 +
 +      *iter = &lower->list;
 +      *ignore = lower->ignore;
 +
 +      return lower->dev;
 +}
 +
  int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *dev,
                                        void *data),
                              void *data)
  {
 -      struct net_device *ldev;
 -      struct list_head *iter;
 -      int ret;
 +      struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 +      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
 +      int ret, cur = 0;
  
 -      for (iter = &dev->adj_list.lower,
 -           ldev = netdev_next_lower_dev(dev, &iter);
 -           ldev;
 -           ldev = netdev_next_lower_dev(dev, &iter)) {
 -              /* first is the lower device itself */
 -              ret = fn(ldev, data);
 -              if (ret)
 -                      return ret;
 +      now = dev;
 +      iter = &dev->adj_list.lower;
  
 -              /* then look at all of its lower devices */
 -              ret = netdev_walk_all_lower_dev(ldev, fn, data);
 -              if (ret)
 -                      return ret;
 +      while (1) {
 +              if (now != dev) {
 +                      ret = fn(now, data);
 +                      if (ret)
 +                              return ret;
 +              }
 +
 +              next = NULL;
 +              while (1) {
 +                      ldev = netdev_next_lower_dev(now, &iter);
 +                      if (!ldev)
 +                              break;
 +
 +                      next = ldev;
 +                      niter = &ldev->adj_list.lower;
 +                      dev_stack[cur] = now;
 +                      iter_stack[cur++] = iter;
 +                      break;
 +              }
 +
 +              if (!next) {
 +                      if (!cur)
 +                              return 0;
 +                      next = dev_stack[--cur];
 +                      niter = iter_stack[cur];
 +              }
 +
 +              now = next;
 +              iter = niter;
        }
  
        return 0;
  }
  EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
  
 +static int __netdev_walk_all_lower_dev(struct net_device *dev,
 +                                     int (*fn)(struct net_device *dev,
 +                                               void *data),
 +                                     void *data)
 +{
 +      struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 +      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
 +      int ret, cur = 0;
 +      bool ignore;
 +
 +      now = dev;
 +      iter = &dev->adj_list.lower;
 +
 +      while (1) {
 +              if (now != dev) {
 +                      ret = fn(now, data);
 +                      if (ret)
 +                              return ret;
 +              }
 +
 +              next = NULL;
 +              while (1) {
 +                      ldev = __netdev_next_lower_dev(now, &iter, &ignore);
 +                      if (!ldev)
 +                              break;
 +                      if (ignore)
 +                              continue;
 +
 +                      next = ldev;
 +                      niter = &ldev->adj_list.lower;
 +                      dev_stack[cur] = now;
 +                      iter_stack[cur++] = iter;
 +                      break;
 +              }
 +
 +              if (!next) {
 +                      if (!cur)
 +                              return 0;
 +                      next = dev_stack[--cur];
 +                      niter = iter_stack[cur];
 +              }
 +
 +              now = next;
 +              iter = niter;
 +      }
 +
 +      return 0;
 +}
 +
  static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
                                                    struct list_head **iter)
  {
        return lower->dev;
  }
  
 -int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 -                                int (*fn)(struct net_device *dev,
 -                                          void *data),
 -                                void *data)
 +static u8 __netdev_upper_depth(struct net_device *dev)
 +{
 +      struct net_device *udev;
 +      struct list_head *iter;
 +      u8 max_depth = 0;
 +      bool ignore;
 +
 +      for (iter = &dev->adj_list.upper,
 +           udev = __netdev_next_upper_dev(dev, &iter, &ignore);
 +           udev;
 +           udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
 +              if (ignore)
 +                      continue;
 +              if (max_depth < udev->upper_level)
 +                      max_depth = udev->upper_level;
 +      }
 +
 +      return max_depth;
 +}
 +
 +static u8 __netdev_lower_depth(struct net_device *dev)
  {
        struct net_device *ldev;
        struct list_head *iter;
 -      int ret;
 +      u8 max_depth = 0;
 +      bool ignore;
  
        for (iter = &dev->adj_list.lower,
 -           ldev = netdev_next_lower_dev_rcu(dev, &iter);
 +           ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
             ldev;
 -           ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
 -              /* first is the lower device itself */
 -              ret = fn(ldev, data);
 -              if (ret)
 -                      return ret;
 +           ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
 +              if (ignore)
 +                      continue;
 +              if (max_depth < ldev->lower_level)
 +                      max_depth = ldev->lower_level;
 +      }
  
 -              /* then look at all of its lower devices */
 -              ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
 -              if (ret)
 -                      return ret;
 +      return max_depth;
 +}
 +
 +static int __netdev_update_upper_level(struct net_device *dev, void *data)
 +{
 +      dev->upper_level = __netdev_upper_depth(dev) + 1;
 +      return 0;
 +}
 +
 +static int __netdev_update_lower_level(struct net_device *dev, void *data)
 +{
 +      dev->lower_level = __netdev_lower_depth(dev) + 1;
 +      return 0;
 +}
 +
 +int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 +                                int (*fn)(struct net_device *dev,
 +                                          void *data),
 +                                void *data)
 +{
 +      struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 +      struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
 +      int ret, cur = 0;
 +
 +      now = dev;
 +      iter = &dev->adj_list.lower;
 +
 +      while (1) {
 +              if (now != dev) {
 +                      ret = fn(now, data);
 +                      if (ret)
 +                              return ret;
 +              }
 +
 +              next = NULL;
 +              while (1) {
 +                      ldev = netdev_next_lower_dev_rcu(now, &iter);
 +                      if (!ldev)
 +                              break;
 +
 +                      next = ldev;
 +                      niter = &ldev->adj_list.lower;
 +                      dev_stack[cur] = now;
 +                      iter_stack[cur++] = iter;
 +                      break;
 +              }
 +
 +              if (!next) {
 +                      if (!cur)
 +                              return 0;
 +                      next = dev_stack[--cur];
 +                      niter = iter_stack[cur];
 +              }
 +
 +              now = next;
 +              iter = niter;
        }
  
        return 0;
@@@ -7361,7 -6952,6 +7361,7 @@@ static int __netdev_adjacent_dev_insert
        adj->master = master;
        adj->ref_nr = 1;
        adj->private = private;
 +      adj->ignore = false;
        dev_hold(adj_dev);
  
        pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
@@@ -7512,17 -7102,14 +7512,17 @@@ static int __netdev_upper_dev_link(stru
                return -EBUSY;
  
        /* To prevent loops, check if dev is not upper device to upper_dev. */
 -      if (netdev_has_upper_dev(upper_dev, dev))
 +      if (__netdev_has_upper_dev(upper_dev, dev))
                return -EBUSY;
  
 +      if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
 +              return -EMLINK;
 +
        if (!master) {
 -              if (netdev_has_upper_dev(dev, upper_dev))
 +              if (__netdev_has_upper_dev(dev, upper_dev))
                        return -EEXIST;
        } else {
 -              master_dev = netdev_master_upper_dev_get(dev);
 +              master_dev = __netdev_master_upper_dev_get(dev);
                if (master_dev)
                        return master_dev == upper_dev ? -EEXIST : -EBUSY;
        }
        if (ret)
                goto rollback;
  
 +      __netdev_update_upper_level(dev, NULL);
 +      __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
 +
 +      __netdev_update_lower_level(upper_dev, NULL);
 +      __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
 +                                  NULL);
 +
        return 0;
  
  rollback:
@@@ -7633,96 -7213,9 +7633,96 @@@ void netdev_upper_dev_unlink(struct net
  
        call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
                                      &changeupper_info.info);
 +
 +      __netdev_update_upper_level(dev, NULL);
 +      __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
 +
 +      __netdev_update_lower_level(upper_dev, NULL);
 +      __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
 +                                  NULL);
  }
  EXPORT_SYMBOL(netdev_upper_dev_unlink);
  
 +static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
 +                                    struct net_device *lower_dev,
 +                                    bool val)
 +{
 +      struct netdev_adjacent *adj;
 +
 +      adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
 +      if (adj)
 +              adj->ignore = val;
 +
 +      adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
 +      if (adj)
 +              adj->ignore = val;
 +}
 +
 +static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
 +                                      struct net_device *lower_dev)
 +{
 +      __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
 +}
 +
 +static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
 +                                     struct net_device *lower_dev)
 +{
 +      __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
 +}
 +
 +int netdev_adjacent_change_prepare(struct net_device *old_dev,
 +                                 struct net_device *new_dev,
 +                                 struct net_device *dev,
 +                                 struct netlink_ext_ack *extack)
 +{
 +      int err;
 +
 +      if (!new_dev)
 +              return 0;
 +
 +      if (old_dev && new_dev != old_dev)
 +              netdev_adjacent_dev_disable(dev, old_dev);
 +
 +      err = netdev_upper_dev_link(new_dev, dev, extack);
 +      if (err) {
 +              if (old_dev && new_dev != old_dev)
 +                      netdev_adjacent_dev_enable(dev, old_dev);
 +              return err;
 +      }
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL(netdev_adjacent_change_prepare);
 +
 +void netdev_adjacent_change_commit(struct net_device *old_dev,
 +                                 struct net_device *new_dev,
 +                                 struct net_device *dev)
 +{
 +      if (!new_dev || !old_dev)
 +              return;
 +
 +      if (new_dev == old_dev)
 +              return;
 +
 +      netdev_adjacent_dev_enable(dev, old_dev);
 +      netdev_upper_dev_unlink(old_dev, dev);
 +}
 +EXPORT_SYMBOL(netdev_adjacent_change_commit);
 +
 +void netdev_adjacent_change_abort(struct net_device *old_dev,
 +                                struct net_device *new_dev,
 +                                struct net_device *dev)
 +{
 +      if (!new_dev)
 +              return;
 +
 +      if (old_dev && new_dev != old_dev)
 +              netdev_adjacent_dev_enable(dev, old_dev);
 +
 +      netdev_upper_dev_unlink(new_dev, dev);
 +}
 +EXPORT_SYMBOL(netdev_adjacent_change_abort);
 +
  /**
   * netdev_bonding_info_change - Dispatch event about slave change
   * @dev: device
@@@ -7836,6 -7329,25 +7836,6 @@@ void *netdev_lower_dev_get_private(stru
  EXPORT_SYMBOL(netdev_lower_dev_get_private);
  
  
 -int dev_get_nest_level(struct net_device *dev)
 -{
 -      struct net_device *lower = NULL;
 -      struct list_head *iter;
 -      int max_nest = -1;
 -      int nest;
 -
 -      ASSERT_RTNL();
 -
 -      netdev_for_each_lower_dev(dev, lower, iter) {
 -              nest = dev_get_nest_level(lower);
 -              if (max_nest < nest)
 -                      max_nest = nest;
 -      }
 -
 -      return max_nest + 1;
 -}
 -EXPORT_SYMBOL(dev_get_nest_level);
 -
  /**
   * netdev_lower_change - Dispatch event about lower device state change
   * @lower_dev: device
@@@ -8642,8 -8154,7 +8642,8 @@@ int dev_change_xdp_fd(struct net_devic
                        return -EINVAL;
                }
  
 -              if (prog->aux->id == prog_id) {
 +              /* prog->aux->id may be 0 for orphaned device-bound progs */
 +              if (prog->aux->id && prog->aux->id == prog_id) {
                        bpf_prog_put(prog);
                        return 0;
                }
@@@ -8753,9 -8264,6 +8753,9 @@@ static void rollback_registered_many(st
                dev_uc_flush(dev);
                dev_mc_flush(dev);
  
 +              netdev_name_node_alt_flush(dev);
 +              netdev_name_node_free(dev->name_node);
 +
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
  
@@@ -9111,7 -8619,7 +9111,7 @@@ static void netdev_init_one_queue(struc
  {
        /* Initialize queue lock */
        spin_lock_init(&queue->_xmit_lock);
 -      netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 +      lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
        queue->xmit_lock_owner = -1;
        netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
        queue->dev = dev;
@@@ -9158,43 -8666,6 +9158,43 @@@ void netif_tx_stop_all_queues(struct ne
  }
  EXPORT_SYMBOL(netif_tx_stop_all_queues);
  
 +static void netdev_register_lockdep_key(struct net_device *dev)
 +{
 +      lockdep_register_key(&dev->qdisc_tx_busylock_key);
 +      lockdep_register_key(&dev->qdisc_running_key);
 +      lockdep_register_key(&dev->qdisc_xmit_lock_key);
 +      lockdep_register_key(&dev->addr_list_lock_key);
 +}
 +
 +static void netdev_unregister_lockdep_key(struct net_device *dev)
 +{
 +      lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
 +      lockdep_unregister_key(&dev->qdisc_running_key);
 +      lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
 +      lockdep_unregister_key(&dev->addr_list_lock_key);
 +}
 +
 +void netdev_update_lockdep_key(struct net_device *dev)
 +{
 +      struct netdev_queue *queue;
 +      int i;
 +
 +      lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
 +      lockdep_unregister_key(&dev->addr_list_lock_key);
 +
 +      lockdep_register_key(&dev->qdisc_xmit_lock_key);
 +      lockdep_register_key(&dev->addr_list_lock_key);
 +
 +      lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
 +      for (i = 0; i < dev->num_tx_queues; i++) {
 +              queue = netdev_get_tx_queue(dev, i);
 +
 +              lockdep_set_class(&queue->_xmit_lock,
 +                                &dev->qdisc_xmit_lock_key);
 +      }
 +}
 +EXPORT_SYMBOL(netdev_update_lockdep_key);
 +
  /**
   *    register_netdevice      - register a network device
   *    @dev: device to register
@@@ -9229,17 -8700,12 +9229,17 @@@ int register_netdevice(struct net_devic
        BUG_ON(!net);
  
        spin_lock_init(&dev->addr_list_lock);
 -      netdev_set_addr_lockdep_class(dev);
 +      lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
  
        ret = dev_get_valid_name(net, dev, dev->name);
        if (ret < 0)
                goto out;
  
 +      ret = -ENOMEM;
 +      dev->name_node = netdev_name_node_head_alloc(dev);
 +      if (!dev->name_node)
 +              goto out;
 +
        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
@@@ -9361,8 -8827,6 +9361,8 @@@ out
        return ret;
  
  err_uninit:
 +      if (dev->name_node)
 +              netdev_name_node_free(dev->name_node);
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
        if (dev->priv_destructor)
@@@ -9746,12 -9210,8 +9746,12 @@@ struct net_device *alloc_netdev_mqs(in
  
        dev_net_set(dev, &init_net);
  
 +      netdev_register_lockdep_key(dev);
 +
        dev->gso_max_size = GSO_MAX_SIZE;
        dev->gso_max_segs = GSO_MAX_SEGS;
 +      dev->upper_level = 1;
 +      dev->lower_level = 1;
  
        INIT_LIST_HEAD(&dev->napi_list);
        INIT_LIST_HEAD(&dev->unreg_list);
@@@ -9832,8 -9292,6 +9832,8 @@@ void free_netdev(struct net_device *dev
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
  
 +      netdev_unregister_lockdep_key(dev);
 +
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                netdev_freemem(dev);
@@@ -10002,7 -9460,7 +10002,7 @@@ int dev_change_net_namespace(struct net
        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
        rcu_barrier();
  
 -      new_nsid = peernet2id_alloc(dev_net(dev), net);
 +      new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
        /* If there is an ifindex conflict assign a new one */
        if (__dev_get_by_index(net, dev->ifindex))
                new_ifindex = dev_new_index(net);
@@@ -10177,8 -9635,6 +10177,8 @@@ static int __net_init netdev_init(struc
        if (net->dev_index_head == NULL)
                goto err_idx;
  
 +      RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
 +
        return 0;
  
  err_idx:
index ff04cdc87f7604777ea30cf5595912420cdc5889,337997464240e9568773897c8647b95d8bfbfc7e..062b73a83af0efd2537669ec515312f50815d041
@@@ -151,64 -151,11 +151,64 @@@ static void nft_set_trans_bind(const st
        }
  }
  
 +static int nft_netdev_register_hooks(struct net *net,
 +                                   struct list_head *hook_list)
 +{
 +      struct nft_hook *hook;
 +      int err, j;
 +
 +      j = 0;
 +      list_for_each_entry(hook, hook_list, list) {
 +              err = nf_register_net_hook(net, &hook->ops);
 +              if (err < 0)
 +                      goto err_register;
 +
 +              j++;
 +      }
 +      return 0;
 +
 +err_register:
 +      list_for_each_entry(hook, hook_list, list) {
 +              if (j-- <= 0)
 +                      break;
 +
 +              nf_unregister_net_hook(net, &hook->ops);
 +      }
 +      return err;
 +}
 +
 +static void nft_netdev_unregister_hooks(struct net *net,
 +                                      struct list_head *hook_list)
 +{
 +      struct nft_hook *hook;
 +
 +      list_for_each_entry(hook, hook_list, list)
 +              nf_unregister_net_hook(net, &hook->ops);
 +}
 +
 +static int nft_register_basechain_hooks(struct net *net, int family,
 +                                      struct nft_base_chain *basechain)
 +{
 +      if (family == NFPROTO_NETDEV)
 +              return nft_netdev_register_hooks(net, &basechain->hook_list);
 +
 +      return nf_register_net_hook(net, &basechain->ops);
 +}
 +
 +static void nft_unregister_basechain_hooks(struct net *net, int family,
 +                                         struct nft_base_chain *basechain)
 +{
 +      if (family == NFPROTO_NETDEV)
 +              nft_netdev_unregister_hooks(net, &basechain->hook_list);
 +      else
 +              nf_unregister_net_hook(net, &basechain->ops);
 +}
 +
  static int nf_tables_register_hook(struct net *net,
                                   const struct nft_table *table,
                                   struct nft_chain *chain)
  {
 -      const struct nft_base_chain *basechain;
 +      struct nft_base_chain *basechain;
        const struct nf_hook_ops *ops;
  
        if (table->flags & NFT_TABLE_F_DORMANT ||
        if (basechain->type->ops_register)
                return basechain->type->ops_register(net, ops);
  
 -      return nf_register_net_hook(net, ops);
 +      return nft_register_basechain_hooks(net, table->family, basechain);
  }
  
  static void nf_tables_unregister_hook(struct net *net,
                                      const struct nft_table *table,
                                      struct nft_chain *chain)
  {
 -      const struct nft_base_chain *basechain;
 +      struct nft_base_chain *basechain;
        const struct nf_hook_ops *ops;
  
        if (table->flags & NFT_TABLE_F_DORMANT ||
        if (basechain->type->ops_unregister)
                return basechain->type->ops_unregister(net, ops);
  
 -      nf_unregister_net_hook(net, ops);
 +      nft_unregister_basechain_hooks(net, table->family, basechain);
  }
  
  static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
@@@ -361,7 -308,6 +361,7 @@@ static struct nft_trans *nft_trans_rule
  
  static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
  {
 +      struct nft_flow_rule *flow;
        struct nft_trans *trans;
        int err;
  
        if (trans == NULL)
                return -ENOMEM;
  
 +      if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
 +              flow = nft_flow_rule_create(ctx->net, rule);
 +              if (IS_ERR(flow)) {
 +                      nft_trans_destroy(trans);
 +                      return PTR_ERR(flow);
 +              }
 +
 +              nft_trans_flow_rule(trans) = flow;
 +      }
 +
        err = nf_tables_delrule_deactivate(ctx, rule);
        if (err < 0) {
                nft_trans_destroy(trans);
@@@ -806,8 -742,7 +806,8 @@@ static void nft_table_disable(struct ne
                if (cnt && i++ == cnt)
                        break;
  
 -              nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
 +              nft_unregister_basechain_hooks(net, table->family,
 +                                             nft_base_chain(chain));
        }
  }
  
@@@ -822,16 -757,14 +822,16 @@@ static int nf_tables_table_enable(struc
                if (!nft_is_base_chain(chain))
                        continue;
  
 -              err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
 +              err = nft_register_basechain_hooks(net, table->family,
 +                                                 nft_base_chain(chain));
                if (err < 0)
 -                      goto err;
 +                      goto err_register_hooks;
  
                i++;
        }
        return 0;
 -err:
 +
 +err_register_hooks:
        if (i)
                nft_table_disable(net, table, i);
        return err;
@@@ -1292,46 -1225,6 +1292,46 @@@ nla_put_failure
        return -ENOSPC;
  }
  
 +static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
 +                                 const struct nft_base_chain *basechain)
 +{
 +      const struct nf_hook_ops *ops = &basechain->ops;
 +      struct nft_hook *hook, *first = NULL;
 +      struct nlattr *nest, *nest_devs;
 +      int n = 0;
 +
 +      nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
 +      if (nest == NULL)
 +              goto nla_put_failure;
 +      if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
 +              goto nla_put_failure;
 +      if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
 +              goto nla_put_failure;
 +
 +      if (family == NFPROTO_NETDEV) {
 +              nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
 +              list_for_each_entry(hook, &basechain->hook_list, list) {
 +                      if (!first)
 +                              first = hook;
 +
 +                      if (nla_put_string(skb, NFTA_DEVICE_NAME,
 +                                         hook->ops.dev->name))
 +                              goto nla_put_failure;
 +                      n++;
 +              }
 +              nla_nest_end(skb, nest_devs);
 +
 +              if (n == 1 &&
 +                  nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
 +                      goto nla_put_failure;
 +      }
 +      nla_nest_end(skb, nest);
 +
 +      return 0;
 +nla_put_failure:
 +      return -1;
 +}
 +
  static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
                                     u32 portid, u32 seq, int event, u32 flags,
                                     int family, const struct nft_table *table,
  
        if (nft_is_base_chain(chain)) {
                const struct nft_base_chain *basechain = nft_base_chain(chain);
 -              const struct nf_hook_ops *ops = &basechain->ops;
                struct nft_stats __percpu *stats;
 -              struct nlattr *nest;
  
 -              nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
 -              if (nest == NULL)
 -                      goto nla_put_failure;
 -              if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
 -                      goto nla_put_failure;
 -              if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
 -                      goto nla_put_failure;
 -              if (basechain->dev_name[0] &&
 -                  nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
 +              if (nft_dump_basechain_hook(skb, family, basechain))
                        goto nla_put_failure;
 -              nla_nest_end(skb, nest);
  
                if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
                                 htonl(basechain->policy)))
@@@ -1557,8 -1461,9 +1557,9 @@@ static void nft_chain_stats_replace(str
        if (!nft_trans_chain_stats(trans))
                return;
  
-       rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
-                          lockdep_commit_lock_is_held(trans->ctx.net));
+       nft_trans_chain_stats(trans) =
+               rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
+                                   lockdep_commit_lock_is_held(trans->ctx.net));
  
        if (!nft_trans_chain_stats(trans))
                static_branch_inc(&nft_counters_enabled);
@@@ -1581,7 -1486,6 +1582,7 @@@ static void nf_tables_chain_free_chain_
  static void nf_tables_chain_destroy(struct nft_ctx *ctx)
  {
        struct nft_chain *chain = ctx->chain;
 +      struct nft_hook *hook, *next;
  
        if (WARN_ON(chain->use > 0))
                return;
        if (nft_is_base_chain(chain)) {
                struct nft_base_chain *basechain = nft_base_chain(chain);
  
 +              if (ctx->family == NFPROTO_NETDEV) {
 +                      list_for_each_entry_safe(hook, next,
 +                                               &basechain->hook_list, list) {
 +                              list_del_rcu(&hook->list);
 +                              kfree_rcu(hook, rcu);
 +                      }
 +              }
                module_put(basechain->type->owner);
                if (rcu_access_pointer(basechain->stats)) {
                        static_branch_dec(&nft_counters_enabled);
        }
  }
  
 +static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
 +                                            const struct nlattr *attr)
 +{
 +      struct net_device *dev;
 +      char ifname[IFNAMSIZ];
 +      struct nft_hook *hook;
 +      int err;
 +
 +      hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
 +      if (!hook) {
 +              err = -ENOMEM;
 +              goto err_hook_alloc;
 +      }
 +
 +      nla_strlcpy(ifname, attr, IFNAMSIZ);
 +      dev = __dev_get_by_name(net, ifname);
 +      if (!dev) {
 +              err = -ENOENT;
 +              goto err_hook_dev;
 +      }
 +      hook->ops.dev = dev;
 +
 +      return hook;
 +
 +err_hook_dev:
 +      kfree(hook);
 +err_hook_alloc:
 +      return ERR_PTR(err);
 +}
 +
 +static bool nft_hook_list_find(struct list_head *hook_list,
 +                             const struct nft_hook *this)
 +{
 +      struct nft_hook *hook;
 +
 +      list_for_each_entry(hook, hook_list, list) {
 +              if (this->ops.dev == hook->ops.dev)
 +                      return true;
 +      }
 +
 +      return false;
 +}
 +
 +static int nf_tables_parse_netdev_hooks(struct net *net,
 +                                      const struct nlattr *attr,
 +                                      struct list_head *hook_list)
 +{
 +      struct nft_hook *hook, *next;
 +      const struct nlattr *tmp;
 +      int rem, n = 0, err;
 +
 +      nla_for_each_nested(tmp, attr, rem) {
 +              if (nla_type(tmp) != NFTA_DEVICE_NAME) {
 +                      err = -EINVAL;
 +                      goto err_hook;
 +              }
 +
 +              hook = nft_netdev_hook_alloc(net, tmp);
 +              if (IS_ERR(hook)) {
 +                      err = PTR_ERR(hook);
 +                      goto err_hook;
 +              }
 +              if (nft_hook_list_find(hook_list, hook)) {
 +                      err = -EEXIST;
 +                      goto err_hook;
 +              }
 +              list_add_tail(&hook->list, hook_list);
 +              n++;
 +
 +              if (n == NFT_NETDEVICE_MAX) {
 +                      err = -EFBIG;
 +                      goto err_hook;
 +              }
 +      }
 +      if (!n)
 +              return -EINVAL;
 +
 +      return 0;
 +
 +err_hook:
 +      list_for_each_entry_safe(hook, next, hook_list, list) {
 +              list_del(&hook->list);
 +              kfree(hook);
 +      }
 +      return err;
 +}
 +
  struct nft_chain_hook {
        u32                             num;
        s32                             priority;
        const struct nft_chain_type     *type;
 -      struct net_device               *dev;
 +      struct list_head                list;
  };
  
 +static int nft_chain_parse_netdev(struct net *net,
 +                                struct nlattr *tb[],
 +                                struct list_head *hook_list)
 +{
 +      struct nft_hook *hook;
 +      int err;
 +
 +      if (tb[NFTA_HOOK_DEV]) {
 +              hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
 +              if (IS_ERR(hook))
 +                      return PTR_ERR(hook);
 +
 +              list_add_tail(&hook->list, hook_list);
 +      } else if (tb[NFTA_HOOK_DEVS]) {
 +              err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
 +                                                 hook_list);
 +              if (err < 0)
 +                      return err;
 +      } else {
 +              return -EINVAL;
 +      }
 +
 +      return 0;
 +}
 +
  static int nft_chain_parse_hook(struct net *net,
                                const struct nlattr * const nla[],
                                struct nft_chain_hook *hook, u8 family,
  {
        struct nlattr *ha[NFTA_HOOK_MAX + 1];
        const struct nft_chain_type *type;
 -      struct net_device *dev;
        int err;
  
        lockdep_assert_held(&net->nft.commit_mutex);
  
        hook->type = type;
  
 -      hook->dev = NULL;
 +      INIT_LIST_HEAD(&hook->list);
        if (family == NFPROTO_NETDEV) {
 -              char ifname[IFNAMSIZ];
 -
 -              if (!ha[NFTA_HOOK_DEV]) {
 -                      module_put(type->owner);
 -                      return -EOPNOTSUPP;
 -              }
 -
 -              nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
 -              dev = __dev_get_by_name(net, ifname);
 -              if (!dev) {
 +              err = nft_chain_parse_netdev(net, ha, &hook->list);
 +              if (err < 0) {
                        module_put(type->owner);
 -                      return -ENOENT;
 +                      return err;
                }
 -              hook->dev = dev;
 -      } else if (ha[NFTA_HOOK_DEV]) {
 +      } else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
                module_put(type->owner);
                return -EOPNOTSUPP;
        }
  
  static void nft_chain_release_hook(struct nft_chain_hook *hook)
  {
 +      struct nft_hook *h, *next;
 +
 +      list_for_each_entry_safe(h, next, &hook->list, list) {
 +              list_del(&h->list);
 +              kfree(h);
 +      }
        module_put(hook->type->owner);
  }
  
@@@ -1822,49 -1611,6 +1823,49 @@@ static struct nft_rule **nf_tables_chai
        return kvmalloc(alloc, GFP_KERNEL);
  }
  
 +static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
 +                                  const struct nft_chain_hook *hook,
 +                                  struct nft_chain *chain)
 +{
 +      ops->pf         = family;
 +      ops->hooknum    = hook->num;
 +      ops->priority   = hook->priority;
 +      ops->priv       = chain;
 +      ops->hook       = hook->type->hooks[ops->hooknum];
 +}
 +
 +static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
 +                            struct nft_chain_hook *hook, u32 flags)
 +{
 +      struct nft_chain *chain;
 +      struct nft_hook *h;
 +
 +      basechain->type = hook->type;
 +      INIT_LIST_HEAD(&basechain->hook_list);
 +      chain = &basechain->chain;
 +
 +      if (family == NFPROTO_NETDEV) {
 +              list_splice_init(&hook->list, &basechain->hook_list);
 +              list_for_each_entry(h, &basechain->hook_list, list)
 +                      nft_basechain_hook_init(&h->ops, family, hook, chain);
 +
 +              basechain->ops.hooknum  = hook->num;
 +              basechain->ops.priority = hook->priority;
 +      } else {
 +              nft_basechain_hook_init(&basechain->ops, family, hook, chain);
 +      }
 +
 +      chain->flags |= NFT_BASE_CHAIN | flags;
 +      basechain->policy = NF_ACCEPT;
 +      if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
 +          nft_chain_offload_priority(basechain) < 0)
 +              return -EOPNOTSUPP;
 +
 +      flow_block_init(&basechain->flow_block);
 +
 +      return 0;
 +}
 +
  static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                              u8 policy, u32 flags)
  {
  
        if (nla[NFTA_CHAIN_HOOK]) {
                struct nft_chain_hook hook;
 -              struct nf_hook_ops *ops;
  
                err = nft_chain_parse_hook(net, nla, &hook, family, true);
                if (err < 0)
                        nft_chain_release_hook(&hook);
                        return -ENOMEM;
                }
 -
 -              if (hook.dev != NULL)
 -                      strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
 +              chain = &basechain->chain;
  
                if (nla[NFTA_CHAIN_COUNTERS]) {
                        stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
                        static_branch_inc(&nft_counters_enabled);
                }
  
 -              basechain->type = hook.type;
 -              chain = &basechain->chain;
 -
 -              ops             = &basechain->ops;
 -              ops->pf         = family;
 -              ops->hooknum    = hook.num;
 -              ops->priority   = hook.priority;
 -              ops->priv       = chain;
 -              ops->hook       = hook.type->hooks[ops->hooknum];
 -              ops->dev        = hook.dev;
 -
 -              chain->flags |= NFT_BASE_CHAIN | flags;
 -              basechain->policy = NF_ACCEPT;
 -              if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
 -                  nft_chain_offload_priority(basechain) < 0)
 -                      return -EOPNOTSUPP;
 -
 -              flow_block_init(&basechain->flow_block);
 +              err = nft_basechain_init(basechain, family, &hook, flags);
 +              if (err < 0) {
 +                      nft_chain_release_hook(&hook);
 +                      kfree(basechain);
 +                      return err;
 +              }
        } else {
                chain = kzalloc(sizeof(*chain), GFP_KERNEL);
                if (chain == NULL)
@@@ -1971,25 -1732,6 +1972,25 @@@ err1
        return err;
  }
  
 +static bool nft_hook_list_equal(struct list_head *hook_list1,
 +                              struct list_head *hook_list2)
 +{
 +      struct nft_hook *hook;
 +      int n = 0, m = 0;
 +
 +      n = 0;
 +      list_for_each_entry(hook, hook_list2, list) {
 +              if (!nft_hook_list_find(hook_list1, hook))
 +                      return false;
 +
 +              n++;
 +      }
 +      list_for_each_entry(hook, hook_list1, list)
 +              m++;
 +
 +      return n == m;
 +}
 +
  static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
                              u32 flags)
  {
                        return -EBUSY;
                }
  
 -              ops = &basechain->ops;
 -              if (ops->hooknum != hook.num ||
 -                  ops->priority != hook.priority ||
 -                  ops->dev != hook.dev) {
 -                      nft_chain_release_hook(&hook);
 -                      return -EBUSY;
 +              if (ctx->family == NFPROTO_NETDEV) {
 +                      if (!nft_hook_list_equal(&basechain->hook_list,
 +                                               &hook.list)) {
 +                              nft_chain_release_hook(&hook);
 +                              return -EBUSY;
 +                      }
 +              } else {
 +                      ops = &basechain->ops;
 +                      if (ops->hooknum != hook.num ||
 +                          ops->priority != hook.priority) {
 +                              nft_chain_release_hook(&hook);
 +                              return -EBUSY;
 +                      }
                }
                nft_chain_release_hook(&hook);
        }
@@@ -2188,7 -1923,6 +2189,7 @@@ static int nf_tables_newchain(struct ne
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;
  
 +              flags |= chain->flags & NFT_BASE_CHAIN;
                return nf_tables_updchain(&ctx, genmask, policy, flags);
        }
  
@@@ -5410,6 -5144,9 +5411,6 @@@ static int nf_tables_updobj(const struc
        struct nft_trans *trans;
        int err;
  
 -      if (!obj->ops->update)
 -              return -EOPNOTSUPP;
 -
        trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
                                sizeof(struct nft_trans_obj));
        if (!trans)
@@@ -5846,7 -5583,6 +5847,7 @@@ static const struct nla_policy nft_flow
                                            .len = NFT_NAME_MAXLEN - 1 },
        [NFTA_FLOWTABLE_HOOK]           = { .type = NLA_NESTED },
        [NFTA_FLOWTABLE_HANDLE]         = { .type = NLA_U64 },
 +      [NFTA_FLOWTABLE_FLAGS]          = { .type = NLA_U32 },
  };
  
  struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
@@@ -5893,6 -5629,43 +5894,6 @@@ nft_flowtable_lookup_byhandle(const str
         return ERR_PTR(-ENOENT);
  }
  
 -static int nf_tables_parse_devices(const struct nft_ctx *ctx,
 -                                 const struct nlattr *attr,
 -                                 struct net_device *dev_array[], int *len)
 -{
 -      const struct nlattr *tmp;
 -      struct net_device *dev;
 -      char ifname[IFNAMSIZ];
 -      int rem, n = 0, err;
 -
 -      nla_for_each_nested(tmp, attr, rem) {
 -              if (nla_type(tmp) != NFTA_DEVICE_NAME) {
 -                      err = -EINVAL;
 -                      goto err1;
 -              }
 -
 -              nla_strlcpy(ifname, tmp, IFNAMSIZ);
 -              dev = __dev_get_by_name(ctx->net, ifname);
 -              if (!dev) {
 -                      err = -ENOENT;
 -                      goto err1;
 -              }
 -
 -              dev_array[n++] = dev;
 -              if (n == NFT_FLOWTABLE_DEVICE_MAX) {
 -                      err = -EFBIG;
 -                      goto err1;
 -              }
 -      }
 -      if (!len)
 -              return -EINVAL;
 -
 -      err = 0;
 -err1:
 -      *len = n;
 -      return err;
 -}
 -
  static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
        [NFTA_FLOWTABLE_HOOK_NUM]       = { .type = NLA_U32 },
        [NFTA_FLOWTABLE_HOOK_PRIORITY]  = { .type = NLA_U32 },
@@@ -5903,10 -5676,11 +5904,10 @@@ static int nf_tables_flowtable_parse_ho
                                          const struct nlattr *attr,
                                          struct nft_flowtable *flowtable)
  {
 -      struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
        struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
 -      struct nf_hook_ops *ops;
 +      struct nft_hook *hook;
        int hooknum, priority;
 -      int err, n = 0, i;
 +      int err;
  
        err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
                                          nft_flowtable_hook_policy, NULL);
  
        priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
  
 -      err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
 -                                    dev_array, &n);
 +      err = nf_tables_parse_netdev_hooks(ctx->net,
 +                                         tb[NFTA_FLOWTABLE_HOOK_DEVS],
 +                                         &flowtable->hook_list);
        if (err < 0)
                return err;
  
 -      ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
 -      if (!ops)
 -              return -ENOMEM;
 -
 -      flowtable->hooknum      = hooknum;
 -      flowtable->priority     = priority;
 -      flowtable->ops          = ops;
 -      flowtable->ops_len      = n;
 +      flowtable->hooknum              = hooknum;
 +      flowtable->data.priority        = priority;
  
 -      for (i = 0; i < n; i++) {
 -              flowtable->ops[i].pf            = NFPROTO_NETDEV;
 -              flowtable->ops[i].hooknum       = hooknum;
 -              flowtable->ops[i].priority      = priority;
 -              flowtable->ops[i].priv          = &flowtable->data;
 -              flowtable->ops[i].hook          = flowtable->data.type->hook;
 -              flowtable->ops[i].dev           = dev_array[i];
 +      list_for_each_entry(hook, &flowtable->hook_list, list) {
 +              hook->ops.pf            = NFPROTO_NETDEV;
 +              hook->ops.hooknum       = hooknum;
 +              hook->ops.priority      = priority;
 +              hook->ops.priv          = &flowtable->data;
 +              hook->ops.hook          = flowtable->data.type->hook;
        }
  
        return err;
@@@ -5975,73 -5755,17 +5976,73 @@@ nft_flowtable_type_get(struct net *net
        return ERR_PTR(-ENOENT);
  }
  
 +static void nft_unregister_flowtable_hook(struct net *net,
 +                                        struct nft_flowtable *flowtable,
 +                                        struct nft_hook *hook)
 +{
 +      nf_unregister_net_hook(net, &hook->ops);
 +      flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
 +                                  FLOW_BLOCK_UNBIND);
 +}
 +
  static void nft_unregister_flowtable_net_hooks(struct net *net,
                                               struct nft_flowtable *flowtable)
  {
 -      int i;
 +      struct nft_hook *hook;
  
 -      for (i = 0; i < flowtable->ops_len; i++) {
 -              if (!flowtable->ops[i].dev)
 -                      continue;
 +      list_for_each_entry(hook, &flowtable->hook_list, list)
 +              nft_unregister_flowtable_hook(net, flowtable, hook);
 +}
 +
 +static int nft_register_flowtable_net_hooks(struct net *net,
 +                                          struct nft_table *table,
 +                                          struct nft_flowtable *flowtable)
 +{
 +      struct nft_hook *hook, *hook2, *next;
 +      struct nft_flowtable *ft;
 +      int err, i = 0;
 +
 +      list_for_each_entry(hook, &flowtable->hook_list, list) {
 +              list_for_each_entry(ft, &table->flowtables, list) {
 +                      list_for_each_entry(hook2, &ft->hook_list, list) {
 +                              if (hook->ops.dev == hook2->ops.dev &&
 +                                  hook->ops.pf == hook2->ops.pf) {
 +                                      err = -EBUSY;
 +                                      goto err_unregister_net_hooks;
 +                              }
 +                      }
 +              }
 +
 +              err = flowtable->data.type->setup(&flowtable->data,
 +                                                hook->ops.dev,
 +                                                FLOW_BLOCK_BIND);
 +              if (err < 0)
 +                      goto err_unregister_net_hooks;
 +
 +              err = nf_register_net_hook(net, &hook->ops);
 +              if (err < 0) {
 +                      flowtable->data.type->setup(&flowtable->data,
 +                                                  hook->ops.dev,
 +                                                  FLOW_BLOCK_UNBIND);
 +                      goto err_unregister_net_hooks;
 +              }
 +
 +              i++;
 +      }
 +
 +      return 0;
  
 -              nf_unregister_net_hook(net, &flowtable->ops[i]);
 +err_unregister_net_hooks:
 +      list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
 +              if (i-- <= 0)
 +                      break;
 +
 +              nft_unregister_flowtable_hook(net, flowtable, hook);
 +              list_del_rcu(&hook->list);
 +              kfree_rcu(hook, rcu);
        }
 +
 +      return err;
  }
  
  static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
  {
        const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
        const struct nf_flowtable_type *type;
 -      struct nft_flowtable *flowtable, *ft;
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
 +      struct nft_flowtable *flowtable;
 +      struct nft_hook *hook, *next;
        struct nft_table *table;
        struct nft_ctx ctx;
 -      int err, i, k;
 +      int err;
  
        if (!nla[NFTA_FLOWTABLE_TABLE] ||
            !nla[NFTA_FLOWTABLE_NAME] ||
  
        flowtable->table = table;
        flowtable->handle = nf_tables_alloc_handle(table);
 +      INIT_LIST_HEAD(&flowtable->hook_list);
  
        flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
        if (!flowtable->name) {
                goto err2;
        }
  
 +      if (nla[NFTA_FLOWTABLE_FLAGS]) {
 +              flowtable->data.flags =
 +                      ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
 +              if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD)
 +                      goto err3;
 +      }
 +
 +      write_pnet(&flowtable->data.net, net);
        flowtable->data.type = type;
        err = type->init(&flowtable->data);
        if (err < 0)
        if (err < 0)
                goto err4;
  
 -      for (i = 0; i < flowtable->ops_len; i++) {
 -              if (!flowtable->ops[i].dev)
 -                      continue;
 -
 -              list_for_each_entry(ft, &table->flowtables, list) {
 -                      for (k = 0; k < ft->ops_len; k++) {
 -                              if (!ft->ops[k].dev)
 -                                      continue;
 -
 -                              if (flowtable->ops[i].dev == ft->ops[k].dev &&
 -                                  flowtable->ops[i].pf == ft->ops[k].pf) {
 -                                      err = -EBUSY;
 -                                      goto err5;
 -                              }
 -                      }
 -              }
 -
 -              err = nf_register_net_hook(net, &flowtable->ops[i]);
 -              if (err < 0)
 -                      goto err5;
 -      }
 +      err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
 +      if (err < 0)
 +              goto err4;
  
        err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
        if (err < 0)
 -              goto err6;
 +              goto err5;
  
        list_add_tail_rcu(&flowtable->list, &table->flowtables);
        table->use++;
  
        return 0;
 -err6:
 -      i = flowtable->ops_len;
  err5:
 -      for (k = i - 1; k >= 0; k--)
 -              nf_unregister_net_hook(net, &flowtable->ops[k]);
 -
 -      kfree(flowtable->ops);
 +      list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
 +              nft_unregister_flowtable_hook(net, flowtable, hook);
 +              list_del_rcu(&hook->list);
 +              kfree_rcu(hook, rcu);
 +      }
  err4:
        flowtable->data.type->free(&flowtable->data);
  err3:
@@@ -6213,8 -5946,8 +6214,8 @@@ static int nf_tables_fill_flowtable_inf
  {
        struct nlattr *nest, *nest_devs;
        struct nfgenmsg *nfmsg;
 +      struct nft_hook *hook;
        struct nlmsghdr *nlh;
 -      int i;
  
        event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
            nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
            nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
            nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
 -                       NFTA_FLOWTABLE_PAD))
 +                       NFTA_FLOWTABLE_PAD) ||
 +          nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
                goto nla_put_failure;
  
        nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
        if (!nest)
                goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
 -          nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
 +          nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->data.priority)))
                goto nla_put_failure;
  
        nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
        if (!nest_devs)
                goto nla_put_failure;
  
 -      for (i = 0; i < flowtable->ops_len; i++) {
 -              const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
 -
 -              if (dev &&
 -                  nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
 +      list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
 +              if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
                        goto nla_put_failure;
        }
        nla_nest_end(skb, nest_devs);
  
  static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
  {
 -      kfree(flowtable->ops);
 +      struct nft_hook *hook, *next;
 +
 +      list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
 +              list_del_rcu(&hook->list);
 +              kfree(hook);
 +      }
        kfree(flowtable->name);
        flowtable->data.type->free(&flowtable->data);
        module_put(flowtable->data.type->owner);
@@@ -6482,15 -6212,14 +6483,15 @@@ nla_put_failure
  static void nft_flowtable_event(unsigned long event, struct net_device *dev,
                                struct nft_flowtable *flowtable)
  {
 -      int i;
 +      struct nft_hook *hook;
  
 -      for (i = 0; i < flowtable->ops_len; i++) {
 -              if (flowtable->ops[i].dev != dev)
 +      list_for_each_entry(hook, &flowtable->hook_list, list) {
 +              if (hook->ops.dev != dev)
                        continue;
  
 -              nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
 -              flowtable->ops[i].dev = NULL;
 +              nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
 +              list_del_rcu(&hook->list);
 +              kfree_rcu(hook, rcu);
                break;
        }
  }
@@@ -6771,8 -6500,7 +6772,8 @@@ static void nft_obj_commit_update(struc
        obj = nft_trans_obj(trans);
        newobj = nft_trans_obj_newobj(trans);
  
 -      obj->ops->update(obj, newobj);
 +      if (obj->ops->update)
 +              obj->ops->update(obj, newobj);
  
        kfree(newobj);
  }
diff --combined net/sched/act_api.c
index 7fc1e2c1b65634afd4f3f44075a98c0e82e6ee7e,685abe277a1c965b62a9b249004d173b698fcc51..90a31b15585f61a5a3c406eb3e3985679164e044
@@@ -88,7 -88,7 +88,7 @@@ struct tcf_chain *tcf_action_set_ctrlac
                                         struct tcf_chain *goto_chain)
  {
        a->tcfa_action = action;
-       rcu_swap_protected(a->goto_chain, goto_chain, 1);
+       goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1);
        return goto_chain;
  }
  EXPORT_SYMBOL(tcf_action_set_ctrlact);
@@@ -188,8 -188,6 +188,8 @@@ static size_t tcf_action_shared_attrs_s
                + nla_total_size(0) /* TCA_ACT_STATS nested */
                /* TCA_STATS_BASIC */
                + nla_total_size_64bit(sizeof(struct gnet_stats_basic))
 +              /* TCA_STATS_PKT64 */
 +              + nla_total_size_64bit(sizeof(u64))
                /* TCA_STATS_QUEUE */
                + nla_total_size_64bit(sizeof(struct gnet_stats_queue))
                + nla_total_size(0) /* TCA_OPTIONS nested */
@@@ -401,7 -399,7 +401,7 @@@ static int tcf_idr_delete_index(struct 
  
  int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
                   struct tc_action **a, const struct tc_action_ops *ops,
 -                 int bind, bool cpustats)
 +                 int bind, bool cpustats, u32 flags)
  {
        struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
        p->tcfa_tm.install = jiffies;
        p->tcfa_tm.lastuse = jiffies;
        p->tcfa_tm.firstuse = 0;
 +      p->tcfa_flags = flags;
        if (est) {
                err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                        &p->tcfa_rate_est,
@@@ -454,17 -451,6 +454,17 @@@ err1
  }
  EXPORT_SYMBOL(tcf_idr_create);
  
 +int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
 +                            struct nlattr *est, struct tc_action **a,
 +                            const struct tc_action_ops *ops, int bind,
 +                            u32 flags)
 +{
 +      /* Set cpustats according to actions flags. */
 +      return tcf_idr_create(tn, index, est, a, ops, bind,
 +                            !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
 +}
 +EXPORT_SYMBOL(tcf_idr_create_from_flags);
 +
  void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
  {
        struct tcf_idrinfo *idrinfo = tn->idrinfo;
@@@ -787,14 -773,6 +787,14 @@@ tcf_action_dump_1(struct sk_buff *skb, 
        }
        rcu_read_unlock();
  
 +      if (a->tcfa_flags) {
 +              struct nla_bitfield32 flags = { a->tcfa_flags,
 +                                              a->tcfa_flags, };
 +
 +              if (nla_put(skb, TCA_ACT_FLAGS, sizeof(flags), &flags))
 +                      goto nla_put_failure;
 +      }
 +
        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
        if (nest == NULL)
                goto nla_put_failure;
@@@ -853,15 -831,12 +853,15 @@@ static struct tc_cookie *nla_memdup_coo
        return c;
  }
  
 +static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
  static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
        [TCA_ACT_KIND]          = { .type = NLA_STRING },
        [TCA_ACT_INDEX]         = { .type = NLA_U32 },
        [TCA_ACT_COOKIE]        = { .type = NLA_BINARY,
                                    .len = TC_COOKIE_MAX_SIZE },
        [TCA_ACT_OPTIONS]       = { .type = NLA_NESTED },
 +      [TCA_ACT_FLAGS]         = { .type = NLA_BITFIELD32,
 +                                  .validation_data = &tca_act_flags_allowed },
  };
  
  struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    bool rtnl_held,
                                    struct netlink_ext_ack *extack)
  {
 +      struct nla_bitfield32 flags = { 0, 0 };
        struct tc_action *a;
        struct tc_action_ops *a_o;
        struct tc_cookie *cookie = NULL;
                                goto err_out;
                        }
                }
 +              if (tb[TCA_ACT_FLAGS])
 +                      flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
        } else {
                if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
                        NL_SET_ERR_MSG(extack, "TC action name too long");
        /* backward compatibility for policer */
        if (name == NULL)
                err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
 -                              rtnl_held, tp, extack);
 +                              rtnl_held, tp, flags.value, extack);
        else
                err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
 -                              tp, extack);
 +                              tp, flags.value, extack);
        if (err < 0)
                goto err_mod;
  
@@@ -1003,6 -975,7 +1003,6 @@@ int tcf_action_init(struct net *net, st
                        err = PTR_ERR(act);
                        goto err;
                }
 -              act->order = i;
                sz += tcf_action_fill_size(act);
                /* Start from index 0 */
                actions[i - 1] = act;
@@@ -1016,29 -989,6 +1016,29 @@@ err
        return err;
  }
  
 +void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets,
 +                           bool drop, bool hw)
 +{
 +      if (a->cpu_bstats) {
 +              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 +
 +              if (drop)
 +                      this_cpu_ptr(a->cpu_qstats)->drops += packets;
 +
 +              if (hw)
 +                      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 +                                         bytes, packets);
 +              return;
 +      }
 +
 +      _bstats_update(&a->tcfa_bstats, bytes, packets);
 +      if (drop)
 +              a->tcfa_qstats.drops += packets;
 +      if (hw)
 +              _bstats_update(&a->tcfa_bstats_hw, bytes, packets);
 +}
 +EXPORT_SYMBOL(tcf_action_update_stats);
 +
  int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
                          int compat_mode)
  {
diff --combined net/sched/act_csum.c
index 16e67e1c1db1981768bea8f2b8bef2451fd0c71f,87dddbaa203135842269235bd713ee10b09c6b2a..cb8608f0a77a2a88671da430c79399c1fab0d77d
@@@ -43,7 -43,7 +43,7 @@@ static struct tc_action_ops act_csum_op
  static int tcf_csum_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a, int ovr,
                         int bind, bool rtnl_held, struct tcf_proto *tp,
 -                       struct netlink_ext_ack *extack)
 +                       u32 flags, struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
        struct tcf_csum_params *params_new;
@@@ -68,8 -68,8 +68,8 @@@
        index = parm->index;
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
 -              ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_csum_ops, bind, true);
 +              ret = tcf_idr_create_from_flags(tn, index, est, a,
 +                                              &act_csum_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
  
        spin_lock_bh(&p->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(p->params, params_new,
-                          lockdep_is_held(&p->tcf_lock));
+       params_new = rcu_replace_pointer(p->params, params_new,
+                                        lockdep_is_held(&p->tcf_lock));
        spin_unlock_bh(&p->tcf_lock);
  
        if (goto_ch)
@@@ -580,7 -580,7 +580,7 @@@ static int tcf_csum_act(struct sk_buff 
        params = rcu_dereference_bh(p->params);
  
        tcf_lastuse_update(&p->tcf_tm);
 -      bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
 +      tcf_action_update_bstats(&p->common, skb);
  
        action = READ_ONCE(p->tcf_action);
        if (unlikely(action == TC_ACT_SHOT))
@@@ -624,7 -624,7 +624,7 @@@ out
        return action;
  
  drop:
 -      qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
 +      tcf_action_inc_drop_qstats(&p->common);
        action = TC_ACT_SHOT;
        goto out;
  }
diff --combined net/sched/act_ct.c
index c13638aeef46bef95f2bf5cd09530fd0bb8b271a,2d5ab233349eafd8486540f9a27b67db0b1d17b7..ae0de372b1c8f003caafdf354e68a87676bd7ab7
@@@ -465,15 -465,16 +465,15 @@@ out_push
        skb_push_rcsum(skb, nh_ofs);
  
  out:
 -      bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
 +      tcf_action_update_bstats(&c->common, skb);
        return retval;
  
  drop:
 -      qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
 +      tcf_action_inc_drop_qstats(&c->common);
        return TC_ACT_SHOT;
  }
  
  static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
 -      [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 },
        [TCA_CT_ACTION] = { .type = NLA_U16 },
        [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
        [TCA_CT_ZONE] = { .type = NLA_U16 },
@@@ -655,7 -656,7 +655,7 @@@ static int tcf_ct_fill_params(struct ne
  static int tcf_ct_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a,
                       int replace, int bind, bool rtnl_held,
 -                     struct tcf_proto *tp,
 +                     struct tcf_proto *tp, u32 flags,
                       struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, ct_net_id);
                return err;
  
        if (!err) {
 -              err = tcf_idr_create(tn, index, est, a,
 -                                   &act_ct_ops, bind, true);
 +              err = tcf_idr_create_from_flags(tn, index, est, a,
 +                                              &act_ct_ops, bind, flags);
                if (err) {
                        tcf_idr_cleanup(tn, index);
                        return err;
  
        spin_lock_bh(&c->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock));
+       params = rcu_replace_pointer(c->params, params,
+                                    lockdep_is_held(&c->tcf_lock));
        spin_unlock_bh(&c->tcf_lock);
  
        if (goto_ch)
@@@ -904,7 -906,11 +905,7 @@@ static void tcf_stats_update(struct tc_
  {
        struct tcf_ct *c = to_ct(a);
  
 -      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 -
 -      if (hw)
 -              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 -                                 bytes, packets);
 +      tcf_action_update_stats(a, bytes, packets, false, hw);
        c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
  }
  
diff --combined net/sched/act_ctinfo.c
index b1e6010072427b512750fc1b1da388c630f0ed31,c599818978461111ad81fc8b8aefde87cac87ff8..40038c321b4a970dc940714ccda4b39f0d261d6a
@@@ -153,7 -153,7 +153,7 @@@ static const struct nla_policy ctinfo_p
  static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
 -                         struct tcf_proto *tp,
 +                         struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (!err) {
                ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_ctinfo_ops, bind, false);
 +                                   &act_ctinfo_ops, bind, false, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
  
        spin_lock_bh(&ci->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
-       rcu_swap_protected(ci->params, cp_new,
-                          lockdep_is_held(&ci->tcf_lock));
+       cp_new = rcu_replace_pointer(ci->params, cp_new,
+                                    lockdep_is_held(&ci->tcf_lock));
        spin_unlock_bh(&ci->tcf_lock);
  
        if (goto_ch)
diff --combined net/sched/act_ife.c
index d562c88cccbe35dc9608ec3dd0787cf0ae539173,2ea2e164e3bd110e05d0ae34e514b928c5cb29cd..5e6379028fc392031f4b84599f666a2c61f071d2
@@@ -465,8 -465,7 +465,8 @@@ static int populate_metalist(struct tcf
  static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
                        int ovr, int bind, bool rtnl_held,
 -                      struct tcf_proto *tp, struct netlink_ext_ack *extack)
 +                      struct tcf_proto *tp, u32 flags,
 +                      struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
        struct nlattr *tb[TCA_IFE_MAX + 1];
  
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a, &act_ife_ops,
 -                                   bind, true);
 +                                   bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        kfree(p);
                spin_lock_bh(&ife->tcf_lock);
        /* protected by tcf_lock when modifying existing action */
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(ife->params, p, 1);
+       p = rcu_replace_pointer(ife->params, p, 1);
  
        if (exists)
                spin_unlock_bh(&ife->tcf_lock);
diff --combined net/sched/act_mirred.c
index b6e1b5bbb4da0c6316fdc4b936064342e7a857f9,6e82e7ab1491afaeaeb1103dc92c0c4b4b55d966..1e3eb3a975324a2630f1b66f08655ea6de8d331c
@@@ -93,7 -93,7 +93,7 @@@ static int tcf_mirred_init(struct net *
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp,
 -                         struct netlink_ext_ack *extack)
 +                         u32 flags, struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
                        NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
                        return -EINVAL;
                }
 -              ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_mirred_ops, bind, true);
 +              ret = tcf_idr_create_from_flags(tn, index, est, a,
 +                                              &act_mirred_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
                        goto put_chain;
                }
                mac_header_xmit = dev_is_mac_header_xmit(dev);
-               rcu_swap_protected(m->tcfm_dev, dev,
-                                  lockdep_is_held(&m->tcf_lock));
+               dev = rcu_replace_pointer(m->tcfm_dev, dev,
+                                         lockdep_is_held(&m->tcf_lock));
                if (dev)
                        dev_put(dev);
                m->tcfm_mac_header_xmit = mac_header_xmit;
@@@ -231,7 -231,7 +231,7 @@@ static int tcf_mirred_act(struct sk_buf
        }
  
        tcf_lastuse_update(&m->tcf_tm);
 -      bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 +      tcf_action_update_bstats(&m->common, skb);
  
        m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
        m_eaction = READ_ONCE(m->tcfm_eaction);
                /* let's the caller reinsert the packet, if possible */
                if (use_reinsert) {
                        res->ingress = want_ingress;
 -                      res->qstats = this_cpu_ptr(m->common.cpu_qstats);
 -                      skb_tc_reinsert(skb, res);
 +                      if (skb_tc_reinsert(skb, res))
 +                              tcf_action_inc_overlimit_qstats(&m->common);
                        __this_cpu_dec(mirred_rec_level);
                        return TC_ACT_CONSUMED;
                }
  
        if (err) {
  out:
 -              qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
 +              tcf_action_inc_overlimit_qstats(&m->common);
                if (tcf_mirred_is_act_redirect(m_eaction))
                        retval = TC_ACT_SHOT;
        }
@@@ -318,7 -318,10 +318,7 @@@ static void tcf_stats_update(struct tc_
        struct tcf_mirred *m = to_mirred(a);
        struct tcf_t *tm = &m->tcf_tm;
  
 -      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 -      if (hw)
 -              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 -                                 bytes, packets);
 +      tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
  }
  
diff --combined net/sched/act_mpls.c
index c7d5e12ee9197e065fec6ee7d42c32bfbae57f13,bb9edac94e1be54471bbf7ab1489df2de3640f66..325eddcc6621e37ec03e4b9c929c0a2cc86603a4
@@@ -119,6 -119,7 +119,6 @@@ static int valid_label(const struct nla
  }
  
  static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
 -      [TCA_MPLS_UNSPEC]       = { .strict_start_type = TCA_MPLS_UNSPEC + 1 },
        [TCA_MPLS_PARMS]        = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
        [TCA_MPLS_PROTO]        = { .type = NLA_U16 },
        [TCA_MPLS_LABEL]        = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
  static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
 -                       struct tcf_proto *tp, struct netlink_ext_ack *extack)
 +                       struct tcf_proto *tp, u32 flags,
 +                       struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, mpls_net_id);
        struct nlattr *tb[TCA_MPLS_MAX + 1];
  
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_mpls_ops, bind, true);
 +                                   &act_mpls_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
  
        spin_lock_bh(&m->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(m->mpls_p, p, lockdep_is_held(&m->tcf_lock));
+       p = rcu_replace_pointer(m->mpls_p, p, lockdep_is_held(&m->tcf_lock));
        spin_unlock_bh(&m->tcf_lock);
  
        if (goto_ch)
diff --combined net/sched/act_police.c
index d96271590268403c19c66d62ad1a01336be2a6d8,caa91cf8791ba4e75af20a128f76f99c6522b1c3..8b7a0ac96c5169e833c6f87898c6503a49c726af
@@@ -47,7 -47,7 +47,7 @@@ static const struct nla_policy police_p
  static int tcf_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
                               int ovr, int bind, bool rtnl_held,
 -                             struct tcf_proto *tp,
 +                             struct tcf_proto *tp, u32 flags,
                               struct netlink_ext_ack *extack)
  {
        int ret = 0, tcfp_result = TC_ACT_OK, err, size;
@@@ -87,7 -87,7 +87,7 @@@
  
        if (!exists) {
                ret = tcf_idr_create(tn, index, NULL, a,
 -                                   &act_police_ops, bind, true);
 +                                   &act_police_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
                police->tcfp_ptoks = new->tcfp_mtu_ptoks;
        spin_unlock_bh(&police->tcfp_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(police->params,
-                          new,
-                          lockdep_is_held(&police->tcf_lock));
+       new = rcu_replace_pointer(police->params,
+                                 new,
+                                 lockdep_is_held(&police->tcf_lock));
        spin_unlock_bh(&police->tcf_lock);
  
        if (goto_ch)
@@@ -294,7 -294,10 +294,7 @@@ static void tcf_police_stats_update(str
        struct tcf_police *police = to_police(a);
        struct tcf_t *tm = &police->tcf_tm;
  
 -      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 -      if (hw)
 -              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 -                                 bytes, packets);
 +      tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
  }
  
@@@ -342,7 -345,10 +342,7 @@@ static int tcf_police_dump(struct sk_bu
            nla_put_u32(skb, TCA_POLICE_AVRATE, p->tcfp_ewma_rate))
                goto nla_put_failure;
  
 -      t.install = jiffies_to_clock_t(jiffies - police->tcf_tm.install);
 -      t.lastuse = jiffies_to_clock_t(jiffies - police->tcf_tm.lastuse);
 -      t.firstuse = jiffies_to_clock_t(jiffies - police->tcf_tm.firstuse);
 -      t.expires = jiffies_to_clock_t(police->tcf_tm.expires);
 +      tcf_tm_dump(&t, &police->tcf_tm);
        if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
                goto nla_put_failure;
        spin_unlock_bh(&police->tcf_lock);
diff --combined net/sched/act_sample.c
index 29b23bfaf10ddc1f27dc6391997e0e63cd98152d,4deeaf268693c9981f1a489e3ba612072e04eab0..ce948c1e24dc543506c1cb1c0d42352f79a5c551
@@@ -36,7 -36,7 +36,7 @@@ static const struct nla_policy sample_p
  static int tcf_sample_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
                           int bind, bool rtnl_held, struct tcf_proto *tp,
 -                         struct netlink_ext_ack *extack)
 +                         u32 flags, struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@@ -69,7 -69,7 +69,7 @@@
  
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_sample_ops, bind, true);
 +                                   &act_sample_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
        s->rate = rate;
        s->psample_group_num = psample_group_num;
-       rcu_swap_protected(s->psample_group, psample_group,
-                          lockdep_is_held(&s->tcf_lock));
+       psample_group = rcu_replace_pointer(s->psample_group, psample_group,
+                                           lockdep_is_held(&s->tcf_lock));
  
        if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
                s->truncate = true;
diff --combined net/sched/act_skbedit.c
index 5f7ca7f89ca2d393de6c6e59e4d49d59057cf2c1,c38cc394576324fc363f9f184b72a58c9e863a2f..e857424c387ce4333757059c5913b172d23873b4
@@@ -86,7 -86,7 +86,7 @@@ static const struct nla_policy skbedit_
  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
                            int ovr, int bind, bool rtnl_held,
 -                          struct tcf_proto *tp,
 +                          struct tcf_proto *tp, u32 act_flags,
                            struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
  
        if (!exists) {
                ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_skbedit_ops, bind, true);
 +                                   &act_skbedit_ops, bind, true, 0);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
  
        spin_lock_bh(&d->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(d->params, params_new,
-                          lockdep_is_held(&d->tcf_lock));
+       params_new = rcu_replace_pointer(d->params, params_new,
+                                        lockdep_is_held(&d->tcf_lock));
        spin_unlock_bh(&d->tcf_lock);
        if (params_new)
                kfree_rcu(params_new, rcu);
index 6379f9568ab816e2014311fb69b3f7e7bab03ff5,20d7ca49f7cb828b233ca550a9744ca6e2952df7..536c4bc31be60c35d0881fd3380aaebb42ab2d69
@@@ -10,8 -10,6 +10,8 @@@
  #include <linux/skbuff.h>
  #include <linux/rtnetlink.h>
  #include <net/geneve.h>
 +#include <net/vxlan.h>
 +#include <net/erspan.h>
  #include <net/netlink.h>
  #include <net/pkt_sched.h>
  #include <net/dst.h>
@@@ -33,7 -31,7 +33,7 @@@ static int tunnel_key_act(struct sk_buf
        params = rcu_dereference_bh(t->params);
  
        tcf_lastuse_update(&t->tcf_tm);
 -      bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
 +      tcf_action_update_bstats(&t->common, skb);
        action = READ_ONCE(t->tcf_action);
  
        switch (params->tcft_action) {
  
  static const struct nla_policy
  enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
 +      [TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC]        = {
 +              .strict_start_type = TCA_TUNNEL_KEY_ENC_OPTS_VXLAN },
        [TCA_TUNNEL_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
 +      [TCA_TUNNEL_KEY_ENC_OPTS_VXLAN]         = { .type = NLA_NESTED },
 +      [TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN]        = { .type = NLA_NESTED },
  };
  
  static const struct nla_policy
@@@ -70,19 -64,6 +70,19 @@@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OP
                                                       .len = 128 },
  };
  
 +static const struct nla_policy
 +vxlan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1] = {
 +      [TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]         = { .type = NLA_U32 },
 +};
 +
 +static const struct nla_policy
 +erspan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
 +      [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]        = { .type = NLA_U8 },
 +      [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]      = { .type = NLA_U32 },
 +      [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR]        = { .type = NLA_U8 },
 +      [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]       = { .type = NLA_U8 },
 +};
 +
  static int
  tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
                           struct netlink_ext_ack *extack)
        return opt_len;
  }
  
 +static int
 +tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len,
 +                        struct netlink_ext_ack *extack)
 +{
 +      struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1];
 +      int err;
 +
 +      err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX, nla,
 +                             vxlan_opt_policy, extack);
 +      if (err < 0)
 +              return err;
 +
 +      if (!tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]) {
 +              NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp");
 +              return -EINVAL;
 +      }
 +
 +      if (dst) {
 +              struct vxlan_metadata *md = dst;
 +
 +              md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]);
 +      }
 +
 +      return sizeof(struct vxlan_metadata);
 +}
 +
 +static int
 +tunnel_key_copy_erspan_opt(const struct nlattr *nla, void *dst, int dst_len,
 +                         struct netlink_ext_ack *extack)
 +{
 +      struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1];
 +      int err;
 +      u8 ver;
 +
 +      err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX, nla,
 +                             erspan_opt_policy, extack);
 +      if (err < 0)
 +              return err;
 +
 +      if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]) {
 +              NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver");
 +              return -EINVAL;
 +      }
 +
 +      ver = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]);
 +      if (ver == 1) {
 +              if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]) {
 +                      NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
 +                      return -EINVAL;
 +              }
 +      } else if (ver == 2) {
 +              if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] ||
 +                  !tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]) {
 +                      NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
 +                      return -EINVAL;
 +              }
 +      } else {
 +              NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect");
 +              return -EINVAL;
 +      }
 +
 +      if (dst) {
 +              struct erspan_metadata *md = dst;
 +
 +              md->version = ver;
 +              if (ver == 1) {
 +                      nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX];
 +                      md->u.index = nla_get_be32(nla);
 +              } else {
 +                      nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR];
 +                      md->u.md2.dir = nla_get_u8(nla);
 +                      nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID];
 +                      set_hwid(&md->u.md2, nla_get_u8(nla));
 +              }
 +      }
 +
 +      return sizeof(struct erspan_metadata);
 +}
 +
  static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
                                int dst_len, struct netlink_ext_ack *extack)
  {
 -      int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
 +      int err, rem, opt_len, len = nla_len(nla), opts_len = 0, type = 0;
        const struct nlattr *attr, *head = nla_data(nla);
  
        err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
        nla_for_each_attr(attr, head, len, rem) {
                switch (nla_type(attr)) {
                case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
 +                      if (type && type != TUNNEL_GENEVE_OPT) {
 +                              NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
 +                              return -EINVAL;
 +                      }
                        opt_len = tunnel_key_copy_geneve_opt(attr, dst,
                                                             dst_len, extack);
                        if (opt_len < 0)
                                return opt_len;
                        opts_len += opt_len;
 +                      if (opts_len > IP_TUNNEL_OPTS_MAX) {
 +                              NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
 +                              return -EINVAL;
 +                      }
                        if (dst) {
                                dst_len -= opt_len;
                                dst += opt_len;
                        }
 +                      type = TUNNEL_GENEVE_OPT;
 +                      break;
 +              case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
 +                      if (type) {
 +                              NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options");
 +                              return -EINVAL;
 +                      }
 +                      opt_len = tunnel_key_copy_vxlan_opt(attr, dst,
 +                                                          dst_len, extack);
 +                      if (opt_len < 0)
 +                              return opt_len;
 +                      opts_len += opt_len;
 +                      type = TUNNEL_VXLAN_OPT;
 +                      break;
 +              case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
 +                      if (type) {
 +                              NL_SET_ERR_MSG(extack, "Duplicate type for erspan options");
 +                              return -EINVAL;
 +                      }
 +                      opt_len = tunnel_key_copy_erspan_opt(attr, dst,
 +                                                           dst_len, extack);
 +                      if (opt_len < 0)
 +                              return opt_len;
 +                      opts_len += opt_len;
 +                      type = TUNNEL_ERSPAN_OPT;
                        break;
                }
        }
@@@ -305,22 -174,6 +305,22 @@@ static int tunnel_key_opts_set(struct n
                                            opts_len, extack);
  #else
                return -EAFNOSUPPORT;
 +#endif
 +      case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
 +#if IS_ENABLED(CONFIG_INET)
 +              info->key.tun_flags |= TUNNEL_VXLAN_OPT;
 +              return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
 +                                          opts_len, extack);
 +#else
 +              return -EAFNOSUPPORT;
 +#endif
 +      case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
 +#if IS_ENABLED(CONFIG_INET)
 +              info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
 +              return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
 +                                          opts_len, extack);
 +#else
 +              return -EAFNOSUPPORT;
  #endif
        default:
                NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
@@@ -355,7 -208,7 +355,7 @@@ static void tunnel_key_release_params(s
  static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
                           int ovr, int bind, bool rtnl_held,
 -                         struct tcf_proto *tp,
 +                         struct tcf_proto *tp, u32 act_flags,
                           struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
        }
  
        if (!exists) {
 -              ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_tunnel_key_ops, bind, true);
 +              ret = tcf_idr_create_from_flags(tn, index, est, a,
 +                                              &act_tunnel_key_ops, bind,
 +                                              act_flags);
                if (ret) {
                        NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
                        goto release_tun_meta;
  
        spin_lock_bh(&t->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(t->params, params_new,
-                          lockdep_is_held(&t->tcf_lock));
+       params_new = rcu_replace_pointer(t->params, params_new,
+                                        lockdep_is_held(&t->tcf_lock));
        spin_unlock_bh(&t->tcf_lock);
        tunnel_key_release_params(params_new);
        if (goto_ch)
@@@ -598,56 -450,6 +598,56 @@@ static int tunnel_key_geneve_opts_dump(
        return 0;
  }
  
 +static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
 +                                    const struct ip_tunnel_info *info)
 +{
 +      struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1);
 +      struct nlattr *start;
 +
 +      start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN);
 +      if (!start)
 +              return -EMSGSIZE;
 +
 +      if (nla_put_u32(skb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) {
 +              nla_nest_cancel(skb, start);
 +              return -EMSGSIZE;
 +      }
 +
 +      nla_nest_end(skb, start);
 +      return 0;
 +}
 +
 +static int tunnel_key_erspan_opts_dump(struct sk_buff *skb,
 +                                     const struct ip_tunnel_info *info)
 +{
 +      struct erspan_metadata *md = (struct erspan_metadata *)(info + 1);
 +      struct nlattr *start;
 +
 +      start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN);
 +      if (!start)
 +              return -EMSGSIZE;
 +
 +      if (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER, md->version))
 +              goto err;
 +
 +      if (md->version == 1 &&
 +          nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index))
 +              goto err;
 +
 +      if (md->version == 2 &&
 +          (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR,
 +                      md->u.md2.dir) ||
 +           nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID,
 +                      get_hwid(&md->u.md2))))
 +              goto err;
 +
 +      nla_nest_end(skb, start);
 +      return 0;
 +err:
 +      nla_nest_cancel(skb, start);
 +      return -EMSGSIZE;
 +}
 +
  static int tunnel_key_opts_dump(struct sk_buff *skb,
                                const struct ip_tunnel_info *info)
  {
                err = tunnel_key_geneve_opts_dump(skb, info);
                if (err)
                        goto err_out;
 +      } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
 +              err = tunnel_key_vxlan_opts_dump(skb, info);
 +              if (err)
 +                      goto err_out;
 +      } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
 +              err = tunnel_key_erspan_opts_dump(skb, info);
 +              if (err)
 +                      goto err_out;
        } else {
  err_out:
                nla_nest_cancel(skb, start);
diff --combined net/sched/act_vlan.c
index b6939abc61ebef96e4eda5991854363125a30b16,7aca1f0ecc21e57291a9e4b1158de38d97f4fa2a..c91d3958fcbb806ed05af62a140892fdf8ec726f
@@@ -29,7 -29,7 +29,7 @@@ static int tcf_vlan_act(struct sk_buff 
        u16 tci;
  
        tcf_lastuse_update(&v->tcf_tm);
 -      bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
 +      tcf_action_update_bstats(&v->common, skb);
  
        /* Ensure 'data' points at mac_header prior calling vlan manipulating
         * functions.
@@@ -88,7 -88,7 +88,7 @@@ out
        return action;
  
  drop:
 -      qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
 +      tcf_action_inc_drop_qstats(&v->common);
        return TC_ACT_SHOT;
  }
  
@@@ -102,8 -102,7 +102,8 @@@ static const struct nla_policy vlan_pol
  static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
                         int ovr, int bind, bool rtnl_held,
 -                       struct tcf_proto *tp, struct netlink_ext_ack *extack)
 +                       struct tcf_proto *tp, u32 flags,
 +                       struct netlink_ext_ack *extack)
  {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
        struct nlattr *tb[TCA_VLAN_MAX + 1];
        action = parm->v_action;
  
        if (!exists) {
 -              ret = tcf_idr_create(tn, index, est, a,
 -                                   &act_vlan_ops, bind, true);
 +              ret = tcf_idr_create_from_flags(tn, index, est, a,
 +                                              &act_vlan_ops, bind, flags);
                if (ret) {
                        tcf_idr_cleanup(tn, index);
                        return ret;
  
        spin_lock_bh(&v->tcf_lock);
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
-       rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
+       p = rcu_replace_pointer(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
        spin_unlock_bh(&v->tcf_lock);
  
        if (goto_ch)
@@@ -308,7 -307,10 +308,7 @@@ static void tcf_vlan_stats_update(struc
        struct tcf_vlan *v = to_vlan(a);
        struct tcf_t *tm = &v->tcf_tm;
  
 -      _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 -      if (hw)
 -              _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 -                                 bytes, packets);
 +      tcf_action_update_stats(a, bytes, packets, false, hw);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
  }