Merge branch 'asoc-4.19' into asoc-linus
[linux-2.6-block.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <linux/audit.h>
23 #include <linux/uaccess.h>
24 #include <linux/ktime.h>
25 #include <linux/slab.h>
26 #include <linux/interrupt.h>
27 #include <linux/kernel.h>
28
29 #include "xfrm_hash.h"
30
31 #define xfrm_state_deref_prot(table, net) \
32         rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
33
34 static void xfrm_state_gc_task(struct work_struct *work);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
44 static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
45 static struct kmem_cache *xfrm_state_cache __ro_after_init;
46
47 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
48 static HLIST_HEAD(xfrm_state_gc_list);
49
50 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
51 {
52         return refcount_inc_not_zero(&x->refcnt);
53 }
54
55 static inline unsigned int xfrm_dst_hash(struct net *net,
56                                          const xfrm_address_t *daddr,
57                                          const xfrm_address_t *saddr,
58                                          u32 reqid,
59                                          unsigned short family)
60 {
61         return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
62 }
63
64 static inline unsigned int xfrm_src_hash(struct net *net,
65                                          const xfrm_address_t *daddr,
66                                          const xfrm_address_t *saddr,
67                                          unsigned short family)
68 {
69         return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
70 }
71
72 static inline unsigned int
73 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
74               __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head_rcu(&x->bydst, ndsttable + h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head_rcu(&x->bysrc, nsrctable + h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head_rcu(&x->byspi, nspitable + h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
111 {
112         return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
113 }
114
115 static void xfrm_hash_resize(struct work_struct *work)
116 {
117         struct net *net = container_of(work, struct net, xfrm.state_hash_work);
118         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
119         unsigned long nsize, osize;
120         unsigned int nhashmask, ohashmask;
121         int i;
122
123         nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
124         ndst = xfrm_hash_alloc(nsize);
125         if (!ndst)
126                 return;
127         nsrc = xfrm_hash_alloc(nsize);
128         if (!nsrc) {
129                 xfrm_hash_free(ndst, nsize);
130                 return;
131         }
132         nspi = xfrm_hash_alloc(nsize);
133         if (!nspi) {
134                 xfrm_hash_free(ndst, nsize);
135                 xfrm_hash_free(nsrc, nsize);
136                 return;
137         }
138
139         spin_lock_bh(&net->xfrm.xfrm_state_lock);
140         write_seqcount_begin(&xfrm_state_hash_generation);
141
142         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
143         odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
144         for (i = net->xfrm.state_hmask; i >= 0; i--)
145                 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
146
147         osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
148         ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
149         ohashmask = net->xfrm.state_hmask;
150
151         rcu_assign_pointer(net->xfrm.state_bydst, ndst);
152         rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
153         rcu_assign_pointer(net->xfrm.state_byspi, nspi);
154         net->xfrm.state_hmask = nhashmask;
155
156         write_seqcount_end(&xfrm_state_hash_generation);
157         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
158
159         osize = (ohashmask + 1) * sizeof(struct hlist_head);
160
161         synchronize_rcu();
162
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166 }
167
168 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
169 static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
170
171 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
172
173 int __xfrm_state_delete(struct xfrm_state *x);
174
175 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
176 bool km_is_alive(const struct km_event *c);
177 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
178
179 static DEFINE_SPINLOCK(xfrm_type_lock);
180 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
181 {
182         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
183         const struct xfrm_type **typemap;
184         int err = 0;
185
186         if (unlikely(afinfo == NULL))
187                 return -EAFNOSUPPORT;
188         typemap = afinfo->type_map;
189         spin_lock_bh(&xfrm_type_lock);
190
191         if (likely(typemap[type->proto] == NULL))
192                 typemap[type->proto] = type;
193         else
194                 err = -EEXIST;
195         spin_unlock_bh(&xfrm_type_lock);
196         rcu_read_unlock();
197         return err;
198 }
199 EXPORT_SYMBOL(xfrm_register_type);
200
201 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
202 {
203         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
204         const struct xfrm_type **typemap;
205         int err = 0;
206
207         if (unlikely(afinfo == NULL))
208                 return -EAFNOSUPPORT;
209         typemap = afinfo->type_map;
210         spin_lock_bh(&xfrm_type_lock);
211
212         if (unlikely(typemap[type->proto] != type))
213                 err = -ENOENT;
214         else
215                 typemap[type->proto] = NULL;
216         spin_unlock_bh(&xfrm_type_lock);
217         rcu_read_unlock();
218         return err;
219 }
220 EXPORT_SYMBOL(xfrm_unregister_type);
221
222 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
223 {
224         struct xfrm_state_afinfo *afinfo;
225         const struct xfrm_type **typemap;
226         const struct xfrm_type *type;
227         int modload_attempted = 0;
228
229 retry:
230         afinfo = xfrm_state_get_afinfo(family);
231         if (unlikely(afinfo == NULL))
232                 return NULL;
233         typemap = afinfo->type_map;
234
235         type = READ_ONCE(typemap[proto]);
236         if (unlikely(type && !try_module_get(type->owner)))
237                 type = NULL;
238
239         rcu_read_unlock();
240
241         if (!type && !modload_attempted) {
242                 request_module("xfrm-type-%d-%d", family, proto);
243                 modload_attempted = 1;
244                 goto retry;
245         }
246
247         return type;
248 }
249
250 static void xfrm_put_type(const struct xfrm_type *type)
251 {
252         module_put(type->owner);
253 }
254
255 static DEFINE_SPINLOCK(xfrm_type_offload_lock);
256 int xfrm_register_type_offload(const struct xfrm_type_offload *type,
257                                unsigned short family)
258 {
259         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
260         const struct xfrm_type_offload **typemap;
261         int err = 0;
262
263         if (unlikely(afinfo == NULL))
264                 return -EAFNOSUPPORT;
265         typemap = afinfo->type_offload_map;
266         spin_lock_bh(&xfrm_type_offload_lock);
267
268         if (likely(typemap[type->proto] == NULL))
269                 typemap[type->proto] = type;
270         else
271                 err = -EEXIST;
272         spin_unlock_bh(&xfrm_type_offload_lock);
273         rcu_read_unlock();
274         return err;
275 }
276 EXPORT_SYMBOL(xfrm_register_type_offload);
277
278 int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
279                                  unsigned short family)
280 {
281         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
282         const struct xfrm_type_offload **typemap;
283         int err = 0;
284
285         if (unlikely(afinfo == NULL))
286                 return -EAFNOSUPPORT;
287         typemap = afinfo->type_offload_map;
288         spin_lock_bh(&xfrm_type_offload_lock);
289
290         if (unlikely(typemap[type->proto] != type))
291                 err = -ENOENT;
292         else
293                 typemap[type->proto] = NULL;
294         spin_unlock_bh(&xfrm_type_offload_lock);
295         rcu_read_unlock();
296         return err;
297 }
298 EXPORT_SYMBOL(xfrm_unregister_type_offload);
299
300 static const struct xfrm_type_offload *
301 xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
302 {
303         struct xfrm_state_afinfo *afinfo;
304         const struct xfrm_type_offload **typemap;
305         const struct xfrm_type_offload *type;
306
307 retry:
308         afinfo = xfrm_state_get_afinfo(family);
309         if (unlikely(afinfo == NULL))
310                 return NULL;
311         typemap = afinfo->type_offload_map;
312
313         type = typemap[proto];
314         if ((type && !try_module_get(type->owner)))
315                 type = NULL;
316
317         rcu_read_unlock();
318
319         if (!type && try_load) {
320                 request_module("xfrm-offload-%d-%d", family, proto);
321                 try_load = false;
322                 goto retry;
323         }
324
325         return type;
326 }
327
328 static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
329 {
330         module_put(type->owner);
331 }
332
333 static DEFINE_SPINLOCK(xfrm_mode_lock);
334 int xfrm_register_mode(struct xfrm_mode *mode, int family)
335 {
336         struct xfrm_state_afinfo *afinfo;
337         struct xfrm_mode **modemap;
338         int err;
339
340         if (unlikely(mode->encap >= XFRM_MODE_MAX))
341                 return -EINVAL;
342
343         afinfo = xfrm_state_get_afinfo(family);
344         if (unlikely(afinfo == NULL))
345                 return -EAFNOSUPPORT;
346
347         err = -EEXIST;
348         modemap = afinfo->mode_map;
349         spin_lock_bh(&xfrm_mode_lock);
350         if (modemap[mode->encap])
351                 goto out;
352
353         err = -ENOENT;
354         if (!try_module_get(afinfo->owner))
355                 goto out;
356
357         mode->afinfo = afinfo;
358         modemap[mode->encap] = mode;
359         err = 0;
360
361 out:
362         spin_unlock_bh(&xfrm_mode_lock);
363         rcu_read_unlock();
364         return err;
365 }
366 EXPORT_SYMBOL(xfrm_register_mode);
367
368 int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
369 {
370         struct xfrm_state_afinfo *afinfo;
371         struct xfrm_mode **modemap;
372         int err;
373
374         if (unlikely(mode->encap >= XFRM_MODE_MAX))
375                 return -EINVAL;
376
377         afinfo = xfrm_state_get_afinfo(family);
378         if (unlikely(afinfo == NULL))
379                 return -EAFNOSUPPORT;
380
381         err = -ENOENT;
382         modemap = afinfo->mode_map;
383         spin_lock_bh(&xfrm_mode_lock);
384         if (likely(modemap[mode->encap] == mode)) {
385                 modemap[mode->encap] = NULL;
386                 module_put(mode->afinfo->owner);
387                 err = 0;
388         }
389
390         spin_unlock_bh(&xfrm_mode_lock);
391         rcu_read_unlock();
392         return err;
393 }
394 EXPORT_SYMBOL(xfrm_unregister_mode);
395
396 static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
397 {
398         struct xfrm_state_afinfo *afinfo;
399         struct xfrm_mode *mode;
400         int modload_attempted = 0;
401
402         if (unlikely(encap >= XFRM_MODE_MAX))
403                 return NULL;
404
405 retry:
406         afinfo = xfrm_state_get_afinfo(family);
407         if (unlikely(afinfo == NULL))
408                 return NULL;
409
410         mode = READ_ONCE(afinfo->mode_map[encap]);
411         if (unlikely(mode && !try_module_get(mode->owner)))
412                 mode = NULL;
413
414         rcu_read_unlock();
415         if (!mode && !modload_attempted) {
416                 request_module("xfrm-mode-%d-%d", family, encap);
417                 modload_attempted = 1;
418                 goto retry;
419         }
420
421         return mode;
422 }
423
424 static void xfrm_put_mode(struct xfrm_mode *mode)
425 {
426         module_put(mode->owner);
427 }
428
429 static void xfrm_state_gc_destroy(struct xfrm_state *x)
430 {
431         tasklet_hrtimer_cancel(&x->mtimer);
432         del_timer_sync(&x->rtimer);
433         kfree(x->aead);
434         kfree(x->aalg);
435         kfree(x->ealg);
436         kfree(x->calg);
437         kfree(x->encap);
438         kfree(x->coaddr);
439         kfree(x->replay_esn);
440         kfree(x->preplay_esn);
441         if (x->inner_mode)
442                 xfrm_put_mode(x->inner_mode);
443         if (x->inner_mode_iaf)
444                 xfrm_put_mode(x->inner_mode_iaf);
445         if (x->outer_mode)
446                 xfrm_put_mode(x->outer_mode);
447         if (x->type_offload)
448                 xfrm_put_type_offload(x->type_offload);
449         if (x->type) {
450                 x->type->destructor(x);
451                 xfrm_put_type(x->type);
452         }
453         xfrm_dev_state_free(x);
454         security_xfrm_state_free(x);
455         kmem_cache_free(xfrm_state_cache, x);
456 }
457
458 static void xfrm_state_gc_task(struct work_struct *work)
459 {
460         struct xfrm_state *x;
461         struct hlist_node *tmp;
462         struct hlist_head gc_list;
463
464         spin_lock_bh(&xfrm_state_gc_lock);
465         hlist_move_list(&xfrm_state_gc_list, &gc_list);
466         spin_unlock_bh(&xfrm_state_gc_lock);
467
468         synchronize_rcu();
469
470         hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
471                 xfrm_state_gc_destroy(x);
472 }
473
474 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
475 {
476         struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
477         struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
478         time64_t now = ktime_get_real_seconds();
479         time64_t next = TIME64_MAX;
480         int warn = 0;
481         int err = 0;
482
483         spin_lock(&x->lock);
484         if (x->km.state == XFRM_STATE_DEAD)
485                 goto out;
486         if (x->km.state == XFRM_STATE_EXPIRED)
487                 goto expired;
488         if (x->lft.hard_add_expires_seconds) {
489                 long tmo = x->lft.hard_add_expires_seconds +
490                         x->curlft.add_time - now;
491                 if (tmo <= 0) {
492                         if (x->xflags & XFRM_SOFT_EXPIRE) {
493                                 /* enter hard expire without soft expire first?!
494                                  * setting a new date could trigger this.
495                                  * workaround: fix x->curflt.add_time by below:
496                                  */
497                                 x->curlft.add_time = now - x->saved_tmo - 1;
498                                 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
499                         } else
500                                 goto expired;
501                 }
502                 if (tmo < next)
503                         next = tmo;
504         }
505         if (x->lft.hard_use_expires_seconds) {
506                 long tmo = x->lft.hard_use_expires_seconds +
507                         (x->curlft.use_time ? : now) - now;
508                 if (tmo <= 0)
509                         goto expired;
510                 if (tmo < next)
511                         next = tmo;
512         }
513         if (x->km.dying)
514                 goto resched;
515         if (x->lft.soft_add_expires_seconds) {
516                 long tmo = x->lft.soft_add_expires_seconds +
517                         x->curlft.add_time - now;
518                 if (tmo <= 0) {
519                         warn = 1;
520                         x->xflags &= ~XFRM_SOFT_EXPIRE;
521                 } else if (tmo < next) {
522                         next = tmo;
523                         x->xflags |= XFRM_SOFT_EXPIRE;
524                         x->saved_tmo = tmo;
525                 }
526         }
527         if (x->lft.soft_use_expires_seconds) {
528                 long tmo = x->lft.soft_use_expires_seconds +
529                         (x->curlft.use_time ? : now) - now;
530                 if (tmo <= 0)
531                         warn = 1;
532                 else if (tmo < next)
533                         next = tmo;
534         }
535
536         x->km.dying = warn;
537         if (warn)
538                 km_state_expired(x, 0, 0);
539 resched:
540         if (next != TIME64_MAX) {
541                 tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
542         }
543
544         goto out;
545
546 expired:
547         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
548                 x->km.state = XFRM_STATE_EXPIRED;
549
550         err = __xfrm_state_delete(x);
551         if (!err)
552                 km_state_expired(x, 1, 0);
553
554         xfrm_audit_state_delete(x, err ? 0 : 1, true);
555
556 out:
557         spin_unlock(&x->lock);
558         return HRTIMER_NORESTART;
559 }
560
561 static void xfrm_replay_timer_handler(struct timer_list *t);
562
563 struct xfrm_state *xfrm_state_alloc(struct net *net)
564 {
565         struct xfrm_state *x;
566
567         x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
568
569         if (x) {
570                 write_pnet(&x->xs_net, net);
571                 refcount_set(&x->refcnt, 1);
572                 atomic_set(&x->tunnel_users, 0);
573                 INIT_LIST_HEAD(&x->km.all);
574                 INIT_HLIST_NODE(&x->bydst);
575                 INIT_HLIST_NODE(&x->bysrc);
576                 INIT_HLIST_NODE(&x->byspi);
577                 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
578                                         CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
579                 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
580                 x->curlft.add_time = ktime_get_real_seconds();
581                 x->lft.soft_byte_limit = XFRM_INF;
582                 x->lft.soft_packet_limit = XFRM_INF;
583                 x->lft.hard_byte_limit = XFRM_INF;
584                 x->lft.hard_packet_limit = XFRM_INF;
585                 x->replay_maxage = 0;
586                 x->replay_maxdiff = 0;
587                 x->inner_mode = NULL;
588                 x->inner_mode_iaf = NULL;
589                 spin_lock_init(&x->lock);
590         }
591         return x;
592 }
593 EXPORT_SYMBOL(xfrm_state_alloc);
594
595 void __xfrm_state_destroy(struct xfrm_state *x)
596 {
597         WARN_ON(x->km.state != XFRM_STATE_DEAD);
598
599         spin_lock_bh(&xfrm_state_gc_lock);
600         hlist_add_head(&x->gclist, &xfrm_state_gc_list);
601         spin_unlock_bh(&xfrm_state_gc_lock);
602         schedule_work(&xfrm_state_gc_work);
603 }
604 EXPORT_SYMBOL(__xfrm_state_destroy);
605
606 int __xfrm_state_delete(struct xfrm_state *x)
607 {
608         struct net *net = xs_net(x);
609         int err = -ESRCH;
610
611         if (x->km.state != XFRM_STATE_DEAD) {
612                 x->km.state = XFRM_STATE_DEAD;
613                 spin_lock(&net->xfrm.xfrm_state_lock);
614                 list_del(&x->km.all);
615                 hlist_del_rcu(&x->bydst);
616                 hlist_del_rcu(&x->bysrc);
617                 if (x->id.spi)
618                         hlist_del_rcu(&x->byspi);
619                 net->xfrm.state_num--;
620                 spin_unlock(&net->xfrm.xfrm_state_lock);
621
622                 xfrm_dev_state_delete(x);
623
624                 /* All xfrm_state objects are created by xfrm_state_alloc.
625                  * The xfrm_state_alloc call gives a reference, and that
626                  * is what we are dropping here.
627                  */
628                 xfrm_state_put(x);
629                 err = 0;
630         }
631
632         return err;
633 }
634 EXPORT_SYMBOL(__xfrm_state_delete);
635
636 int xfrm_state_delete(struct xfrm_state *x)
637 {
638         int err;
639
640         spin_lock_bh(&x->lock);
641         err = __xfrm_state_delete(x);
642         spin_unlock_bh(&x->lock);
643
644         return err;
645 }
646 EXPORT_SYMBOL(xfrm_state_delete);
647
648 #ifdef CONFIG_SECURITY_NETWORK_XFRM
649 static inline int
650 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
651 {
652         int i, err = 0;
653
654         for (i = 0; i <= net->xfrm.state_hmask; i++) {
655                 struct xfrm_state *x;
656
657                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
658                         if (xfrm_id_proto_match(x->id.proto, proto) &&
659                            (err = security_xfrm_state_delete(x)) != 0) {
660                                 xfrm_audit_state_delete(x, 0, task_valid);
661                                 return err;
662                         }
663                 }
664         }
665
666         return err;
667 }
668
669 static inline int
670 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
671 {
672         int i, err = 0;
673
674         for (i = 0; i <= net->xfrm.state_hmask; i++) {
675                 struct xfrm_state *x;
676                 struct xfrm_state_offload *xso;
677
678                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
679                         xso = &x->xso;
680
681                         if (xso->dev == dev &&
682                            (err = security_xfrm_state_delete(x)) != 0) {
683                                 xfrm_audit_state_delete(x, 0, task_valid);
684                                 return err;
685                         }
686                 }
687         }
688
689         return err;
690 }
691 #else
692 static inline int
693 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
694 {
695         return 0;
696 }
697
698 static inline int
699 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
700 {
701         return 0;
702 }
703 #endif
704
705 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
706 {
707         int i, err = 0, cnt = 0;
708
709         spin_lock_bh(&net->xfrm.xfrm_state_lock);
710         err = xfrm_state_flush_secctx_check(net, proto, task_valid);
711         if (err)
712                 goto out;
713
714         err = -ESRCH;
715         for (i = 0; i <= net->xfrm.state_hmask; i++) {
716                 struct xfrm_state *x;
717 restart:
718                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
719                         if (!xfrm_state_kern(x) &&
720                             xfrm_id_proto_match(x->id.proto, proto)) {
721                                 xfrm_state_hold(x);
722                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
723
724                                 err = xfrm_state_delete(x);
725                                 xfrm_audit_state_delete(x, err ? 0 : 1,
726                                                         task_valid);
727                                 xfrm_state_put(x);
728                                 if (!err)
729                                         cnt++;
730
731                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
732                                 goto restart;
733                         }
734                 }
735         }
736 out:
737         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
738         if (cnt)
739                 err = 0;
740
741         return err;
742 }
743 EXPORT_SYMBOL(xfrm_state_flush);
744
745 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
746 {
747         int i, err = 0, cnt = 0;
748
749         spin_lock_bh(&net->xfrm.xfrm_state_lock);
750         err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
751         if (err)
752                 goto out;
753
754         err = -ESRCH;
755         for (i = 0; i <= net->xfrm.state_hmask; i++) {
756                 struct xfrm_state *x;
757                 struct xfrm_state_offload *xso;
758 restart:
759                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
760                         xso = &x->xso;
761
762                         if (!xfrm_state_kern(x) && xso->dev == dev) {
763                                 xfrm_state_hold(x);
764                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
765
766                                 err = xfrm_state_delete(x);
767                                 xfrm_audit_state_delete(x, err ? 0 : 1,
768                                                         task_valid);
769                                 xfrm_state_put(x);
770                                 if (!err)
771                                         cnt++;
772
773                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
774                                 goto restart;
775                         }
776                 }
777         }
778         if (cnt)
779                 err = 0;
780
781 out:
782         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
783         return err;
784 }
785 EXPORT_SYMBOL(xfrm_dev_state_flush);
786
787 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
788 {
789         spin_lock_bh(&net->xfrm.xfrm_state_lock);
790         si->sadcnt = net->xfrm.state_num;
791         si->sadhcnt = net->xfrm.state_hmask;
792         si->sadhmcnt = xfrm_state_hashmax;
793         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
794 }
795 EXPORT_SYMBOL(xfrm_sad_getinfo);
796
797 static void
798 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
799                     const struct xfrm_tmpl *tmpl,
800                     const xfrm_address_t *daddr, const xfrm_address_t *saddr,
801                     unsigned short family)
802 {
803         struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
804
805         if (!afinfo)
806                 return;
807
808         afinfo->init_tempsel(&x->sel, fl);
809
810         if (family != tmpl->encap_family) {
811                 afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
812                 if (!afinfo)
813                         return;
814         }
815         afinfo->init_temprop(x, tmpl, daddr, saddr);
816 }
817
818 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
819                                               const xfrm_address_t *daddr,
820                                               __be32 spi, u8 proto,
821                                               unsigned short family)
822 {
823         unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
824         struct xfrm_state *x;
825
826         hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
827                 if (x->props.family != family ||
828                     x->id.spi       != spi ||
829                     x->id.proto     != proto ||
830                     !xfrm_addr_equal(&x->id.daddr, daddr, family))
831                         continue;
832
833                 if ((mark & x->mark.m) != x->mark.v)
834                         continue;
835                 if (!xfrm_state_hold_rcu(x))
836                         continue;
837                 return x;
838         }
839
840         return NULL;
841 }
842
843 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
844                                                      const xfrm_address_t *daddr,
845                                                      const xfrm_address_t *saddr,
846                                                      u8 proto, unsigned short family)
847 {
848         unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
849         struct xfrm_state *x;
850
851         hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
852                 if (x->props.family != family ||
853                     x->id.proto     != proto ||
854                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
855                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
856                         continue;
857
858                 if ((mark & x->mark.m) != x->mark.v)
859                         continue;
860                 if (!xfrm_state_hold_rcu(x))
861                         continue;
862                 return x;
863         }
864
865         return NULL;
866 }
867
868 static inline struct xfrm_state *
869 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
870 {
871         struct net *net = xs_net(x);
872         u32 mark = x->mark.v & x->mark.m;
873
874         if (use_spi)
875                 return __xfrm_state_lookup(net, mark, &x->id.daddr,
876                                            x->id.spi, x->id.proto, family);
877         else
878                 return __xfrm_state_lookup_byaddr(net, mark,
879                                                   &x->id.daddr,
880                                                   &x->props.saddr,
881                                                   x->id.proto, family);
882 }
883
884 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
885 {
886         if (have_hash_collision &&
887             (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
888             net->xfrm.state_num > net->xfrm.state_hmask)
889                 schedule_work(&net->xfrm.state_hash_work);
890 }
891
892 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
893                                const struct flowi *fl, unsigned short family,
894                                struct xfrm_state **best, int *acq_in_progress,
895                                int *error)
896 {
897         /* Resolution logic:
898          * 1. There is a valid state with matching selector. Done.
899          * 2. Valid state with inappropriate selector. Skip.
900          *
901          * Entering area of "sysdeps".
902          *
903          * 3. If state is not valid, selector is temporary, it selects
904          *    only session which triggered previous resolution. Key
905          *    manager will do something to install a state with proper
906          *    selector.
907          */
908         if (x->km.state == XFRM_STATE_VALID) {
909                 if ((x->sel.family &&
910                      !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
911                     !security_xfrm_state_pol_flow_match(x, pol, fl))
912                         return;
913
914                 if (!*best ||
915                     (*best)->km.dying > x->km.dying ||
916                     ((*best)->km.dying == x->km.dying &&
917                      (*best)->curlft.add_time < x->curlft.add_time))
918                         *best = x;
919         } else if (x->km.state == XFRM_STATE_ACQ) {
920                 *acq_in_progress = 1;
921         } else if (x->km.state == XFRM_STATE_ERROR ||
922                    x->km.state == XFRM_STATE_EXPIRED) {
923                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
924                     security_xfrm_state_pol_flow_match(x, pol, fl))
925                         *error = -ESRCH;
926         }
927 }
928
929 struct xfrm_state *
930 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
931                 const struct flowi *fl, struct xfrm_tmpl *tmpl,
932                 struct xfrm_policy *pol, int *err,
933                 unsigned short family, u32 if_id)
934 {
935         static xfrm_address_t saddr_wildcard = { };
936         struct net *net = xp_net(pol);
937         unsigned int h, h_wildcard;
938         struct xfrm_state *x, *x0, *to_put;
939         int acquire_in_progress = 0;
940         int error = 0;
941         struct xfrm_state *best = NULL;
942         u32 mark = pol->mark.v & pol->mark.m;
943         unsigned short encap_family = tmpl->encap_family;
944         unsigned int sequence;
945         struct km_event c;
946
947         to_put = NULL;
948
949         sequence = read_seqcount_begin(&xfrm_state_hash_generation);
950
951         rcu_read_lock();
952         h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
953         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
954                 if (x->props.family == encap_family &&
955                     x->props.reqid == tmpl->reqid &&
956                     (mark & x->mark.m) == x->mark.v &&
957                     x->if_id == if_id &&
958                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
959                     xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
960                     tmpl->mode == x->props.mode &&
961                     tmpl->id.proto == x->id.proto &&
962                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
963                         xfrm_state_look_at(pol, x, fl, encap_family,
964                                            &best, &acquire_in_progress, &error);
965         }
966         if (best || acquire_in_progress)
967                 goto found;
968
969         h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
970         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
971                 if (x->props.family == encap_family &&
972                     x->props.reqid == tmpl->reqid &&
973                     (mark & x->mark.m) == x->mark.v &&
974                     x->if_id == if_id &&
975                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
976                     xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
977                     tmpl->mode == x->props.mode &&
978                     tmpl->id.proto == x->id.proto &&
979                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
980                         xfrm_state_look_at(pol, x, fl, encap_family,
981                                            &best, &acquire_in_progress, &error);
982         }
983
984 found:
985         x = best;
986         if (!x && !error && !acquire_in_progress) {
987                 if (tmpl->id.spi &&
988                     (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
989                                               tmpl->id.proto, encap_family)) != NULL) {
990                         to_put = x0;
991                         error = -EEXIST;
992                         goto out;
993                 }
994
995                 c.net = net;
996                 /* If the KMs have no listeners (yet...), avoid allocating an SA
997                  * for each and every packet - garbage collection might not
998                  * handle the flood.
999                  */
1000                 if (!km_is_alive(&c)) {
1001                         error = -ESRCH;
1002                         goto out;
1003                 }
1004
1005                 x = xfrm_state_alloc(net);
1006                 if (x == NULL) {
1007                         error = -ENOMEM;
1008                         goto out;
1009                 }
1010                 /* Initialize temporary state matching only
1011                  * to current session. */
1012                 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1013                 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1014                 x->if_id = if_id;
1015
1016                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1017                 if (error) {
1018                         x->km.state = XFRM_STATE_DEAD;
1019                         to_put = x;
1020                         x = NULL;
1021                         goto out;
1022                 }
1023
1024                 if (km_query(x, tmpl, pol) == 0) {
1025                         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1026                         x->km.state = XFRM_STATE_ACQ;
1027                         list_add(&x->km.all, &net->xfrm.state_all);
1028                         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1029                         h = xfrm_src_hash(net, daddr, saddr, encap_family);
1030                         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1031                         if (x->id.spi) {
1032                                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1033                                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1034                         }
1035                         x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1036                         tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1037                         net->xfrm.state_num++;
1038                         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1039                         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1040                 } else {
1041                         x->km.state = XFRM_STATE_DEAD;
1042                         to_put = x;
1043                         x = NULL;
1044                         error = -ESRCH;
1045                 }
1046         }
1047 out:
1048         if (x) {
1049                 if (!xfrm_state_hold_rcu(x)) {
1050                         *err = -EAGAIN;
1051                         x = NULL;
1052                 }
1053         } else {
1054                 *err = acquire_in_progress ? -EAGAIN : error;
1055         }
1056         rcu_read_unlock();
1057         if (to_put)
1058                 xfrm_state_put(to_put);
1059
1060         if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
1061                 *err = -EAGAIN;
1062                 if (x) {
1063                         xfrm_state_put(x);
1064                         x = NULL;
1065                 }
1066         }
1067
1068         return x;
1069 }
1070
1071 struct xfrm_state *
1072 xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
1073                     xfrm_address_t *daddr, xfrm_address_t *saddr,
1074                     unsigned short family, u8 mode, u8 proto, u32 reqid)
1075 {
1076         unsigned int h;
1077         struct xfrm_state *rx = NULL, *x = NULL;
1078
1079         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1080         h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1081         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1082                 if (x->props.family == family &&
1083                     x->props.reqid == reqid &&
1084                     (mark & x->mark.m) == x->mark.v &&
1085                     x->if_id == if_id &&
1086                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
1087                     xfrm_state_addr_check(x, daddr, saddr, family) &&
1088                     mode == x->props.mode &&
1089                     proto == x->id.proto &&
1090                     x->km.state == XFRM_STATE_VALID) {
1091                         rx = x;
1092                         break;
1093                 }
1094         }
1095
1096         if (rx)
1097                 xfrm_state_hold(rx);
1098         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1099
1100
1101         return rx;
1102 }
1103 EXPORT_SYMBOL(xfrm_stateonly_find);
1104
1105 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1106                                               unsigned short family)
1107 {
1108         struct xfrm_state *x;
1109         struct xfrm_state_walk *w;
1110
1111         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1112         list_for_each_entry(w, &net->xfrm.state_all, all) {
1113                 x = container_of(w, struct xfrm_state, km);
1114                 if (x->props.family != family ||
1115                         x->id.spi != spi)
1116                         continue;
1117
1118                 xfrm_state_hold(x);
1119                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1120                 return x;
1121         }
1122         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1123         return NULL;
1124 }
1125 EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1126
1127 static void __xfrm_state_insert(struct xfrm_state *x)
1128 {
1129         struct net *net = xs_net(x);
1130         unsigned int h;
1131
1132         list_add(&x->km.all, &net->xfrm.state_all);
1133
1134         h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1135                           x->props.reqid, x->props.family);
1136         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1137
1138         h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1139         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1140
1141         if (x->id.spi) {
1142                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1143                                   x->props.family);
1144
1145                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1146         }
1147
1148         tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1149         if (x->replay_maxage)
1150                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1151
1152         net->xfrm.state_num++;
1153
1154         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1155 }
1156
1157 /* net->xfrm.xfrm_state_lock is held */
1158 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1159 {
1160         struct net *net = xs_net(xnew);
1161         unsigned short family = xnew->props.family;
1162         u32 reqid = xnew->props.reqid;
1163         struct xfrm_state *x;
1164         unsigned int h;
1165         u32 mark = xnew->mark.v & xnew->mark.m;
1166         u32 if_id = xnew->if_id;
1167
1168         h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1169         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1170                 if (x->props.family     == family &&
1171                     x->props.reqid      == reqid &&
1172                     x->if_id            == if_id &&
1173                     (mark & x->mark.m) == x->mark.v &&
1174                     xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1175                     xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1176                         x->genid++;
1177         }
1178 }
1179
1180 void xfrm_state_insert(struct xfrm_state *x)
1181 {
1182         struct net *net = xs_net(x);
1183
1184         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1185         __xfrm_state_bump_genids(x);
1186         __xfrm_state_insert(x);
1187         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1188 }
1189 EXPORT_SYMBOL(xfrm_state_insert);
1190
1191 /* net->xfrm.xfrm_state_lock is held */
1192 static struct xfrm_state *__find_acq_core(struct net *net,
1193                                           const struct xfrm_mark *m,
1194                                           unsigned short family, u8 mode,
1195                                           u32 reqid, u32 if_id, u8 proto,
1196                                           const xfrm_address_t *daddr,
1197                                           const xfrm_address_t *saddr,
1198                                           int create)
1199 {
1200         unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1201         struct xfrm_state *x;
1202         u32 mark = m->v & m->m;
1203
1204         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1205                 if (x->props.reqid  != reqid ||
1206                     x->props.mode   != mode ||
1207                     x->props.family != family ||
1208                     x->km.state     != XFRM_STATE_ACQ ||
1209                     x->id.spi       != 0 ||
1210                     x->id.proto     != proto ||
1211                     (mark & x->mark.m) != x->mark.v ||
1212                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1213                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
1214                         continue;
1215
1216                 xfrm_state_hold(x);
1217                 return x;
1218         }
1219
1220         if (!create)
1221                 return NULL;
1222
1223         x = xfrm_state_alloc(net);
1224         if (likely(x)) {
1225                 switch (family) {
1226                 case AF_INET:
1227                         x->sel.daddr.a4 = daddr->a4;
1228                         x->sel.saddr.a4 = saddr->a4;
1229                         x->sel.prefixlen_d = 32;
1230                         x->sel.prefixlen_s = 32;
1231                         x->props.saddr.a4 = saddr->a4;
1232                         x->id.daddr.a4 = daddr->a4;
1233                         break;
1234
1235                 case AF_INET6:
1236                         x->sel.daddr.in6 = daddr->in6;
1237                         x->sel.saddr.in6 = saddr->in6;
1238                         x->sel.prefixlen_d = 128;
1239                         x->sel.prefixlen_s = 128;
1240                         x->props.saddr.in6 = saddr->in6;
1241                         x->id.daddr.in6 = daddr->in6;
1242                         break;
1243                 }
1244
1245                 x->km.state = XFRM_STATE_ACQ;
1246                 x->id.proto = proto;
1247                 x->props.family = family;
1248                 x->props.mode = mode;
1249                 x->props.reqid = reqid;
1250                 x->if_id = if_id;
1251                 x->mark.v = m->v;
1252                 x->mark.m = m->m;
1253                 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1254                 xfrm_state_hold(x);
1255                 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1256                 list_add(&x->km.all, &net->xfrm.state_all);
1257                 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1258                 h = xfrm_src_hash(net, daddr, saddr, family);
1259                 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1260
1261                 net->xfrm.state_num++;
1262
1263                 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1264         }
1265
1266         return x;
1267 }
1268
1269 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1270
1271 int xfrm_state_add(struct xfrm_state *x)
1272 {
1273         struct net *net = xs_net(x);
1274         struct xfrm_state *x1, *to_put;
1275         int family;
1276         int err;
1277         u32 mark = x->mark.v & x->mark.m;
1278         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1279
1280         family = x->props.family;
1281
1282         to_put = NULL;
1283
1284         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1285
1286         x1 = __xfrm_state_locate(x, use_spi, family);
1287         if (x1) {
1288                 to_put = x1;
1289                 x1 = NULL;
1290                 err = -EEXIST;
1291                 goto out;
1292         }
1293
1294         if (use_spi && x->km.seq) {
1295                 x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1296                 if (x1 && ((x1->id.proto != x->id.proto) ||
1297                     !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1298                         to_put = x1;
1299                         x1 = NULL;
1300                 }
1301         }
1302
1303         if (use_spi && !x1)
1304                 x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1305                                      x->props.reqid, x->if_id, x->id.proto,
1306                                      &x->id.daddr, &x->props.saddr, 0);
1307
1308         __xfrm_state_bump_genids(x);
1309         __xfrm_state_insert(x);
1310         err = 0;
1311
1312 out:
1313         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1314
1315         if (x1) {
1316                 xfrm_state_delete(x1);
1317                 xfrm_state_put(x1);
1318         }
1319
1320         if (to_put)
1321                 xfrm_state_put(to_put);
1322
1323         return err;
1324 }
1325 EXPORT_SYMBOL(xfrm_state_add);
1326
1327 #ifdef CONFIG_XFRM_MIGRATE
1328 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1329                                            struct xfrm_encap_tmpl *encap)
1330 {
1331         struct net *net = xs_net(orig);
1332         struct xfrm_state *x = xfrm_state_alloc(net);
1333         if (!x)
1334                 goto out;
1335
1336         memcpy(&x->id, &orig->id, sizeof(x->id));
1337         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1338         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1339         x->props.mode = orig->props.mode;
1340         x->props.replay_window = orig->props.replay_window;
1341         x->props.reqid = orig->props.reqid;
1342         x->props.family = orig->props.family;
1343         x->props.saddr = orig->props.saddr;
1344
1345         if (orig->aalg) {
1346                 x->aalg = xfrm_algo_auth_clone(orig->aalg);
1347                 if (!x->aalg)
1348                         goto error;
1349         }
1350         x->props.aalgo = orig->props.aalgo;
1351
1352         if (orig->aead) {
1353                 x->aead = xfrm_algo_aead_clone(orig->aead);
1354                 x->geniv = orig->geniv;
1355                 if (!x->aead)
1356                         goto error;
1357         }
1358         if (orig->ealg) {
1359                 x->ealg = xfrm_algo_clone(orig->ealg);
1360                 if (!x->ealg)
1361                         goto error;
1362         }
1363         x->props.ealgo = orig->props.ealgo;
1364
1365         if (orig->calg) {
1366                 x->calg = xfrm_algo_clone(orig->calg);
1367                 if (!x->calg)
1368                         goto error;
1369         }
1370         x->props.calgo = orig->props.calgo;
1371
1372         if (encap || orig->encap) {
1373                 if (encap)
1374                         x->encap = kmemdup(encap, sizeof(*x->encap),
1375                                         GFP_KERNEL);
1376                 else
1377                         x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1378                                         GFP_KERNEL);
1379
1380                 if (!x->encap)
1381                         goto error;
1382         }
1383
1384         if (orig->coaddr) {
1385                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1386                                     GFP_KERNEL);
1387                 if (!x->coaddr)
1388                         goto error;
1389         }
1390
1391         if (orig->replay_esn) {
1392                 if (xfrm_replay_clone(x, orig))
1393                         goto error;
1394         }
1395
1396         memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1397
1398         if (xfrm_init_state(x) < 0)
1399                 goto error;
1400
1401         x->props.flags = orig->props.flags;
1402         x->props.extra_flags = orig->props.extra_flags;
1403
1404         x->if_id = orig->if_id;
1405         x->tfcpad = orig->tfcpad;
1406         x->replay_maxdiff = orig->replay_maxdiff;
1407         x->replay_maxage = orig->replay_maxage;
1408         x->curlft.add_time = orig->curlft.add_time;
1409         x->km.state = orig->km.state;
1410         x->km.seq = orig->km.seq;
1411         x->replay = orig->replay;
1412         x->preplay = orig->preplay;
1413
1414         return x;
1415
1416  error:
1417         xfrm_state_put(x);
1418 out:
1419         return NULL;
1420 }
1421
1422 struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1423 {
1424         unsigned int h;
1425         struct xfrm_state *x = NULL;
1426
1427         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1428
1429         if (m->reqid) {
1430                 h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1431                                   m->reqid, m->old_family);
1432                 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1433                         if (x->props.mode != m->mode ||
1434                             x->id.proto != m->proto)
1435                                 continue;
1436                         if (m->reqid && x->props.reqid != m->reqid)
1437                                 continue;
1438                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1439                                              m->old_family) ||
1440                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1441                                              m->old_family))
1442                                 continue;
1443                         xfrm_state_hold(x);
1444                         break;
1445                 }
1446         } else {
1447                 h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1448                                   m->old_family);
1449                 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1450                         if (x->props.mode != m->mode ||
1451                             x->id.proto != m->proto)
1452                                 continue;
1453                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1454                                              m->old_family) ||
1455                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1456                                              m->old_family))
1457                                 continue;
1458                         xfrm_state_hold(x);
1459                         break;
1460                 }
1461         }
1462
1463         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1464
1465         return x;
1466 }
1467 EXPORT_SYMBOL(xfrm_migrate_state_find);
1468
1469 struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1470                                       struct xfrm_migrate *m,
1471                                       struct xfrm_encap_tmpl *encap)
1472 {
1473         struct xfrm_state *xc;
1474
1475         xc = xfrm_state_clone(x, encap);
1476         if (!xc)
1477                 return NULL;
1478
1479         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1480         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1481
1482         /* add state */
1483         if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1484                 /* a care is needed when the destination address of the
1485                    state is to be updated as it is a part of triplet */
1486                 xfrm_state_insert(xc);
1487         } else {
1488                 if (xfrm_state_add(xc) < 0)
1489                         goto error;
1490         }
1491
1492         return xc;
1493 error:
1494         xfrm_state_put(xc);
1495         return NULL;
1496 }
1497 EXPORT_SYMBOL(xfrm_state_migrate);
1498 #endif
1499
1500 int xfrm_state_update(struct xfrm_state *x)
1501 {
1502         struct xfrm_state *x1, *to_put;
1503         int err;
1504         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1505         struct net *net = xs_net(x);
1506
1507         to_put = NULL;
1508
1509         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1510         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1511
1512         err = -ESRCH;
1513         if (!x1)
1514                 goto out;
1515
1516         if (xfrm_state_kern(x1)) {
1517                 to_put = x1;
1518                 err = -EEXIST;
1519                 goto out;
1520         }
1521
1522         if (x1->km.state == XFRM_STATE_ACQ) {
1523                 __xfrm_state_insert(x);
1524                 x = NULL;
1525         }
1526         err = 0;
1527
1528 out:
1529         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1530
1531         if (to_put)
1532                 xfrm_state_put(to_put);
1533
1534         if (err)
1535                 return err;
1536
1537         if (!x) {
1538                 xfrm_state_delete(x1);
1539                 xfrm_state_put(x1);
1540                 return 0;
1541         }
1542
1543         err = -EINVAL;
1544         spin_lock_bh(&x1->lock);
1545         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1546                 if (x->encap && x1->encap &&
1547                     x->encap->encap_type == x1->encap->encap_type)
1548                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1549                 else if (x->encap || x1->encap)
1550                         goto fail;
1551
1552                 if (x->coaddr && x1->coaddr) {
1553                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1554                 }
1555                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1556                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1557                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1558                 x1->km.dying = 0;
1559
1560                 tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1561                 if (x1->curlft.use_time)
1562                         xfrm_state_check_expire(x1);
1563
1564                 if (x->props.smark.m || x->props.smark.v || x->if_id) {
1565                         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1566
1567                         if (x->props.smark.m || x->props.smark.v)
1568                                 x1->props.smark = x->props.smark;
1569
1570                         if (x->if_id)
1571                                 x1->if_id = x->if_id;
1572
1573                         __xfrm_state_bump_genids(x1);
1574                         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1575                 }
1576
1577                 err = 0;
1578                 x->km.state = XFRM_STATE_DEAD;
1579                 __xfrm_state_put(x);
1580         }
1581
1582 fail:
1583         spin_unlock_bh(&x1->lock);
1584
1585         xfrm_state_put(x1);
1586
1587         return err;
1588 }
1589 EXPORT_SYMBOL(xfrm_state_update);
1590
1591 int xfrm_state_check_expire(struct xfrm_state *x)
1592 {
1593         if (!x->curlft.use_time)
1594                 x->curlft.use_time = ktime_get_real_seconds();
1595
1596         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1597             x->curlft.packets >= x->lft.hard_packet_limit) {
1598                 x->km.state = XFRM_STATE_EXPIRED;
1599                 tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1600                 return -EINVAL;
1601         }
1602
1603         if (!x->km.dying &&
1604             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1605              x->curlft.packets >= x->lft.soft_packet_limit)) {
1606                 x->km.dying = 1;
1607                 km_state_expired(x, 0, 0);
1608         }
1609         return 0;
1610 }
1611 EXPORT_SYMBOL(xfrm_state_check_expire);
1612
1613 struct xfrm_state *
1614 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1615                   u8 proto, unsigned short family)
1616 {
1617         struct xfrm_state *x;
1618
1619         rcu_read_lock();
1620         x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1621         rcu_read_unlock();
1622         return x;
1623 }
1624 EXPORT_SYMBOL(xfrm_state_lookup);
1625
1626 struct xfrm_state *
1627 xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1628                          const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1629                          u8 proto, unsigned short family)
1630 {
1631         struct xfrm_state *x;
1632
1633         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1634         x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1635         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1636         return x;
1637 }
1638 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1639
1640 struct xfrm_state *
1641 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1642               u32 if_id, u8 proto, const xfrm_address_t *daddr,
1643               const xfrm_address_t *saddr, int create, unsigned short family)
1644 {
1645         struct xfrm_state *x;
1646
1647         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1648         x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
1649         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1650
1651         return x;
1652 }
1653 EXPORT_SYMBOL(xfrm_find_acq);
1654
1655 #ifdef CONFIG_XFRM_SUB_POLICY
1656 int
1657 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1658                unsigned short family, struct net *net)
1659 {
1660         int i;
1661         int err = 0;
1662         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1663         if (!afinfo)
1664                 return -EAFNOSUPPORT;
1665
1666         spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1667         if (afinfo->tmpl_sort)
1668                 err = afinfo->tmpl_sort(dst, src, n);
1669         else
1670                 for (i = 0; i < n; i++)
1671                         dst[i] = src[i];
1672         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1673         rcu_read_unlock();
1674         return err;
1675 }
1676 EXPORT_SYMBOL(xfrm_tmpl_sort);
1677
1678 int
1679 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1680                 unsigned short family)
1681 {
1682         int i;
1683         int err = 0;
1684         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1685         struct net *net = xs_net(*src);
1686
1687         if (!afinfo)
1688                 return -EAFNOSUPPORT;
1689
1690         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1691         if (afinfo->state_sort)
1692                 err = afinfo->state_sort(dst, src, n);
1693         else
1694                 for (i = 0; i < n; i++)
1695                         dst[i] = src[i];
1696         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1697         rcu_read_unlock();
1698         return err;
1699 }
1700 EXPORT_SYMBOL(xfrm_state_sort);
1701 #endif
1702
1703 /* Silly enough, but I'm lazy to build resolution list */
1704
1705 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1706 {
1707         int i;
1708
1709         for (i = 0; i <= net->xfrm.state_hmask; i++) {
1710                 struct xfrm_state *x;
1711
1712                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1713                         if (x->km.seq == seq &&
1714                             (mark & x->mark.m) == x->mark.v &&
1715                             x->km.state == XFRM_STATE_ACQ) {
1716                                 xfrm_state_hold(x);
1717                                 return x;
1718                         }
1719                 }
1720         }
1721         return NULL;
1722 }
1723
1724 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1725 {
1726         struct xfrm_state *x;
1727
1728         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1729         x = __xfrm_find_acq_byseq(net, mark, seq);
1730         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1731         return x;
1732 }
1733 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1734
1735 u32 xfrm_get_acqseq(void)
1736 {
1737         u32 res;
1738         static atomic_t acqseq;
1739
1740         do {
1741                 res = atomic_inc_return(&acqseq);
1742         } while (!res);
1743
1744         return res;
1745 }
1746 EXPORT_SYMBOL(xfrm_get_acqseq);
1747
1748 int verify_spi_info(u8 proto, u32 min, u32 max)
1749 {
1750         switch (proto) {
1751         case IPPROTO_AH:
1752         case IPPROTO_ESP:
1753                 break;
1754
1755         case IPPROTO_COMP:
1756                 /* IPCOMP spi is 16-bits. */
1757                 if (max >= 0x10000)
1758                         return -EINVAL;
1759                 break;
1760
1761         default:
1762                 return -EINVAL;
1763         }
1764
1765         if (min > max)
1766                 return -EINVAL;
1767
1768         return 0;
1769 }
1770 EXPORT_SYMBOL(verify_spi_info);
1771
1772 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1773 {
1774         struct net *net = xs_net(x);
1775         unsigned int h;
1776         struct xfrm_state *x0;
1777         int err = -ENOENT;
1778         __be32 minspi = htonl(low);
1779         __be32 maxspi = htonl(high);
1780         u32 mark = x->mark.v & x->mark.m;
1781
1782         spin_lock_bh(&x->lock);
1783         if (x->km.state == XFRM_STATE_DEAD)
1784                 goto unlock;
1785
1786         err = 0;
1787         if (x->id.spi)
1788                 goto unlock;
1789
1790         err = -ENOENT;
1791
1792         if (minspi == maxspi) {
1793                 x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1794                 if (x0) {
1795                         xfrm_state_put(x0);
1796                         goto unlock;
1797                 }
1798                 x->id.spi = minspi;
1799         } else {
1800                 u32 spi = 0;
1801                 for (h = 0; h < high-low+1; h++) {
1802                         spi = low + prandom_u32()%(high-low+1);
1803                         x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1804                         if (x0 == NULL) {
1805                                 x->id.spi = htonl(spi);
1806                                 break;
1807                         }
1808                         xfrm_state_put(x0);
1809                 }
1810         }
1811         if (x->id.spi) {
1812                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1813                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1814                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1815                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1816
1817                 err = 0;
1818         }
1819
1820 unlock:
1821         spin_unlock_bh(&x->lock);
1822
1823         return err;
1824 }
1825 EXPORT_SYMBOL(xfrm_alloc_spi);
1826
1827 static bool __xfrm_state_filter_match(struct xfrm_state *x,
1828                                       struct xfrm_address_filter *filter)
1829 {
1830         if (filter) {
1831                 if ((filter->family == AF_INET ||
1832                      filter->family == AF_INET6) &&
1833                     x->props.family != filter->family)
1834                         return false;
1835
1836                 return addr_match(&x->props.saddr, &filter->saddr,
1837                                   filter->splen) &&
1838                        addr_match(&x->id.daddr, &filter->daddr,
1839                                   filter->dplen);
1840         }
1841         return true;
1842 }
1843
1844 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1845                     int (*func)(struct xfrm_state *, int, void*),
1846                     void *data)
1847 {
1848         struct xfrm_state *state;
1849         struct xfrm_state_walk *x;
1850         int err = 0;
1851
1852         if (walk->seq != 0 && list_empty(&walk->all))
1853                 return 0;
1854
1855         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1856         if (list_empty(&walk->all))
1857                 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1858         else
1859                 x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1860         list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1861                 if (x->state == XFRM_STATE_DEAD)
1862                         continue;
1863                 state = container_of(x, struct xfrm_state, km);
1864                 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1865                         continue;
1866                 if (!__xfrm_state_filter_match(state, walk->filter))
1867                         continue;
1868                 err = func(state, walk->seq, data);
1869                 if (err) {
1870                         list_move_tail(&walk->all, &x->all);
1871                         goto out;
1872                 }
1873                 walk->seq++;
1874         }
1875         if (walk->seq == 0) {
1876                 err = -ENOENT;
1877                 goto out;
1878         }
1879         list_del_init(&walk->all);
1880 out:
1881         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1882         return err;
1883 }
1884 EXPORT_SYMBOL(xfrm_state_walk);
1885
1886 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1887                           struct xfrm_address_filter *filter)
1888 {
1889         INIT_LIST_HEAD(&walk->all);
1890         walk->proto = proto;
1891         walk->state = XFRM_STATE_DEAD;
1892         walk->seq = 0;
1893         walk->filter = filter;
1894 }
1895 EXPORT_SYMBOL(xfrm_state_walk_init);
1896
1897 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1898 {
1899         kfree(walk->filter);
1900
1901         if (list_empty(&walk->all))
1902                 return;
1903
1904         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1905         list_del(&walk->all);
1906         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1907 }
1908 EXPORT_SYMBOL(xfrm_state_walk_done);
1909
1910 static void xfrm_replay_timer_handler(struct timer_list *t)
1911 {
1912         struct xfrm_state *x = from_timer(x, t, rtimer);
1913
1914         spin_lock(&x->lock);
1915
1916         if (x->km.state == XFRM_STATE_VALID) {
1917                 if (xfrm_aevent_is_on(xs_net(x)))
1918                         x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1919                 else
1920                         x->xflags |= XFRM_TIME_DEFER;
1921         }
1922
1923         spin_unlock(&x->lock);
1924 }
1925
1926 static LIST_HEAD(xfrm_km_list);
1927
1928 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1929 {
1930         struct xfrm_mgr *km;
1931
1932         rcu_read_lock();
1933         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1934                 if (km->notify_policy)
1935                         km->notify_policy(xp, dir, c);
1936         rcu_read_unlock();
1937 }
1938
1939 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1940 {
1941         struct xfrm_mgr *km;
1942         rcu_read_lock();
1943         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1944                 if (km->notify)
1945                         km->notify(x, c);
1946         rcu_read_unlock();
1947 }
1948
1949 EXPORT_SYMBOL(km_policy_notify);
1950 EXPORT_SYMBOL(km_state_notify);
1951
1952 void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1953 {
1954         struct km_event c;
1955
1956         c.data.hard = hard;
1957         c.portid = portid;
1958         c.event = XFRM_MSG_EXPIRE;
1959         km_state_notify(x, &c);
1960 }
1961
1962 EXPORT_SYMBOL(km_state_expired);
1963 /*
1964  * We send to all registered managers regardless of failure
1965  * We are happy with one success
1966 */
1967 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1968 {
1969         int err = -EINVAL, acqret;
1970         struct xfrm_mgr *km;
1971
1972         rcu_read_lock();
1973         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1974                 acqret = km->acquire(x, t, pol);
1975                 if (!acqret)
1976                         err = acqret;
1977         }
1978         rcu_read_unlock();
1979         return err;
1980 }
1981 EXPORT_SYMBOL(km_query);
1982
1983 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1984 {
1985         int err = -EINVAL;
1986         struct xfrm_mgr *km;
1987
1988         rcu_read_lock();
1989         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1990                 if (km->new_mapping)
1991                         err = km->new_mapping(x, ipaddr, sport);
1992                 if (!err)
1993                         break;
1994         }
1995         rcu_read_unlock();
1996         return err;
1997 }
1998 EXPORT_SYMBOL(km_new_mapping);
1999
2000 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
2001 {
2002         struct km_event c;
2003
2004         c.data.hard = hard;
2005         c.portid = portid;
2006         c.event = XFRM_MSG_POLEXPIRE;
2007         km_policy_notify(pol, dir, &c);
2008 }
2009 EXPORT_SYMBOL(km_policy_expired);
2010
2011 #ifdef CONFIG_XFRM_MIGRATE
2012 int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2013                const struct xfrm_migrate *m, int num_migrate,
2014                const struct xfrm_kmaddress *k,
2015                const struct xfrm_encap_tmpl *encap)
2016 {
2017         int err = -EINVAL;
2018         int ret;
2019         struct xfrm_mgr *km;
2020
2021         rcu_read_lock();
2022         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2023                 if (km->migrate) {
2024                         ret = km->migrate(sel, dir, type, m, num_migrate, k,
2025                                           encap);
2026                         if (!ret)
2027                                 err = ret;
2028                 }
2029         }
2030         rcu_read_unlock();
2031         return err;
2032 }
2033 EXPORT_SYMBOL(km_migrate);
2034 #endif
2035
2036 int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2037 {
2038         int err = -EINVAL;
2039         int ret;
2040         struct xfrm_mgr *km;
2041
2042         rcu_read_lock();
2043         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2044                 if (km->report) {
2045                         ret = km->report(net, proto, sel, addr);
2046                         if (!ret)
2047                                 err = ret;
2048                 }
2049         }
2050         rcu_read_unlock();
2051         return err;
2052 }
2053 EXPORT_SYMBOL(km_report);
2054
2055 bool km_is_alive(const struct km_event *c)
2056 {
2057         struct xfrm_mgr *km;
2058         bool is_alive = false;
2059
2060         rcu_read_lock();
2061         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2062                 if (km->is_alive && km->is_alive(c)) {
2063                         is_alive = true;
2064                         break;
2065                 }
2066         }
2067         rcu_read_unlock();
2068
2069         return is_alive;
2070 }
2071 EXPORT_SYMBOL(km_is_alive);
2072
2073 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
2074 {
2075         int err;
2076         u8 *data;
2077         struct xfrm_mgr *km;
2078         struct xfrm_policy *pol = NULL;
2079
2080 #ifdef CONFIG_COMPAT
2081         if (in_compat_syscall())
2082                 return -EOPNOTSUPP;
2083 #endif
2084
2085         if (!optval && !optlen) {
2086                 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2087                 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2088                 __sk_dst_reset(sk);
2089                 return 0;
2090         }
2091
2092         if (optlen <= 0 || optlen > PAGE_SIZE)
2093                 return -EMSGSIZE;
2094
2095         data = memdup_user(optval, optlen);
2096         if (IS_ERR(data))
2097                 return PTR_ERR(data);
2098
2099         err = -EINVAL;
2100         rcu_read_lock();
2101         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2102                 pol = km->compile_policy(sk, optname, data,
2103                                          optlen, &err);
2104                 if (err >= 0)
2105                         break;
2106         }
2107         rcu_read_unlock();
2108
2109         if (err >= 0) {
2110                 xfrm_sk_policy_insert(sk, err, pol);
2111                 xfrm_pol_put(pol);
2112                 __sk_dst_reset(sk);
2113                 err = 0;
2114         }
2115
2116         kfree(data);
2117         return err;
2118 }
2119 EXPORT_SYMBOL(xfrm_user_policy);
2120
2121 static DEFINE_SPINLOCK(xfrm_km_lock);
2122
2123 int xfrm_register_km(struct xfrm_mgr *km)
2124 {
2125         spin_lock_bh(&xfrm_km_lock);
2126         list_add_tail_rcu(&km->list, &xfrm_km_list);
2127         spin_unlock_bh(&xfrm_km_lock);
2128         return 0;
2129 }
2130 EXPORT_SYMBOL(xfrm_register_km);
2131
2132 int xfrm_unregister_km(struct xfrm_mgr *km)
2133 {
2134         spin_lock_bh(&xfrm_km_lock);
2135         list_del_rcu(&km->list);
2136         spin_unlock_bh(&xfrm_km_lock);
2137         synchronize_rcu();
2138         return 0;
2139 }
2140 EXPORT_SYMBOL(xfrm_unregister_km);
2141
2142 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2143 {
2144         int err = 0;
2145
2146         if (WARN_ON(afinfo->family >= NPROTO))
2147                 return -EAFNOSUPPORT;
2148
2149         spin_lock_bh(&xfrm_state_afinfo_lock);
2150         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2151                 err = -EEXIST;
2152         else
2153                 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2154         spin_unlock_bh(&xfrm_state_afinfo_lock);
2155         return err;
2156 }
2157 EXPORT_SYMBOL(xfrm_state_register_afinfo);
2158
2159 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2160 {
2161         int err = 0, family = afinfo->family;
2162
2163         if (WARN_ON(family >= NPROTO))
2164                 return -EAFNOSUPPORT;
2165
2166         spin_lock_bh(&xfrm_state_afinfo_lock);
2167         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2168                 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2169                         err = -EINVAL;
2170                 else
2171                         RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2172         }
2173         spin_unlock_bh(&xfrm_state_afinfo_lock);
2174         synchronize_rcu();
2175         return err;
2176 }
2177 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2178
2179 struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2180 {
2181         if (unlikely(family >= NPROTO))
2182                 return NULL;
2183
2184         return rcu_dereference(xfrm_state_afinfo[family]);
2185 }
2186
2187 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2188 {
2189         struct xfrm_state_afinfo *afinfo;
2190         if (unlikely(family >= NPROTO))
2191                 return NULL;
2192         rcu_read_lock();
2193         afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2194         if (unlikely(!afinfo))
2195                 rcu_read_unlock();
2196         return afinfo;
2197 }
2198
2199 void xfrm_flush_gc(void)
2200 {
2201         flush_work(&xfrm_state_gc_work);
2202 }
2203 EXPORT_SYMBOL(xfrm_flush_gc);
2204
2205 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2206 void xfrm_state_delete_tunnel(struct xfrm_state *x)
2207 {
2208         if (x->tunnel) {
2209                 struct xfrm_state *t = x->tunnel;
2210
2211                 if (atomic_read(&t->tunnel_users) == 2)
2212                         xfrm_state_delete(t);
2213                 atomic_dec(&t->tunnel_users);
2214                 xfrm_state_put(t);
2215                 x->tunnel = NULL;
2216         }
2217 }
2218 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2219
2220 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2221 {
2222         const struct xfrm_type *type = READ_ONCE(x->type);
2223
2224         if (x->km.state == XFRM_STATE_VALID &&
2225             type && type->get_mtu)
2226                 return type->get_mtu(x, mtu);
2227
2228         return mtu - x->props.header_len;
2229 }
2230
2231 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
2232 {
2233         struct xfrm_state_afinfo *afinfo;
2234         struct xfrm_mode *inner_mode;
2235         int family = x->props.family;
2236         int err;
2237
2238         err = -EAFNOSUPPORT;
2239         afinfo = xfrm_state_get_afinfo(family);
2240         if (!afinfo)
2241                 goto error;
2242
2243         err = 0;
2244         if (afinfo->init_flags)
2245                 err = afinfo->init_flags(x);
2246
2247         rcu_read_unlock();
2248
2249         if (err)
2250                 goto error;
2251
2252         err = -EPROTONOSUPPORT;
2253
2254         if (x->sel.family != AF_UNSPEC) {
2255                 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2256                 if (inner_mode == NULL)
2257                         goto error;
2258
2259                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2260                     family != x->sel.family) {
2261                         xfrm_put_mode(inner_mode);
2262                         goto error;
2263                 }
2264
2265                 x->inner_mode = inner_mode;
2266         } else {
2267                 struct xfrm_mode *inner_mode_iaf;
2268                 int iafamily = AF_INET;
2269
2270                 inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2271                 if (inner_mode == NULL)
2272                         goto error;
2273
2274                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2275                         xfrm_put_mode(inner_mode);
2276                         goto error;
2277                 }
2278                 x->inner_mode = inner_mode;
2279
2280                 if (x->props.family == AF_INET)
2281                         iafamily = AF_INET6;
2282
2283                 inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2284                 if (inner_mode_iaf) {
2285                         if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2286                                 x->inner_mode_iaf = inner_mode_iaf;
2287                         else
2288                                 xfrm_put_mode(inner_mode_iaf);
2289                 }
2290         }
2291
2292         x->type = xfrm_get_type(x->id.proto, family);
2293         if (x->type == NULL)
2294                 goto error;
2295
2296         x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
2297
2298         err = x->type->init_state(x);
2299         if (err)
2300                 goto error;
2301
2302         x->outer_mode = xfrm_get_mode(x->props.mode, family);
2303         if (x->outer_mode == NULL) {
2304                 err = -EPROTONOSUPPORT;
2305                 goto error;
2306         }
2307
2308         if (init_replay) {
2309                 err = xfrm_init_replay(x);
2310                 if (err)
2311                         goto error;
2312         }
2313
2314 error:
2315         return err;
2316 }
2317
2318 EXPORT_SYMBOL(__xfrm_init_state);
2319
2320 int xfrm_init_state(struct xfrm_state *x)
2321 {
2322         int err;
2323
2324         err = __xfrm_init_state(x, true, false);
2325         if (!err)
2326                 x->km.state = XFRM_STATE_VALID;
2327
2328         return err;
2329 }
2330
2331 EXPORT_SYMBOL(xfrm_init_state);
2332
2333 int __net_init xfrm_state_init(struct net *net)
2334 {
2335         unsigned int sz;
2336
2337         if (net_eq(net, &init_net))
2338                 xfrm_state_cache = KMEM_CACHE(xfrm_state,
2339                                               SLAB_HWCACHE_ALIGN | SLAB_PANIC);
2340
2341         INIT_LIST_HEAD(&net->xfrm.state_all);
2342
2343         sz = sizeof(struct hlist_head) * 8;
2344
2345         net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2346         if (!net->xfrm.state_bydst)
2347                 goto out_bydst;
2348         net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2349         if (!net->xfrm.state_bysrc)
2350                 goto out_bysrc;
2351         net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2352         if (!net->xfrm.state_byspi)
2353                 goto out_byspi;
2354         net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2355
2356         net->xfrm.state_num = 0;
2357         INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2358         spin_lock_init(&net->xfrm.xfrm_state_lock);
2359         return 0;
2360
2361 out_byspi:
2362         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2363 out_bysrc:
2364         xfrm_hash_free(net->xfrm.state_bydst, sz);
2365 out_bydst:
2366         return -ENOMEM;
2367 }
2368
2369 void xfrm_state_fini(struct net *net)
2370 {
2371         unsigned int sz;
2372
2373         flush_work(&net->xfrm.state_hash_work);
2374         xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
2375         flush_work(&xfrm_state_gc_work);
2376
2377         WARN_ON(!list_empty(&net->xfrm.state_all));
2378
2379         sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2380         WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2381         xfrm_hash_free(net->xfrm.state_byspi, sz);
2382         WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2383         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2384         WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2385         xfrm_hash_free(net->xfrm.state_bydst, sz);
2386 }
2387
2388 #ifdef CONFIG_AUDITSYSCALL
2389 static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2390                                      struct audit_buffer *audit_buf)
2391 {
2392         struct xfrm_sec_ctx *ctx = x->security;
2393         u32 spi = ntohl(x->id.spi);
2394
2395         if (ctx)
2396                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2397                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2398
2399         switch (x->props.family) {
2400         case AF_INET:
2401                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2402                                  &x->props.saddr.a4, &x->id.daddr.a4);
2403                 break;
2404         case AF_INET6:
2405                 audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2406                                  x->props.saddr.a6, x->id.daddr.a6);
2407                 break;
2408         }
2409
2410         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2411 }
2412
2413 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2414                                       struct audit_buffer *audit_buf)
2415 {
2416         const struct iphdr *iph4;
2417         const struct ipv6hdr *iph6;
2418
2419         switch (family) {
2420         case AF_INET:
2421                 iph4 = ip_hdr(skb);
2422                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2423                                  &iph4->saddr, &iph4->daddr);
2424                 break;
2425         case AF_INET6:
2426                 iph6 = ipv6_hdr(skb);
2427                 audit_log_format(audit_buf,
2428                                  " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2429                                  &iph6->saddr, &iph6->daddr,
2430                                  iph6->flow_lbl[0] & 0x0f,
2431                                  iph6->flow_lbl[1],
2432                                  iph6->flow_lbl[2]);
2433                 break;
2434         }
2435 }
2436
2437 void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2438 {
2439         struct audit_buffer *audit_buf;
2440
2441         audit_buf = xfrm_audit_start("SAD-add");
2442         if (audit_buf == NULL)
2443                 return;
2444         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2445         xfrm_audit_helper_sainfo(x, audit_buf);
2446         audit_log_format(audit_buf, " res=%u", result);
2447         audit_log_end(audit_buf);
2448 }
2449 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2450
2451 void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
2452 {
2453         struct audit_buffer *audit_buf;
2454
2455         audit_buf = xfrm_audit_start("SAD-delete");
2456         if (audit_buf == NULL)
2457                 return;
2458         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2459         xfrm_audit_helper_sainfo(x, audit_buf);
2460         audit_log_format(audit_buf, " res=%u", result);
2461         audit_log_end(audit_buf);
2462 }
2463 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2464
2465 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2466                                       struct sk_buff *skb)
2467 {
2468         struct audit_buffer *audit_buf;
2469         u32 spi;
2470
2471         audit_buf = xfrm_audit_start("SA-replay-overflow");
2472         if (audit_buf == NULL)
2473                 return;
2474         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2475         /* don't record the sequence number because it's inherent in this kind
2476          * of audit message */
2477         spi = ntohl(x->id.spi);
2478         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2479         audit_log_end(audit_buf);
2480 }
2481 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2482
2483 void xfrm_audit_state_replay(struct xfrm_state *x,
2484                              struct sk_buff *skb, __be32 net_seq)
2485 {
2486         struct audit_buffer *audit_buf;
2487         u32 spi;
2488
2489         audit_buf = xfrm_audit_start("SA-replayed-pkt");
2490         if (audit_buf == NULL)
2491                 return;
2492         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2493         spi = ntohl(x->id.spi);
2494         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2495                          spi, spi, ntohl(net_seq));
2496         audit_log_end(audit_buf);
2497 }
2498 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2499
2500 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2501 {
2502         struct audit_buffer *audit_buf;
2503
2504         audit_buf = xfrm_audit_start("SA-notfound");
2505         if (audit_buf == NULL)
2506                 return;
2507         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2508         audit_log_end(audit_buf);
2509 }
2510 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2511
2512 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2513                                __be32 net_spi, __be32 net_seq)
2514 {
2515         struct audit_buffer *audit_buf;
2516         u32 spi;
2517
2518         audit_buf = xfrm_audit_start("SA-notfound");
2519         if (audit_buf == NULL)
2520                 return;
2521         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2522         spi = ntohl(net_spi);
2523         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2524                          spi, spi, ntohl(net_seq));
2525         audit_log_end(audit_buf);
2526 }
2527 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2528
2529 void xfrm_audit_state_icvfail(struct xfrm_state *x,
2530                               struct sk_buff *skb, u8 proto)
2531 {
2532         struct audit_buffer *audit_buf;
2533         __be32 net_spi;
2534         __be32 net_seq;
2535
2536         audit_buf = xfrm_audit_start("SA-icv-failure");
2537         if (audit_buf == NULL)
2538                 return;
2539         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2540         if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2541                 u32 spi = ntohl(net_spi);
2542                 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2543                                  spi, spi, ntohl(net_seq));
2544         }
2545         audit_log_end(audit_buf);
2546 }
2547 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2548 #endif /* CONFIG_AUDITSYSCALL */