tcp: tso: allow deferring under reordering state
[linux-block.git] / net / core / lwtunnel.c
CommitLineData
499a2425
RP
1/*
2 * lwtunnel Infrastructure for light weight tunnels like mpls
3 *
4 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/capability.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/slab.h>
18#include <linux/uaccess.h>
19#include <linux/skbuff.h>
20#include <linux/netdevice.h>
21#include <linux/lwtunnel.h>
22#include <linux/in.h>
23#include <linux/init.h>
24#include <linux/err.h>
25
26#include <net/lwtunnel.h>
27#include <net/rtnetlink.h>
ffce4196 28#include <net/ip6_fib.h>
499a2425
RP
29
30struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
31{
32 struct lwtunnel_state *lws;
33
34 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
35
36 return lws;
37}
38EXPORT_SYMBOL(lwtunnel_state_alloc);
39
40const struct lwtunnel_encap_ops __rcu *
41 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
42
43int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
44 unsigned int num)
45{
46 if (num > LWTUNNEL_ENCAP_MAX)
47 return -ERANGE;
48
49 return !cmpxchg((const struct lwtunnel_encap_ops **)
50 &lwtun_encaps[num],
51 NULL, ops) ? 0 : -1;
52}
53EXPORT_SYMBOL(lwtunnel_encap_add_ops);
54
55int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
56 unsigned int encap_type)
57{
58 int ret;
59
60 if (encap_type == LWTUNNEL_ENCAP_NONE ||
61 encap_type > LWTUNNEL_ENCAP_MAX)
62 return -ERANGE;
63
64 ret = (cmpxchg((const struct lwtunnel_encap_ops **)
65 &lwtun_encaps[encap_type],
66 ops, NULL) == ops) ? 0 : -1;
67
68 synchronize_net();
69
70 return ret;
71}
72EXPORT_SYMBOL(lwtunnel_encap_del_ops);
73
74int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
75 struct nlattr *encap, struct lwtunnel_state **lws)
76{
77 const struct lwtunnel_encap_ops *ops;
78 int ret = -EINVAL;
79
80 if (encap_type == LWTUNNEL_ENCAP_NONE ||
81 encap_type > LWTUNNEL_ENCAP_MAX)
82 return ret;
83
84 ret = -EOPNOTSUPP;
85 rcu_read_lock();
86 ops = rcu_dereference(lwtun_encaps[encap_type]);
87 if (likely(ops && ops->build_state))
88 ret = ops->build_state(dev, encap, lws);
89 rcu_read_unlock();
90
91 return ret;
92}
93EXPORT_SYMBOL(lwtunnel_build_state);
94
95int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
96{
97 const struct lwtunnel_encap_ops *ops;
98 struct nlattr *nest;
99 int ret = -EINVAL;
100
101 if (!lwtstate)
102 return 0;
103
104 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
105 lwtstate->type > LWTUNNEL_ENCAP_MAX)
106 return 0;
107
108 ret = -EOPNOTSUPP;
109 nest = nla_nest_start(skb, RTA_ENCAP);
110 rcu_read_lock();
111 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
112 if (likely(ops && ops->fill_encap))
113 ret = ops->fill_encap(skb, lwtstate);
114 rcu_read_unlock();
115
116 if (ret)
117 goto nla_put_failure;
118 nla_nest_end(skb, nest);
119 ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
120 if (ret)
121 goto nla_put_failure;
122
123 return 0;
124
125nla_put_failure:
126 nla_nest_cancel(skb, nest);
127
128 return (ret == -EOPNOTSUPP ? 0 : ret);
129}
130EXPORT_SYMBOL(lwtunnel_fill_encap);
131
132int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
133{
134 const struct lwtunnel_encap_ops *ops;
135 int ret = 0;
136
137 if (!lwtstate)
138 return 0;
139
140 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
141 lwtstate->type > LWTUNNEL_ENCAP_MAX)
142 return 0;
143
144 rcu_read_lock();
145 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
146 if (likely(ops && ops->get_encap_size))
147 ret = nla_total_size(ops->get_encap_size(lwtstate));
148 rcu_read_unlock();
149
150 return ret;
151}
152EXPORT_SYMBOL(lwtunnel_get_encap_size);
153
154int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
155{
156 const struct lwtunnel_encap_ops *ops;
157 int ret = 0;
158
159 if (!a && !b)
160 return 0;
161
162 if (!a || !b)
163 return 1;
164
165 if (a->type != b->type)
166 return 1;
167
168 if (a->type == LWTUNNEL_ENCAP_NONE ||
169 a->type > LWTUNNEL_ENCAP_MAX)
170 return 0;
171
172 rcu_read_lock();
173 ops = rcu_dereference(lwtun_encaps[a->type]);
174 if (likely(ops && ops->cmp_encap))
175 ret = ops->cmp_encap(a, b);
176 rcu_read_unlock();
177
178 return ret;
179}
180EXPORT_SYMBOL(lwtunnel_cmp_encap);
ffce4196
RP
181
182int __lwtunnel_output(struct sock *sk, struct sk_buff *skb,
183 struct lwtunnel_state *lwtstate)
184{
185 const struct lwtunnel_encap_ops *ops;
186 int ret = -EINVAL;
187
188 if (!lwtstate)
189 goto drop;
190
191 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
192 lwtstate->type > LWTUNNEL_ENCAP_MAX)
193 return 0;
194
195 ret = -EOPNOTSUPP;
196 rcu_read_lock();
197 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
198 if (likely(ops && ops->output))
199 ret = ops->output(sk, skb);
200 rcu_read_unlock();
201
202 if (ret == -EOPNOTSUPP)
203 goto drop;
204
205 return ret;
206
207drop:
208 kfree(skb);
209
210 return ret;
211}
212
213int lwtunnel_output6(struct sock *sk, struct sk_buff *skb)
214{
215 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
216 struct lwtunnel_state *lwtstate = NULL;
217
218 if (rt)
219 lwtstate = rt->rt6i_lwtstate;
220
221 return __lwtunnel_output(sk, skb, lwtstate);
222}
223EXPORT_SYMBOL(lwtunnel_output6);
224
225int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
226{
227 struct rtable *rt = (struct rtable *)skb_dst(skb);
228 struct lwtunnel_state *lwtstate = NULL;
229
230 if (rt)
231 lwtstate = rt->rt_lwtstate;
232
233 return __lwtunnel_output(sk, skb, lwtstate);
234}
235EXPORT_SYMBOL(lwtunnel_output);