]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/netfilter/ipvs/ip_vs_ctl.c
Merge branch 'linus' into omap-for-v3.8/cleanup-headers-prepare-multiplatform-v3
[can-eth-gw-linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned int hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned int hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned int hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned int porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned int hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del_init(&dest->d_list);
543         }
544
545         return 1;
546 }
547
548 /*
549  *      Lookup real service by <proto,addr,port> in the real service table.
550  */
551 struct ip_vs_dest *
552 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
553                           const union nf_inet_addr *daddr,
554                           __be16 dport)
555 {
556         struct netns_ipvs *ipvs = net_ipvs(net);
557         unsigned int hash;
558         struct ip_vs_dest *dest;
559
560         /*
561          *      Check for "full" addressed entries
562          *      Return the first found entry
563          */
564         hash = ip_vs_rs_hashkey(af, daddr, dport);
565
566         read_lock(&ipvs->rs_lock);
567         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
568                 if ((dest->af == af)
569                     && ip_vs_addr_equal(af, &dest->addr, daddr)
570                     && (dest->port == dport)
571                     && ((dest->protocol == protocol) ||
572                         dest->vfwmark)) {
573                         /* HIT */
574                         read_unlock(&ipvs->rs_lock);
575                         return dest;
576                 }
577         }
578         read_unlock(&ipvs->rs_lock);
579
580         return NULL;
581 }
582
583 /*
584  *      Lookup destination by {addr,port} in the given service
585  */
586 static struct ip_vs_dest *
587 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
588                   __be16 dport)
589 {
590         struct ip_vs_dest *dest;
591
592         /*
593          * Find the destination for the given service
594          */
595         list_for_each_entry(dest, &svc->destinations, n_list) {
596                 if ((dest->af == svc->af)
597                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
598                     && (dest->port == dport)) {
599                         /* HIT */
600                         return dest;
601                 }
602         }
603
604         return NULL;
605 }
606
607 /*
608  * Find destination by {daddr,dport,vaddr,protocol}
609  * Cretaed to be used in ip_vs_process_message() in
610  * the backup synchronization daemon. It finds the
611  * destination to be bound to the received connection
612  * on the backup.
613  *
614  * ip_vs_lookup_real_service() looked promissing, but
615  * seems not working as expected.
616  */
617 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
618                                    const union nf_inet_addr *daddr,
619                                    __be16 dport,
620                                    const union nf_inet_addr *vaddr,
621                                    __be16 vport, __u16 protocol, __u32 fwmark,
622                                    __u32 flags)
623 {
624         struct ip_vs_dest *dest;
625         struct ip_vs_service *svc;
626         __be16 port = dport;
627
628         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
629         if (!svc)
630                 return NULL;
631         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
632                 port = 0;
633         dest = ip_vs_lookup_dest(svc, daddr, port);
634         if (!dest)
635                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
636         if (dest)
637                 atomic_inc(&dest->refcnt);
638         ip_vs_service_put(svc);
639         return dest;
640 }
641
642 /*
643  *  Lookup dest by {svc,addr,port} in the destination trash.
644  *  The destination trash is used to hold the destinations that are removed
645  *  from the service table but are still referenced by some conn entries.
646  *  The reason to add the destination trash is when the dest is temporary
647  *  down (either by administrator or by monitor program), the dest can be
648  *  picked back from the trash, the remaining connections to the dest can
649  *  continue, and the counting information of the dest is also useful for
650  *  scheduling.
651  */
652 static struct ip_vs_dest *
653 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
654                      __be16 dport)
655 {
656         struct ip_vs_dest *dest, *nxt;
657         struct netns_ipvs *ipvs = net_ipvs(svc->net);
658
659         /*
660          * Find the destination in trash
661          */
662         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
663                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
664                               "dest->refcnt=%d\n",
665                               dest->vfwmark,
666                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
667                               ntohs(dest->port),
668                               atomic_read(&dest->refcnt));
669                 if (dest->af == svc->af &&
670                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
671                     dest->port == dport &&
672                     dest->vfwmark == svc->fwmark &&
673                     dest->protocol == svc->protocol &&
674                     (svc->fwmark ||
675                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
676                       dest->vport == svc->port))) {
677                         /* HIT */
678                         return dest;
679                 }
680
681                 /*
682                  * Try to purge the destination from trash if not referenced
683                  */
684                 if (atomic_read(&dest->refcnt) == 1) {
685                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
686                                       "from trash\n",
687                                       dest->vfwmark,
688                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
689                                       ntohs(dest->port));
690                         list_del(&dest->n_list);
691                         ip_vs_dst_reset(dest);
692                         __ip_vs_unbind_svc(dest);
693                         free_percpu(dest->stats.cpustats);
694                         kfree(dest);
695                 }
696         }
697
698         return NULL;
699 }
700
701
702 /*
703  *  Clean up all the destinations in the trash
704  *  Called by the ip_vs_control_cleanup()
705  *
706  *  When the ip_vs_control_clearup is activated by ipvs module exit,
707  *  the service tables must have been flushed and all the connections
708  *  are expired, and the refcnt of each destination in the trash must
709  *  be 1, so we simply release them here.
710  */
711 static void ip_vs_trash_cleanup(struct net *net)
712 {
713         struct ip_vs_dest *dest, *nxt;
714         struct netns_ipvs *ipvs = net_ipvs(net);
715
716         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
717                 list_del(&dest->n_list);
718                 ip_vs_dst_reset(dest);
719                 __ip_vs_unbind_svc(dest);
720                 free_percpu(dest->stats.cpustats);
721                 kfree(dest);
722         }
723 }
724
725 static void
726 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
727 {
728 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
729
730         spin_lock_bh(&src->lock);
731
732         IP_VS_SHOW_STATS_COUNTER(conns);
733         IP_VS_SHOW_STATS_COUNTER(inpkts);
734         IP_VS_SHOW_STATS_COUNTER(outpkts);
735         IP_VS_SHOW_STATS_COUNTER(inbytes);
736         IP_VS_SHOW_STATS_COUNTER(outbytes);
737
738         ip_vs_read_estimator(dst, src);
739
740         spin_unlock_bh(&src->lock);
741 }
742
743 static void
744 ip_vs_zero_stats(struct ip_vs_stats *stats)
745 {
746         spin_lock_bh(&stats->lock);
747
748         /* get current counters as zero point, rates are zeroed */
749
750 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
751
752         IP_VS_ZERO_STATS_COUNTER(conns);
753         IP_VS_ZERO_STATS_COUNTER(inpkts);
754         IP_VS_ZERO_STATS_COUNTER(outpkts);
755         IP_VS_ZERO_STATS_COUNTER(inbytes);
756         IP_VS_ZERO_STATS_COUNTER(outbytes);
757
758         ip_vs_zero_estimator(stats);
759
760         spin_unlock_bh(&stats->lock);
761 }
762
763 /*
764  *      Update a destination in the given service
765  */
766 static void
767 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
768                     struct ip_vs_dest_user_kern *udest, int add)
769 {
770         struct netns_ipvs *ipvs = net_ipvs(svc->net);
771         int conn_flags;
772
773         /* set the weight and the flags */
774         atomic_set(&dest->weight, udest->weight);
775         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776         conn_flags |= IP_VS_CONN_F_INACTIVE;
777
778         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781         } else {
782                 /*
783                  *    Put the real service in rs_table if not present.
784                  *    For now only for NAT!
785                  */
786                 write_lock_bh(&ipvs->rs_lock);
787                 ip_vs_rs_hash(ipvs, dest);
788                 write_unlock_bh(&ipvs->rs_lock);
789         }
790         atomic_set(&dest->conn_flags, conn_flags);
791
792         /* bind the service */
793         if (!dest->svc) {
794                 __ip_vs_bind_svc(dest, svc);
795         } else {
796                 if (dest->svc != svc) {
797                         __ip_vs_unbind_svc(dest);
798                         ip_vs_zero_stats(&dest->stats);
799                         __ip_vs_bind_svc(dest, svc);
800                 }
801         }
802
803         /* set the dest status flags */
804         dest->flags |= IP_VS_DEST_F_AVAILABLE;
805
806         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808         dest->u_threshold = udest->u_threshold;
809         dest->l_threshold = udest->l_threshold;
810
811         spin_lock_bh(&dest->dst_lock);
812         ip_vs_dst_reset(dest);
813         spin_unlock_bh(&dest->dst_lock);
814
815         if (add)
816                 ip_vs_start_estimator(svc->net, &dest->stats);
817
818         write_lock_bh(&__ip_vs_svc_lock);
819
820         /* Wait until all other svc users go away */
821         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823         if (add) {
824                 list_add(&dest->n_list, &svc->destinations);
825                 svc->num_dests++;
826         }
827
828         /* call the update_service, because server weight may be changed */
829         if (svc->scheduler->update_service)
830                 svc->scheduler->update_service(svc);
831
832         write_unlock_bh(&__ip_vs_svc_lock);
833 }
834
835
836 /*
837  *      Create a destination for the given service
838  */
839 static int
840 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841                struct ip_vs_dest **dest_p)
842 {
843         struct ip_vs_dest *dest;
844         unsigned int atype;
845
846         EnterFunction(2);
847
848 #ifdef CONFIG_IP_VS_IPV6
849         if (svc->af == AF_INET6) {
850                 atype = ipv6_addr_type(&udest->addr.in6);
851                 if ((!(atype & IPV6_ADDR_UNICAST) ||
852                         atype & IPV6_ADDR_LINKLOCAL) &&
853                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854                         return -EINVAL;
855         } else
856 #endif
857         {
858                 atype = inet_addr_type(svc->net, udest->addr.ip);
859                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860                         return -EINVAL;
861         }
862
863         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
864         if (dest == NULL)
865                 return -ENOMEM;
866
867         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
868         if (!dest->stats.cpustats)
869                 goto err_alloc;
870
871         dest->af = svc->af;
872         dest->protocol = svc->protocol;
873         dest->vaddr = svc->addr;
874         dest->vport = svc->port;
875         dest->vfwmark = svc->fwmark;
876         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877         dest->port = udest->port;
878
879         atomic_set(&dest->activeconns, 0);
880         atomic_set(&dest->inactconns, 0);
881         atomic_set(&dest->persistconns, 0);
882         atomic_set(&dest->refcnt, 1);
883
884         INIT_LIST_HEAD(&dest->d_list);
885         spin_lock_init(&dest->dst_lock);
886         spin_lock_init(&dest->stats.lock);
887         __ip_vs_update_dest(svc, dest, udest, 1);
888
889         *dest_p = dest;
890
891         LeaveFunction(2);
892         return 0;
893
894 err_alloc:
895         kfree(dest);
896         return -ENOMEM;
897 }
898
899
900 /*
901  *      Add a destination into an existing service
902  */
903 static int
904 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
905 {
906         struct ip_vs_dest *dest;
907         union nf_inet_addr daddr;
908         __be16 dport = udest->port;
909         int ret;
910
911         EnterFunction(2);
912
913         if (udest->weight < 0) {
914                 pr_err("%s(): server weight less than zero\n", __func__);
915                 return -ERANGE;
916         }
917
918         if (udest->l_threshold > udest->u_threshold) {
919                 pr_err("%s(): lower threshold is higher than upper threshold\n",
920                         __func__);
921                 return -ERANGE;
922         }
923
924         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
925
926         /*
927          * Check if the dest already exists in the list
928          */
929         dest = ip_vs_lookup_dest(svc, &daddr, dport);
930
931         if (dest != NULL) {
932                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
933                 return -EEXIST;
934         }
935
936         /*
937          * Check if the dest already exists in the trash and
938          * is from the same service
939          */
940         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
941
942         if (dest != NULL) {
943                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
944                               "dest->refcnt=%d, service %u/%s:%u\n",
945                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
946                               atomic_read(&dest->refcnt),
947                               dest->vfwmark,
948                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
949                               ntohs(dest->vport));
950
951                 /*
952                  * Get the destination from the trash
953                  */
954                 list_del(&dest->n_list);
955
956                 __ip_vs_update_dest(svc, dest, udest, 1);
957                 ret = 0;
958         } else {
959                 /*
960                  * Allocate and initialize the dest structure
961                  */
962                 ret = ip_vs_new_dest(svc, udest, &dest);
963         }
964         LeaveFunction(2);
965
966         return ret;
967 }
968
969
970 /*
971  *      Edit a destination in the given service
972  */
973 static int
974 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
975 {
976         struct ip_vs_dest *dest;
977         union nf_inet_addr daddr;
978         __be16 dport = udest->port;
979
980         EnterFunction(2);
981
982         if (udest->weight < 0) {
983                 pr_err("%s(): server weight less than zero\n", __func__);
984                 return -ERANGE;
985         }
986
987         if (udest->l_threshold > udest->u_threshold) {
988                 pr_err("%s(): lower threshold is higher than upper threshold\n",
989                         __func__);
990                 return -ERANGE;
991         }
992
993         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
994
995         /*
996          *  Lookup the destination list
997          */
998         dest = ip_vs_lookup_dest(svc, &daddr, dport);
999
1000         if (dest == NULL) {
1001                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1002                 return -ENOENT;
1003         }
1004
1005         __ip_vs_update_dest(svc, dest, udest, 0);
1006         LeaveFunction(2);
1007
1008         return 0;
1009 }
1010
1011
1012 /*
1013  *      Delete a destination (must be already unlinked from the service)
1014  */
1015 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1016 {
1017         struct netns_ipvs *ipvs = net_ipvs(net);
1018
1019         ip_vs_stop_estimator(net, &dest->stats);
1020
1021         /*
1022          *  Remove it from the d-linked list with the real services.
1023          */
1024         write_lock_bh(&ipvs->rs_lock);
1025         ip_vs_rs_unhash(dest);
1026         write_unlock_bh(&ipvs->rs_lock);
1027
1028         /*
1029          *  Decrease the refcnt of the dest, and free the dest
1030          *  if nobody refers to it (refcnt=0). Otherwise, throw
1031          *  the destination into the trash.
1032          */
1033         if (atomic_dec_and_test(&dest->refcnt)) {
1034                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035                               dest->vfwmark,
1036                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037                               ntohs(dest->port));
1038                 ip_vs_dst_reset(dest);
1039                 /* simply decrease svc->refcnt here, let the caller check
1040                    and release the service if nobody refers to it.
1041                    Only user context can release destination and service,
1042                    and only one user context can update virtual service at a
1043                    time, so the operation here is OK */
1044                 atomic_dec(&dest->svc->refcnt);
1045                 free_percpu(dest->stats.cpustats);
1046                 kfree(dest);
1047         } else {
1048                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049                               "dest->refcnt=%d\n",
1050                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051                               ntohs(dest->port),
1052                               atomic_read(&dest->refcnt));
1053                 list_add(&dest->n_list, &ipvs->dest_trash);
1054                 atomic_inc(&dest->refcnt);
1055         }
1056 }
1057
1058
1059 /*
1060  *      Unlink a destination from the given service
1061  */
1062 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1063                                 struct ip_vs_dest *dest,
1064                                 int svcupd)
1065 {
1066         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1067
1068         /*
1069          *  Remove it from the d-linked destination list.
1070          */
1071         list_del(&dest->n_list);
1072         svc->num_dests--;
1073
1074         /*
1075          *  Call the update_service function of its scheduler
1076          */
1077         if (svcupd && svc->scheduler->update_service)
1078                         svc->scheduler->update_service(svc);
1079 }
1080
1081
1082 /*
1083  *      Delete a destination server in the given service
1084  */
1085 static int
1086 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1087 {
1088         struct ip_vs_dest *dest;
1089         __be16 dport = udest->port;
1090
1091         EnterFunction(2);
1092
1093         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1094
1095         if (dest == NULL) {
1096                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097                 return -ENOENT;
1098         }
1099
1100         write_lock_bh(&__ip_vs_svc_lock);
1101
1102         /*
1103          *      Wait until all other svc users go away.
1104          */
1105         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106
1107         /*
1108          *      Unlink dest from the service
1109          */
1110         __ip_vs_unlink_dest(svc, dest, 1);
1111
1112         write_unlock_bh(&__ip_vs_svc_lock);
1113
1114         /*
1115          *      Delete the destination
1116          */
1117         __ip_vs_del_dest(svc->net, dest);
1118
1119         LeaveFunction(2);
1120
1121         return 0;
1122 }
1123
1124
1125 /*
1126  *      Add a service into the service hash table
1127  */
1128 static int
1129 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1130                   struct ip_vs_service **svc_p)
1131 {
1132         int ret = 0;
1133         struct ip_vs_scheduler *sched = NULL;
1134         struct ip_vs_pe *pe = NULL;
1135         struct ip_vs_service *svc = NULL;
1136         struct netns_ipvs *ipvs = net_ipvs(net);
1137
1138         /* increase the module use count */
1139         ip_vs_use_count_inc();
1140
1141         /* Lookup the scheduler by 'u->sched_name' */
1142         sched = ip_vs_scheduler_get(u->sched_name);
1143         if (sched == NULL) {
1144                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1145                 ret = -ENOENT;
1146                 goto out_err;
1147         }
1148
1149         if (u->pe_name && *u->pe_name) {
1150                 pe = ip_vs_pe_getbyname(u->pe_name);
1151                 if (pe == NULL) {
1152                         pr_info("persistence engine module ip_vs_pe_%s "
1153                                 "not found\n", u->pe_name);
1154                         ret = -ENOENT;
1155                         goto out_err;
1156                 }
1157         }
1158
1159 #ifdef CONFIG_IP_VS_IPV6
1160         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1161                 ret = -EINVAL;
1162                 goto out_err;
1163         }
1164 #endif
1165
1166         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1167         if (svc == NULL) {
1168                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1169                 ret = -ENOMEM;
1170                 goto out_err;
1171         }
1172         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1173         if (!svc->stats.cpustats) {
1174                 ret = -ENOMEM;
1175                 goto out_err;
1176         }
1177
1178         /* I'm the first user of the service */
1179         atomic_set(&svc->usecnt, 0);
1180         atomic_set(&svc->refcnt, 0);
1181
1182         svc->af = u->af;
1183         svc->protocol = u->protocol;
1184         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1185         svc->port = u->port;
1186         svc->fwmark = u->fwmark;
1187         svc->flags = u->flags;
1188         svc->timeout = u->timeout * HZ;
1189         svc->netmask = u->netmask;
1190         svc->net = net;
1191
1192         INIT_LIST_HEAD(&svc->destinations);
1193         rwlock_init(&svc->sched_lock);
1194         spin_lock_init(&svc->stats.lock);
1195
1196         /* Bind the scheduler */
1197         ret = ip_vs_bind_scheduler(svc, sched);
1198         if (ret)
1199                 goto out_err;
1200         sched = NULL;
1201
1202         /* Bind the ct retriever */
1203         ip_vs_bind_pe(svc, pe);
1204         pe = NULL;
1205
1206         /* Update the virtual service counters */
1207         if (svc->port == FTPPORT)
1208                 atomic_inc(&ipvs->ftpsvc_counter);
1209         else if (svc->port == 0)
1210                 atomic_inc(&ipvs->nullsvc_counter);
1211
1212         ip_vs_start_estimator(net, &svc->stats);
1213
1214         /* Count only IPv4 services for old get/setsockopt interface */
1215         if (svc->af == AF_INET)
1216                 ipvs->num_services++;
1217
1218         /* Hash the service into the service table */
1219         write_lock_bh(&__ip_vs_svc_lock);
1220         ip_vs_svc_hash(svc);
1221         write_unlock_bh(&__ip_vs_svc_lock);
1222
1223         *svc_p = svc;
1224         /* Now there is a service - full throttle */
1225         ipvs->enable = 1;
1226         return 0;
1227
1228
1229  out_err:
1230         if (svc != NULL) {
1231                 ip_vs_unbind_scheduler(svc);
1232                 if (svc->inc) {
1233                         local_bh_disable();
1234                         ip_vs_app_inc_put(svc->inc);
1235                         local_bh_enable();
1236                 }
1237                 if (svc->stats.cpustats)
1238                         free_percpu(svc->stats.cpustats);
1239                 kfree(svc);
1240         }
1241         ip_vs_scheduler_put(sched);
1242         ip_vs_pe_put(pe);
1243
1244         /* decrease the module use count */
1245         ip_vs_use_count_dec();
1246
1247         return ret;
1248 }
1249
1250
1251 /*
1252  *      Edit a service and bind it with a new scheduler
1253  */
1254 static int
1255 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256 {
1257         struct ip_vs_scheduler *sched, *old_sched;
1258         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1259         int ret = 0;
1260
1261         /*
1262          * Lookup the scheduler, by 'u->sched_name'
1263          */
1264         sched = ip_vs_scheduler_get(u->sched_name);
1265         if (sched == NULL) {
1266                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1267                 return -ENOENT;
1268         }
1269         old_sched = sched;
1270
1271         if (u->pe_name && *u->pe_name) {
1272                 pe = ip_vs_pe_getbyname(u->pe_name);
1273                 if (pe == NULL) {
1274                         pr_info("persistence engine module ip_vs_pe_%s "
1275                                 "not found\n", u->pe_name);
1276                         ret = -ENOENT;
1277                         goto out;
1278                 }
1279                 old_pe = pe;
1280         }
1281
1282 #ifdef CONFIG_IP_VS_IPV6
1283         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1284                 ret = -EINVAL;
1285                 goto out;
1286         }
1287 #endif
1288
1289         write_lock_bh(&__ip_vs_svc_lock);
1290
1291         /*
1292          * Wait until all other svc users go away.
1293          */
1294         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1295
1296         /*
1297          * Set the flags and timeout value
1298          */
1299         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1300         svc->timeout = u->timeout * HZ;
1301         svc->netmask = u->netmask;
1302
1303         old_sched = svc->scheduler;
1304         if (sched != old_sched) {
1305                 /*
1306                  * Unbind the old scheduler
1307                  */
1308                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1309                         old_sched = sched;
1310                         goto out_unlock;
1311                 }
1312
1313                 /*
1314                  * Bind the new scheduler
1315                  */
1316                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317                         /*
1318                          * If ip_vs_bind_scheduler fails, restore the old
1319                          * scheduler.
1320                          * The main reason of failure is out of memory.
1321                          *
1322                          * The question is if the old scheduler can be
1323                          * restored all the time. TODO: if it cannot be
1324                          * restored some time, we must delete the service,
1325                          * otherwise the system may crash.
1326                          */
1327                         ip_vs_bind_scheduler(svc, old_sched);
1328                         old_sched = sched;
1329                         goto out_unlock;
1330                 }
1331         }
1332
1333         old_pe = svc->pe;
1334         if (pe != old_pe) {
1335                 ip_vs_unbind_pe(svc);
1336                 ip_vs_bind_pe(svc, pe);
1337         }
1338
1339 out_unlock:
1340         write_unlock_bh(&__ip_vs_svc_lock);
1341 out:
1342         ip_vs_scheduler_put(old_sched);
1343         ip_vs_pe_put(old_pe);
1344         return ret;
1345 }
1346
1347
1348 /*
1349  *      Delete a service from the service list
1350  *      - The service must be unlinked, unlocked and not referenced!
1351  *      - We are called under _bh lock
1352  */
1353 static void __ip_vs_del_service(struct ip_vs_service *svc)
1354 {
1355         struct ip_vs_dest *dest, *nxt;
1356         struct ip_vs_scheduler *old_sched;
1357         struct ip_vs_pe *old_pe;
1358         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1359
1360         pr_info("%s: enter\n", __func__);
1361
1362         /* Count only IPv4 services for old get/setsockopt interface */
1363         if (svc->af == AF_INET)
1364                 ipvs->num_services--;
1365
1366         ip_vs_stop_estimator(svc->net, &svc->stats);
1367
1368         /* Unbind scheduler */
1369         old_sched = svc->scheduler;
1370         ip_vs_unbind_scheduler(svc);
1371         ip_vs_scheduler_put(old_sched);
1372
1373         /* Unbind persistence engine */
1374         old_pe = svc->pe;
1375         ip_vs_unbind_pe(svc);
1376         ip_vs_pe_put(old_pe);
1377
1378         /* Unbind app inc */
1379         if (svc->inc) {
1380                 ip_vs_app_inc_put(svc->inc);
1381                 svc->inc = NULL;
1382         }
1383
1384         /*
1385          *    Unlink the whole destination list
1386          */
1387         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388                 __ip_vs_unlink_dest(svc, dest, 0);
1389                 __ip_vs_del_dest(svc->net, dest);
1390         }
1391
1392         /*
1393          *    Update the virtual service counters
1394          */
1395         if (svc->port == FTPPORT)
1396                 atomic_dec(&ipvs->ftpsvc_counter);
1397         else if (svc->port == 0)
1398                 atomic_dec(&ipvs->nullsvc_counter);
1399
1400         /*
1401          *    Free the service if nobody refers to it
1402          */
1403         if (atomic_read(&svc->refcnt) == 0) {
1404                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1405                               svc->fwmark,
1406                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407                               ntohs(svc->port), atomic_read(&svc->usecnt));
1408                 free_percpu(svc->stats.cpustats);
1409                 kfree(svc);
1410         }
1411
1412         /* decrease the module use count */
1413         ip_vs_use_count_dec();
1414 }
1415
1416 /*
1417  * Unlink a service from list and try to delete it if its refcnt reached 0
1418  */
1419 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1420 {
1421         /*
1422          * Unhash it from the service table
1423          */
1424         write_lock_bh(&__ip_vs_svc_lock);
1425
1426         ip_vs_svc_unhash(svc);
1427
1428         /*
1429          * Wait until all the svc users go away.
1430          */
1431         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432
1433         __ip_vs_del_service(svc);
1434
1435         write_unlock_bh(&__ip_vs_svc_lock);
1436 }
1437
1438 /*
1439  *      Delete a service from the service list
1440  */
1441 static int ip_vs_del_service(struct ip_vs_service *svc)
1442 {
1443         if (svc == NULL)
1444                 return -EEXIST;
1445         ip_vs_unlink_service(svc);
1446
1447         return 0;
1448 }
1449
1450
1451 /*
1452  *      Flush all the virtual services
1453  */
1454 static int ip_vs_flush(struct net *net)
1455 {
1456         int idx;
1457         struct ip_vs_service *svc, *nxt;
1458
1459         /*
1460          * Flush the service table hashed by <netns,protocol,addr,port>
1461          */
1462         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1464                                          s_list) {
1465                         if (net_eq(svc->net, net))
1466                                 ip_vs_unlink_service(svc);
1467                 }
1468         }
1469
1470         /*
1471          * Flush the service table hashed by fwmark
1472          */
1473         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474                 list_for_each_entry_safe(svc, nxt,
1475                                          &ip_vs_svc_fwm_table[idx], f_list) {
1476                         if (net_eq(svc->net, net))
1477                                 ip_vs_unlink_service(svc);
1478                 }
1479         }
1480
1481         return 0;
1482 }
1483
1484 /*
1485  *      Delete service by {netns} in the service table.
1486  *      Called by __ip_vs_cleanup()
1487  */
1488 void ip_vs_service_net_cleanup(struct net *net)
1489 {
1490         EnterFunction(2);
1491         /* Check for "full" addressed entries */
1492         mutex_lock(&__ip_vs_mutex);
1493         ip_vs_flush(net);
1494         mutex_unlock(&__ip_vs_mutex);
1495         LeaveFunction(2);
1496 }
1497 /*
1498  * Release dst hold by dst_cache
1499  */
1500 static inline void
1501 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1502 {
1503         spin_lock_bh(&dest->dst_lock);
1504         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1505                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506                               dev->name,
1507                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508                               ntohs(dest->port),
1509                               atomic_read(&dest->refcnt));
1510                 ip_vs_dst_reset(dest);
1511         }
1512         spin_unlock_bh(&dest->dst_lock);
1513
1514 }
1515 /*
1516  * Netdev event receiver
1517  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518  * a device that is "unregister" it must be released.
1519  */
1520 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521                             void *ptr)
1522 {
1523         struct net_device *dev = ptr;
1524         struct net *net = dev_net(dev);
1525         struct netns_ipvs *ipvs = net_ipvs(net);
1526         struct ip_vs_service *svc;
1527         struct ip_vs_dest *dest;
1528         unsigned int idx;
1529
1530         if (event != NETDEV_UNREGISTER || !ipvs)
1531                 return NOTIFY_DONE;
1532         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533         EnterFunction(2);
1534         mutex_lock(&__ip_vs_mutex);
1535         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537                         if (net_eq(svc->net, net)) {
1538                                 list_for_each_entry(dest, &svc->destinations,
1539                                                     n_list) {
1540                                         __ip_vs_dev_reset(dest, dev);
1541                                 }
1542                         }
1543                 }
1544
1545                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546                         if (net_eq(svc->net, net)) {
1547                                 list_for_each_entry(dest, &svc->destinations,
1548                                                     n_list) {
1549                                         __ip_vs_dev_reset(dest, dev);
1550                                 }
1551                         }
1552
1553                 }
1554         }
1555
1556         list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1557                 __ip_vs_dev_reset(dest, dev);
1558         }
1559         mutex_unlock(&__ip_vs_mutex);
1560         LeaveFunction(2);
1561         return NOTIFY_DONE;
1562 }
1563
1564 /*
1565  *      Zero counters in a service or all services
1566  */
1567 static int ip_vs_zero_service(struct ip_vs_service *svc)
1568 {
1569         struct ip_vs_dest *dest;
1570
1571         write_lock_bh(&__ip_vs_svc_lock);
1572         list_for_each_entry(dest, &svc->destinations, n_list) {
1573                 ip_vs_zero_stats(&dest->stats);
1574         }
1575         ip_vs_zero_stats(&svc->stats);
1576         write_unlock_bh(&__ip_vs_svc_lock);
1577         return 0;
1578 }
1579
1580 static int ip_vs_zero_all(struct net *net)
1581 {
1582         int idx;
1583         struct ip_vs_service *svc;
1584
1585         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587                         if (net_eq(svc->net, net))
1588                                 ip_vs_zero_service(svc);
1589                 }
1590         }
1591
1592         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594                         if (net_eq(svc->net, net))
1595                                 ip_vs_zero_service(svc);
1596                 }
1597         }
1598
1599         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1600         return 0;
1601 }
1602
1603 #ifdef CONFIG_SYSCTL
1604
1605 static int zero;
1606 static int three = 3;
1607
1608 static int
1609 proc_do_defense_mode(ctl_table *table, int write,
1610                      void __user *buffer, size_t *lenp, loff_t *ppos)
1611 {
1612         struct net *net = current->nsproxy->net_ns;
1613         int *valp = table->data;
1614         int val = *valp;
1615         int rc;
1616
1617         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1618         if (write && (*valp != val)) {
1619                 if ((*valp < 0) || (*valp > 3)) {
1620                         /* Restore the correct value */
1621                         *valp = val;
1622                 } else {
1623                         update_defense_level(net_ipvs(net));
1624                 }
1625         }
1626         return rc;
1627 }
1628
1629 static int
1630 proc_do_sync_threshold(ctl_table *table, int write,
1631                        void __user *buffer, size_t *lenp, loff_t *ppos)
1632 {
1633         int *valp = table->data;
1634         int val[2];
1635         int rc;
1636
1637         /* backup the value first */
1638         memcpy(val, valp, sizeof(val));
1639
1640         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1641         if (write && (valp[0] < 0 || valp[1] < 0 ||
1642             (valp[0] >= valp[1] && valp[1]))) {
1643                 /* Restore the correct value */
1644                 memcpy(valp, val, sizeof(val));
1645         }
1646         return rc;
1647 }
1648
1649 static int
1650 proc_do_sync_mode(ctl_table *table, int write,
1651                      void __user *buffer, size_t *lenp, loff_t *ppos)
1652 {
1653         int *valp = table->data;
1654         int val = *valp;
1655         int rc;
1656
1657         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1658         if (write && (*valp != val)) {
1659                 if ((*valp < 0) || (*valp > 1)) {
1660                         /* Restore the correct value */
1661                         *valp = val;
1662                 }
1663         }
1664         return rc;
1665 }
1666
1667 static int
1668 proc_do_sync_ports(ctl_table *table, int write,
1669                    void __user *buffer, size_t *lenp, loff_t *ppos)
1670 {
1671         int *valp = table->data;
1672         int val = *valp;
1673         int rc;
1674
1675         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1676         if (write && (*valp != val)) {
1677                 if (*valp < 1 || !is_power_of_2(*valp)) {
1678                         /* Restore the correct value */
1679                         *valp = val;
1680                 }
1681         }
1682         return rc;
1683 }
1684
1685 /*
1686  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1687  *      Do not change order or insert new entries without
1688  *      align with netns init in ip_vs_control_net_init()
1689  */
1690
1691 static struct ctl_table vs_vars[] = {
1692         {
1693                 .procname       = "amemthresh",
1694                 .maxlen         = sizeof(int),
1695                 .mode           = 0644,
1696                 .proc_handler   = proc_dointvec,
1697         },
1698         {
1699                 .procname       = "am_droprate",
1700                 .maxlen         = sizeof(int),
1701                 .mode           = 0644,
1702                 .proc_handler   = proc_dointvec,
1703         },
1704         {
1705                 .procname       = "drop_entry",
1706                 .maxlen         = sizeof(int),
1707                 .mode           = 0644,
1708                 .proc_handler   = proc_do_defense_mode,
1709         },
1710         {
1711                 .procname       = "drop_packet",
1712                 .maxlen         = sizeof(int),
1713                 .mode           = 0644,
1714                 .proc_handler   = proc_do_defense_mode,
1715         },
1716 #ifdef CONFIG_IP_VS_NFCT
1717         {
1718                 .procname       = "conntrack",
1719                 .maxlen         = sizeof(int),
1720                 .mode           = 0644,
1721                 .proc_handler   = &proc_dointvec,
1722         },
1723 #endif
1724         {
1725                 .procname       = "secure_tcp",
1726                 .maxlen         = sizeof(int),
1727                 .mode           = 0644,
1728                 .proc_handler   = proc_do_defense_mode,
1729         },
1730         {
1731                 .procname       = "snat_reroute",
1732                 .maxlen         = sizeof(int),
1733                 .mode           = 0644,
1734                 .proc_handler   = &proc_dointvec,
1735         },
1736         {
1737                 .procname       = "sync_version",
1738                 .maxlen         = sizeof(int),
1739                 .mode           = 0644,
1740                 .proc_handler   = &proc_do_sync_mode,
1741         },
1742         {
1743                 .procname       = "sync_ports",
1744                 .maxlen         = sizeof(int),
1745                 .mode           = 0644,
1746                 .proc_handler   = &proc_do_sync_ports,
1747         },
1748         {
1749                 .procname       = "sync_qlen_max",
1750                 .maxlen         = sizeof(int),
1751                 .mode           = 0644,
1752                 .proc_handler   = proc_dointvec,
1753         },
1754         {
1755                 .procname       = "sync_sock_size",
1756                 .maxlen         = sizeof(int),
1757                 .mode           = 0644,
1758                 .proc_handler   = proc_dointvec,
1759         },
1760         {
1761                 .procname       = "cache_bypass",
1762                 .maxlen         = sizeof(int),
1763                 .mode           = 0644,
1764                 .proc_handler   = proc_dointvec,
1765         },
1766         {
1767                 .procname       = "expire_nodest_conn",
1768                 .maxlen         = sizeof(int),
1769                 .mode           = 0644,
1770                 .proc_handler   = proc_dointvec,
1771         },
1772         {
1773                 .procname       = "expire_quiescent_template",
1774                 .maxlen         = sizeof(int),
1775                 .mode           = 0644,
1776                 .proc_handler   = proc_dointvec,
1777         },
1778         {
1779                 .procname       = "sync_threshold",
1780                 .maxlen         =
1781                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1782                 .mode           = 0644,
1783                 .proc_handler   = proc_do_sync_threshold,
1784         },
1785         {
1786                 .procname       = "sync_refresh_period",
1787                 .maxlen         = sizeof(int),
1788                 .mode           = 0644,
1789                 .proc_handler   = proc_dointvec_jiffies,
1790         },
1791         {
1792                 .procname       = "sync_retries",
1793                 .maxlen         = sizeof(int),
1794                 .mode           = 0644,
1795                 .proc_handler   = proc_dointvec_minmax,
1796                 .extra1         = &zero,
1797                 .extra2         = &three,
1798         },
1799         {
1800                 .procname       = "nat_icmp_send",
1801                 .maxlen         = sizeof(int),
1802                 .mode           = 0644,
1803                 .proc_handler   = proc_dointvec,
1804         },
1805         {
1806                 .procname       = "pmtu_disc",
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0644,
1809                 .proc_handler   = proc_dointvec,
1810         },
1811 #ifdef CONFIG_IP_VS_DEBUG
1812         {
1813                 .procname       = "debug_level",
1814                 .data           = &sysctl_ip_vs_debug_level,
1815                 .maxlen         = sizeof(int),
1816                 .mode           = 0644,
1817                 .proc_handler   = proc_dointvec,
1818         },
1819 #endif
1820 #if 0
1821         {
1822                 .procname       = "timeout_established",
1823                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1824                 .maxlen         = sizeof(int),
1825                 .mode           = 0644,
1826                 .proc_handler   = proc_dointvec_jiffies,
1827         },
1828         {
1829                 .procname       = "timeout_synsent",
1830                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1831                 .maxlen         = sizeof(int),
1832                 .mode           = 0644,
1833                 .proc_handler   = proc_dointvec_jiffies,
1834         },
1835         {
1836                 .procname       = "timeout_synrecv",
1837                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1838                 .maxlen         = sizeof(int),
1839                 .mode           = 0644,
1840                 .proc_handler   = proc_dointvec_jiffies,
1841         },
1842         {
1843                 .procname       = "timeout_finwait",
1844                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1845                 .maxlen         = sizeof(int),
1846                 .mode           = 0644,
1847                 .proc_handler   = proc_dointvec_jiffies,
1848         },
1849         {
1850                 .procname       = "timeout_timewait",
1851                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1852                 .maxlen         = sizeof(int),
1853                 .mode           = 0644,
1854                 .proc_handler   = proc_dointvec_jiffies,
1855         },
1856         {
1857                 .procname       = "timeout_close",
1858                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1859                 .maxlen         = sizeof(int),
1860                 .mode           = 0644,
1861                 .proc_handler   = proc_dointvec_jiffies,
1862         },
1863         {
1864                 .procname       = "timeout_closewait",
1865                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1866                 .maxlen         = sizeof(int),
1867                 .mode           = 0644,
1868                 .proc_handler   = proc_dointvec_jiffies,
1869         },
1870         {
1871                 .procname       = "timeout_lastack",
1872                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1873                 .maxlen         = sizeof(int),
1874                 .mode           = 0644,
1875                 .proc_handler   = proc_dointvec_jiffies,
1876         },
1877         {
1878                 .procname       = "timeout_listen",
1879                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1880                 .maxlen         = sizeof(int),
1881                 .mode           = 0644,
1882                 .proc_handler   = proc_dointvec_jiffies,
1883         },
1884         {
1885                 .procname       = "timeout_synack",
1886                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1887                 .maxlen         = sizeof(int),
1888                 .mode           = 0644,
1889                 .proc_handler   = proc_dointvec_jiffies,
1890         },
1891         {
1892                 .procname       = "timeout_udp",
1893                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1894                 .maxlen         = sizeof(int),
1895                 .mode           = 0644,
1896                 .proc_handler   = proc_dointvec_jiffies,
1897         },
1898         {
1899                 .procname       = "timeout_icmp",
1900                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1901                 .maxlen         = sizeof(int),
1902                 .mode           = 0644,
1903                 .proc_handler   = proc_dointvec_jiffies,
1904         },
1905 #endif
1906         { }
1907 };
1908
1909 #endif
1910
1911 #ifdef CONFIG_PROC_FS
1912
1913 struct ip_vs_iter {
1914         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1915         struct list_head *table;
1916         int bucket;
1917 };
1918
1919 /*
1920  *      Write the contents of the VS rule table to a PROCfs file.
1921  *      (It is kept just for backward compatibility)
1922  */
1923 static inline const char *ip_vs_fwd_name(unsigned int flags)
1924 {
1925         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1926         case IP_VS_CONN_F_LOCALNODE:
1927                 return "Local";
1928         case IP_VS_CONN_F_TUNNEL:
1929                 return "Tunnel";
1930         case IP_VS_CONN_F_DROUTE:
1931                 return "Route";
1932         default:
1933                 return "Masq";
1934         }
1935 }
1936
1937
1938 /* Get the Nth entry in the two lists */
1939 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1940 {
1941         struct net *net = seq_file_net(seq);
1942         struct ip_vs_iter *iter = seq->private;
1943         int idx;
1944         struct ip_vs_service *svc;
1945
1946         /* look in hash by protocol */
1947         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1948                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1949                         if (net_eq(svc->net, net) && pos-- == 0) {
1950                                 iter->table = ip_vs_svc_table;
1951                                 iter->bucket = idx;
1952                                 return svc;
1953                         }
1954                 }
1955         }
1956
1957         /* keep looking in fwmark */
1958         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1959                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1960                         if (net_eq(svc->net, net) && pos-- == 0) {
1961                                 iter->table = ip_vs_svc_fwm_table;
1962                                 iter->bucket = idx;
1963                                 return svc;
1964                         }
1965                 }
1966         }
1967
1968         return NULL;
1969 }
1970
1971 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1972 __acquires(__ip_vs_svc_lock)
1973 {
1974
1975         read_lock_bh(&__ip_vs_svc_lock);
1976         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1977 }
1978
1979
1980 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1981 {
1982         struct list_head *e;
1983         struct ip_vs_iter *iter;
1984         struct ip_vs_service *svc;
1985
1986         ++*pos;
1987         if (v == SEQ_START_TOKEN)
1988                 return ip_vs_info_array(seq,0);
1989
1990         svc = v;
1991         iter = seq->private;
1992
1993         if (iter->table == ip_vs_svc_table) {
1994                 /* next service in table hashed by protocol */
1995                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1996                         return list_entry(e, struct ip_vs_service, s_list);
1997
1998
1999                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2000                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2001                                             s_list) {
2002                                 return svc;
2003                         }
2004                 }
2005
2006                 iter->table = ip_vs_svc_fwm_table;
2007                 iter->bucket = -1;
2008                 goto scan_fwmark;
2009         }
2010
2011         /* next service in hashed by fwmark */
2012         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2013                 return list_entry(e, struct ip_vs_service, f_list);
2014
2015  scan_fwmark:
2016         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2017                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2018                                     f_list)
2019                         return svc;
2020         }
2021
2022         return NULL;
2023 }
2024
2025 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2026 __releases(__ip_vs_svc_lock)
2027 {
2028         read_unlock_bh(&__ip_vs_svc_lock);
2029 }
2030
2031
2032 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2033 {
2034         if (v == SEQ_START_TOKEN) {
2035                 seq_printf(seq,
2036                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2037                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2038                 seq_puts(seq,
2039                          "Prot LocalAddress:Port Scheduler Flags\n");
2040                 seq_puts(seq,
2041                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2042         } else {
2043                 const struct ip_vs_service *svc = v;
2044                 const struct ip_vs_iter *iter = seq->private;
2045                 const struct ip_vs_dest *dest;
2046
2047                 if (iter->table == ip_vs_svc_table) {
2048 #ifdef CONFIG_IP_VS_IPV6
2049                         if (svc->af == AF_INET6)
2050                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2051                                            ip_vs_proto_name(svc->protocol),
2052                                            &svc->addr.in6,
2053                                            ntohs(svc->port),
2054                                            svc->scheduler->name);
2055                         else
2056 #endif
2057                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2058                                            ip_vs_proto_name(svc->protocol),
2059                                            ntohl(svc->addr.ip),
2060                                            ntohs(svc->port),
2061                                            svc->scheduler->name,
2062                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2063                 } else {
2064                         seq_printf(seq, "FWM  %08X %s %s",
2065                                    svc->fwmark, svc->scheduler->name,
2066                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2067                 }
2068
2069                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2070                         seq_printf(seq, "persistent %d %08X\n",
2071                                 svc->timeout,
2072                                 ntohl(svc->netmask));
2073                 else
2074                         seq_putc(seq, '\n');
2075
2076                 list_for_each_entry(dest, &svc->destinations, n_list) {
2077 #ifdef CONFIG_IP_VS_IPV6
2078                         if (dest->af == AF_INET6)
2079                                 seq_printf(seq,
2080                                            "  -> [%pI6]:%04X"
2081                                            "      %-7s %-6d %-10d %-10d\n",
2082                                            &dest->addr.in6,
2083                                            ntohs(dest->port),
2084                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2085                                            atomic_read(&dest->weight),
2086                                            atomic_read(&dest->activeconns),
2087                                            atomic_read(&dest->inactconns));
2088                         else
2089 #endif
2090                                 seq_printf(seq,
2091                                            "  -> %08X:%04X      "
2092                                            "%-7s %-6d %-10d %-10d\n",
2093                                            ntohl(dest->addr.ip),
2094                                            ntohs(dest->port),
2095                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2096                                            atomic_read(&dest->weight),
2097                                            atomic_read(&dest->activeconns),
2098                                            atomic_read(&dest->inactconns));
2099
2100                 }
2101         }
2102         return 0;
2103 }
2104
2105 static const struct seq_operations ip_vs_info_seq_ops = {
2106         .start = ip_vs_info_seq_start,
2107         .next  = ip_vs_info_seq_next,
2108         .stop  = ip_vs_info_seq_stop,
2109         .show  = ip_vs_info_seq_show,
2110 };
2111
2112 static int ip_vs_info_open(struct inode *inode, struct file *file)
2113 {
2114         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2115                         sizeof(struct ip_vs_iter));
2116 }
2117
2118 static const struct file_operations ip_vs_info_fops = {
2119         .owner   = THIS_MODULE,
2120         .open    = ip_vs_info_open,
2121         .read    = seq_read,
2122         .llseek  = seq_lseek,
2123         .release = seq_release_net,
2124 };
2125
2126 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2127 {
2128         struct net *net = seq_file_single_net(seq);
2129         struct ip_vs_stats_user show;
2130
2131 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2132         seq_puts(seq,
2133                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2134         seq_printf(seq,
2135                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2136
2137         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2138         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2139                    show.inpkts, show.outpkts,
2140                    (unsigned long long) show.inbytes,
2141                    (unsigned long long) show.outbytes);
2142
2143 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2144         seq_puts(seq,
2145                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2146         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2147                         show.cps, show.inpps, show.outpps,
2148                         show.inbps, show.outbps);
2149
2150         return 0;
2151 }
2152
2153 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2154 {
2155         return single_open_net(inode, file, ip_vs_stats_show);
2156 }
2157
2158 static const struct file_operations ip_vs_stats_fops = {
2159         .owner = THIS_MODULE,
2160         .open = ip_vs_stats_seq_open,
2161         .read = seq_read,
2162         .llseek = seq_lseek,
2163         .release = single_release_net,
2164 };
2165
2166 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2167 {
2168         struct net *net = seq_file_single_net(seq);
2169         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2170         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2171         struct ip_vs_stats_user rates;
2172         int i;
2173
2174 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2175         seq_puts(seq,
2176                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2177         seq_printf(seq,
2178                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2179
2180         for_each_possible_cpu(i) {
2181                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2182                 unsigned int start;
2183                 __u64 inbytes, outbytes;
2184
2185                 do {
2186                         start = u64_stats_fetch_begin_bh(&u->syncp);
2187                         inbytes = u->ustats.inbytes;
2188                         outbytes = u->ustats.outbytes;
2189                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2190
2191                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2192                            i, u->ustats.conns, u->ustats.inpkts,
2193                            u->ustats.outpkts, (__u64)inbytes,
2194                            (__u64)outbytes);
2195         }
2196
2197         spin_lock_bh(&tot_stats->lock);
2198
2199         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2200                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2201                    tot_stats->ustats.outpkts,
2202                    (unsigned long long) tot_stats->ustats.inbytes,
2203                    (unsigned long long) tot_stats->ustats.outbytes);
2204
2205         ip_vs_read_estimator(&rates, tot_stats);
2206
2207         spin_unlock_bh(&tot_stats->lock);
2208
2209 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2210         seq_puts(seq,
2211                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2212         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2213                         rates.cps,
2214                         rates.inpps,
2215                         rates.outpps,
2216                         rates.inbps,
2217                         rates.outbps);
2218
2219         return 0;
2220 }
2221
2222 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2223 {
2224         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2225 }
2226
2227 static const struct file_operations ip_vs_stats_percpu_fops = {
2228         .owner = THIS_MODULE,
2229         .open = ip_vs_stats_percpu_seq_open,
2230         .read = seq_read,
2231         .llseek = seq_lseek,
2232         .release = single_release_net,
2233 };
2234 #endif
2235
2236 /*
2237  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2238  */
2239 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2240 {
2241 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2242         struct ip_vs_proto_data *pd;
2243 #endif
2244
2245         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2246                   u->tcp_timeout,
2247                   u->tcp_fin_timeout,
2248                   u->udp_timeout);
2249
2250 #ifdef CONFIG_IP_VS_PROTO_TCP
2251         if (u->tcp_timeout) {
2252                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2253                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2254                         = u->tcp_timeout * HZ;
2255         }
2256
2257         if (u->tcp_fin_timeout) {
2258                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2259                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2260                         = u->tcp_fin_timeout * HZ;
2261         }
2262 #endif
2263
2264 #ifdef CONFIG_IP_VS_PROTO_UDP
2265         if (u->udp_timeout) {
2266                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2267                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2268                         = u->udp_timeout * HZ;
2269         }
2270 #endif
2271         return 0;
2272 }
2273
2274
2275 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2276 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2277 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2278                                  sizeof(struct ip_vs_dest_user))
2279 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2280 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2281 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2282
2283 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2284         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2285         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2286         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2287         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2288         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2289         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2290         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2291         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2292         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2293         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2294         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2295 };
2296
2297 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2298                                   struct ip_vs_service_user *usvc_compat)
2299 {
2300         memset(usvc, 0, sizeof(*usvc));
2301
2302         usvc->af                = AF_INET;
2303         usvc->protocol          = usvc_compat->protocol;
2304         usvc->addr.ip           = usvc_compat->addr;
2305         usvc->port              = usvc_compat->port;
2306         usvc->fwmark            = usvc_compat->fwmark;
2307
2308         /* Deep copy of sched_name is not needed here */
2309         usvc->sched_name        = usvc_compat->sched_name;
2310
2311         usvc->flags             = usvc_compat->flags;
2312         usvc->timeout           = usvc_compat->timeout;
2313         usvc->netmask           = usvc_compat->netmask;
2314 }
2315
2316 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2317                                    struct ip_vs_dest_user *udest_compat)
2318 {
2319         memset(udest, 0, sizeof(*udest));
2320
2321         udest->addr.ip          = udest_compat->addr;
2322         udest->port             = udest_compat->port;
2323         udest->conn_flags       = udest_compat->conn_flags;
2324         udest->weight           = udest_compat->weight;
2325         udest->u_threshold      = udest_compat->u_threshold;
2326         udest->l_threshold      = udest_compat->l_threshold;
2327 }
2328
2329 static int
2330 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2331 {
2332         struct net *net = sock_net(sk);
2333         int ret;
2334         unsigned char arg[MAX_ARG_LEN];
2335         struct ip_vs_service_user *usvc_compat;
2336         struct ip_vs_service_user_kern usvc;
2337         struct ip_vs_service *svc;
2338         struct ip_vs_dest_user *udest_compat;
2339         struct ip_vs_dest_user_kern udest;
2340         struct netns_ipvs *ipvs = net_ipvs(net);
2341
2342         if (!capable(CAP_NET_ADMIN))
2343                 return -EPERM;
2344
2345         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2346                 return -EINVAL;
2347         if (len < 0 || len >  MAX_ARG_LEN)
2348                 return -EINVAL;
2349         if (len != set_arglen[SET_CMDID(cmd)]) {
2350                 pr_err("set_ctl: len %u != %u\n",
2351                        len, set_arglen[SET_CMDID(cmd)]);
2352                 return -EINVAL;
2353         }
2354
2355         if (copy_from_user(arg, user, len) != 0)
2356                 return -EFAULT;
2357
2358         /* increase the module use count */
2359         ip_vs_use_count_inc();
2360
2361         /* Handle daemons since they have another lock */
2362         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2363             cmd == IP_VS_SO_SET_STOPDAEMON) {
2364                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2365
2366                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2367                         ret = -ERESTARTSYS;
2368                         goto out_dec;
2369                 }
2370                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2371                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2372                                                 dm->syncid);
2373                 else
2374                         ret = stop_sync_thread(net, dm->state);
2375                 mutex_unlock(&ipvs->sync_mutex);
2376                 goto out_dec;
2377         }
2378
2379         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2380                 ret = -ERESTARTSYS;
2381                 goto out_dec;
2382         }
2383
2384         if (cmd == IP_VS_SO_SET_FLUSH) {
2385                 /* Flush the virtual service */
2386                 ret = ip_vs_flush(net);
2387                 goto out_unlock;
2388         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2389                 /* Set timeout values for (tcp tcpfin udp) */
2390                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2391                 goto out_unlock;
2392         }
2393
2394         usvc_compat = (struct ip_vs_service_user *)arg;
2395         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2396
2397         /* We only use the new structs internally, so copy userspace compat
2398          * structs to extended internal versions */
2399         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2400         ip_vs_copy_udest_compat(&udest, udest_compat);
2401
2402         if (cmd == IP_VS_SO_SET_ZERO) {
2403                 /* if no service address is set, zero counters in all */
2404                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2405                         ret = ip_vs_zero_all(net);
2406                         goto out_unlock;
2407                 }
2408         }
2409
2410         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2411         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2412             usvc.protocol != IPPROTO_SCTP) {
2413                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2414                        usvc.protocol, &usvc.addr.ip,
2415                        ntohs(usvc.port), usvc.sched_name);
2416                 ret = -EFAULT;
2417                 goto out_unlock;
2418         }
2419
2420         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2421         if (usvc.fwmark == 0)
2422                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2423                                            &usvc.addr, usvc.port);
2424         else
2425                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2426
2427         if (cmd != IP_VS_SO_SET_ADD
2428             && (svc == NULL || svc->protocol != usvc.protocol)) {
2429                 ret = -ESRCH;
2430                 goto out_unlock;
2431         }
2432
2433         switch (cmd) {
2434         case IP_VS_SO_SET_ADD:
2435                 if (svc != NULL)
2436                         ret = -EEXIST;
2437                 else
2438                         ret = ip_vs_add_service(net, &usvc, &svc);
2439                 break;
2440         case IP_VS_SO_SET_EDIT:
2441                 ret = ip_vs_edit_service(svc, &usvc);
2442                 break;
2443         case IP_VS_SO_SET_DEL:
2444                 ret = ip_vs_del_service(svc);
2445                 if (!ret)
2446                         goto out_unlock;
2447                 break;
2448         case IP_VS_SO_SET_ZERO:
2449                 ret = ip_vs_zero_service(svc);
2450                 break;
2451         case IP_VS_SO_SET_ADDDEST:
2452                 ret = ip_vs_add_dest(svc, &udest);
2453                 break;
2454         case IP_VS_SO_SET_EDITDEST:
2455                 ret = ip_vs_edit_dest(svc, &udest);
2456                 break;
2457         case IP_VS_SO_SET_DELDEST:
2458                 ret = ip_vs_del_dest(svc, &udest);
2459                 break;
2460         default:
2461                 ret = -EINVAL;
2462         }
2463
2464   out_unlock:
2465         mutex_unlock(&__ip_vs_mutex);
2466   out_dec:
2467         /* decrease the module use count */
2468         ip_vs_use_count_dec();
2469
2470         return ret;
2471 }
2472
2473
2474 static void
2475 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2476 {
2477         dst->protocol = src->protocol;
2478         dst->addr = src->addr.ip;
2479         dst->port = src->port;
2480         dst->fwmark = src->fwmark;
2481         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2482         dst->flags = src->flags;
2483         dst->timeout = src->timeout / HZ;
2484         dst->netmask = src->netmask;
2485         dst->num_dests = src->num_dests;
2486         ip_vs_copy_stats(&dst->stats, &src->stats);
2487 }
2488
2489 static inline int
2490 __ip_vs_get_service_entries(struct net *net,
2491                             const struct ip_vs_get_services *get,
2492                             struct ip_vs_get_services __user *uptr)
2493 {
2494         int idx, count=0;
2495         struct ip_vs_service *svc;
2496         struct ip_vs_service_entry entry;
2497         int ret = 0;
2498
2499         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2500                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2501                         /* Only expose IPv4 entries to old interface */
2502                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2503                                 continue;
2504
2505                         if (count >= get->num_services)
2506                                 goto out;
2507                         memset(&entry, 0, sizeof(entry));
2508                         ip_vs_copy_service(&entry, svc);
2509                         if (copy_to_user(&uptr->entrytable[count],
2510                                          &entry, sizeof(entry))) {
2511                                 ret = -EFAULT;
2512                                 goto out;
2513                         }
2514                         count++;
2515                 }
2516         }
2517
2518         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2519                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2520                         /* Only expose IPv4 entries to old interface */
2521                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2522                                 continue;
2523
2524                         if (count >= get->num_services)
2525                                 goto out;
2526                         memset(&entry, 0, sizeof(entry));
2527                         ip_vs_copy_service(&entry, svc);
2528                         if (copy_to_user(&uptr->entrytable[count],
2529                                          &entry, sizeof(entry))) {
2530                                 ret = -EFAULT;
2531                                 goto out;
2532                         }
2533                         count++;
2534                 }
2535         }
2536 out:
2537         return ret;
2538 }
2539
2540 static inline int
2541 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2542                          struct ip_vs_get_dests __user *uptr)
2543 {
2544         struct ip_vs_service *svc;
2545         union nf_inet_addr addr = { .ip = get->addr };
2546         int ret = 0;
2547
2548         if (get->fwmark)
2549                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2550         else
2551                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2552                                            get->port);
2553
2554         if (svc) {
2555                 int count = 0;
2556                 struct ip_vs_dest *dest;
2557                 struct ip_vs_dest_entry entry;
2558
2559                 list_for_each_entry(dest, &svc->destinations, n_list) {
2560                         if (count >= get->num_dests)
2561                                 break;
2562
2563                         entry.addr = dest->addr.ip;
2564                         entry.port = dest->port;
2565                         entry.conn_flags = atomic_read(&dest->conn_flags);
2566                         entry.weight = atomic_read(&dest->weight);
2567                         entry.u_threshold = dest->u_threshold;
2568                         entry.l_threshold = dest->l_threshold;
2569                         entry.activeconns = atomic_read(&dest->activeconns);
2570                         entry.inactconns = atomic_read(&dest->inactconns);
2571                         entry.persistconns = atomic_read(&dest->persistconns);
2572                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2573                         if (copy_to_user(&uptr->entrytable[count],
2574                                          &entry, sizeof(entry))) {
2575                                 ret = -EFAULT;
2576                                 break;
2577                         }
2578                         count++;
2579                 }
2580         } else
2581                 ret = -ESRCH;
2582         return ret;
2583 }
2584
2585 static inline void
2586 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2587 {
2588 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2589         struct ip_vs_proto_data *pd;
2590 #endif
2591
2592         memset(u, 0, sizeof (*u));
2593
2594 #ifdef CONFIG_IP_VS_PROTO_TCP
2595         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2596         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2597         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2598 #endif
2599 #ifdef CONFIG_IP_VS_PROTO_UDP
2600         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2601         u->udp_timeout =
2602                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2603 #endif
2604 }
2605
2606
2607 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2608 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2609 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2610 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2611 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2612 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2613 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2614
2615 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2616         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2617         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2618         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2619         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2620         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2621         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2622         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2623 };
2624
2625 static int
2626 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2627 {
2628         unsigned char arg[128];
2629         int ret = 0;
2630         unsigned int copylen;
2631         struct net *net = sock_net(sk);
2632         struct netns_ipvs *ipvs = net_ipvs(net);
2633
2634         BUG_ON(!net);
2635         if (!capable(CAP_NET_ADMIN))
2636                 return -EPERM;
2637
2638         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2639                 return -EINVAL;
2640
2641         if (*len < get_arglen[GET_CMDID(cmd)]) {
2642                 pr_err("get_ctl: len %u < %u\n",
2643                        *len, get_arglen[GET_CMDID(cmd)]);
2644                 return -EINVAL;
2645         }
2646
2647         copylen = get_arglen[GET_CMDID(cmd)];
2648         if (copylen > 128)
2649                 return -EINVAL;
2650
2651         if (copy_from_user(arg, user, copylen) != 0)
2652                 return -EFAULT;
2653         /*
2654          * Handle daemons first since it has its own locking
2655          */
2656         if (cmd == IP_VS_SO_GET_DAEMON) {
2657                 struct ip_vs_daemon_user d[2];
2658
2659                 memset(&d, 0, sizeof(d));
2660                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2661                         return -ERESTARTSYS;
2662
2663                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2664                         d[0].state = IP_VS_STATE_MASTER;
2665                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2666                                 sizeof(d[0].mcast_ifn));
2667                         d[0].syncid = ipvs->master_syncid;
2668                 }
2669                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2670                         d[1].state = IP_VS_STATE_BACKUP;
2671                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2672                                 sizeof(d[1].mcast_ifn));
2673                         d[1].syncid = ipvs->backup_syncid;
2674                 }
2675                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2676                         ret = -EFAULT;
2677                 mutex_unlock(&ipvs->sync_mutex);
2678                 return ret;
2679         }
2680
2681         if (mutex_lock_interruptible(&__ip_vs_mutex))
2682                 return -ERESTARTSYS;
2683
2684         switch (cmd) {
2685         case IP_VS_SO_GET_VERSION:
2686         {
2687                 char buf[64];
2688
2689                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2690                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2691                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2692                         ret = -EFAULT;
2693                         goto out;
2694                 }
2695                 *len = strlen(buf)+1;
2696         }
2697         break;
2698
2699         case IP_VS_SO_GET_INFO:
2700         {
2701                 struct ip_vs_getinfo info;
2702                 info.version = IP_VS_VERSION_CODE;
2703                 info.size = ip_vs_conn_tab_size;
2704                 info.num_services = ipvs->num_services;
2705                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2706                         ret = -EFAULT;
2707         }
2708         break;
2709
2710         case IP_VS_SO_GET_SERVICES:
2711         {
2712                 struct ip_vs_get_services *get;
2713                 int size;
2714
2715                 get = (struct ip_vs_get_services *)arg;
2716                 size = sizeof(*get) +
2717                         sizeof(struct ip_vs_service_entry) * get->num_services;
2718                 if (*len != size) {
2719                         pr_err("length: %u != %u\n", *len, size);
2720                         ret = -EINVAL;
2721                         goto out;
2722                 }
2723                 ret = __ip_vs_get_service_entries(net, get, user);
2724         }
2725         break;
2726
2727         case IP_VS_SO_GET_SERVICE:
2728         {
2729                 struct ip_vs_service_entry *entry;
2730                 struct ip_vs_service *svc;
2731                 union nf_inet_addr addr;
2732
2733                 entry = (struct ip_vs_service_entry *)arg;
2734                 addr.ip = entry->addr;
2735                 if (entry->fwmark)
2736                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2737                 else
2738                         svc = __ip_vs_service_find(net, AF_INET,
2739                                                    entry->protocol, &addr,
2740                                                    entry->port);
2741                 if (svc) {
2742                         ip_vs_copy_service(entry, svc);
2743                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2744                                 ret = -EFAULT;
2745                 } else
2746                         ret = -ESRCH;
2747         }
2748         break;
2749
2750         case IP_VS_SO_GET_DESTS:
2751         {
2752                 struct ip_vs_get_dests *get;
2753                 int size;
2754
2755                 get = (struct ip_vs_get_dests *)arg;
2756                 size = sizeof(*get) +
2757                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2758                 if (*len != size) {
2759                         pr_err("length: %u != %u\n", *len, size);
2760                         ret = -EINVAL;
2761                         goto out;
2762                 }
2763                 ret = __ip_vs_get_dest_entries(net, get, user);
2764         }
2765         break;
2766
2767         case IP_VS_SO_GET_TIMEOUT:
2768         {
2769                 struct ip_vs_timeout_user t;
2770
2771                 __ip_vs_get_timeouts(net, &t);
2772                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2773                         ret = -EFAULT;
2774         }
2775         break;
2776
2777         default:
2778                 ret = -EINVAL;
2779         }
2780
2781 out:
2782         mutex_unlock(&__ip_vs_mutex);
2783         return ret;
2784 }
2785
2786
2787 static struct nf_sockopt_ops ip_vs_sockopts = {
2788         .pf             = PF_INET,
2789         .set_optmin     = IP_VS_BASE_CTL,
2790         .set_optmax     = IP_VS_SO_SET_MAX+1,
2791         .set            = do_ip_vs_set_ctl,
2792         .get_optmin     = IP_VS_BASE_CTL,
2793         .get_optmax     = IP_VS_SO_GET_MAX+1,
2794         .get            = do_ip_vs_get_ctl,
2795         .owner          = THIS_MODULE,
2796 };
2797
2798 /*
2799  * Generic Netlink interface
2800  */
2801
2802 /* IPVS genetlink family */
2803 static struct genl_family ip_vs_genl_family = {
2804         .id             = GENL_ID_GENERATE,
2805         .hdrsize        = 0,
2806         .name           = IPVS_GENL_NAME,
2807         .version        = IPVS_GENL_VERSION,
2808         .maxattr        = IPVS_CMD_MAX,
2809         .netnsok        = true,         /* Make ipvsadm to work on netns */
2810 };
2811
2812 /* Policy used for first-level command attributes */
2813 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2814         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2815         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2816         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2817         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2818         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2819         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2820 };
2821
2822 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2823 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2824         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2825         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2826                                             .len = IP_VS_IFNAME_MAXLEN },
2827         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2828 };
2829
2830 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2831 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2832         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2833         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2834         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2835                                             .len = sizeof(union nf_inet_addr) },
2836         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2837         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2838         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2839                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2840         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2841                                             .len = IP_VS_PENAME_MAXLEN },
2842         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2843                                             .len = sizeof(struct ip_vs_flags) },
2844         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2845         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2846         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2847 };
2848
2849 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2850 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2851         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2852                                             .len = sizeof(union nf_inet_addr) },
2853         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2854         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2855         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2856         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2857         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2858         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2859         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2860         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2861         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2862 };
2863
2864 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2865                                  struct ip_vs_stats *stats)
2866 {
2867         struct ip_vs_stats_user ustats;
2868         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2869         if (!nl_stats)
2870                 return -EMSGSIZE;
2871
2872         ip_vs_copy_stats(&ustats, stats);
2873
2874         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2875             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2876             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2877             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2878             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2879             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2880             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2881             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2882             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2883             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2884                 goto nla_put_failure;
2885         nla_nest_end(skb, nl_stats);
2886
2887         return 0;
2888
2889 nla_put_failure:
2890         nla_nest_cancel(skb, nl_stats);
2891         return -EMSGSIZE;
2892 }
2893
2894 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2895                                    struct ip_vs_service *svc)
2896 {
2897         struct nlattr *nl_service;
2898         struct ip_vs_flags flags = { .flags = svc->flags,
2899                                      .mask = ~0 };
2900
2901         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2902         if (!nl_service)
2903                 return -EMSGSIZE;
2904
2905         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2906                 goto nla_put_failure;
2907         if (svc->fwmark) {
2908                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2909                         goto nla_put_failure;
2910         } else {
2911                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2912                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2913                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2914                         goto nla_put_failure;
2915         }
2916
2917         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2918             (svc->pe &&
2919              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2920             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2921             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2922             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2923                 goto nla_put_failure;
2924         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2925                 goto nla_put_failure;
2926
2927         nla_nest_end(skb, nl_service);
2928
2929         return 0;
2930
2931 nla_put_failure:
2932         nla_nest_cancel(skb, nl_service);
2933         return -EMSGSIZE;
2934 }
2935
2936 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2937                                    struct ip_vs_service *svc,
2938                                    struct netlink_callback *cb)
2939 {
2940         void *hdr;
2941
2942         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2943                           &ip_vs_genl_family, NLM_F_MULTI,
2944                           IPVS_CMD_NEW_SERVICE);
2945         if (!hdr)
2946                 return -EMSGSIZE;
2947
2948         if (ip_vs_genl_fill_service(skb, svc) < 0)
2949                 goto nla_put_failure;
2950
2951         return genlmsg_end(skb, hdr);
2952
2953 nla_put_failure:
2954         genlmsg_cancel(skb, hdr);
2955         return -EMSGSIZE;
2956 }
2957
2958 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2959                                     struct netlink_callback *cb)
2960 {
2961         int idx = 0, i;
2962         int start = cb->args[0];
2963         struct ip_vs_service *svc;
2964         struct net *net = skb_sknet(skb);
2965
2966         mutex_lock(&__ip_vs_mutex);
2967         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2968                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2969                         if (++idx <= start || !net_eq(svc->net, net))
2970                                 continue;
2971                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2972                                 idx--;
2973                                 goto nla_put_failure;
2974                         }
2975                 }
2976         }
2977
2978         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2979                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2980                         if (++idx <= start || !net_eq(svc->net, net))
2981                                 continue;
2982                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2983                                 idx--;
2984                                 goto nla_put_failure;
2985                         }
2986                 }
2987         }
2988
2989 nla_put_failure:
2990         mutex_unlock(&__ip_vs_mutex);
2991         cb->args[0] = idx;
2992
2993         return skb->len;
2994 }
2995
2996 static int ip_vs_genl_parse_service(struct net *net,
2997                                     struct ip_vs_service_user_kern *usvc,
2998                                     struct nlattr *nla, int full_entry,
2999                                     struct ip_vs_service **ret_svc)
3000 {
3001         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3002         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3003         struct ip_vs_service *svc;
3004
3005         /* Parse mandatory identifying service fields first */
3006         if (nla == NULL ||
3007             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3008                 return -EINVAL;
3009
3010         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3011         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3012         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3013         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3014         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3015
3016         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3017                 return -EINVAL;
3018
3019         memset(usvc, 0, sizeof(*usvc));
3020
3021         usvc->af = nla_get_u16(nla_af);
3022 #ifdef CONFIG_IP_VS_IPV6
3023         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3024 #else
3025         if (usvc->af != AF_INET)
3026 #endif
3027                 return -EAFNOSUPPORT;
3028
3029         if (nla_fwmark) {
3030                 usvc->protocol = IPPROTO_TCP;
3031                 usvc->fwmark = nla_get_u32(nla_fwmark);
3032         } else {
3033                 usvc->protocol = nla_get_u16(nla_protocol);
3034                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3035                 usvc->port = nla_get_u16(nla_port);
3036                 usvc->fwmark = 0;
3037         }
3038
3039         if (usvc->fwmark)
3040                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3041         else
3042                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3043                                            &usvc->addr, usvc->port);
3044         *ret_svc = svc;
3045
3046         /* If a full entry was requested, check for the additional fields */
3047         if (full_entry) {
3048                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3049                               *nla_netmask;
3050                 struct ip_vs_flags flags;
3051
3052                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3053                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3054                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3055                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3056                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3057
3058                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3059                         return -EINVAL;
3060
3061                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3062
3063                 /* prefill flags from service if it already exists */
3064                 if (svc)
3065                         usvc->flags = svc->flags;
3066
3067                 /* set new flags from userland */
3068                 usvc->flags = (usvc->flags & ~flags.mask) |
3069                               (flags.flags & flags.mask);
3070                 usvc->sched_name = nla_data(nla_sched);
3071                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3072                 usvc->timeout = nla_get_u32(nla_timeout);
3073                 usvc->netmask = nla_get_u32(nla_netmask);
3074         }
3075
3076         return 0;
3077 }
3078
3079 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3080                                                      struct nlattr *nla)
3081 {
3082         struct ip_vs_service_user_kern usvc;
3083         struct ip_vs_service *svc;
3084         int ret;
3085
3086         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3087         return ret ? ERR_PTR(ret) : svc;
3088 }
3089
3090 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3091 {
3092         struct nlattr *nl_dest;
3093
3094         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3095         if (!nl_dest)
3096                 return -EMSGSIZE;
3097
3098         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3099             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3100             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3101                         (atomic_read(&dest->conn_flags) &
3102                          IP_VS_CONN_F_FWD_MASK)) ||
3103             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3104                         atomic_read(&dest->weight)) ||
3105             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3106             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3107             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3108                         atomic_read(&dest->activeconns)) ||
3109             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3110                         atomic_read(&dest->inactconns)) ||
3111             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3112                         atomic_read(&dest->persistconns)))
3113                 goto nla_put_failure;
3114         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3115                 goto nla_put_failure;
3116
3117         nla_nest_end(skb, nl_dest);
3118
3119         return 0;
3120
3121 nla_put_failure:
3122         nla_nest_cancel(skb, nl_dest);
3123         return -EMSGSIZE;
3124 }
3125
3126 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3127                                 struct netlink_callback *cb)
3128 {
3129         void *hdr;
3130
3131         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3132                           &ip_vs_genl_family, NLM_F_MULTI,
3133                           IPVS_CMD_NEW_DEST);
3134         if (!hdr)
3135                 return -EMSGSIZE;
3136
3137         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3138                 goto nla_put_failure;
3139
3140         return genlmsg_end(skb, hdr);
3141
3142 nla_put_failure:
3143         genlmsg_cancel(skb, hdr);
3144         return -EMSGSIZE;
3145 }
3146
3147 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3148                                  struct netlink_callback *cb)
3149 {
3150         int idx = 0;
3151         int start = cb->args[0];
3152         struct ip_vs_service *svc;
3153         struct ip_vs_dest *dest;
3154         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3155         struct net *net = skb_sknet(skb);
3156
3157         mutex_lock(&__ip_vs_mutex);
3158
3159         /* Try to find the service for which to dump destinations */
3160         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3161                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3162                 goto out_err;
3163
3164
3165         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3166         if (IS_ERR(svc) || svc == NULL)
3167                 goto out_err;
3168
3169         /* Dump the destinations */
3170         list_for_each_entry(dest, &svc->destinations, n_list) {
3171                 if (++idx <= start)
3172                         continue;
3173                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3174                         idx--;
3175                         goto nla_put_failure;
3176                 }
3177         }
3178
3179 nla_put_failure:
3180         cb->args[0] = idx;
3181
3182 out_err:
3183         mutex_unlock(&__ip_vs_mutex);
3184
3185         return skb->len;
3186 }
3187
3188 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3189                                  struct nlattr *nla, int full_entry)
3190 {
3191         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3192         struct nlattr *nla_addr, *nla_port;
3193
3194         /* Parse mandatory identifying destination fields first */
3195         if (nla == NULL ||
3196             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3197                 return -EINVAL;
3198
3199         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3200         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3201
3202         if (!(nla_addr && nla_port))
3203                 return -EINVAL;
3204
3205         memset(udest, 0, sizeof(*udest));
3206
3207         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3208         udest->port = nla_get_u16(nla_port);
3209
3210         /* If a full entry was requested, check for the additional fields */
3211         if (full_entry) {
3212                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3213                               *nla_l_thresh;
3214
3215                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3216                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3217                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3218                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3219
3220                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3221                         return -EINVAL;
3222
3223                 udest->conn_flags = nla_get_u32(nla_fwd)
3224                                     & IP_VS_CONN_F_FWD_MASK;
3225                 udest->weight = nla_get_u32(nla_weight);
3226                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3227                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3228         }
3229
3230         return 0;
3231 }
3232
3233 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3234                                   const char *mcast_ifn, __be32 syncid)
3235 {
3236         struct nlattr *nl_daemon;
3237
3238         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3239         if (!nl_daemon)
3240                 return -EMSGSIZE;
3241
3242         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3243             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3244             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3245                 goto nla_put_failure;
3246         nla_nest_end(skb, nl_daemon);
3247
3248         return 0;
3249
3250 nla_put_failure:
3251         nla_nest_cancel(skb, nl_daemon);
3252         return -EMSGSIZE;
3253 }
3254
3255 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3256                                   const char *mcast_ifn, __be32 syncid,
3257                                   struct netlink_callback *cb)
3258 {
3259         void *hdr;
3260         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3261                           &ip_vs_genl_family, NLM_F_MULTI,
3262                           IPVS_CMD_NEW_DAEMON);
3263         if (!hdr)
3264                 return -EMSGSIZE;
3265
3266         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3267                 goto nla_put_failure;
3268
3269         return genlmsg_end(skb, hdr);
3270
3271 nla_put_failure:
3272         genlmsg_cancel(skb, hdr);
3273         return -EMSGSIZE;
3274 }
3275
3276 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3277                                    struct netlink_callback *cb)
3278 {
3279         struct net *net = skb_sknet(skb);
3280         struct netns_ipvs *ipvs = net_ipvs(net);
3281
3282         mutex_lock(&ipvs->sync_mutex);
3283         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3284                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3285                                            ipvs->master_mcast_ifn,
3286                                            ipvs->master_syncid, cb) < 0)
3287                         goto nla_put_failure;
3288
3289                 cb->args[0] = 1;
3290         }
3291
3292         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3293                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3294                                            ipvs->backup_mcast_ifn,
3295                                            ipvs->backup_syncid, cb) < 0)
3296                         goto nla_put_failure;
3297
3298                 cb->args[1] = 1;
3299         }
3300
3301 nla_put_failure:
3302         mutex_unlock(&ipvs->sync_mutex);
3303
3304         return skb->len;
3305 }
3306
3307 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3308 {
3309         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3310               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3311               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3312                 return -EINVAL;
3313
3314         return start_sync_thread(net,
3315                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3316                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3317                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3318 }
3319
3320 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3321 {
3322         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3323                 return -EINVAL;
3324
3325         return stop_sync_thread(net,
3326                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3327 }
3328
3329 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3330 {
3331         struct ip_vs_timeout_user t;
3332
3333         __ip_vs_get_timeouts(net, &t);
3334
3335         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3336                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3337
3338         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3339                 t.tcp_fin_timeout =
3340                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3341
3342         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3343                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3344
3345         return ip_vs_set_timeout(net, &t);
3346 }
3347
3348 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3349 {
3350         int ret = 0, cmd;
3351         struct net *net;
3352         struct netns_ipvs *ipvs;
3353
3354         net = skb_sknet(skb);
3355         ipvs = net_ipvs(net);
3356         cmd = info->genlhdr->cmd;
3357
3358         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3359                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3360
3361                 mutex_lock(&ipvs->sync_mutex);
3362                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3363                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3364                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3365                                      ip_vs_daemon_policy)) {
3366                         ret = -EINVAL;
3367                         goto out;
3368                 }
3369
3370                 if (cmd == IPVS_CMD_NEW_DAEMON)
3371                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3372                 else
3373                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3374 out:
3375                 mutex_unlock(&ipvs->sync_mutex);
3376         }
3377         return ret;
3378 }
3379
3380 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3381 {
3382         struct ip_vs_service *svc = NULL;
3383         struct ip_vs_service_user_kern usvc;
3384         struct ip_vs_dest_user_kern udest;
3385         int ret = 0, cmd;
3386         int need_full_svc = 0, need_full_dest = 0;
3387         struct net *net;
3388
3389         net = skb_sknet(skb);
3390         cmd = info->genlhdr->cmd;
3391
3392         mutex_lock(&__ip_vs_mutex);
3393
3394         if (cmd == IPVS_CMD_FLUSH) {
3395                 ret = ip_vs_flush(net);
3396                 goto out;
3397         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3398                 ret = ip_vs_genl_set_config(net, info->attrs);
3399                 goto out;
3400         } else if (cmd == IPVS_CMD_ZERO &&
3401                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3402                 ret = ip_vs_zero_all(net);
3403                 goto out;
3404         }
3405
3406         /* All following commands require a service argument, so check if we
3407          * received a valid one. We need a full service specification when
3408          * adding / editing a service. Only identifying members otherwise. */
3409         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3410                 need_full_svc = 1;
3411
3412         ret = ip_vs_genl_parse_service(net, &usvc,
3413                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3414                                        need_full_svc, &svc);
3415         if (ret)
3416                 goto out;
3417
3418         /* Unless we're adding a new service, the service must already exist */
3419         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3420                 ret = -ESRCH;
3421                 goto out;
3422         }
3423
3424         /* Destination commands require a valid destination argument. For
3425          * adding / editing a destination, we need a full destination
3426          * specification. */
3427         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3428             cmd == IPVS_CMD_DEL_DEST) {
3429                 if (cmd != IPVS_CMD_DEL_DEST)
3430                         need_full_dest = 1;
3431
3432                 ret = ip_vs_genl_parse_dest(&udest,
3433                                             info->attrs[IPVS_CMD_ATTR_DEST],
3434                                             need_full_dest);
3435                 if (ret)
3436                         goto out;
3437         }
3438
3439         switch (cmd) {
3440         case IPVS_CMD_NEW_SERVICE:
3441                 if (svc == NULL)
3442                         ret = ip_vs_add_service(net, &usvc, &svc);
3443                 else
3444                         ret = -EEXIST;
3445                 break;
3446         case IPVS_CMD_SET_SERVICE:
3447                 ret = ip_vs_edit_service(svc, &usvc);
3448                 break;
3449         case IPVS_CMD_DEL_SERVICE:
3450                 ret = ip_vs_del_service(svc);
3451                 /* do not use svc, it can be freed */
3452                 break;
3453         case IPVS_CMD_NEW_DEST:
3454                 ret = ip_vs_add_dest(svc, &udest);
3455                 break;
3456         case IPVS_CMD_SET_DEST:
3457                 ret = ip_vs_edit_dest(svc, &udest);
3458                 break;
3459         case IPVS_CMD_DEL_DEST:
3460                 ret = ip_vs_del_dest(svc, &udest);
3461                 break;
3462         case IPVS_CMD_ZERO:
3463                 ret = ip_vs_zero_service(svc);
3464                 break;
3465         default:
3466                 ret = -EINVAL;
3467         }
3468
3469 out:
3470         mutex_unlock(&__ip_vs_mutex);
3471
3472         return ret;
3473 }
3474
3475 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3476 {
3477         struct sk_buff *msg;
3478         void *reply;
3479         int ret, cmd, reply_cmd;
3480         struct net *net;
3481
3482         net = skb_sknet(skb);
3483         cmd = info->genlhdr->cmd;
3484
3485         if (cmd == IPVS_CMD_GET_SERVICE)
3486                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3487         else if (cmd == IPVS_CMD_GET_INFO)
3488                 reply_cmd = IPVS_CMD_SET_INFO;
3489         else if (cmd == IPVS_CMD_GET_CONFIG)
3490                 reply_cmd = IPVS_CMD_SET_CONFIG;
3491         else {
3492                 pr_err("unknown Generic Netlink command\n");
3493                 return -EINVAL;
3494         }
3495
3496         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3497         if (!msg)
3498                 return -ENOMEM;
3499
3500         mutex_lock(&__ip_vs_mutex);
3501
3502         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3503         if (reply == NULL)
3504                 goto nla_put_failure;
3505
3506         switch (cmd) {
3507         case IPVS_CMD_GET_SERVICE:
3508         {
3509                 struct ip_vs_service *svc;
3510
3511                 svc = ip_vs_genl_find_service(net,
3512                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3513                 if (IS_ERR(svc)) {
3514                         ret = PTR_ERR(svc);
3515                         goto out_err;
3516                 } else if (svc) {
3517                         ret = ip_vs_genl_fill_service(msg, svc);
3518                         if (ret)
3519                                 goto nla_put_failure;
3520                 } else {
3521                         ret = -ESRCH;
3522                         goto out_err;
3523                 }
3524
3525                 break;
3526         }
3527
3528         case IPVS_CMD_GET_CONFIG:
3529         {
3530                 struct ip_vs_timeout_user t;
3531
3532                 __ip_vs_get_timeouts(net, &t);
3533 #ifdef CONFIG_IP_VS_PROTO_TCP
3534                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3535                                 t.tcp_timeout) ||
3536                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3537                                 t.tcp_fin_timeout))
3538                         goto nla_put_failure;
3539 #endif
3540 #ifdef CONFIG_IP_VS_PROTO_UDP
3541                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3542                         goto nla_put_failure;
3543 #endif
3544
3545                 break;
3546         }
3547
3548         case IPVS_CMD_GET_INFO:
3549                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3550                                 IP_VS_VERSION_CODE) ||
3551                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3552                                 ip_vs_conn_tab_size))
3553                         goto nla_put_failure;
3554                 break;
3555         }
3556
3557         genlmsg_end(msg, reply);
3558         ret = genlmsg_reply(msg, info);
3559         goto out;
3560
3561 nla_put_failure:
3562         pr_err("not enough space in Netlink message\n");
3563         ret = -EMSGSIZE;
3564
3565 out_err:
3566         nlmsg_free(msg);
3567 out:
3568         mutex_unlock(&__ip_vs_mutex);
3569
3570         return ret;
3571 }
3572
3573
3574 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3575         {
3576                 .cmd    = IPVS_CMD_NEW_SERVICE,
3577                 .flags  = GENL_ADMIN_PERM,
3578                 .policy = ip_vs_cmd_policy,
3579                 .doit   = ip_vs_genl_set_cmd,
3580         },
3581         {
3582                 .cmd    = IPVS_CMD_SET_SERVICE,
3583                 .flags  = GENL_ADMIN_PERM,
3584                 .policy = ip_vs_cmd_policy,
3585                 .doit   = ip_vs_genl_set_cmd,
3586         },
3587         {
3588                 .cmd    = IPVS_CMD_DEL_SERVICE,
3589                 .flags  = GENL_ADMIN_PERM,
3590                 .policy = ip_vs_cmd_policy,
3591                 .doit   = ip_vs_genl_set_cmd,
3592         },
3593         {
3594                 .cmd    = IPVS_CMD_GET_SERVICE,
3595                 .flags  = GENL_ADMIN_PERM,
3596                 .doit   = ip_vs_genl_get_cmd,
3597                 .dumpit = ip_vs_genl_dump_services,
3598                 .policy = ip_vs_cmd_policy,
3599         },
3600         {
3601                 .cmd    = IPVS_CMD_NEW_DEST,
3602                 .flags  = GENL_ADMIN_PERM,
3603                 .policy = ip_vs_cmd_policy,
3604                 .doit   = ip_vs_genl_set_cmd,
3605         },
3606         {
3607                 .cmd    = IPVS_CMD_SET_DEST,
3608                 .flags  = GENL_ADMIN_PERM,
3609                 .policy = ip_vs_cmd_policy,
3610                 .doit   = ip_vs_genl_set_cmd,
3611         },
3612         {
3613                 .cmd    = IPVS_CMD_DEL_DEST,
3614                 .flags  = GENL_ADMIN_PERM,
3615                 .policy = ip_vs_cmd_policy,
3616                 .doit   = ip_vs_genl_set_cmd,
3617         },
3618         {
3619                 .cmd    = IPVS_CMD_GET_DEST,
3620                 .flags  = GENL_ADMIN_PERM,
3621                 .policy = ip_vs_cmd_policy,
3622                 .dumpit = ip_vs_genl_dump_dests,
3623         },
3624         {
3625                 .cmd    = IPVS_CMD_NEW_DAEMON,
3626                 .flags  = GENL_ADMIN_PERM,
3627                 .policy = ip_vs_cmd_policy,
3628                 .doit   = ip_vs_genl_set_daemon,
3629         },
3630         {
3631                 .cmd    = IPVS_CMD_DEL_DAEMON,
3632                 .flags  = GENL_ADMIN_PERM,
3633                 .policy = ip_vs_cmd_policy,
3634                 .doit   = ip_vs_genl_set_daemon,
3635         },
3636         {
3637                 .cmd    = IPVS_CMD_GET_DAEMON,
3638                 .flags  = GENL_ADMIN_PERM,
3639                 .dumpit = ip_vs_genl_dump_daemons,
3640         },
3641         {
3642                 .cmd    = IPVS_CMD_SET_CONFIG,
3643                 .flags  = GENL_ADMIN_PERM,
3644                 .policy = ip_vs_cmd_policy,
3645                 .doit   = ip_vs_genl_set_cmd,
3646         },
3647         {
3648                 .cmd    = IPVS_CMD_GET_CONFIG,
3649                 .flags  = GENL_ADMIN_PERM,
3650                 .doit   = ip_vs_genl_get_cmd,
3651         },
3652         {
3653                 .cmd    = IPVS_CMD_GET_INFO,
3654                 .flags  = GENL_ADMIN_PERM,
3655                 .doit   = ip_vs_genl_get_cmd,
3656         },
3657         {
3658                 .cmd    = IPVS_CMD_ZERO,
3659                 .flags  = GENL_ADMIN_PERM,
3660                 .policy = ip_vs_cmd_policy,
3661                 .doit   = ip_vs_genl_set_cmd,
3662         },
3663         {
3664                 .cmd    = IPVS_CMD_FLUSH,
3665                 .flags  = GENL_ADMIN_PERM,
3666                 .doit   = ip_vs_genl_set_cmd,
3667         },
3668 };
3669
3670 static int __init ip_vs_genl_register(void)
3671 {
3672         return genl_register_family_with_ops(&ip_vs_genl_family,
3673                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3674 }
3675
3676 static void ip_vs_genl_unregister(void)
3677 {
3678         genl_unregister_family(&ip_vs_genl_family);
3679 }
3680
3681 /* End of Generic Netlink interface definitions */
3682
3683 /*
3684  * per netns intit/exit func.
3685  */
3686 #ifdef CONFIG_SYSCTL
3687 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3688 {
3689         int idx;
3690         struct netns_ipvs *ipvs = net_ipvs(net);
3691         struct ctl_table *tbl;
3692
3693         atomic_set(&ipvs->dropentry, 0);
3694         spin_lock_init(&ipvs->dropentry_lock);
3695         spin_lock_init(&ipvs->droppacket_lock);
3696         spin_lock_init(&ipvs->securetcp_lock);
3697
3698         if (!net_eq(net, &init_net)) {
3699                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3700                 if (tbl == NULL)
3701                         return -ENOMEM;
3702         } else
3703                 tbl = vs_vars;
3704         /* Initialize sysctl defaults */
3705         idx = 0;
3706         ipvs->sysctl_amemthresh = 1024;
3707         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3708         ipvs->sysctl_am_droprate = 10;
3709         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3710         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3711         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3712 #ifdef CONFIG_IP_VS_NFCT
3713         tbl[idx++].data = &ipvs->sysctl_conntrack;
3714 #endif
3715         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3716         ipvs->sysctl_snat_reroute = 1;
3717         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3718         ipvs->sysctl_sync_ver = 1;
3719         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3720         ipvs->sysctl_sync_ports = 1;
3721         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3722         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3723         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3724         ipvs->sysctl_sync_sock_size = 0;
3725         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3726         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3727         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3728         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3729         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3730         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3731         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3732         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3733         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3734         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3735         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3736         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3737         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3738         ipvs->sysctl_pmtu_disc = 1;
3739         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3740
3741
3742         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3743         if (ipvs->sysctl_hdr == NULL) {
3744                 if (!net_eq(net, &init_net))
3745                         kfree(tbl);
3746                 return -ENOMEM;
3747         }
3748         ip_vs_start_estimator(net, &ipvs->tot_stats);
3749         ipvs->sysctl_tbl = tbl;
3750         /* Schedule defense work */
3751         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3752         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3753
3754         return 0;
3755 }
3756
3757 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3758 {
3759         struct netns_ipvs *ipvs = net_ipvs(net);
3760
3761         cancel_delayed_work_sync(&ipvs->defense_work);
3762         cancel_work_sync(&ipvs->defense_work.work);
3763         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3764 }
3765
3766 #else
3767
3768 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3769 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3770
3771 #endif
3772
3773 static struct notifier_block ip_vs_dst_notifier = {
3774         .notifier_call = ip_vs_dst_event,
3775 };
3776
3777 int __net_init ip_vs_control_net_init(struct net *net)
3778 {
3779         int idx;
3780         struct netns_ipvs *ipvs = net_ipvs(net);
3781
3782         rwlock_init(&ipvs->rs_lock);
3783
3784         /* Initialize rs_table */
3785         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3786                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3787
3788         INIT_LIST_HEAD(&ipvs->dest_trash);
3789         atomic_set(&ipvs->ftpsvc_counter, 0);
3790         atomic_set(&ipvs->nullsvc_counter, 0);
3791
3792         /* procfs stats */
3793         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3794         if (!ipvs->tot_stats.cpustats)
3795                 return -ENOMEM;
3796
3797         spin_lock_init(&ipvs->tot_stats.lock);
3798
3799         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3800         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3801         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3802                              &ip_vs_stats_percpu_fops);
3803
3804         if (ip_vs_control_net_init_sysctl(net))
3805                 goto err;
3806
3807         return 0;
3808
3809 err:
3810         free_percpu(ipvs->tot_stats.cpustats);
3811         return -ENOMEM;
3812 }
3813
3814 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3815 {
3816         struct netns_ipvs *ipvs = net_ipvs(net);
3817
3818         ip_vs_trash_cleanup(net);
3819         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3820         ip_vs_control_net_cleanup_sysctl(net);
3821         proc_net_remove(net, "ip_vs_stats_percpu");
3822         proc_net_remove(net, "ip_vs_stats");
3823         proc_net_remove(net, "ip_vs");
3824         free_percpu(ipvs->tot_stats.cpustats);
3825 }
3826
3827 int __init ip_vs_register_nl_ioctl(void)
3828 {
3829         int ret;
3830
3831         ret = nf_register_sockopt(&ip_vs_sockopts);
3832         if (ret) {
3833                 pr_err("cannot register sockopt.\n");
3834                 goto err_sock;
3835         }
3836
3837         ret = ip_vs_genl_register();
3838         if (ret) {
3839                 pr_err("cannot register Generic Netlink interface.\n");
3840                 goto err_genl;
3841         }
3842         return 0;
3843
3844 err_genl:
3845         nf_unregister_sockopt(&ip_vs_sockopts);
3846 err_sock:
3847         return ret;
3848 }
3849
3850 void ip_vs_unregister_nl_ioctl(void)
3851 {
3852         ip_vs_genl_unregister();
3853         nf_unregister_sockopt(&ip_vs_sockopts);
3854 }
3855
3856 int __init ip_vs_control_init(void)
3857 {
3858         int idx;
3859         int ret;
3860
3861         EnterFunction(2);
3862
3863         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3864         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3866                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867         }
3868
3869         smp_wmb();      /* Do we really need it now ? */
3870
3871         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872         if (ret < 0)
3873                 return ret;
3874
3875         LeaveFunction(2);
3876         return 0;
3877 }
3878
3879
3880 void ip_vs_control_cleanup(void)
3881 {
3882         EnterFunction(2);
3883         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3884         LeaveFunction(2);
3885 }