]> rtime.felk.cvut.cz Git - sojka/nv-tegra/linux-3.10.git/blob - drivers/md/dm-mpath.c
Linux-2.6.12-rc2
[sojka/nv-tegra/linux-3.10.git] / drivers / md / dm-mpath.c
1 /*
2  * Copyright (C) 2003 Sistina Software Limited.
3  * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4  *
5  * This file is released under the GPL.
6  */
7
8 #include "dm.h"
9 #include "dm-path-selector.h"
10 #include "dm-hw-handler.h"
11 #include "dm-bio-list.h"
12 #include "dm-bio-record.h"
13
14 #include <linux/ctype.h>
15 #include <linux/init.h>
16 #include <linux/mempool.h>
17 #include <linux/module.h>
18 #include <linux/pagemap.h>
19 #include <linux/slab.h>
20 #include <linux/time.h>
21 #include <linux/workqueue.h>
22 #include <asm/atomic.h>
23
24 #define MESG_STR(x) x, sizeof(x)
25
26 /* Path properties */
27 struct pgpath {
28         struct list_head list;
29
30         struct priority_group *pg;      /* Owning PG */
31         unsigned fail_count;            /* Cumulative failure count */
32
33         struct path path;
34 };
35
36 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
37
38 /*
39  * Paths are grouped into Priority Groups and numbered from 1 upwards.
40  * Each has a path selector which controls which path gets used.
41  */
42 struct priority_group {
43         struct list_head list;
44
45         struct multipath *m;            /* Owning multipath instance */
46         struct path_selector ps;
47
48         unsigned pg_num;                /* Reference number */
49         unsigned bypassed;              /* Temporarily bypass this PG? */
50
51         unsigned nr_pgpaths;            /* Number of paths in PG */
52         struct list_head pgpaths;
53 };
54
55 /* Multipath context */
56 struct multipath {
57         struct list_head list;
58         struct dm_target *ti;
59
60         spinlock_t lock;
61
62         struct hw_handler hw_handler;
63         unsigned nr_priority_groups;
64         struct list_head priority_groups;
65         unsigned pg_init_required;      /* pg_init needs calling? */
66
67         unsigned nr_valid_paths;        /* Total number of usable paths */
68         struct pgpath *current_pgpath;
69         struct priority_group *current_pg;
70         struct priority_group *next_pg; /* Switch to this PG if set */
71         unsigned repeat_count;          /* I/Os left before calling PS again */
72
73         unsigned queue_io;              /* Must we queue all I/O? */
74         unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
75         unsigned suspended;             /* Has dm core suspended our I/O? */
76
77         struct work_struct process_queued_ios;
78         struct bio_list queued_ios;
79         unsigned queue_size;
80
81         struct work_struct trigger_event;
82
83         /*
84          * We must use a mempool of mpath_io structs so that we
85          * can resubmit bios on error.
86          */
87         mempool_t *mpio_pool;
88 };
89
90 /*
91  * Context information attached to each bio we process.
92  */
93 struct mpath_io {
94         struct pgpath *pgpath;
95         struct dm_bio_details details;
96 };
97
98 typedef int (*action_fn) (struct pgpath *pgpath);
99
100 #define MIN_IOS 256     /* Mempool size */
101
102 static kmem_cache_t *_mpio_cache;
103
104 static void process_queued_ios(void *data);
105 static void trigger_event(void *data);
106
107
108 /*-----------------------------------------------
109  * Allocation routines
110  *-----------------------------------------------*/
111
112 static struct pgpath *alloc_pgpath(void)
113 {
114         struct pgpath *pgpath = kmalloc(sizeof(*pgpath), GFP_KERNEL);
115
116         if (pgpath) {
117                 memset(pgpath, 0, sizeof(*pgpath));
118                 pgpath->path.is_active = 1;
119         }
120
121         return pgpath;
122 }
123
124 static inline void free_pgpath(struct pgpath *pgpath)
125 {
126         kfree(pgpath);
127 }
128
129 static struct priority_group *alloc_priority_group(void)
130 {
131         struct priority_group *pg;
132
133         pg = kmalloc(sizeof(*pg), GFP_KERNEL);
134         if (!pg)
135                 return NULL;
136
137         memset(pg, 0, sizeof(*pg));
138         INIT_LIST_HEAD(&pg->pgpaths);
139
140         return pg;
141 }
142
143 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
144 {
145         struct pgpath *pgpath, *tmp;
146
147         list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
148                 list_del(&pgpath->list);
149                 dm_put_device(ti, pgpath->path.dev);
150                 free_pgpath(pgpath);
151         }
152 }
153
154 static void free_priority_group(struct priority_group *pg,
155                                 struct dm_target *ti)
156 {
157         struct path_selector *ps = &pg->ps;
158
159         if (ps->type) {
160                 ps->type->destroy(ps);
161                 dm_put_path_selector(ps->type);
162         }
163
164         free_pgpaths(&pg->pgpaths, ti);
165         kfree(pg);
166 }
167
168 static struct multipath *alloc_multipath(void)
169 {
170         struct multipath *m;
171
172         m = kmalloc(sizeof(*m), GFP_KERNEL);
173         if (m) {
174                 memset(m, 0, sizeof(*m));
175                 INIT_LIST_HEAD(&m->priority_groups);
176                 spin_lock_init(&m->lock);
177                 m->queue_io = 1;
178                 INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
179                 INIT_WORK(&m->trigger_event, trigger_event, m);
180                 m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
181                                               mempool_free_slab, _mpio_cache);
182                 if (!m->mpio_pool) {
183                         kfree(m);
184                         return NULL;
185                 }
186         }
187
188         return m;
189 }
190
191 static void free_multipath(struct multipath *m)
192 {
193         struct priority_group *pg, *tmp;
194         struct hw_handler *hwh = &m->hw_handler;
195
196         list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
197                 list_del(&pg->list);
198                 free_priority_group(pg, m->ti);
199         }
200
201         if (hwh->type) {
202                 hwh->type->destroy(hwh);
203                 dm_put_hw_handler(hwh->type);
204         }
205
206         mempool_destroy(m->mpio_pool);
207         kfree(m);
208 }
209
210
211 /*-----------------------------------------------
212  * Path selection
213  *-----------------------------------------------*/
214
215 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
216 {
217         struct hw_handler *hwh = &m->hw_handler;
218
219         m->current_pg = pgpath->pg;
220
221         /* Must we initialise the PG first, and queue I/O till it's ready? */
222         if (hwh->type && hwh->type->pg_init) {
223                 m->pg_init_required = 1;
224                 m->queue_io = 1;
225         } else {
226                 m->pg_init_required = 0;
227                 m->queue_io = 0;
228         }
229 }
230
231 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
232 {
233         struct path *path;
234
235         path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
236         if (!path)
237                 return -ENXIO;
238
239         m->current_pgpath = path_to_pgpath(path);
240
241         if (m->current_pg != pg)
242                 __switch_pg(m, m->current_pgpath);
243
244         return 0;
245 }
246
247 static void __choose_pgpath(struct multipath *m)
248 {
249         struct priority_group *pg;
250         unsigned bypassed = 1;
251
252         if (!m->nr_valid_paths)
253                 goto failed;
254
255         /* Were we instructed to switch PG? */
256         if (m->next_pg) {
257                 pg = m->next_pg;
258                 m->next_pg = NULL;
259                 if (!__choose_path_in_pg(m, pg))
260                         return;
261         }
262
263         /* Don't change PG until it has no remaining paths */
264         if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
265                 return;
266
267         /*
268          * Loop through priority groups until we find a valid path.
269          * First time we skip PGs marked 'bypassed'.
270          * Second time we only try the ones we skipped.
271          */
272         do {
273                 list_for_each_entry(pg, &m->priority_groups, list) {
274                         if (pg->bypassed == bypassed)
275                                 continue;
276                         if (!__choose_path_in_pg(m, pg))
277                                 return;
278                 }
279         } while (bypassed--);
280
281 failed:
282         m->current_pgpath = NULL;
283         m->current_pg = NULL;
284 }
285
286 static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
287                   unsigned was_queued)
288 {
289         int r = 1;
290         unsigned long flags;
291         struct pgpath *pgpath;
292
293         spin_lock_irqsave(&m->lock, flags);
294
295         /* Do we need to select a new pgpath? */
296         if (!m->current_pgpath ||
297             (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
298                 __choose_pgpath(m);
299
300         pgpath = m->current_pgpath;
301
302         if (was_queued)
303                 m->queue_size--;
304
305         if ((pgpath && m->queue_io) ||
306             (!pgpath && m->queue_if_no_path && !m->suspended)) {
307                 /* Queue for the daemon to resubmit */
308                 bio_list_add(&m->queued_ios, bio);
309                 m->queue_size++;
310                 if (m->pg_init_required || !m->queue_io)
311                         schedule_work(&m->process_queued_ios);
312                 pgpath = NULL;
313                 r = 0;
314         } else if (!pgpath)
315                 r = -EIO;               /* Failed */
316         else
317                 bio->bi_bdev = pgpath->path.dev->bdev;
318
319         mpio->pgpath = pgpath;
320
321         spin_unlock_irqrestore(&m->lock, flags);
322
323         return r;
324 }
325
326 /*
327  * If we run out of usable paths, should we queue I/O or error it?
328  */
329 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path)
330 {
331         unsigned long flags;
332
333         spin_lock_irqsave(&m->lock, flags);
334
335         m->queue_if_no_path = queue_if_no_path;
336         if (!m->queue_if_no_path)
337                 schedule_work(&m->process_queued_ios);
338
339         spin_unlock_irqrestore(&m->lock, flags);
340
341         return 0;
342 }
343
344 /*-----------------------------------------------------------------
345  * The multipath daemon is responsible for resubmitting queued ios.
346  *---------------------------------------------------------------*/
347
348 static void dispatch_queued_ios(struct multipath *m)
349 {
350         int r;
351         unsigned long flags;
352         struct bio *bio = NULL, *next;
353         struct mpath_io *mpio;
354         union map_info *info;
355
356         spin_lock_irqsave(&m->lock, flags);
357         bio = bio_list_get(&m->queued_ios);
358         spin_unlock_irqrestore(&m->lock, flags);
359
360         while (bio) {
361                 next = bio->bi_next;
362                 bio->bi_next = NULL;
363
364                 info = dm_get_mapinfo(bio);
365                 mpio = info->ptr;
366
367                 r = map_io(m, bio, mpio, 1);
368                 if (r < 0)
369                         bio_endio(bio, bio->bi_size, r);
370                 else if (r == 1)
371                         generic_make_request(bio);
372
373                 bio = next;
374         }
375 }
376
377 static void process_queued_ios(void *data)
378 {
379         struct multipath *m = (struct multipath *) data;
380         struct hw_handler *hwh = &m->hw_handler;
381         struct pgpath *pgpath;
382         unsigned init_required, must_queue = 0;
383         unsigned long flags;
384
385         spin_lock_irqsave(&m->lock, flags);
386
387         if (!m->current_pgpath)
388                 __choose_pgpath(m);
389
390         pgpath = m->current_pgpath;
391
392         if ((pgpath && m->queue_io) ||
393             (!pgpath && m->queue_if_no_path && !m->suspended))
394                 must_queue = 1;
395
396         init_required = m->pg_init_required;
397         if (init_required)
398                 m->pg_init_required = 0;
399
400         spin_unlock_irqrestore(&m->lock, flags);
401
402         if (init_required)
403                 hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path);
404
405         if (!must_queue)
406                 dispatch_queued_ios(m);
407 }
408
409 /*
410  * An event is triggered whenever a path is taken out of use.
411  * Includes path failure and PG bypass.
412  */
413 static void trigger_event(void *data)
414 {
415         struct multipath *m = (struct multipath *) data;
416
417         dm_table_event(m->ti->table);
418 }
419
420 /*-----------------------------------------------------------------
421  * Constructor/argument parsing:
422  * <#multipath feature args> [<arg>]*
423  * <#hw_handler args> [hw_handler [<arg>]*]
424  * <#priority groups>
425  * <initial priority group>
426  *     [<selector> <#selector args> [<arg>]*
427  *      <#paths> <#per-path selector args>
428  *         [<path> [<arg>]* ]+ ]+
429  *---------------------------------------------------------------*/
430 struct param {
431         unsigned min;
432         unsigned max;
433         char *error;
434 };
435
436 #define ESTR(s) ("dm-multipath: " s)
437
438 static int read_param(struct param *param, char *str, unsigned *v, char **error)
439 {
440         if (!str ||
441             (sscanf(str, "%u", v) != 1) ||
442             (*v < param->min) ||
443             (*v > param->max)) {
444                 *error = param->error;
445                 return -EINVAL;
446         }
447
448         return 0;
449 }
450
451 struct arg_set {
452         unsigned argc;
453         char **argv;
454 };
455
456 static char *shift(struct arg_set *as)
457 {
458         char *r;
459
460         if (as->argc) {
461                 as->argc--;
462                 r = *as->argv;
463                 as->argv++;
464                 return r;
465         }
466
467         return NULL;
468 }
469
470 static void consume(struct arg_set *as, unsigned n)
471 {
472         BUG_ON (as->argc < n);
473         as->argc -= n;
474         as->argv += n;
475 }
476
477 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
478                                struct dm_target *ti)
479 {
480         int r;
481         struct path_selector_type *pst;
482         unsigned ps_argc;
483
484         static struct param _params[] = {
485                 {0, 1024, ESTR("invalid number of path selector args")},
486         };
487
488         pst = dm_get_path_selector(shift(as));
489         if (!pst) {
490                 ti->error = ESTR("unknown path selector type");
491                 return -EINVAL;
492         }
493
494         r = read_param(_params, shift(as), &ps_argc, &ti->error);
495         if (r)
496                 return -EINVAL;
497
498         r = pst->create(&pg->ps, ps_argc, as->argv);
499         if (r) {
500                 dm_put_path_selector(pst);
501                 ti->error = ESTR("path selector constructor failed");
502                 return r;
503         }
504
505         pg->ps.type = pst;
506         consume(as, ps_argc);
507
508         return 0;
509 }
510
511 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
512                                struct dm_target *ti)
513 {
514         int r;
515         struct pgpath *p;
516
517         /* we need at least a path arg */
518         if (as->argc < 1) {
519                 ti->error = ESTR("no device given");
520                 return NULL;
521         }
522
523         p = alloc_pgpath();
524         if (!p)
525                 return NULL;
526
527         r = dm_get_device(ti, shift(as), ti->begin, ti->len,
528                           dm_table_get_mode(ti->table), &p->path.dev);
529         if (r) {
530                 ti->error = ESTR("error getting device");
531                 goto bad;
532         }
533
534         r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
535         if (r) {
536                 dm_put_device(ti, p->path.dev);
537                 goto bad;
538         }
539
540         return p;
541
542  bad:
543         free_pgpath(p);
544         return NULL;
545 }
546
547 static struct priority_group *parse_priority_group(struct arg_set *as,
548                                                    struct multipath *m,
549                                                    struct dm_target *ti)
550 {
551         static struct param _params[] = {
552                 {1, 1024, ESTR("invalid number of paths")},
553                 {0, 1024, ESTR("invalid number of selector args")}
554         };
555
556         int r;
557         unsigned i, nr_selector_args, nr_params;
558         struct priority_group *pg;
559
560         if (as->argc < 2) {
561                 as->argc = 0;
562                 ti->error = ESTR("not enough priority group aruments");
563                 return NULL;
564         }
565
566         pg = alloc_priority_group();
567         if (!pg) {
568                 ti->error = ESTR("couldn't allocate priority group");
569                 return NULL;
570         }
571         pg->m = m;
572
573         r = parse_path_selector(as, pg, ti);
574         if (r)
575                 goto bad;
576
577         /*
578          * read the paths
579          */
580         r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
581         if (r)
582                 goto bad;
583
584         r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
585         if (r)
586                 goto bad;
587
588         nr_params = 1 + nr_selector_args;
589         for (i = 0; i < pg->nr_pgpaths; i++) {
590                 struct pgpath *pgpath;
591                 struct arg_set path_args;
592
593                 if (as->argc < nr_params)
594                         goto bad;
595
596                 path_args.argc = nr_params;
597                 path_args.argv = as->argv;
598
599                 pgpath = parse_path(&path_args, &pg->ps, ti);
600                 if (!pgpath)
601                         goto bad;
602
603                 pgpath->pg = pg;
604                 list_add_tail(&pgpath->list, &pg->pgpaths);
605                 consume(as, nr_params);
606         }
607
608         return pg;
609
610  bad:
611         free_priority_group(pg, ti);
612         return NULL;
613 }
614
615 static int parse_hw_handler(struct arg_set *as, struct multipath *m,
616                             struct dm_target *ti)
617 {
618         int r;
619         struct hw_handler_type *hwht;
620         unsigned hw_argc;
621
622         static struct param _params[] = {
623                 {0, 1024, ESTR("invalid number of hardware handler args")},
624         };
625
626         r = read_param(_params, shift(as), &hw_argc, &ti->error);
627         if (r)
628                 return -EINVAL;
629
630         if (!hw_argc)
631                 return 0;
632
633         hwht = dm_get_hw_handler(shift(as));
634         if (!hwht) {
635                 ti->error = ESTR("unknown hardware handler type");
636                 return -EINVAL;
637         }
638
639         r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
640         if (r) {
641                 dm_put_hw_handler(hwht);
642                 ti->error = ESTR("hardware handler constructor failed");
643                 return r;
644         }
645
646         m->hw_handler.type = hwht;
647         consume(as, hw_argc - 1);
648
649         return 0;
650 }
651
652 static int parse_features(struct arg_set *as, struct multipath *m,
653                           struct dm_target *ti)
654 {
655         int r;
656         unsigned argc;
657
658         static struct param _params[] = {
659                 {0, 1, ESTR("invalid number of feature args")},
660         };
661
662         r = read_param(_params, shift(as), &argc, &ti->error);
663         if (r)
664                 return -EINVAL;
665
666         if (!argc)
667                 return 0;
668
669         if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
670                 return queue_if_no_path(m, 1);
671         else {
672                 ti->error = "Unrecognised multipath feature request";
673                 return -EINVAL;
674         }
675 }
676
677 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
678                          char **argv)
679 {
680         /* target parameters */
681         static struct param _params[] = {
682                 {1, 1024, ESTR("invalid number of priority groups")},
683                 {1, 1024, ESTR("invalid initial priority group number")},
684         };
685
686         int r;
687         struct multipath *m;
688         struct arg_set as;
689         unsigned pg_count = 0;
690         unsigned next_pg_num;
691
692         as.argc = argc;
693         as.argv = argv;
694
695         m = alloc_multipath();
696         if (!m) {
697                 ti->error = ESTR("can't allocate multipath");
698                 return -EINVAL;
699         }
700
701         r = parse_features(&as, m, ti);
702         if (r)
703                 goto bad;
704
705         r = parse_hw_handler(&as, m, ti);
706         if (r)
707                 goto bad;
708
709         r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
710         if (r)
711                 goto bad;
712
713         r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
714         if (r)
715                 goto bad;
716
717         /* parse the priority groups */
718         while (as.argc) {
719                 struct priority_group *pg;
720
721                 pg = parse_priority_group(&as, m, ti);
722                 if (!pg) {
723                         r = -EINVAL;
724                         goto bad;
725                 }
726
727                 m->nr_valid_paths += pg->nr_pgpaths;
728                 list_add_tail(&pg->list, &m->priority_groups);
729                 pg_count++;
730                 pg->pg_num = pg_count;
731                 if (!--next_pg_num)
732                         m->next_pg = pg;
733         }
734
735         if (pg_count != m->nr_priority_groups) {
736                 ti->error = ESTR("priority group count mismatch");
737                 r = -EINVAL;
738                 goto bad;
739         }
740
741         ti->private = m;
742         m->ti = ti;
743
744         return 0;
745
746  bad:
747         free_multipath(m);
748         return r;
749 }
750
751 static void multipath_dtr(struct dm_target *ti)
752 {
753         struct multipath *m = (struct multipath *) ti->private;
754         free_multipath(m);
755 }
756
757 /*
758  * Map bios, recording original fields for later in case we have to resubmit
759  */
760 static int multipath_map(struct dm_target *ti, struct bio *bio,
761                          union map_info *map_context)
762 {
763         int r;
764         struct mpath_io *mpio;
765         struct multipath *m = (struct multipath *) ti->private;
766
767         mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
768         dm_bio_record(&mpio->details, bio);
769
770         map_context->ptr = mpio;
771         bio->bi_rw |= (1 << BIO_RW_FAILFAST);
772         r = map_io(m, bio, mpio, 0);
773         if (r < 0)
774                 mempool_free(mpio, m->mpio_pool);
775
776         return r;
777 }
778
779 /*
780  * Take a path out of use.
781  */
782 static int fail_path(struct pgpath *pgpath)
783 {
784         unsigned long flags;
785         struct multipath *m = pgpath->pg->m;
786
787         spin_lock_irqsave(&m->lock, flags);
788
789         if (!pgpath->path.is_active)
790                 goto out;
791
792         DMWARN("dm-multipath: Failing path %s.", pgpath->path.dev->name);
793
794         pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
795         pgpath->path.is_active = 0;
796         pgpath->fail_count++;
797
798         m->nr_valid_paths--;
799
800         if (pgpath == m->current_pgpath)
801                 m->current_pgpath = NULL;
802
803         schedule_work(&m->trigger_event);
804
805 out:
806         spin_unlock_irqrestore(&m->lock, flags);
807
808         return 0;
809 }
810
811 /*
812  * Reinstate a previously-failed path
813  */
814 static int reinstate_path(struct pgpath *pgpath)
815 {
816         int r = 0;
817         unsigned long flags;
818         struct multipath *m = pgpath->pg->m;
819
820         spin_lock_irqsave(&m->lock, flags);
821
822         if (pgpath->path.is_active)
823                 goto out;
824
825         if (!pgpath->pg->ps.type) {
826                 DMWARN("Reinstate path not supported by path selector %s",
827                        pgpath->pg->ps.type->name);
828                 r = -EINVAL;
829                 goto out;
830         }
831
832         r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
833         if (r)
834                 goto out;
835
836         pgpath->path.is_active = 1;
837
838         m->current_pgpath = NULL;
839         if (!m->nr_valid_paths++)
840                 schedule_work(&m->process_queued_ios);
841
842         schedule_work(&m->trigger_event);
843
844 out:
845         spin_unlock_irqrestore(&m->lock, flags);
846
847         return r;
848 }
849
850 /*
851  * Fail or reinstate all paths that match the provided struct dm_dev.
852  */
853 static int action_dev(struct multipath *m, struct dm_dev *dev,
854                       action_fn action)
855 {
856         int r = 0;
857         struct pgpath *pgpath;
858         struct priority_group *pg;
859
860         list_for_each_entry(pg, &m->priority_groups, list) {
861                 list_for_each_entry(pgpath, &pg->pgpaths, list) {
862                         if (pgpath->path.dev == dev)
863                                 r = action(pgpath);
864                 }
865         }
866
867         return r;
868 }
869
870 /*
871  * Temporarily try to avoid having to use the specified PG
872  */
873 static void bypass_pg(struct multipath *m, struct priority_group *pg,
874                       int bypassed)
875 {
876         unsigned long flags;
877
878         spin_lock_irqsave(&m->lock, flags);
879
880         pg->bypassed = bypassed;
881         m->current_pgpath = NULL;
882         m->current_pg = NULL;
883
884         spin_unlock_irqrestore(&m->lock, flags);
885
886         schedule_work(&m->trigger_event);
887 }
888
889 /*
890  * Switch to using the specified PG from the next I/O that gets mapped
891  */
892 static int switch_pg_num(struct multipath *m, const char *pgstr)
893 {
894         struct priority_group *pg;
895         unsigned pgnum;
896         unsigned long flags;
897
898         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
899             (pgnum > m->nr_priority_groups)) {
900                 DMWARN("invalid PG number supplied to switch_pg_num");
901                 return -EINVAL;
902         }
903
904         spin_lock_irqsave(&m->lock, flags);
905         list_for_each_entry(pg, &m->priority_groups, list) {
906                 pg->bypassed = 0;
907                 if (--pgnum)
908                         continue;
909
910                 m->current_pgpath = NULL;
911                 m->current_pg = NULL;
912                 m->next_pg = pg;
913         }
914         spin_unlock_irqrestore(&m->lock, flags);
915
916         schedule_work(&m->trigger_event);
917         return 0;
918 }
919
920 /*
921  * Set/clear bypassed status of a PG.
922  * PGs are numbered upwards from 1 in the order they were declared.
923  */
924 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
925 {
926         struct priority_group *pg;
927         unsigned pgnum;
928
929         if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
930             (pgnum > m->nr_priority_groups)) {
931                 DMWARN("invalid PG number supplied to bypass_pg");
932                 return -EINVAL;
933         }
934
935         list_for_each_entry(pg, &m->priority_groups, list) {
936                 if (!--pgnum)
937                         break;
938         }
939
940         bypass_pg(m, pg, bypassed);
941         return 0;
942 }
943
944 /*
945  * pg_init must call this when it has completed its initialisation
946  */
947 void dm_pg_init_complete(struct path *path, unsigned err_flags)
948 {
949         struct pgpath *pgpath = path_to_pgpath(path);
950         struct priority_group *pg = pgpath->pg;
951         struct multipath *m = pg->m;
952         unsigned long flags;
953
954         /* We insist on failing the path if the PG is already bypassed. */
955         if (err_flags && pg->bypassed)
956                 err_flags |= MP_FAIL_PATH;
957
958         if (err_flags & MP_FAIL_PATH)
959                 fail_path(pgpath);
960
961         if (err_flags & MP_BYPASS_PG)
962                 bypass_pg(m, pg, 1);
963
964         spin_lock_irqsave(&m->lock, flags);
965         if (!err_flags)
966                 m->queue_io = 0;
967         else {
968                 m->current_pgpath = NULL;
969                 m->current_pg = NULL;
970         }
971         schedule_work(&m->process_queued_ios);
972         spin_unlock_irqrestore(&m->lock, flags);
973 }
974
975 /*
976  * end_io handling
977  */
978 static int do_end_io(struct multipath *m, struct bio *bio,
979                      int error, struct mpath_io *mpio)
980 {
981         struct hw_handler *hwh = &m->hw_handler;
982         unsigned err_flags = MP_FAIL_PATH;      /* Default behavior */
983
984         if (!error)
985                 return 0;       /* I/O complete */
986
987         spin_lock(&m->lock);
988         if (!m->nr_valid_paths) {
989                 if (!m->queue_if_no_path || m->suspended) {
990                         spin_unlock(&m->lock);
991                         return -EIO;
992                 } else {
993                         spin_unlock(&m->lock);
994                         goto requeue;
995                 }
996         }
997         spin_unlock(&m->lock);
998
999         if (hwh->type && hwh->type->error)
1000                 err_flags = hwh->type->error(hwh, bio);
1001
1002         if (mpio->pgpath) {
1003                 if (err_flags & MP_FAIL_PATH)
1004                         fail_path(mpio->pgpath);
1005
1006                 if (err_flags & MP_BYPASS_PG)
1007                         bypass_pg(m, mpio->pgpath->pg, 1);
1008         }
1009
1010         if (err_flags & MP_ERROR_IO)
1011                 return -EIO;
1012
1013       requeue:
1014         dm_bio_restore(&mpio->details, bio);
1015
1016         /* queue for the daemon to resubmit or fail */
1017         spin_lock(&m->lock);
1018         bio_list_add(&m->queued_ios, bio);
1019         m->queue_size++;
1020         if (!m->queue_io)
1021                 schedule_work(&m->process_queued_ios);
1022         spin_unlock(&m->lock);
1023
1024         return 1;       /* io not complete */
1025 }
1026
1027 static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1028                             int error, union map_info *map_context)
1029 {
1030         struct multipath *m = (struct multipath *) ti->private;
1031         struct mpath_io *mpio = (struct mpath_io *) map_context->ptr;
1032         struct pgpath *pgpath = mpio->pgpath;
1033         struct path_selector *ps;
1034         int r;
1035
1036         r  = do_end_io(m, bio, error, mpio);
1037         if (pgpath) {
1038                 ps = &pgpath->pg->ps;
1039                 if (ps->type->end_io)
1040                         ps->type->end_io(ps, &pgpath->path);
1041         }
1042         if (r <= 0)
1043                 mempool_free(mpio, m->mpio_pool);
1044
1045         return r;
1046 }
1047
1048 /*
1049  * Suspend can't complete until all the I/O is processed so if
1050  * the last path failed we will now error any queued I/O.
1051  */
1052 static void multipath_presuspend(struct dm_target *ti)
1053 {
1054         struct multipath *m = (struct multipath *) ti->private;
1055         unsigned long flags;
1056
1057         spin_lock_irqsave(&m->lock, flags);
1058         m->suspended = 1;
1059         if (m->queue_if_no_path)
1060                 schedule_work(&m->process_queued_ios);
1061         spin_unlock_irqrestore(&m->lock, flags);
1062 }
1063
1064 static void multipath_resume(struct dm_target *ti)
1065 {
1066         struct multipath *m = (struct multipath *) ti->private;
1067         unsigned long flags;
1068
1069         spin_lock_irqsave(&m->lock, flags);
1070         m->suspended = 0;
1071         spin_unlock_irqrestore(&m->lock, flags);
1072 }
1073
1074 /*
1075  * Info output has the following format:
1076  * num_multipath_feature_args [multipath_feature_args]*
1077  * num_handler_status_args [handler_status_args]*
1078  * num_groups init_group_number
1079  *            [A|D|E num_ps_status_args [ps_status_args]*
1080  *             num_paths num_selector_args
1081  *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1082  *
1083  * Table output has the following format (identical to the constructor string):
1084  * num_feature_args [features_args]*
1085  * num_handler_args hw_handler [hw_handler_args]*
1086  * num_groups init_group_number
1087  *     [priority selector-name num_ps_args [ps_args]*
1088  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1089  */
1090 static int multipath_status(struct dm_target *ti, status_type_t type,
1091                             char *result, unsigned int maxlen)
1092 {
1093         int sz = 0;
1094         unsigned long flags;
1095         struct multipath *m = (struct multipath *) ti->private;
1096         struct hw_handler *hwh = &m->hw_handler;
1097         struct priority_group *pg;
1098         struct pgpath *p;
1099         unsigned pg_num;
1100         char state;
1101
1102         spin_lock_irqsave(&m->lock, flags);
1103
1104         /* Features */
1105         if (type == STATUSTYPE_INFO)
1106                 DMEMIT("1 %u ", m->queue_size);
1107         else if (m->queue_if_no_path)
1108                 DMEMIT("1 queue_if_no_path ");
1109         else
1110                 DMEMIT("0 ");
1111
1112         if (hwh->type && hwh->type->status)
1113                 sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
1114         else if (!hwh->type || type == STATUSTYPE_INFO)
1115                 DMEMIT("0 ");
1116         else
1117                 DMEMIT("1 %s ", hwh->type->name);
1118
1119         DMEMIT("%u ", m->nr_priority_groups);
1120
1121         if (m->next_pg)
1122                 pg_num = m->next_pg->pg_num;
1123         else if (m->current_pg)
1124                 pg_num = m->current_pg->pg_num;
1125         else
1126                         pg_num = 1;
1127
1128         DMEMIT("%u ", pg_num);
1129
1130         switch (type) {
1131         case STATUSTYPE_INFO:
1132                 list_for_each_entry(pg, &m->priority_groups, list) {
1133                         if (pg->bypassed)
1134                                 state = 'D';    /* Disabled */
1135                         else if (pg == m->current_pg)
1136                                 state = 'A';    /* Currently Active */
1137                         else
1138                                 state = 'E';    /* Enabled */
1139
1140                         DMEMIT("%c ", state);
1141
1142                         if (pg->ps.type->status)
1143                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1144                                                           result + sz,
1145                                                           maxlen - sz);
1146                         else
1147                                 DMEMIT("0 ");
1148
1149                         DMEMIT("%u %u ", pg->nr_pgpaths,
1150                                pg->ps.type->info_args);
1151
1152                         list_for_each_entry(p, &pg->pgpaths, list) {
1153                                 DMEMIT("%s %s %u ", p->path.dev->name,
1154                                        p->path.is_active ? "A" : "F",
1155                                        p->fail_count);
1156                                 if (pg->ps.type->status)
1157                                         sz += pg->ps.type->status(&pg->ps,
1158                                               &p->path, type, result + sz,
1159                                               maxlen - sz);
1160                         }
1161                 }
1162                 break;
1163
1164         case STATUSTYPE_TABLE:
1165                 list_for_each_entry(pg, &m->priority_groups, list) {
1166                         DMEMIT("%s ", pg->ps.type->name);
1167
1168                         if (pg->ps.type->status)
1169                                 sz += pg->ps.type->status(&pg->ps, NULL, type,
1170                                                           result + sz,
1171                                                           maxlen - sz);
1172                         else
1173                                 DMEMIT("0 ");
1174
1175                         DMEMIT("%u %u ", pg->nr_pgpaths,
1176                                pg->ps.type->table_args);
1177
1178                         list_for_each_entry(p, &pg->pgpaths, list) {
1179                                 DMEMIT("%s ", p->path.dev->name);
1180                                 if (pg->ps.type->status)
1181                                         sz += pg->ps.type->status(&pg->ps,
1182                                               &p->path, type, result + sz,
1183                                               maxlen - sz);
1184                         }
1185                 }
1186                 break;
1187         }
1188
1189         spin_unlock_irqrestore(&m->lock, flags);
1190
1191         return 0;
1192 }
1193
1194 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1195 {
1196         int r;
1197         struct dm_dev *dev;
1198         struct multipath *m = (struct multipath *) ti->private;
1199         action_fn action;
1200
1201         if (argc == 1) {
1202                 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
1203                         return queue_if_no_path(m, 1);
1204                 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
1205                         return queue_if_no_path(m, 0);
1206         }
1207
1208         if (argc != 2)
1209                 goto error;
1210
1211         if (!strnicmp(argv[0], MESG_STR("disable_group")))
1212                 return bypass_pg_num(m, argv[1], 1);
1213         else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1214                 return bypass_pg_num(m, argv[1], 0);
1215         else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1216                 return switch_pg_num(m, argv[1]);
1217         else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1218                 action = reinstate_path;
1219         else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1220                 action = fail_path;
1221         else
1222                 goto error;
1223
1224         r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1225                           dm_table_get_mode(ti->table), &dev);
1226         if (r) {
1227                 DMWARN("dm-multipath message: error getting device %s",
1228                        argv[1]);
1229                 return -EINVAL;
1230         }
1231
1232         r = action_dev(m, dev, action);
1233
1234         dm_put_device(ti, dev);
1235
1236         return r;
1237
1238 error:
1239         DMWARN("Unrecognised multipath message received.");
1240         return -EINVAL;
1241 }
1242
1243 /*-----------------------------------------------------------------
1244  * Module setup
1245  *---------------------------------------------------------------*/
1246 static struct target_type multipath_target = {
1247         .name = "multipath",
1248         .version = {1, 0, 4},
1249         .module = THIS_MODULE,
1250         .ctr = multipath_ctr,
1251         .dtr = multipath_dtr,
1252         .map = multipath_map,
1253         .end_io = multipath_end_io,
1254         .presuspend = multipath_presuspend,
1255         .resume = multipath_resume,
1256         .status = multipath_status,
1257         .message = multipath_message,
1258 };
1259
1260 static int __init dm_multipath_init(void)
1261 {
1262         int r;
1263
1264         /* allocate a slab for the dm_ios */
1265         _mpio_cache = kmem_cache_create("dm_mpath", sizeof(struct mpath_io),
1266                                         0, 0, NULL, NULL);
1267         if (!_mpio_cache)
1268                 return -ENOMEM;
1269
1270         r = dm_register_target(&multipath_target);
1271         if (r < 0) {
1272                 DMERR("%s: register failed %d", multipath_target.name, r);
1273                 kmem_cache_destroy(_mpio_cache);
1274                 return -EINVAL;
1275         }
1276
1277         DMINFO("dm-multipath version %u.%u.%u loaded",
1278                multipath_target.version[0], multipath_target.version[1],
1279                multipath_target.version[2]);
1280
1281         return r;
1282 }
1283
1284 static void __exit dm_multipath_exit(void)
1285 {
1286         int r;
1287
1288         r = dm_unregister_target(&multipath_target);
1289         if (r < 0)
1290                 DMERR("%s: target unregister failed %d",
1291                       multipath_target.name, r);
1292         kmem_cache_destroy(_mpio_cache);
1293 }
1294
1295 EXPORT_SYMBOL_GPL(dm_pg_init_complete);
1296
1297 module_init(dm_multipath_init);
1298 module_exit(dm_multipath_exit);
1299
1300 MODULE_DESCRIPTION(DM_NAME " multipath target");
1301 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1302 MODULE_LICENSE("GPL");