]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/libstdc++-v3/contrib/libstdc++-v3-4.4/doc/xml/manual/parallel_mode.xml
update
[l4.git] / l4 / pkg / libstdc++-v3 / contrib / libstdc++-v3-4.4 / doc / xml / manual / parallel_mode.xml
1 <?xml version='1.0'?>
2 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" 
3  "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" 
4 [ ]>
5
6 <chapter id="manual.ext.parallel_mode" xreflabel="Parallel Mode">
7 <?dbhtml filename="parallel_mode.html"?>
8  
9 <chapterinfo>
10   <keywordset>
11     <keyword>
12       C++
13     </keyword>
14     <keyword>
15       library
16     </keyword>
17     <keyword>
18       parallel
19     </keyword>
20   </keywordset>
21 </chapterinfo>
22
23 <title>Parallel Mode</title>
24
25 <para> The libstdc++ parallel mode is an experimental parallel
26 implementation of many algorithms the C++ Standard Library.
27 </para>
28
29 <para>
30 Several of the standard algorithms, for instance
31 <function>std::sort</function>, are made parallel using OpenMP
32 annotations. These parallel mode constructs and can be invoked by
33 explicit source declaration or by compiling existing sources with a
34 specific compiler flag.
35 </para>
36
37
38 <sect1 id="manual.ext.parallel_mode.intro" xreflabel="Intro">
39   <title>Intro</title>
40
41 <para>The following library components in the include
42 <filename class="headerfile">numeric</filename> are included in the parallel mode:</para>
43 <itemizedlist>
44   <listitem><para><function>std::accumulate</function></para></listitem>
45   <listitem><para><function>std::adjacent_difference</function></para></listitem>
46   <listitem><para><function>std::inner_product</function></para></listitem>
47   <listitem><para><function>std::partial_sum</function></para></listitem>
48 </itemizedlist>
49
50 <para>The following library components in the include
51 <filename class="headerfile">algorithm</filename> are included in the parallel mode:</para>
52 <itemizedlist>
53   <listitem><para><function>std::adjacent_find</function></para></listitem>
54   <listitem><para><function>std::count</function></para></listitem>
55   <listitem><para><function>std::count_if</function></para></listitem>
56   <listitem><para><function>std::equal</function></para></listitem>
57   <listitem><para><function>std::find</function></para></listitem>
58   <listitem><para><function>std::find_if</function></para></listitem>
59   <listitem><para><function>std::find_first_of</function></para></listitem>
60   <listitem><para><function>std::for_each</function></para></listitem>
61   <listitem><para><function>std::generate</function></para></listitem>
62   <listitem><para><function>std::generate_n</function></para></listitem>
63   <listitem><para><function>std::lexicographical_compare</function></para></listitem>
64   <listitem><para><function>std::mismatch</function></para></listitem>
65   <listitem><para><function>std::search</function></para></listitem>
66   <listitem><para><function>std::search_n</function></para></listitem>
67   <listitem><para><function>std::transform</function></para></listitem>
68   <listitem><para><function>std::replace</function></para></listitem>
69   <listitem><para><function>std::replace_if</function></para></listitem>
70   <listitem><para><function>std::max_element</function></para></listitem>
71   <listitem><para><function>std::merge</function></para></listitem>
72   <listitem><para><function>std::min_element</function></para></listitem>
73   <listitem><para><function>std::nth_element</function></para></listitem>
74   <listitem><para><function>std::partial_sort</function></para></listitem>
75   <listitem><para><function>std::partition</function></para></listitem>
76   <listitem><para><function>std::random_shuffle</function></para></listitem>
77   <listitem><para><function>std::set_union</function></para></listitem>
78   <listitem><para><function>std::set_intersection</function></para></listitem>
79   <listitem><para><function>std::set_symmetric_difference</function></para></listitem>
80   <listitem><para><function>std::set_difference</function></para></listitem>
81   <listitem><para><function>std::sort</function></para></listitem>
82   <listitem><para><function>std::stable_sort</function></para></listitem>
83   <listitem><para><function>std::unique_copy</function></para></listitem>
84 </itemizedlist>
85
86 </sect1>
87
88 <sect1 id="manual.ext.parallel_mode.semantics" xreflabel="Semantics">
89   <title>Semantics</title>
90
91 <para> The parallel mode STL algorithms are currently not exception-safe,
92 i.e. user-defined functors must not throw exceptions.
93 Also, the order of execution is not guaranteed for some functions, of course.
94 Therefore, user-defined functors should not have any concurrent side effects.
95 </para>
96
97 <para> Since the current GCC OpenMP implementation does not support
98 OpenMP parallel regions in concurrent threads,
99 it is not possible to call parallel STL algorithm in
100 concurrent threads, either.
101 It might work with other compilers, though.</para>
102
103 </sect1>
104
105 <sect1 id="manual.ext.parallel_mode.using" xreflabel="Using">
106   <title>Using</title>
107
108 <sect2 id="parallel_mode.using.prereq_flags" xreflabel="using.prereq_flags">
109   <title>Prerequisite Compiler Flags</title>
110
111 <para>
112   Any use of parallel functionality requires additional compiler
113   and runtime support, in particular support for OpenMP. Adding this support is
114   not difficult: just compile your application with the compiler
115   flag <literal>-fopenmp</literal>. This will link
116   in <code>libgomp</code>, the GNU
117   OpenMP <ulink url="http://gcc.gnu.org/onlinedocs/libgomp/">implementation</ulink>,
118   whose presence is mandatory. 
119 </para>
120
121 <para>
122 In addition, hardware that supports atomic operations and a compiler
123   capable of producing atomic operations is mandatory: GCC defaults to no
124   support for atomic operations on some common hardware
125   architectures. Activating atomic operations may require explicit
126   compiler flags on some targets (like sparc and x86), such
127   as <literal>-march=i686</literal>,
128   <literal>-march=native</literal> or <literal>-mcpu=v9</literal>. See
129   the GCC manual for more information.
130 </para>
131
132 </sect2>
133
134 <sect2 id="parallel_mode.using.parallel_mode" xreflabel="using.parallel_mode">
135   <title>Using Parallel Mode</title>
136
137 <para>
138   To use the libstdc++ parallel mode, compile your application with
139   the prerequisite flags as detailed above, and in addition
140   add <constant>-D_GLIBCXX_PARALLEL</constant>. This will convert all
141   use of the standard (sequential) algorithms to the appropriate parallel
142   equivalents. Please note that this doesn't necessarily mean that
143   everything will end up being executed in a parallel manner, but
144   rather that the heuristics and settings coded into the parallel
145   versions will be used to determine if all, some, or no algorithms
146   will be executed using parallel variants.
147 </para>
148
149 <para>Note that the <constant>_GLIBCXX_PARALLEL</constant> define may change the
150   sizes and behavior of standard class templates such as
151   <function>std::search</function>, and therefore one can only link code
152   compiled with parallel mode and code compiled without parallel mode
153   if no instantiation of a container is passed between the two
154   translation units. Parallel mode functionality has distinct linkage,
155   and cannot be confused with normal mode symbols.
156 </para>
157 </sect2>
158
159 <sect2 id="parallel_mode.using.specific" xreflabel="using.specific">
160   <title>Using Specific Parallel Components</title>
161
162 <para>When it is not feasible to recompile your entire application, or
163   only specific algorithms need to be parallel-aware, individual
164   parallel algorithms can be made available explicitly. These
165   parallel algorithms are functionally equivalent to the standard
166   drop-in algorithms used in parallel mode, but they are available in
167   a separate namespace as GNU extensions and may be used in programs
168   compiled with either release mode or with parallel mode.
169 </para>
170
171
172 <para>An example of using a parallel version
173 of <function>std::sort</function>, but no other parallel algorithms, is:
174 </para>
175
176 <programlisting>
177 #include &lt;vector&gt;
178 #include &lt;parallel/algorithm&gt;
179
180 int main()
181 {
182   std::vector&lt;int&gt; v(100);
183
184   // ...
185
186   // Explicitly force a call to parallel sort.
187   __gnu_parallel::sort(v.begin(), v.end());
188   return 0;
189 }
190 </programlisting>
191
192 <para>
193 Then compile this code with the prerequisite compiler flags
194 (<literal>-fopenmp</literal> and any necessary architecture-specific
195 flags for atomic operations.)
196 </para>
197
198 <para> The following table provides the names and headers of all the
199   parallel algorithms that can be used in a similar manner:
200 </para>
201
202 <table frame='all'>
203 <title>Parallel Algorithms</title>
204 <tgroup cols='4' align='left' colsep='1' rowsep='1'>
205 <colspec colname='c1'></colspec>
206 <colspec colname='c2'></colspec>
207 <colspec colname='c3'></colspec>
208 <colspec colname='c4'></colspec>
209
210 <thead>
211   <row>
212     <entry>Algorithm</entry>
213     <entry>Header</entry>
214     <entry>Parallel algorithm</entry>
215     <entry>Parallel header</entry>
216   </row>
217 </thead>
218
219 <tbody>
220   <row>
221     <entry><function>std::accumulate</function></entry>
222     <entry><filename class="headerfile">numeric</filename></entry>
223     <entry><function>__gnu_parallel::accumulate</function></entry>
224     <entry><filename class="headerfile">parallel/numeric</filename></entry>
225   </row>
226   <row>
227     <entry><function>std::adjacent_difference</function></entry>
228     <entry><filename class="headerfile">numeric</filename></entry>
229     <entry><function>__gnu_parallel::adjacent_difference</function></entry>
230     <entry><filename class="headerfile">parallel/numeric</filename></entry>
231   </row>
232   <row>
233     <entry><function>std::inner_product</function></entry>
234     <entry><filename class="headerfile">numeric</filename></entry>
235     <entry><function>__gnu_parallel::inner_product</function></entry>
236     <entry><filename class="headerfile">parallel/numeric</filename></entry>
237   </row>
238   <row>
239     <entry><function>std::partial_sum</function></entry>
240     <entry><filename class="headerfile">numeric</filename></entry>
241     <entry><function>__gnu_parallel::partial_sum</function></entry>
242     <entry><filename class="headerfile">parallel/numeric</filename></entry>
243   </row>
244   <row>
245     <entry><function>std::adjacent_find</function></entry>
246     <entry><filename class="headerfile">algorithm</filename></entry>
247     <entry><function>__gnu_parallel::adjacent_find</function></entry>
248     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
249   </row>
250
251   <row>
252     <entry><function>std::count</function></entry>
253     <entry><filename class="headerfile">algorithm</filename></entry>
254     <entry><function>__gnu_parallel::count</function></entry>
255     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
256   </row>
257
258   <row>
259     <entry><function>std::count_if</function></entry>
260     <entry><filename class="headerfile">algorithm</filename></entry>
261     <entry><function>__gnu_parallel::count_if</function></entry>
262     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
263   </row>
264
265   <row>
266     <entry><function>std::equal</function></entry>
267     <entry><filename class="headerfile">algorithm</filename></entry>
268     <entry><function>__gnu_parallel::equal</function></entry>
269     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
270   </row>
271
272   <row>
273     <entry><function>std::find</function></entry>
274     <entry><filename class="headerfile">algorithm</filename></entry>
275     <entry><function>__gnu_parallel::find</function></entry>
276     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
277   </row>
278
279   <row>
280     <entry><function>std::find_if</function></entry>
281     <entry><filename class="headerfile">algorithm</filename></entry>
282     <entry><function>__gnu_parallel::find_if</function></entry>
283     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
284   </row>
285
286   <row>
287     <entry><function>std::find_first_of</function></entry>
288     <entry><filename class="headerfile">algorithm</filename></entry>
289     <entry><function>__gnu_parallel::find_first_of</function></entry>
290     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
291   </row>
292
293   <row>
294     <entry><function>std::for_each</function></entry>
295     <entry><filename class="headerfile">algorithm</filename></entry>
296     <entry><function>__gnu_parallel::for_each</function></entry>
297     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
298   </row>
299
300   <row>
301     <entry><function>std::generate</function></entry>
302     <entry><filename class="headerfile">algorithm</filename></entry>
303     <entry><function>__gnu_parallel::generate</function></entry>
304     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
305   </row>
306
307   <row>
308     <entry><function>std::generate_n</function></entry>
309     <entry><filename class="headerfile">algorithm</filename></entry>
310     <entry><function>__gnu_parallel::generate_n</function></entry>
311     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
312   </row>
313
314   <row>
315     <entry><function>std::lexicographical_compare</function></entry>
316     <entry><filename class="headerfile">algorithm</filename></entry>
317     <entry><function>__gnu_parallel::lexicographical_compare</function></entry>
318     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
319   </row>
320
321   <row>
322     <entry><function>std::mismatch</function></entry>
323     <entry><filename class="headerfile">algorithm</filename></entry>
324     <entry><function>__gnu_parallel::mismatch</function></entry>
325     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
326   </row>
327
328   <row>
329     <entry><function>std::search</function></entry>
330     <entry><filename class="headerfile">algorithm</filename></entry>
331     <entry><function>__gnu_parallel::search</function></entry>
332     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
333   </row>
334
335   <row>
336     <entry><function>std::search_n</function></entry>
337     <entry><filename class="headerfile">algorithm</filename></entry>
338     <entry><function>__gnu_parallel::search_n</function></entry>
339     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
340   </row>
341
342   <row>
343     <entry><function>std::transform</function></entry>
344     <entry><filename class="headerfile">algorithm</filename></entry>
345     <entry><function>__gnu_parallel::transform</function></entry>
346     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
347   </row>
348
349   <row>
350     <entry><function>std::replace</function></entry>
351     <entry><filename class="headerfile">algorithm</filename></entry>
352     <entry><function>__gnu_parallel::replace</function></entry>
353     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
354   </row>
355
356   <row>
357     <entry><function>std::replace_if</function></entry>
358     <entry><filename class="headerfile">algorithm</filename></entry>
359     <entry><function>__gnu_parallel::replace_if</function></entry>
360     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
361   </row>
362
363   <row>
364     <entry><function>std::max_element</function></entry>
365     <entry><filename class="headerfile">algorithm</filename></entry>
366     <entry><function>__gnu_parallel::max_element</function></entry>
367     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
368   </row>
369
370   <row>
371     <entry><function>std::merge</function></entry>
372     <entry><filename class="headerfile">algorithm</filename></entry>
373     <entry><function>__gnu_parallel::merge</function></entry>
374     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
375   </row>
376
377   <row>
378     <entry><function>std::min_element</function></entry>
379     <entry><filename class="headerfile">algorithm</filename></entry>
380     <entry><function>__gnu_parallel::min_element</function></entry>
381     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
382   </row>
383
384   <row>
385     <entry><function>std::nth_element</function></entry>
386     <entry><filename class="headerfile">algorithm</filename></entry>
387     <entry><function>__gnu_parallel::nth_element</function></entry>
388     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
389   </row>
390
391   <row>
392     <entry><function>std::partial_sort</function></entry>
393     <entry><filename class="headerfile">algorithm</filename></entry>
394     <entry><function>__gnu_parallel::partial_sort</function></entry>
395     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
396   </row>
397
398   <row>
399     <entry><function>std::partition</function></entry>
400     <entry><filename class="headerfile">algorithm</filename></entry>
401     <entry><function>__gnu_parallel::partition</function></entry>
402     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
403   </row>
404
405   <row>
406     <entry><function>std::random_shuffle</function></entry>
407     <entry><filename class="headerfile">algorithm</filename></entry>
408     <entry><function>__gnu_parallel::random_shuffle</function></entry>
409     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
410   </row>
411
412   <row>
413     <entry><function>std::set_union</function></entry>
414     <entry><filename class="headerfile">algorithm</filename></entry>
415     <entry><function>__gnu_parallel::set_union</function></entry>
416     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
417   </row>
418
419   <row>
420     <entry><function>std::set_intersection</function></entry>
421     <entry><filename class="headerfile">algorithm</filename></entry>
422     <entry><function>__gnu_parallel::set_intersection</function></entry>
423     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
424   </row>
425
426   <row>
427     <entry><function>std::set_symmetric_difference</function></entry>
428     <entry><filename class="headerfile">algorithm</filename></entry>
429     <entry><function>__gnu_parallel::set_symmetric_difference</function></entry>
430     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
431   </row>
432
433   <row>
434     <entry><function>std::set_difference</function></entry>
435     <entry><filename class="headerfile">algorithm</filename></entry>
436     <entry><function>__gnu_parallel::set_difference</function></entry>
437     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
438   </row>
439
440   <row>
441     <entry><function>std::sort</function></entry>
442     <entry><filename class="headerfile">algorithm</filename></entry>
443     <entry><function>__gnu_parallel::sort</function></entry>
444     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
445   </row>
446
447   <row>
448     <entry><function>std::stable_sort</function></entry>
449     <entry><filename class="headerfile">algorithm</filename></entry>
450     <entry><function>__gnu_parallel::stable_sort</function></entry>
451     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
452   </row>
453
454   <row>
455     <entry><function>std::unique_copy</function></entry>
456     <entry><filename class="headerfile">algorithm</filename></entry>
457     <entry><function>__gnu_parallel::unique_copy</function></entry>
458     <entry><filename class="headerfile">parallel/algorithm</filename></entry>
459   </row>
460 </tbody>
461 </tgroup>
462 </table>
463
464 </sect2>
465
466 </sect1>
467
468 <sect1 id="manual.ext.parallel_mode.design" xreflabel="Design">
469   <title>Design</title>
470   <para>
471   </para>
472 <sect2 id="manual.ext.parallel_mode.design.intro" xreflabel="Intro">
473   <title>Interface Basics</title>
474
475 <para>
476 All parallel algorithms are intended to have signatures that are
477 equivalent to the ISO C++ algorithms replaced. For instance, the
478 <function>std::adjacent_find</function> function is declared as:
479 </para>
480 <programlisting>
481 namespace std
482 {
483   template&lt;typename _FIter&gt;
484     _FIter
485     adjacent_find(_FIter, _FIter);
486 }
487 </programlisting>
488
489 <para>
490 Which means that there should be something equivalent for the parallel
491 version. Indeed, this is the case:
492 </para>
493
494 <programlisting>
495 namespace std
496 {
497   namespace __parallel
498   {
499     template&lt;typename _FIter&gt;
500       _FIter
501       adjacent_find(_FIter, _FIter);
502
503     ...
504   }
505 }
506 </programlisting>
507
508 <para>But.... why the ellipses?
509 </para>
510
511 <para> The ellipses in the example above represent additional overloads
512 required for the parallel version of the function. These additional
513 overloads are used to dispatch calls from the ISO C++ function
514 signature to the appropriate parallel function (or sequential
515 function, if no parallel functions are deemed worthy), based on either
516 compile-time or run-time conditions.
517 </para>
518
519 <para> The available signature options are specific for the different
520 algorithms/algorithm classes.</para>
521
522 <para> The general view of overloads for the parallel algorithms look like this:
523 </para>
524 <itemizedlist>
525    <listitem><para>ISO C++ signature</para></listitem>
526    <listitem><para>ISO C++ signature + sequential_tag argument</para></listitem>
527    <listitem><para>ISO C++ signature + algorithm-specific tag type
528     (several signatures)</para></listitem>
529 </itemizedlist>
530
531 <para> Please note that the implementation may use additional functions
532 (designated with the <code>_switch</code> suffix) to dispatch from the
533 ISO C++ signature to the correct parallel version. Also, some of the
534 algorithms do not have support for run-time conditions, so the last
535 overload is therefore missing.
536 </para>
537
538
539 </sect2>
540
541 <sect2 id="manual.ext.parallel_mode.design.tuning" xreflabel="Tuning">
542   <title>Configuration and Tuning</title>
543
544
545 <sect3 id="parallel_mode.design.tuning.omp" xreflabel="OpenMP Environment">
546   <title>Setting up the OpenMP Environment</title>
547
548 <para>
549 Several aspects of the overall runtime environment can be manipulated
550 by standard OpenMP function calls.
551 </para>
552
553 <para>
554 To specify the number of threads to be used for the algorithms globally,
555 use the function <function>omp_set_num_threads</function>. An example:
556 </para>
557
558 <programlisting>
559 #include &lt;stdlib.h&gt;
560 #include &lt;omp.h&gt;
561
562 int main()
563 {
564   // Explicitly set number of threads.
565   const int threads_wanted = 20;
566   omp_set_dynamic(false);
567   omp_set_num_threads(threads_wanted);
568
569   // Call parallel mode algorithms.
570
571   return 0;
572 }
573 </programlisting>
574
575 <para>
576  Some algorithms allow the number of threads being set for a particular call,
577  by augmenting the algorithm variant.
578  See the next section for further information.
579 </para>
580
581 <para>
582 Other parts of the runtime environment able to be manipulated include
583 nested parallelism (<function>omp_set_nested</function>), schedule kind
584 (<function>omp_set_schedule</function>), and others. See the OpenMP
585 documentation for more information.
586 </para>
587
588 </sect3>
589
590 <sect3 id="parallel_mode.design.tuning.compile" xreflabel="Compile Switches">
591   <title>Compile Time Switches</title>
592
593 <para>
594 To force an algorithm to execute sequentially, even though parallelism
595 is switched on in general via the macro <constant>_GLIBCXX_PARALLEL</constant>,
596 add <classname>__gnu_parallel::sequential_tag()</classname> to the end
597 of the algorithm's argument list.
598 </para>
599
600 <para>
601 Like so:
602 </para>
603
604 <programlisting>
605 std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
606 </programlisting>
607
608 <para>
609 Some parallel algorithm variants can be excluded from compilation by
610 preprocessor defines. See the doxygen documentation on
611 <code>compiletime_settings.h</code> and <code>features.h</code> for details.
612 </para>
613
614 <para>
615 For some algorithms, the desired variant can be chosen at compile-time by
616 appending a tag object. The available options are specific to the particular
617 algorithm (class).
618 </para>
619
620 <para>
621 For the "embarrassingly parallel" algorithms, there is only one "tag object
622 type", the enum _Parallelism.
623 It takes one of the following values,
624 <code>__gnu_parallel::parallel_tag</code>,
625 <code>__gnu_parallel::balanced_tag</code>,
626 <code>__gnu_parallel::unbalanced_tag</code>,
627 <code>__gnu_parallel::omp_loop_tag</code>,
628 <code>__gnu_parallel::omp_loop_static_tag</code>.
629 This means that the actual parallelization strategy is chosen at run-time.
630 (Choosing the variants at compile-time will come soon.)
631 </para>
632
633 <para>
634 For the following algorithms in general, we have
635 <code>__gnu_parallel::parallel_tag</code> and
636 <code>__gnu_parallel::default_parallel_tag</code>, in addition to
637 <code>__gnu_parallel::sequential_tag</code>.
638 <code>__gnu_parallel::default_parallel_tag</code> chooses the default 
639 algorithm at compiletime, as does omitting the tag.
640 <code>__gnu_parallel::parallel_tag</code> postpones the decision to runtime
641 (see next section).
642 For all tags, the number of threads desired for this call can optionally be
643 passed to the respective tag's constructor.
644 </para>
645
646 <para>
647 The <code>multiway_merge</code> algorithm comes with the additional choices,
648 <code>__gnu_parallel::exact_tag</code> and
649 <code>__gnu_parallel::sampling_tag</code>.
650 Exact and sampling are the two available splitting strategies.
651 </para>
652
653 <para>
654 For the <code>sort</code> and <code>stable_sort</code> algorithms, there are
655 several additional choices, namely
656 <code>__gnu_parallel::multiway_mergesort_tag</code>,
657 <code>__gnu_parallel::multiway_mergesort_exact_tag</code>, 
658 <code>__gnu_parallel::multiway_mergesort_sampling_tag</code>,
659 <code>__gnu_parallel::quicksort_tag</code>, and
660 <code>__gnu_parallel::balanced_quicksort_tag</code>.
661 Multiway mergesort comes with the two splitting strategies for multi-way
662 merging. The quicksort options cannot be used for <code>stable_sort</code>.
663 </para>
664
665 </sect3>
666
667 <sect3 id="parallel_mode.design.tuning.settings" xreflabel="_Settings">
668   <title>Run Time Settings and Defaults</title>
669
670 <para>
671 The default parallelization strategy, the choice of specific algorithm
672 strategy, the minimum threshold limits for individual parallel
673 algorithms, and aspects of the underlying hardware can be specified as
674 desired via manipulation
675 of <classname>__gnu_parallel::_Settings</classname> member data.
676 </para>
677
678 <para>
679 First off, the choice of parallelization strategy: serial, parallel,
680 or heuristically deduced. This corresponds
681 to <code>__gnu_parallel::_Settings::algorithm_strategy</code> and is a
682 value of enum <type>__gnu_parallel::_AlgorithmStrategy</type>
683 type. Choices
684 include: <type>heuristic</type>, <type>force_sequential</type>,
685 and <type>force_parallel</type>. The default is <type>heuristic</type>.
686 </para>
687
688
689 <para>
690 Next, the sub-choices for algorithm variant, if not fixed at compile-time.
691 Specific algorithms like <function>find</function> or <function>sort</function>
692 can be implemented in multiple ways: when this is the case,
693 a <classname>__gnu_parallel::_Settings</classname> member exists to
694 pick the default strategy. For
695 example, <code>__gnu_parallel::_Settings::sort_algorithm</code> can
696 have any values of
697 enum <type>__gnu_parallel::_SortAlgorithm</type>: <type>MWMS</type>, <type>QS</type>,
698 or <type>QS_BALANCED</type>.
699 </para>
700
701 <para>
702 Likewise for setting the minimal threshold for algorithm
703 parallelization.  Parallelism always incurs some overhead. Thus, it is
704 not helpful to parallelize operations on very small sets of
705 data. Because of this, measures are taken to avoid parallelizing below
706 a certain, pre-determined threshold. For each algorithm, a minimum
707 problem size is encoded as a variable in the
708 active <classname>__gnu_parallel::_Settings</classname> object.  This
709 threshold variable follows the following naming scheme:
710 <code>__gnu_parallel::_Settings::[algorithm]_minimal_n</code>.  So,
711 for <function>fill</function>, the threshold variable
712 is <code>__gnu_parallel::_Settings::fill_minimal_n</code>,
713 </para>
714
715 <para>
716 Finally, hardware details like L1/L2 cache size can be hardwired
717 via <code>__gnu_parallel::_Settings::L1_cache_size</code> and friends.
718 </para>
719
720 <para>
721 </para>
722
723 <para>
724 All these configuration variables can be changed by the user, if
725 desired.
726 There exists one global instance of the class <classname>_Settings</classname>,
727 i. e. it is a singleton. It can be read and written by calling
728 <code>__gnu_parallel::_Settings::get</code> and
729 <code>__gnu_parallel::_Settings::set</code>, respectively.
730 Please note that the first call return a const object, so direct manipulation
731 is forbidden.
732 See <ulink url="http://gcc.gnu.org/onlinedocs/libstdc++/latest-doxygen/a00640.html">
733   <filename class="headerfile">settings.h</filename></ulink>
734 for complete details.
735 </para>
736
737 <para>
738 A small example of tuning the default:
739 </para>
740
741 <programlisting>
742 #include &lt;parallel/algorithm&gt;
743 #include &lt;parallel/settings.h&gt;
744
745 int main()
746 {
747   __gnu_parallel::_Settings s;
748   s.algorithm_strategy = __gnu_parallel::force_parallel;
749   __gnu_parallel::_Settings::set(s);
750
751   // Do work... all algorithms will be parallelized, always.
752
753   return 0;
754 }
755 </programlisting>
756
757 </sect3>
758
759 </sect2>
760
761 <sect2 id="manual.ext.parallel_mode.design.impl" xreflabel="Impl">
762   <title>Implementation Namespaces</title>
763
764 <para> One namespace contain versions of code that are always
765 explicitly sequential:
766 <code>__gnu_serial</code>.
767 </para>
768
769 <para> Two namespaces contain the parallel mode:
770 <code>std::__parallel</code> and <code>__gnu_parallel</code>. 
771 </para>
772
773 <para> Parallel implementations of standard components, including
774 template helpers to select parallelism, are defined in <code>namespace
775 std::__parallel</code>. For instance, <function>std::transform</function> from <filename class="headerfile">algorithm</filename> has a parallel counterpart in
776 <function>std::__parallel::transform</function> from <filename class="headerfile">parallel/algorithm</filename>. In addition, these parallel
777 implementations are injected into <code>namespace
778 __gnu_parallel</code> with using declarations.
779 </para>
780
781 <para> Support and general infrastructure is in <code>namespace
782 __gnu_parallel</code>.
783 </para>
784
785 <para> More information, and an organized index of types and functions
786 related to the parallel mode on a per-namespace basis, can be found in
787 the generated source documentation.
788 </para>
789
790 </sect2>
791
792 </sect1>
793
794 <sect1 id="manual.ext.parallel_mode.test" xreflabel="Testing">
795   <title>Testing</title>
796
797   <para> 
798     Both the normal conformance and regression tests and the
799     supplemental performance tests work.
800   </para>
801
802   <para> 
803     To run the conformance and regression tests with the parallel mode
804     active,
805   </para>
806
807   <screen>
808   <userinput>make check-parallel</userinput>
809   </screen>
810   
811   <para>
812     The log and summary files for conformance testing are in the
813     <filename class="directory">testsuite/parallel</filename> directory.
814   </para>
815
816   <para> 
817     To run the performance tests with the parallel mode active,
818   </para>
819
820   <screen>
821   <userinput>make check-performance-parallel</userinput>
822   </screen>
823
824   <para>
825     The result file for performance testing are in the
826     <filename class="directory">testsuite</filename> directory, in the file
827     <filename>libstdc++_performance.sum</filename>. In addition, the
828     policy-based containers have their own visualizations, which have
829     additional software dependencies than the usual bare-boned text
830     file, and can be generated by using the <code>make
831     doc-performance</code> rule in the testsuite's Makefile.
832 </para>
833 </sect1>
834
835 <bibliography id="parallel_mode.biblio" xreflabel="parallel_mode.biblio">
836 <title>Bibliography</title>
837
838   <biblioentry>
839     <title>
840       Parallelization of Bulk Operations for STL Dictionaries
841     </title>
842
843     <author>
844       <firstname>Johannes</firstname>
845       <surname>Singler</surname>
846     </author>
847     <author>
848       <firstname>Leonor</firstname>
849       <surname>Frias</surname>
850     </author>
851
852     <copyright>
853       <year>2007</year>
854       <holder></holder>
855     </copyright>
856
857     <publisher>
858       <publishername>
859         Workshop on Highly Parallel Processing on a Chip (HPPC) 2007. (LNCS)
860       </publishername>
861     </publisher>
862   </biblioentry> 
863
864   <biblioentry>
865     <title>
866       The Multi-Core Standard Template Library
867     </title>
868
869     <author>
870       <firstname>Johannes</firstname>
871       <surname>Singler</surname>
872     </author>
873     <author>
874       <firstname>Peter</firstname>
875       <surname>Sanders</surname>
876     </author>
877     <author>
878       <firstname>Felix</firstname>
879       <surname>Putze</surname>
880     </author>
881
882     <copyright>
883       <year>2007</year>
884       <holder></holder>
885     </copyright>
886
887     <publisher>
888       <publishername>
889          Euro-Par 2007: Parallel Processing. (LNCS 4641)
890       </publishername>
891     </publisher>
892   </biblioentry> 
893
894 </bibliography>
895
896 </chapter>