]> rtime.felk.cvut.cz Git - l4.git/blob - l4/pkg/libstdc++-v3/contrib/libstdc++-v3-4.3.3/include/parallel/partition.h
update
[l4.git] / l4 / pkg / libstdc++-v3 / contrib / libstdc++-v3-4.3.3 / include / parallel / partition.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING.  If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction.  Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License.  This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/partition.h
32  *  @brief Parallel implementation of std::partition(),
33  *  std::nth_element(), and std::partial_sort().
34  *  This file is a GNU parallel extension to the Standard C++ Library.
35  */
36
37 // Written by Johannes Singler and Felix Putze.
38
39 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
40 #define _GLIBCXX_PARALLEL_PARTITION_H 1
41
42 #include <parallel/basic_iterator.h>
43 #include <parallel/sort.h>
44 #include <parallel/random_number.h>
45 #include <bits/stl_algo.h>
46 #include <parallel/parallel.h>
47
48 /** @brief Decide whether to declare certain variables volatile. */
49 #define _GLIBCXX_VOLATILE volatile
50
51 namespace __gnu_parallel
52 {
53 /** @brief Parallel implementation of std::partition.
54   *  @param begin Begin iterator of input sequence to split.
55   *  @param end End iterator of input sequence to split.
56   *  @param pred Partition predicate, possibly including some kind of pivot.
57   *  @param num_threads Maximum number of threads to use for this task.
58   *  @return Number of elements not fulfilling the predicate. */
59 template<typename RandomAccessIterator, typename Predicate>
60   typename std::iterator_traits<RandomAccessIterator>::difference_type
61   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
62                      Predicate pred, thread_index_t num_threads)
63   {
64     typedef std::iterator_traits<RandomAccessIterator> traits_type;
65     typedef typename traits_type::value_type value_type;
66     typedef typename traits_type::difference_type difference_type;
67
68     difference_type n = end - begin;
69
70     _GLIBCXX_CALL(n)
71
72     const _Settings& __s = _Settings::get();
73
74     // Shared.
75     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
76     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
77     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
78
79     bool* reserved_left = NULL, * reserved_right = NULL;
80
81     difference_type chunk_size;
82
83     omp_lock_t result_lock;
84     omp_init_lock(&result_lock);
85
86     //at least two chunks per thread
87     if(right - left + 1 >= 2 * num_threads * chunk_size)
88 #   pragma omp parallel num_threads(num_threads)
89       {
90 #       pragma omp single
91           {
92             num_threads = omp_get_num_threads();
93             reserved_left = new bool[num_threads];
94             reserved_right = new bool[num_threads];
95
96             if (__s.partition_chunk_share > 0.0)
97               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
98                                     (double)n * __s.partition_chunk_share
99                                                      / (double)num_threads);
100             else
101               chunk_size = __s.partition_chunk_size;
102           }
103
104         while (right - left + 1 >= 2 * num_threads * chunk_size)
105           {
106 #           pragma omp single
107               {
108                 difference_type num_chunks = (right - left + 1) / chunk_size;
109
110                 for (int r = 0; r < num_threads; ++r)
111                   {
112                     reserved_left[r] = false;
113                     reserved_right[r] = false;
114                   }
115                 leftover_left = 0;
116                 leftover_right = 0;
117               } //implicit barrier
118
119             // Private.
120             difference_type thread_left, thread_left_border,
121                             thread_right, thread_right_border;
122             thread_left = left + 1;
123
124             // Just to satisfy the condition below.
125             thread_left_border = thread_left - 1;
126             thread_right = n - 1;
127             thread_right_border = thread_right + 1;
128
129             bool iam_finished = false;
130             while (!iam_finished)
131               {
132                 if (thread_left > thread_left_border)
133                   {
134                     omp_set_lock(&result_lock);
135                     if (left + (chunk_size - 1) > right)
136                       iam_finished = true;
137                     else
138                       {
139                         thread_left = left;
140                         thread_left_border = left + (chunk_size - 1);
141                         left += chunk_size;
142                       }
143                     omp_unset_lock(&result_lock);
144                   }
145
146                 if (thread_right < thread_right_border)
147                   {
148                     omp_set_lock(&result_lock);
149                     if (left > right - (chunk_size - 1))
150                       iam_finished = true;
151                     else
152                       {
153                         thread_right = right;
154                         thread_right_border = right - (chunk_size - 1);
155                         right -= chunk_size;
156                       }
157                     omp_unset_lock(&result_lock);
158                   }
159
160                 if (iam_finished)
161                   break;
162
163                 // Swap as usual.
164                 while (thread_left < thread_right)
165                   {
166                     while (pred(begin[thread_left])
167                             && thread_left <= thread_left_border)
168                       ++thread_left;
169                     while (!pred(begin[thread_right])
170                             && thread_right >= thread_right_border)
171                       --thread_right;
172
173                     if (thread_left > thread_left_border
174                         || thread_right < thread_right_border)
175                       // Fetch new chunk(s).
176                       break;
177
178                     std::swap(begin[thread_left], begin[thread_right]);
179                     ++thread_left;
180                     --thread_right;
181                   }
182               }
183
184             // Now swap the leftover chunks to the right places.
185             if (thread_left <= thread_left_border)
186 #             pragma omp atomic
187               ++leftover_left;
188             if (thread_right >= thread_right_border)
189 #             pragma omp atomic
190               ++leftover_right;
191
192 #           pragma omp barrier
193
194 #           pragma omp single
195               {
196                 leftnew = left - leftover_left * chunk_size;
197                 rightnew = right + leftover_right * chunk_size;
198               }
199
200 #           pragma omp barrier
201
202             // <=> thread_left_border + (chunk_size - 1) >= leftnew
203             if (thread_left <= thread_left_border
204                 && thread_left_border >= leftnew)
205               {
206                 // Chunk already in place, reserve spot.
207                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
208                     = true;
209               }
210
211             // <=> thread_right_border - (chunk_size - 1) <= rightnew
212             if (thread_right >= thread_right_border
213                 && thread_right_border <= rightnew)
214               {
215                 // Chunk already in place, reserve spot.
216                 reserved_right[((thread_right_border - 1) - right)
217                                / chunk_size] = true;
218               }
219
220 #           pragma omp barrier
221
222             if (thread_left <= thread_left_border
223                 && thread_left_border < leftnew)
224               {
225                 // Find spot and swap.
226                 difference_type swapstart = -1;
227                 omp_set_lock(&result_lock);
228                 for (int r = 0; r < leftover_left; ++r)
229                   if (!reserved_left[r])
230                     {
231                       reserved_left[r] = true;
232                       swapstart = left - (r + 1) * chunk_size;
233                       break;
234                     }
235                 omp_unset_lock(&result_lock);
236
237 #if _GLIBCXX_ASSERTIONS
238                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
239 #endif
240
241                 std::swap_ranges(begin + thread_left_border
242                                  - (chunk_size - 1),
243                                  begin + thread_left_border + 1,
244                                  begin + swapstart);
245               }
246
247             if (thread_right >= thread_right_border
248                 && thread_right_border > rightnew)
249               {
250                 // Find spot and swap
251                 difference_type swapstart = -1;
252                 omp_set_lock(&result_lock);
253                 for (int r = 0; r < leftover_right; ++r)
254                   if (!reserved_right[r])
255                     {
256                       reserved_right[r] = true;
257                       swapstart = right + r * chunk_size + 1;
258                       break;
259                     }
260                 omp_unset_lock(&result_lock);
261
262 #if _GLIBCXX_ASSERTIONS
263                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
264 #endif
265
266                 std::swap_ranges(begin + thread_right_border,
267                                  begin + thread_right_border + chunk_size,
268                                  begin + swapstart);
269               }
270 #if _GLIBCXX_ASSERTIONS
271 #             pragma omp barrier
272
273 #             pragma omp single
274                 {
275                   for (int r = 0; r < leftover_left; ++r)
276                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
277                   for (int r = 0; r < leftover_right; ++r)
278                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
279                 }
280
281 #             pragma omp barrier
282 #endif
283
284 #             pragma omp barrier
285
286               left = leftnew;
287               right = rightnew;
288           }
289 #         pragma omp flush(left, right)
290       } // end "recursion" //parallel
291
292     difference_type final_left = left, final_right = right;
293
294     while (final_left < final_right)
295       {
296         // Go right until key is geq than pivot.
297         while (pred(begin[final_left]) && final_left < final_right)
298           ++final_left;
299
300         // Go left until key is less than pivot.
301         while (!pred(begin[final_right]) && final_left < final_right)
302           --final_right;
303
304         if (final_left == final_right)
305           break;
306         std::swap(begin[final_left], begin[final_right]);
307         ++final_left;
308         --final_right;
309       }
310
311     // All elements on the left side are < piv, all elements on the
312     // right are >= piv
313     delete[] reserved_left;
314     delete[] reserved_right;
315
316     omp_destroy_lock(&result_lock);
317
318     // Element "between" final_left and final_right might not have
319     // been regarded yet
320     if (final_left < n && !pred(begin[final_left]))
321       // Really swapped.
322       return final_left;
323     else
324       return final_left + 1;
325   }
326
327 /**
328   *  @brief Parallel implementation of std::nth_element().
329   *  @param begin Begin iterator of input sequence.
330   *  @param nth Iterator of element that must be in position afterwards.
331   *  @param end End iterator of input sequence.
332   *  @param comp Comparator.
333   */
334 template<typename RandomAccessIterator, typename Comparator>
335   void 
336   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, 
337                        RandomAccessIterator end, Comparator comp)
338   {
339     typedef std::iterator_traits<RandomAccessIterator> traits_type;
340     typedef typename traits_type::value_type value_type;
341     typedef typename traits_type::difference_type difference_type;
342
343     _GLIBCXX_CALL(end - begin)
344
345     RandomAccessIterator split;
346     random_number rng;
347
348     difference_type minimum_length =
349       std::max<difference_type>(2, _Settings::get().partition_minimal_n);
350
351     // Break if input range to small.
352     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
353       {
354         difference_type n = end - begin;
355
356         RandomAccessIterator pivot_pos = begin +  rng(n);
357
358         // Swap pivot_pos value to end.
359         if (pivot_pos != (end - 1))
360           std::swap(*pivot_pos, *(end - 1));
361         pivot_pos = end - 1;
362
363         // XXX Comparator must have first_value_type, second_value_type,
364         // result_type
365         // Comparator == __gnu_parallel::lexicographic<S, int,
366         // __gnu_parallel::less<S, S> >
367         // pivot_pos == std::pair<S, int>*
368         // XXX binder2nd only for RandomAccessIterators??
369         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
370           pred(comp, *pivot_pos);
371
372         // Divide, leave pivot unchanged in last place.
373         RandomAccessIterator split_pos1, split_pos2;
374         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
375                                                 get_max_threads());
376
377         // Left side: < pivot_pos; right side: >= pivot_pos
378
379         // Swap pivot back to middle.
380         if (split_pos1 != pivot_pos)
381           std::swap(*split_pos1, *pivot_pos);
382         pivot_pos = split_pos1;
383
384         // In case all elements are equal, split_pos1 == 0
385         if ((split_pos1 + 1 - begin) < (n >> 7)
386             || (end - split_pos1) < (n >> 7))
387           {
388             // Very unequal split, one part smaller than one 128th
389             // elements not strictly larger than the pivot.
390             __gnu_parallel::unary_negate<__gnu_parallel::
391               binder1st<Comparator, value_type, value_type, bool>, value_type>
392               pred(__gnu_parallel::binder1st<Comparator, value_type,
393                    value_type, bool>(comp, *pivot_pos));
394
395             // Find other end of pivot-equal range.
396             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
397                                                      end, pred);
398           }
399         else
400           // Only skip the pivot.
401           split_pos2 = split_pos1 + 1;
402
403         // Compare iterators.
404         if (split_pos2 <= nth)
405           begin = split_pos2;
406         else if (nth < split_pos1)
407           end = split_pos1;
408         else
409           break;
410       }
411
412     // Only at most _Settings::partition_minimal_n elements left.
413     __gnu_sequential::sort(begin, end, comp);
414   }
415
416 /** @brief Parallel implementation of std::partial_sort().
417 *  @param begin Begin iterator of input sequence.
418 *  @param middle Sort until this position.
419 *  @param end End iterator of input sequence.
420 *  @param comp Comparator. */
421 template<typename RandomAccessIterator, typename Comparator>
422   void
423   parallel_partial_sort(RandomAccessIterator begin,
424                         RandomAccessIterator middle,
425                         RandomAccessIterator end, Comparator comp)
426   {
427     parallel_nth_element(begin, middle, end, comp);
428     std::sort(begin, middle, comp);
429   }
430
431 } //namespace __gnu_parallel
432
433 #undef _GLIBCXX_VOLATILE
434
435 #endif