l4/pkg/libstdc++-v3/contrib/libstdc++-v3-4.3.3/include/parallel/partition.h

   1 // -*- C++ -*-
   2
   3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the terms
   7 // of the GNU General Public License as published by the Free Software
   8 // Foundation; either version 2, or (at your option) any later
   9 // version.
  10
  11 // This library is distributed in the hope that it will be useful, but
  12 // WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 // General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License
  17 // along with this library; see the file COPYING.  If not, write to
  18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
  19 // MA 02111-1307, USA.
  20
  21 // As a special exception, you may use this file as part of a free
  22 // software library without restriction.  Specifically, if other files
  23 // instantiate templates or use macros or inline functions from this
  24 // file, or you compile this file and link it with other files to
  25 // produce an executable, this file does not by itself cause the
  26 // resulting executable to be covered by the GNU General Public
  27 // License.  This exception does not however invalidate any other
  28 // reasons why the executable file might be covered by the GNU General
  29 // Public License.
  30
  31 /** @file parallel/partition.h
  32  *  @brief Parallel implementation of std::partition(),
  33  *  std::nth_element(), and std::partial_sort().
  34  *  This file is a GNU parallel extension to the Standard C++ Library.
  35  */
  36
  37 // Written by Johannes Singler and Felix Putze.
  38
  39 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
  40 #define _GLIBCXX_PARALLEL_PARTITION_H 1
  41
  42 #include <parallel/basic_iterator.h>
  43 #include <parallel/sort.h>
  44 #include <parallel/random_number.h>
  45 #include <bits/stl_algo.h>
  46 #include <parallel/parallel.h>
  47
  48 /** @brief Decide whether to declare certain variables volatile. */
  49 #define _GLIBCXX_VOLATILE volatile
  50
  51 namespace __gnu_parallel
  52 {
  53 /** @brief Parallel implementation of std::partition.
  54   *  @param begin Begin iterator of input sequence to split.
  55   *  @param end End iterator of input sequence to split.
  56   *  @param pred Partition predicate, possibly including some kind of pivot.
  57   *  @param num_threads Maximum number of threads to use for this task.
  58   *  @return Number of elements not fulfilling the predicate. */
  59 template<typename RandomAccessIterator, typename Predicate>
  60   typename std::iterator_traits<RandomAccessIterator>::difference_type
  61   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
  62                      Predicate pred, thread_index_t num_threads)
  63   {
  64     typedef std::iterator_traits<RandomAccessIterator> traits_type;
  65     typedef typename traits_type::value_type value_type;
  66     typedef typename traits_type::difference_type difference_type;
  67
  68     difference_type n = end - begin;
  69
  70     _GLIBCXX_CALL(n)
  71
  72     const _Settings& __s = _Settings::get();
  73
  74     // Shared.
  75     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
  76     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
  77     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
  78
  79     bool* reserved_left = NULL, * reserved_right = NULL;
  80
  81     difference_type chunk_size;
  82
  83     omp_lock_t result_lock;
  84     omp_init_lock(&result_lock);
  85
  86     //at least two chunks per thread
  87     if(right - left + 1 >= 2 * num_threads * chunk_size)
  88 #   pragma omp parallel num_threads(num_threads)
  89       {
  90 #       pragma omp single
  91           {
  92             num_threads = omp_get_num_threads();
  93             reserved_left = new bool[num_threads];
  94             reserved_right = new bool[num_threads];
  95
  96             if (__s.partition_chunk_share > 0.0)
  97               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
  98                                     (double)n * __s.partition_chunk_share
  99                                                      / (double)num_threads);
 100             else
 101               chunk_size = __s.partition_chunk_size;
 102           }
 103
 104         while (right - left + 1 >= 2 * num_threads * chunk_size)
 105           {
 106 #           pragma omp single
 107               {
 108                 difference_type num_chunks = (right - left + 1) / chunk_size;
 109
 110                 for (int r = 0; r < num_threads; ++r)
 111                   {
 112                     reserved_left[r] = false;
 113                     reserved_right[r] = false;
 114                   }
 115                 leftover_left = 0;
 116                 leftover_right = 0;
 117               } //implicit barrier
 118
 119             // Private.
 120             difference_type thread_left, thread_left_border,
 121                             thread_right, thread_right_border;
 122             thread_left = left + 1;
 123
 124             // Just to satisfy the condition below.
 125             thread_left_border = thread_left - 1;
 126             thread_right = n - 1;
 127             thread_right_border = thread_right + 1;
 128
 129             bool iam_finished = false;
 130             while (!iam_finished)
 131               {
 132                 if (thread_left > thread_left_border)
 133                   {
 134                     omp_set_lock(&result_lock);
 135                     if (left + (chunk_size - 1) > right)
 136                       iam_finished = true;
 137                     else
 138                       {
 139                         thread_left = left;
 140                         thread_left_border = left + (chunk_size - 1);
 141                         left += chunk_size;
 142                       }
 143                     omp_unset_lock(&result_lock);
 144                   }
 145
 146                 if (thread_right < thread_right_border)
 147                   {
 148                     omp_set_lock(&result_lock);
 149                     if (left > right - (chunk_size - 1))
 150                       iam_finished = true;
 151                     else
 152                       {
 153                         thread_right = right;
 154                         thread_right_border = right - (chunk_size - 1);
 155                         right -= chunk_size;
 156                       }
 157                     omp_unset_lock(&result_lock);
 158                   }
 159
 160                 if (iam_finished)
 161                   break;
 162
 163                 // Swap as usual.
 164                 while (thread_left < thread_right)
 165                   {
 166                     while (pred(begin[thread_left])
 167                             && thread_left <= thread_left_border)
 168                       ++thread_left;
 169                     while (!pred(begin[thread_right])
 170                             && thread_right >= thread_right_border)
 171                       --thread_right;
 172
 173                     if (thread_left > thread_left_border
 174                         || thread_right < thread_right_border)
 175                       // Fetch new chunk(s).
 176                       break;
 177
 178                     std::swap(begin[thread_left], begin[thread_right]);
 179                     ++thread_left;
 180                     --thread_right;
 181                   }
 182               }
 183
 184             // Now swap the leftover chunks to the right places.
 185             if (thread_left <= thread_left_border)
 186 #             pragma omp atomic
 187               ++leftover_left;
 188             if (thread_right >= thread_right_border)
 189 #             pragma omp atomic
 190               ++leftover_right;
 191
 192 #           pragma omp barrier
 193
 194 #           pragma omp single
 195               {
 196                 leftnew = left - leftover_left * chunk_size;
 197                 rightnew = right + leftover_right * chunk_size;
 198               }
 199
 200 #           pragma omp barrier
 201
 202             // <=> thread_left_border + (chunk_size - 1) >= leftnew
 203             if (thread_left <= thread_left_border
 204                 && thread_left_border >= leftnew)
 205               {
 206                 // Chunk already in place, reserve spot.
 207                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
 208                     = true;
 209               }
 210
 211             // <=> thread_right_border - (chunk_size - 1) <= rightnew
 212             if (thread_right >= thread_right_border
 213                 && thread_right_border <= rightnew)
 214               {
 215                 // Chunk already in place, reserve spot.
 216                 reserved_right[((thread_right_border - 1) - right)
 217                                / chunk_size] = true;
 218               }
 219
 220 #           pragma omp barrier
 221
 222             if (thread_left <= thread_left_border
 223                 && thread_left_border < leftnew)
 224               {
 225                 // Find spot and swap.
 226                 difference_type swapstart = -1;
 227                 omp_set_lock(&result_lock);
 228                 for (int r = 0; r < leftover_left; ++r)
 229                   if (!reserved_left[r])
 230                     {
 231                       reserved_left[r] = true;
 232                       swapstart = left - (r + 1) * chunk_size;
 233                       break;
 234                     }
 235                 omp_unset_lock(&result_lock);
 236
 237 #if _GLIBCXX_ASSERTIONS
 238                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
 239 #endif
 240
 241                 std::swap_ranges(begin + thread_left_border
 242                                  - (chunk_size - 1),
 243                                  begin + thread_left_border + 1,
 244                                  begin + swapstart);
 245               }
 246
 247             if (thread_right >= thread_right_border
 248                 && thread_right_border > rightnew)
 249               {
 250                 // Find spot and swap
 251                 difference_type swapstart = -1;
 252                 omp_set_lock(&result_lock);
 253                 for (int r = 0; r < leftover_right; ++r)
 254                   if (!reserved_right[r])
 255                     {
 256                       reserved_right[r] = true;
 257                       swapstart = right + r * chunk_size + 1;
 258                       break;
 259                     }
 260                 omp_unset_lock(&result_lock);
 261
 262 #if _GLIBCXX_ASSERTIONS
 263                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
 264 #endif
 265
 266                 std::swap_ranges(begin + thread_right_border,
 267                                  begin + thread_right_border + chunk_size,
 268                                  begin + swapstart);
 269               }
 270 #if _GLIBCXX_ASSERTIONS
 271 #             pragma omp barrier
 272
 273 #             pragma omp single
 274                 {
 275                   for (int r = 0; r < leftover_left; ++r)
 276                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
 277                   for (int r = 0; r < leftover_right; ++r)
 278                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
 279                 }
 280
 281 #             pragma omp barrier
 282 #endif
 283
 284 #             pragma omp barrier
 285
 286               left = leftnew;
 287               right = rightnew;
 288           }
 289 #         pragma omp flush(left, right)
 290       } // end "recursion" //parallel
 291
 292     difference_type final_left = left, final_right = right;
 293
 294     while (final_left < final_right)
 295       {
 296         // Go right until key is geq than pivot.
 297         while (pred(begin[final_left]) && final_left < final_right)
 298           ++final_left;
 299
 300         // Go left until key is less than pivot.
 301         while (!pred(begin[final_right]) && final_left < final_right)
 302           --final_right;
 303
 304         if (final_left == final_right)
 305           break;
 306         std::swap(begin[final_left], begin[final_right]);
 307         ++final_left;
 308         --final_right;
 309       }
 310
 311     // All elements on the left side are < piv, all elements on the
 312     // right are >= piv
 313     delete[] reserved_left;
 314     delete[] reserved_right;
 315
 316     omp_destroy_lock(&result_lock);
 317
 318     // Element "between" final_left and final_right might not have
 319     // been regarded yet
 320     if (final_left < n && !pred(begin[final_left]))
 321       // Really swapped.
 322       return final_left;
 323     else
 324       return final_left + 1;
 325   }
 326
 327 /**
 328   *  @brief Parallel implementation of std::nth_element().
 329   *  @param begin Begin iterator of input sequence.
 330   *  @param nth Iterator of element that must be in position afterwards.
 331   *  @param end End iterator of input sequence.
 332   *  @param comp Comparator.
 333   */
 334 template<typename RandomAccessIterator, typename Comparator>
 335   void
 336   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
 337                        RandomAccessIterator end, Comparator comp)
 338   {
 339     typedef std::iterator_traits<RandomAccessIterator> traits_type;
 340     typedef typename traits_type::value_type value_type;
 341     typedef typename traits_type::difference_type difference_type;
 342
 343     _GLIBCXX_CALL(end - begin)
 344
 345     RandomAccessIterator split;
 346     random_number rng;
 347
 348     difference_type minimum_length =
 349       std::max<difference_type>(2, _Settings::get().partition_minimal_n);
 350
 351     // Break if input range to small.
 352     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
 353       {
 354         difference_type n = end - begin;
 355
 356         RandomAccessIterator pivot_pos = begin +  rng(n);
 357
 358         // Swap pivot_pos value to end.
 359         if (pivot_pos != (end - 1))
 360           std::swap(*pivot_pos, *(end - 1));
 361         pivot_pos = end - 1;
 362
 363         // XXX Comparator must have first_value_type, second_value_type,
 364         // result_type
 365         // Comparator == __gnu_parallel::lexicographic<S, int,
 366         // __gnu_parallel::less<S, S> >
 367         // pivot_pos == std::pair<S, int>*
 368         // XXX binder2nd only for RandomAccessIterators??
 369         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
 370           pred(comp, *pivot_pos);
 371
 372         // Divide, leave pivot unchanged in last place.
 373         RandomAccessIterator split_pos1, split_pos2;
 374         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
 375                                                 get_max_threads());
 376
 377         // Left side: < pivot_pos; right side: >= pivot_pos
 378
 379         // Swap pivot back to middle.
 380         if (split_pos1 != pivot_pos)
 381           std::swap(*split_pos1, *pivot_pos);
 382         pivot_pos = split_pos1;
 383
 384         // In case all elements are equal, split_pos1 == 0
 385         if ((split_pos1 + 1 - begin) < (n >> 7)
 386             || (end - split_pos1) < (n >> 7))
 387           {
 388             // Very unequal split, one part smaller than one 128th
 389             // elements not strictly larger than the pivot.
 390             __gnu_parallel::unary_negate<__gnu_parallel::
 391               binder1st<Comparator, value_type, value_type, bool>, value_type>
 392               pred(__gnu_parallel::binder1st<Comparator, value_type,
 393                    value_type, bool>(comp, *pivot_pos));
 394
 395             // Find other end of pivot-equal range.
 396             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
 397                                                      end, pred);
 398           }
 399         else
 400           // Only skip the pivot.
 401           split_pos2 = split_pos1 + 1;
 402
 403         // Compare iterators.
 404         if (split_pos2 <= nth)
 405           begin = split_pos2;
 406         else if (nth < split_pos1)
 407           end = split_pos1;
 408         else
 409           break;
 410       }
 411
 412     // Only at most _Settings::partition_minimal_n elements left.
 413     __gnu_sequential::sort(begin, end, comp);
 414   }
 415
 416 /** @brief Parallel implementation of std::partial_sort().
 417 *  @param begin Begin iterator of input sequence.
 418 *  @param middle Sort until this position.
 419 *  @param end End iterator of input sequence.
 420 *  @param comp Comparator. */
 421 template<typename RandomAccessIterator, typename Comparator>
 422   void
 423   parallel_partial_sort(RandomAccessIterator begin,
 424                         RandomAccessIterator middle,
 425                         RandomAccessIterator end, Comparator comp)
 426   {
 427     parallel_nth_element(begin, middle, end, comp);
 428     std::sort(begin, middle, comp);
 429   }
 430
 431 } //namespace __gnu_parallel
 432
 433 #undef _GLIBCXX_VOLATILE
 434
 435 #endif