Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_scan_H
18 #define __TBB_parallel_scan_H
19 
20 #include "task.h"
21 #include "aligned_space.h"
22 #include <new>
23 #include "partitioner.h"
24 
25 namespace tbb {
26 
28 
29 struct pre_scan_tag {
30  static bool is_final_scan() {return false;}
31  operator bool() {return is_final_scan();}
32 };
33 
35 
37  static bool is_final_scan() {return true;}
38  operator bool() {return is_final_scan();}
39 };
40 
42 namespace internal {
43 
45 
46  template<typename Range, typename Body>
47  class final_sum: public task {
48  public:
49  Body my_body;
50  private:
54  public:
55  final_sum( Body& body_ ) :
56  my_body(body_,split())
57  {
58  poison_pointer(my_stuff_last);
59  }
61  my_range.begin()->~Range();
62  }
63  void finish_construction( const Range& range_, Body* stuff_last_ ) {
64  new( my_range.begin() ) Range(range_);
65  my_stuff_last = stuff_last_;
66  }
67  private:
69  my_body( *my_range.begin(), final_scan_tag() );
70  if( my_stuff_last )
71  my_stuff_last->assign(my_body);
72  return NULL;
73  }
74  };
75 
77 
78  template<typename Range, typename Body>
79  class sum_node: public task {
81  public:
82  final_sum_type *my_incoming;
83  final_sum_type *my_body;
85  private:
86  final_sum_type *my_left_sum;
90  Range my_range;
91  sum_node( const Range range_, bool left_is_final_ ) :
92  my_stuff_last(NULL),
93  my_left_sum(NULL),
94  my_left(NULL),
95  my_right(NULL),
96  my_left_is_final(left_is_final_),
97  my_range(range_)
98  {
99  // Poison fields that will be set by second pass.
100  poison_pointer(my_body);
101  poison_pointer(my_incoming);
102  }
103  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
104  if( !n ) {
105  f.recycle_as_child_of( *this );
106  f.finish_construction( range_, stuff_last_ );
107  return &f;
108  } else {
109  n->my_body = &f;
110  n->my_incoming = incoming_;
111  n->my_stuff_last = stuff_last_;
112  return n;
113  }
114  }
116  if( my_body ) {
117  if( my_incoming )
118  my_left_sum->my_body.reverse_join( my_incoming->my_body );
119  recycle_as_continuation();
120  sum_node& c = *this;
121  task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
122  task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
123  set_ref_count( (a!=NULL)+(b!=NULL) );
124  my_body = NULL;
125  if( a ) spawn(*b);
126  else a = b;
127  return a;
128  } else {
129  return NULL;
130  }
131  }
132  template<typename Range_,typename Body_,typename Partitioner_>
133  friend class start_scan;
134 
135  template<typename Range_,typename Body_>
136  friend class finish_scan;
137  };
138 
140 
141  template<typename Range, typename Body>
142  class finish_scan: public task {
145  final_sum_type** const my_sum;
146  sum_node_type*& my_return_slot;
147  public:
148  final_sum_type* my_right_zombie;
149  sum_node_type& my_result;
150 
152  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
153  if( my_result.my_left )
154  my_result.my_left_is_final = false;
155  if( my_right_zombie && my_sum )
156  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
157  __TBB_ASSERT( !my_return_slot, NULL );
158  if( my_right_zombie || my_result.my_right ) {
159  my_return_slot = &my_result;
160  } else {
161  destroy( my_result );
162  }
163  if( my_right_zombie && !my_sum && !my_result.my_right ) {
164  destroy(*my_right_zombie);
165  my_right_zombie = NULL;
166  }
167  return NULL;
168  }
169 
170  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
171  my_sum(sum_),
172  my_return_slot(return_slot_),
173  my_right_zombie(NULL),
174  my_result(result_)
175  {
176  __TBB_ASSERT( !my_return_slot, NULL );
177  }
178  };
179 
181 
182  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
183  class start_scan: public task {
186  final_sum_type* my_body;
188  final_sum_type** my_sum;
189  sum_node_type** my_return_slot;
191  sum_node_type* my_parent_sum;
194  Range my_range;
195  typename Partitioner::partition_type my_partition;
196  task* execute() __TBB_override ;
197  public:
198  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
199  my_body(parent_.my_body),
200  my_sum(parent_.my_sum),
201  my_return_slot(&return_slot_),
202  my_parent_sum(parent_sum_),
203  my_is_final(parent_.my_is_final),
204  my_is_right_child(false),
205  my_range(parent_.my_range,split()),
206  my_partition(parent_.my_partition,split())
207  {
208  __TBB_ASSERT( !*my_return_slot, NULL );
209  }
210 
211  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
212  my_body(&body_),
213  my_sum(NULL),
214  my_return_slot(&return_slot_),
215  my_parent_sum(NULL),
216  my_is_final(true),
217  my_is_right_child(false),
218  my_range(range_),
219  my_partition(partitioner_)
220  {
221  __TBB_ASSERT( !*my_return_slot, NULL );
222  }
223 
224  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
225  if( !range_.empty() ) {
226  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
227  internal::sum_node<Range,Body>* root = NULL;
228  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
229  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
230  /*my_return_slot=*/root,
231  range_,
232  *temp_body,
233  partitioner_ );
234  temp_body->my_body.reverse_join(body_);
235  task::spawn_root_and_wait( pass1 );
236  if( root ) {
237  root->my_body = temp_body;
238  root->my_incoming = NULL;
239  root->my_stuff_last = &body_;
240  task::spawn_root_and_wait( *root );
241  } else {
242  body_.assign(temp_body->my_body);
243  temp_body->finish_construction( range_, NULL );
244  temp_body->destroy(*temp_body);
245  }
246  }
247  }
248  };
249 
250  template<typename Range, typename Body, typename Partitioner>
252  typedef internal::finish_scan<Range,Body> finish_pass1_type;
253  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
254  // Inspecting p->result.left_sum would ordinarily be a race condition.
255  // But we inspect it only if we are not a stolen task, in which case we
256  // know that task assigning to p->result.left_sum has completed.
257  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
258  if( treat_as_stolen ) {
259  // Invocation is for right child that has been really stolen or needs to be virtually stolen
260  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
261  my_is_final = false;
262  }
263  task* next_task = NULL;
264  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
265  if( my_is_final )
266  (my_body->my_body)( my_range, final_scan_tag() );
267  else if( my_sum )
268  (my_body->my_body)( my_range, pre_scan_tag() );
269  if( my_sum )
270  *my_sum = my_body;
271  __TBB_ASSERT( !*my_return_slot, NULL );
272  } else {
273  sum_node_type* result;
274  if( my_parent_sum )
275  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
276  else
277  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
278  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
279  // Split off right child
280  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
281  b.my_is_right_child = true;
282  // Left child is recycling of *this. Must recycle this before spawning b,
283  // otherwise b might complete and decrement c.ref_count() to zero, which
284  // would cause c.execute() to run prematurely.
285  recycle_as_child_of(c);
286  c.set_ref_count(2);
287  c.spawn(b);
288  my_sum = &result->my_left_sum;
289  my_return_slot = &result->my_left;
290  my_is_right_child = false;
291  next_task = this;
292  my_parent_sum = result;
293  __TBB_ASSERT( !*my_return_slot, NULL );
294  }
295  return next_task;
296  }
297 
298  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
300  Value my_sum;
301  const Value& identity_element;
302  const Scan& my_scan;
303  const ReverseJoin& my_reverse_join;
304  public:
305  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
306  : my_sum(identity)
307  , identity_element(identity)
308  , my_scan(scan)
309  , my_reverse_join(rev_join) {}
310 
312  : my_sum(b.identity_element)
313  , identity_element(b.identity_element)
314  , my_scan(b.my_scan)
315  , my_reverse_join(b.my_reverse_join) {}
316 
317  template<typename Tag>
318  void operator()( const Range& r, Tag tag ) {
319  my_sum = my_scan(r, my_sum, tag);
320  }
321 
323  my_sum = my_reverse_join(a.my_sum, my_sum);
324  }
325 
327  my_sum = b.my_sum;
328  }
329 
330  Value result() const {
331  return my_sum;
332  }
333  };
334 } // namespace internal
336 
337 // Requirements on Range concept are documented in blocked_range.h
338 
356 
358 
359 template<typename Range, typename Body>
360 void parallel_scan( const Range& range, Body& body ) {
362 }
363 
365 
366 template<typename Range, typename Body>
367 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
369 }
370 
372 
373 template<typename Range, typename Body>
374 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
376 }
377 
379 
380 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
381 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
382  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
384  return body.result();
385 }
386 
388 
389 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
390 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
391  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
392  tbb::parallel_scan(range,body,partitioner);
393  return body.result();
394 }
395 
397 
398 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
399 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
400  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
401  tbb::parallel_scan(range,body,partitioner);
402  return body.result();
403 }
404 
406 
407 } // namespace tbb
408 
409 #endif /* __TBB_parallel_scan_H */
410 
Combine partial results.
final_sum_type * my_body
void assign(lambda_scan_body &b)
#define __TBB_override
Definition: tbb_stddef.h:240
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:80
final_sum_type * my_left_sum
Definition: parallel_scan.h:86
void reverse_join(lambda_scan_body &a)
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:633
sum_node< Range, Body > sum_node_type
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:91
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:305
The graph class.
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:68
sum_node_type ** my_return_slot
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum< Range, Body > final_sum_type
aligned_space< Range > my_range
Definition: parallel_scan.h:51
static bool is_final_scan()
Definition: parallel_scan.h:30
final_sum_type **const my_sum
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
Initial task to split the work.
A simple partitioner.
Definition: partitioner.h:583
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
task * execute() __TBB_override
Should be overridden by derived classes.
sum_node< Range, Body > sum_node_type
final_sum_type * my_body
Definition: parallel_scan.h:83
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:778
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:36
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:395
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
sum_node_type * my_parent_sum
sum_node_type *& my_return_slot
An auto partitioner.
Definition: partitioner.h:610
final_sum_type ** my_sum
Partitioner::partition_type my_partition
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:596
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum_type * my_right_zombie
T * begin() const
Pointer to beginning of array.
Definition: aligned_space.h:35
void operator()(const Range &r, Tag tag)
final_sum< Range, Body > final_sum_type
static bool is_final_scan()
Definition: parallel_scan.h:37
int ref_count() const
The internal reference count.
Definition: task.h:867
Base class for user-defined tasks.
Definition: task.h:589
lambda_scan_body(lambda_scan_body &b, split)
Split work to be done in the scan.
Definition: parallel_scan.h:79
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:29
Base class for types that should not be assigned.
Definition: tbb_stddef.h:320
void const char const char int ITT_FORMAT __itt_group_sync p
final_sum_type * my_incoming
Definition: parallel_scan.h:82
Performs final scan for a leaf.
Definition: parallel_scan.h:47
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:53
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:695
const ReverseJoin & my_reverse_join
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:63

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.