LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 if (lwt)
294 ompt_frame = &(lwt->ompt_task_info.frame);
295 else {
296 int tid = __kmp_tid_from_gtid(gtid);
297 ompt_frame = &(
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 }
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301 }
302 OMPT_STORE_RETURN_ADDRESS(gtid);
303#endif
304
305#if INCLUDE_SSC_MARKS
306 SSC_MARK_FORKING();
307#endif
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 kmp_va_addr_of(ap));
312#if INCLUDE_SSC_MARKS
313 SSC_MARK_JOINING();
314#endif
315 __kmp_join_call(loc, gtid
316#if OMPT_SUPPORT
317 ,
318 fork_context_intel
319#endif
320 );
321
322 va_end(ap);
323 }
324
325#if KMP_STATS_ENABLED
326 if (previous_state == stats_state_e::SERIAL_REGION) {
327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
328 KMP_SET_THREAD_STATE(previous_state);
329 } else {
330 KMP_POP_PARTITIONED_TIMER();
331 }
332#endif // KMP_STATS_ENABLED
333}
334
346void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
347 kmp_int32 num_teams, kmp_int32 num_threads) {
348 KA_TRACE(20,
349 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
350 global_tid, num_teams, num_threads));
351 __kmp_assert_valid_gtid(global_tid);
352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
353}
354
371void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
373 kmp_int32 num_threads) {
374 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
375 " num_teams_ub=%d num_threads=%d\n",
376 global_tid, num_teams_lb, num_teams_ub, num_threads));
377 __kmp_assert_valid_gtid(global_tid);
378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
379 num_threads);
380}
381
392void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
393 ...) {
394 int gtid = __kmp_entry_gtid();
395 kmp_info_t *this_thr = __kmp_threads[gtid];
396 va_list ap;
397 va_start(ap, microtask);
398
399#if KMP_STATS_ENABLED
400 KMP_COUNT_BLOCK(OMP_TEAMS);
401 stats_state_e previous_state = KMP_GET_THREAD_STATE();
402 if (previous_state == stats_state_e::SERIAL_REGION) {
403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
404 } else {
405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
406 }
407#endif
408
409 // remember teams entry point and nesting level
410 this_thr->th.th_teams_microtask = microtask;
411 this_thr->th.th_teams_level =
412 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
413
414#if OMPT_SUPPORT
415 kmp_team_t *parent_team = this_thr->th.th_team;
416 int tid = __kmp_tid_from_gtid(gtid);
417 if (ompt_enabled.enabled) {
418 parent_team->t.t_implicit_task_taskdata[tid]
419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
420 }
421 OMPT_STORE_RETURN_ADDRESS(gtid);
422#endif
423
424 // check if __kmpc_push_num_teams called, set default number of teams
425 // otherwise
426 if (this_thr->th.th_teams_size.nteams == 0) {
427 __kmp_push_num_teams(loc, gtid, 0, 0);
428 }
429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
432
433 __kmp_fork_call(
434 loc, gtid, fork_context_intel, argc,
435 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
437 __kmp_join_call(loc, gtid
438#if OMPT_SUPPORT
439 ,
440 fork_context_intel
441#endif
442 );
443
444 // Pop current CG root off list
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
447 this_thr->th.th_cg_roots = tmp->up;
448 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
449 " to node %p. cg_nthreads was %d\n",
450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
451 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
452 int i = tmp->cg_nthreads--;
453 if (i == 1) { // check is we are the last thread in CG (not always the case)
454 __kmp_free(tmp);
455 }
456 // Restore current task's thread_limit from CG root
457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
458 this_thr->th.th_current_task->td_icvs.thread_limit =
459 this_thr->th.th_cg_roots->cg_thread_limit;
460
461 this_thr->th.th_teams_microtask = NULL;
462 this_thr->th.th_teams_level = 0;
463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
464 va_end(ap);
465#if KMP_STATS_ENABLED
466 if (previous_state == stats_state_e::SERIAL_REGION) {
467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
468 KMP_SET_THREAD_STATE(previous_state);
469 } else {
470 KMP_POP_PARTITIONED_TIMER();
471 }
472#endif // KMP_STATS_ENABLED
473}
474
475// I don't think this function should ever have been exported.
476// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
477// openmp code ever called it, but it's been exported from the RTL for so
478// long that I'm afraid to remove the definition.
479int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
480
493void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
494 // The implementation is now in kmp_runtime.cpp so that it can share static
495 // functions with kmp_fork_call since the tasks to be done are similar in
496 // each case.
497 __kmp_assert_valid_gtid(global_tid);
498#if OMPT_SUPPORT
499 OMPT_STORE_RETURN_ADDRESS(global_tid);
500#endif
501 __kmp_serialized_parallel(loc, global_tid);
502}
503
511void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
512 kmp_internal_control_t *top;
513 kmp_info_t *this_thr;
514 kmp_team_t *serial_team;
515
516 KC_TRACE(10,
517 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
518
519 /* skip all this code for autopar serialized loops since it results in
520 unacceptable overhead */
521 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
522 return;
523
524 // Not autopar code
525 __kmp_assert_valid_gtid(global_tid);
526 if (!TCR_4(__kmp_init_parallel))
527 __kmp_parallel_initialize();
528
529 __kmp_resume_if_soft_paused();
530
531 this_thr = __kmp_threads[global_tid];
532 serial_team = this_thr->th.th_serial_team;
533
534 kmp_task_team_t *task_team = this_thr->th.th_task_team;
535 // we need to wait for the proxy tasks before finishing the thread
536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
538
539 KMP_MB();
540 KMP_DEBUG_ASSERT(serial_team);
541 KMP_ASSERT(serial_team->t.t_serialized);
542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
546
547#if OMPT_SUPPORT
548 if (ompt_enabled.enabled &&
549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
551 if (ompt_enabled.ompt_callback_implicit_task) {
552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
555 }
556
557 // reset clear the task id only after unlinking the task
558 ompt_data_t *parent_task_data;
559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
560
561 if (ompt_enabled.ompt_callback_parallel_end) {
562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
564 ompt_parallel_invoker_program | ompt_parallel_team,
565 OMPT_LOAD_RETURN_ADDRESS(global_tid));
566 }
567 __ompt_lw_taskteam_unlink(this_thr);
568 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
569 }
570#endif
571
572 /* If necessary, pop the internal control stack values and replace the team
573 * values */
574 top = serial_team->t.t_control_stack_top;
575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
577 serial_team->t.t_control_stack_top = top->next;
578 __kmp_free(top);
579 }
580
581 // if( serial_team -> t.t_serialized > 1 )
582 serial_team->t.t_level--;
583
584 /* pop dispatch buffers stack */
585 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
586 {
587 dispatch_private_info_t *disp_buffer =
588 serial_team->t.t_dispatch->th_disp_buffer;
589 serial_team->t.t_dispatch->th_disp_buffer =
590 serial_team->t.t_dispatch->th_disp_buffer->next;
591 __kmp_free(disp_buffer);
592 }
593 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
594
595 --serial_team->t.t_serialized;
596 if (serial_team->t.t_serialized == 0) {
597
598 /* return to the parallel section */
599
600#if KMP_ARCH_X86 || KMP_ARCH_X86_64
601 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
602 __kmp_clear_x87_fpu_status_word();
603 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
604 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
605 }
606#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
607
608#if OMPD_SUPPORT
609 if (ompd_state & OMPD_ENABLE_BP)
610 ompd_bp_parallel_end();
611#endif
612
613 this_thr->th.th_team = serial_team->t.t_parent;
614 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
615
616 /* restore values cached in the thread */
617 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
618 this_thr->th.th_team_master =
619 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
620 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
621
622 /* TODO the below shouldn't need to be adjusted for serialized teams */
623 this_thr->th.th_dispatch =
624 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
625
626 __kmp_pop_current_task_from_thread(this_thr);
627
628 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
629 this_thr->th.th_current_task->td_flags.executing = 1;
630
631 if (__kmp_tasking_mode != tskm_immediate_exec) {
632 // Copy the task team from the new child / old parent team to the thread.
633 this_thr->th.th_task_team =
634 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
635 KA_TRACE(20,
636 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
637 "team %p\n",
638 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
639 }
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
645 }
646 }
647
648 if (__kmp_env_consistency_check)
649 __kmp_pop_parallel(global_tid, NULL);
650#if OMPT_SUPPORT
651 if (ompt_enabled.enabled)
652 this_thr->th.ompt_thread_info.state =
653 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
654 : ompt_state_work_parallel);
655#endif
656}
657
667 KC_TRACE(10, ("__kmpc_flush: called\n"));
668
669 /* need explicit __mf() here since use volatile instead in library */
670 KMP_MB(); /* Flush all pending memory write invalidates. */
671
672#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
673#if KMP_MIC
674// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
675// We shouldn't need it, though, since the ABI rules require that
676// * If the compiler generates NGO stores it also generates the fence
677// * If users hand-code NGO stores they should insert the fence
678// therefore no incomplete unordered stores should be visible.
679#else
680 // C74404
681 // This is to address non-temporal store instructions (sfence needed).
682 // The clflush instruction is addressed either (mfence needed).
683 // Probably the non-temporal load monvtdqa instruction should also be
684 // addressed.
685 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
686 if (!__kmp_cpuinfo.initialized) {
687 __kmp_query_cpuid(&__kmp_cpuinfo);
688 }
689 if (!__kmp_cpuinfo.sse2) {
690 // CPU cannot execute SSE2 instructions.
691 } else {
692#if KMP_COMPILER_ICC
693 _mm_mfence();
694#elif KMP_COMPILER_MSVC
695 MemoryBarrier();
696#else
697 __sync_synchronize();
698#endif // KMP_COMPILER_ICC
699 }
700#endif // KMP_MIC
701#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
702 KMP_ARCH_RISCV64)
703// Nothing to see here move along
704#elif KMP_ARCH_PPC64
705// Nothing needed here (we have a real MB above).
706#else
707#error Unknown or unsupported architecture
708#endif
709
710#if OMPT_SUPPORT && OMPT_OPTIONAL
711 if (ompt_enabled.ompt_callback_flush) {
712 ompt_callbacks.ompt_callback(ompt_callback_flush)(
713 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
714 }
715#endif
716}
717
718/* -------------------------------------------------------------------------- */
726void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
727 KMP_COUNT_BLOCK(OMP_BARRIER);
728 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
729 __kmp_assert_valid_gtid(global_tid);
730
731 if (!TCR_4(__kmp_init_parallel))
732 __kmp_parallel_initialize();
733
734 __kmp_resume_if_soft_paused();
735
736 if (__kmp_env_consistency_check) {
737 if (loc == 0) {
738 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
739 }
740 __kmp_check_barrier(global_tid, ct_barrier, loc);
741 }
742
743#if OMPT_SUPPORT
744 ompt_frame_t *ompt_frame;
745 if (ompt_enabled.enabled) {
746 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
747 if (ompt_frame->enter_frame.ptr == NULL)
748 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
749 }
750 OMPT_STORE_RETURN_ADDRESS(global_tid);
751#endif
752 __kmp_threads[global_tid]->th.th_ident = loc;
753 // TODO: explicit barrier_wait_id:
754 // this function is called when 'barrier' directive is present or
755 // implicit barrier at the end of a worksharing construct.
756 // 1) better to add a per-thread barrier counter to a thread data structure
757 // 2) set to 0 when a new team is created
758 // 4) no sync is required
759
760 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
761#if OMPT_SUPPORT && OMPT_OPTIONAL
762 if (ompt_enabled.enabled) {
763 ompt_frame->enter_frame = ompt_data_none;
764 }
765#endif
766}
767
768/* The BARRIER for a MASTER section is always explicit */
775kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
776 int status = 0;
777
778 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
779 __kmp_assert_valid_gtid(global_tid);
780
781 if (!TCR_4(__kmp_init_parallel))
782 __kmp_parallel_initialize();
783
784 __kmp_resume_if_soft_paused();
785
786 if (KMP_MASTER_GTID(global_tid)) {
787 KMP_COUNT_BLOCK(OMP_MASTER);
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789 status = 1;
790 }
791
792#if OMPT_SUPPORT && OMPT_OPTIONAL
793 if (status) {
794 if (ompt_enabled.ompt_callback_masked) {
795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
797
798 int tid = __kmp_tid_from_gtid(global_tid);
799 ompt_callbacks.ompt_callback(ompt_callback_masked)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
803 }
804 }
805#endif
806
807 if (__kmp_env_consistency_check) {
808#if KMP_USE_DYNAMIC_LOCK
809 if (status)
810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
811 else
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
813#else
814 if (status)
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
816 else
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
818#endif
819 }
820
821 return status;
822}
823
832void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
834 __kmp_assert_valid_gtid(global_tid);
835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
837
838#if OMPT_SUPPORT && OMPT_OPTIONAL
839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
841 if (ompt_enabled.ompt_callback_masked) {
842 int tid = __kmp_tid_from_gtid(global_tid);
843 ompt_callbacks.ompt_callback(ompt_callback_masked)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
847 }
848#endif
849
850 if (__kmp_env_consistency_check) {
851 if (KMP_MASTER_GTID(global_tid))
852 __kmp_pop_sync(global_tid, ct_master, loc);
853 }
854}
855
864kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
865 int status = 0;
866 int tid;
867 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
868 __kmp_assert_valid_gtid(global_tid);
869
870 if (!TCR_4(__kmp_init_parallel))
871 __kmp_parallel_initialize();
872
873 __kmp_resume_if_soft_paused();
874
875 tid = __kmp_tid_from_gtid(global_tid);
876 if (tid == filter) {
877 KMP_COUNT_BLOCK(OMP_MASKED);
878 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
879 status = 1;
880 }
881
882#if OMPT_SUPPORT && OMPT_OPTIONAL
883 if (status) {
884 if (ompt_enabled.ompt_callback_masked) {
885 kmp_info_t *this_thr = __kmp_threads[global_tid];
886 kmp_team_t *team = this_thr->th.th_team;
887 ompt_callbacks.ompt_callback(ompt_callback_masked)(
888 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
890 OMPT_GET_RETURN_ADDRESS(0));
891 }
892 }
893#endif
894
895 if (__kmp_env_consistency_check) {
896#if KMP_USE_DYNAMIC_LOCK
897 if (status)
898 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
899 else
900 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
901#else
902 if (status)
903 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
904 else
905 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
906#endif
907 }
908
909 return status;
910}
911
920void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
921 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
922 __kmp_assert_valid_gtid(global_tid);
923 KMP_POP_PARTITIONED_TIMER();
924
925#if OMPT_SUPPORT && OMPT_OPTIONAL
926 kmp_info_t *this_thr = __kmp_threads[global_tid];
927 kmp_team_t *team = this_thr->th.th_team;
928 if (ompt_enabled.ompt_callback_masked) {
929 int tid = __kmp_tid_from_gtid(global_tid);
930 ompt_callbacks.ompt_callback(ompt_callback_masked)(
931 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
932 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
933 OMPT_GET_RETURN_ADDRESS(0));
934 }
935#endif
936
937 if (__kmp_env_consistency_check) {
938 __kmp_pop_sync(global_tid, ct_masked, loc);
939 }
940}
941
949void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
950 int cid = 0;
951 kmp_info_t *th;
952 KMP_DEBUG_ASSERT(__kmp_init_serial);
953
954 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
955 __kmp_assert_valid_gtid(gtid);
956
957 if (!TCR_4(__kmp_init_parallel))
958 __kmp_parallel_initialize();
959
960 __kmp_resume_if_soft_paused();
961
962#if USE_ITT_BUILD
963 __kmp_itt_ordered_prep(gtid);
964// TODO: ordered_wait_id
965#endif /* USE_ITT_BUILD */
966
967 th = __kmp_threads[gtid];
968
969#if OMPT_SUPPORT && OMPT_OPTIONAL
970 kmp_team_t *team;
971 ompt_wait_id_t lck;
972 void *codeptr_ra;
973 OMPT_STORE_RETURN_ADDRESS(gtid);
974 if (ompt_enabled.enabled) {
975 team = __kmp_team_from_gtid(gtid);
976 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
977 /* OMPT state update */
978 th->th.ompt_thread_info.wait_id = lck;
979 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
980
981 /* OMPT event callback */
982 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
983 if (ompt_enabled.ompt_callback_mutex_acquire) {
984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
985 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
986 codeptr_ra);
987 }
988 }
989#endif
990
991 if (th->th.th_dispatch->th_deo_fcn != 0)
992 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
993 else
994 __kmp_parallel_deo(&gtid, &cid, loc);
995
996#if OMPT_SUPPORT && OMPT_OPTIONAL
997 if (ompt_enabled.enabled) {
998 /* OMPT state update */
999 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1000 th->th.ompt_thread_info.wait_id = 0;
1001
1002 /* OMPT event callback */
1003 if (ompt_enabled.ompt_callback_mutex_acquired) {
1004 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1005 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1006 }
1007 }
1008#endif
1009
1010#if USE_ITT_BUILD
1011 __kmp_itt_ordered_start(gtid);
1012#endif /* USE_ITT_BUILD */
1013}
1014
1022void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1023 int cid = 0;
1024 kmp_info_t *th;
1025
1026 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1027 __kmp_assert_valid_gtid(gtid);
1028
1029#if USE_ITT_BUILD
1030 __kmp_itt_ordered_end(gtid);
1031// TODO: ordered_wait_id
1032#endif /* USE_ITT_BUILD */
1033
1034 th = __kmp_threads[gtid];
1035
1036 if (th->th.th_dispatch->th_dxo_fcn != 0)
1037 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1038 else
1039 __kmp_parallel_dxo(&gtid, &cid, loc);
1040
1041#if OMPT_SUPPORT && OMPT_OPTIONAL
1042 OMPT_STORE_RETURN_ADDRESS(gtid);
1043 if (ompt_enabled.ompt_callback_mutex_released) {
1044 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1045 ompt_mutex_ordered,
1046 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1047 ->t.t_ordered.dt.t_value,
1048 OMPT_LOAD_RETURN_ADDRESS(gtid));
1049 }
1050#endif
1051}
1052
1053#if KMP_USE_DYNAMIC_LOCK
1054
1055static __forceinline void
1056__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1057 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1058 // Pointer to the allocated indirect lock is written to crit, while indexing
1059 // is ignored.
1060 void *idx;
1061 kmp_indirect_lock_t **lck;
1062 lck = (kmp_indirect_lock_t **)crit;
1063 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1064 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1065 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1066 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1067 KA_TRACE(20,
1068 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1069#if USE_ITT_BUILD
1070 __kmp_itt_critical_creating(ilk->lock, loc);
1071#endif
1072 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1073 if (status == 0) {
1074#if USE_ITT_BUILD
1075 __kmp_itt_critical_destroyed(ilk->lock);
1076#endif
1077 // We don't really need to destroy the unclaimed lock here since it will be
1078 // cleaned up at program exit.
1079 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1080 }
1081 KMP_DEBUG_ASSERT(*lck != NULL);
1082}
1083
1084// Fast-path acquire tas lock
1085#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1086 { \
1087 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1088 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1089 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1090 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1091 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1092 kmp_uint32 spins; \
1093 KMP_FSYNC_PREPARE(l); \
1094 KMP_INIT_YIELD(spins); \
1095 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1096 do { \
1097 if (TCR_4(__kmp_nth) > \
1098 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1099 KMP_YIELD(TRUE); \
1100 } else { \
1101 KMP_YIELD_SPIN(spins); \
1102 } \
1103 __kmp_spin_backoff(&backoff); \
1104 } while ( \
1105 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1106 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1107 } \
1108 KMP_FSYNC_ACQUIRED(l); \
1109 }
1110
1111// Fast-path test tas lock
1112#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1113 { \
1114 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1115 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1116 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1117 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1118 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1119 }
1120
1121// Fast-path release tas lock
1122#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1123 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1124
1125#if KMP_USE_FUTEX
1126
1127#include <sys/syscall.h>
1128#include <unistd.h>
1129#ifndef FUTEX_WAIT
1130#define FUTEX_WAIT 0
1131#endif
1132#ifndef FUTEX_WAKE
1133#define FUTEX_WAKE 1
1134#endif
1135
1136// Fast-path acquire futex lock
1137#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1138 { \
1139 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1140 kmp_int32 gtid_code = (gtid + 1) << 1; \
1141 KMP_MB(); \
1142 KMP_FSYNC_PREPARE(ftx); \
1143 kmp_int32 poll_val; \
1144 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1145 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1146 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1147 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1148 if (!cond) { \
1149 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1150 poll_val | \
1151 KMP_LOCK_BUSY(1, futex))) { \
1152 continue; \
1153 } \
1154 poll_val |= KMP_LOCK_BUSY(1, futex); \
1155 } \
1156 kmp_int32 rc; \
1157 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1158 NULL, NULL, 0)) != 0) { \
1159 continue; \
1160 } \
1161 gtid_code |= 1; \
1162 } \
1163 KMP_FSYNC_ACQUIRED(ftx); \
1164 }
1165
1166// Fast-path test futex lock
1167#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1168 { \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1171 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1172 KMP_FSYNC_ACQUIRED(ftx); \
1173 rc = TRUE; \
1174 } else { \
1175 rc = FALSE; \
1176 } \
1177 }
1178
1179// Fast-path release futex lock
1180#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1181 { \
1182 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1183 KMP_MB(); \
1184 KMP_FSYNC_RELEASING(ftx); \
1185 kmp_int32 poll_val = \
1186 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1187 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1188 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1189 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1190 } \
1191 KMP_MB(); \
1192 KMP_YIELD_OVERSUB(); \
1193 }
1194
1195#endif // KMP_USE_FUTEX
1196
1197#else // KMP_USE_DYNAMIC_LOCK
1198
1199static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1200 ident_t const *loc,
1201 kmp_int32 gtid) {
1202 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1203
1204 // Because of the double-check, the following load doesn't need to be volatile
1205 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1206
1207 if (lck == NULL) {
1208 void *idx;
1209
1210 // Allocate & initialize the lock.
1211 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1212 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1213 __kmp_init_user_lock_with_checks(lck);
1214 __kmp_set_user_lock_location(lck, loc);
1215#if USE_ITT_BUILD
1216 __kmp_itt_critical_creating(lck);
1217// __kmp_itt_critical_creating() should be called *before* the first usage
1218// of underlying lock. It is the only place where we can guarantee it. There
1219// are chances the lock will destroyed with no usage, but it is not a
1220// problem, because this is not real event seen by user but rather setting
1221// name for object (lock). See more details in kmp_itt.h.
1222#endif /* USE_ITT_BUILD */
1223
1224 // Use a cmpxchg instruction to slam the start of the critical section with
1225 // the lock pointer. If another thread beat us to it, deallocate the lock,
1226 // and use the lock that the other thread allocated.
1227 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1228
1229 if (status == 0) {
1230// Deallocate the lock and reload the value.
1231#if USE_ITT_BUILD
1232 __kmp_itt_critical_destroyed(lck);
1233// Let ITT know the lock is destroyed and the same memory location may be reused
1234// for another purpose.
1235#endif /* USE_ITT_BUILD */
1236 __kmp_destroy_user_lock_with_checks(lck);
1237 __kmp_user_lock_free(&idx, gtid, lck);
1238 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1239 KMP_DEBUG_ASSERT(lck != NULL);
1240 }
1241 }
1242 return lck;
1243}
1244
1245#endif // KMP_USE_DYNAMIC_LOCK
1246
1257void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1258 kmp_critical_name *crit) {
1259#if KMP_USE_DYNAMIC_LOCK
1260#if OMPT_SUPPORT && OMPT_OPTIONAL
1261 OMPT_STORE_RETURN_ADDRESS(global_tid);
1262#endif // OMPT_SUPPORT
1263 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1264#else
1265 KMP_COUNT_BLOCK(OMP_CRITICAL);
1266#if OMPT_SUPPORT && OMPT_OPTIONAL
1267 ompt_state_t prev_state = ompt_state_undefined;
1268 ompt_thread_info_t ti;
1269#endif
1270 kmp_user_lock_p lck;
1271
1272 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1273 __kmp_assert_valid_gtid(global_tid);
1274
1275 // TODO: add THR_OVHD_STATE
1276
1277 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1278 KMP_CHECK_USER_LOCK_INIT();
1279
1280 if ((__kmp_user_lock_kind == lk_tas) &&
1281 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1282 lck = (kmp_user_lock_p)crit;
1283 }
1284#if KMP_USE_FUTEX
1285 else if ((__kmp_user_lock_kind == lk_futex) &&
1286 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1287 lck = (kmp_user_lock_p)crit;
1288 }
1289#endif
1290 else { // ticket, queuing or drdpa
1291 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1292 }
1293
1294 if (__kmp_env_consistency_check)
1295 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1296
1297 // since the critical directive binds to all threads, not just the current
1298 // team we have to check this even if we are in a serialized team.
1299 // also, even if we are the uber thread, we still have to conduct the lock,
1300 // as we have to contend with sibling threads.
1301
1302#if USE_ITT_BUILD
1303 __kmp_itt_critical_acquiring(lck);
1304#endif /* USE_ITT_BUILD */
1305#if OMPT_SUPPORT && OMPT_OPTIONAL
1306 OMPT_STORE_RETURN_ADDRESS(gtid);
1307 void *codeptr_ra = NULL;
1308 if (ompt_enabled.enabled) {
1309 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1310 /* OMPT state update */
1311 prev_state = ti.state;
1312 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1313 ti.state = ompt_state_wait_critical;
1314
1315 /* OMPT event callback */
1316 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1317 if (ompt_enabled.ompt_callback_mutex_acquire) {
1318 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1319 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1320 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1321 }
1322 }
1323#endif
1324 // Value of 'crit' should be good for using as a critical_id of the critical
1325 // section directive.
1326 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1327
1328#if USE_ITT_BUILD
1329 __kmp_itt_critical_acquired(lck);
1330#endif /* USE_ITT_BUILD */
1331#if OMPT_SUPPORT && OMPT_OPTIONAL
1332 if (ompt_enabled.enabled) {
1333 /* OMPT state update */
1334 ti.state = prev_state;
1335 ti.wait_id = 0;
1336
1337 /* OMPT event callback */
1338 if (ompt_enabled.ompt_callback_mutex_acquired) {
1339 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1340 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1341 }
1342 }
1343#endif
1344 KMP_POP_PARTITIONED_TIMER();
1345
1346 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1347 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1348#endif // KMP_USE_DYNAMIC_LOCK
1349}
1350
1351#if KMP_USE_DYNAMIC_LOCK
1352
1353// Converts the given hint to an internal lock implementation
1354static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1355#if KMP_USE_TSX
1356#define KMP_TSX_LOCK(seq) lockseq_##seq
1357#else
1358#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1359#endif
1360
1361#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1362#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1363#else
1364#define KMP_CPUINFO_RTM 0
1365#endif
1366
1367 // Hints that do not require further logic
1368 if (hint & kmp_lock_hint_hle)
1369 return KMP_TSX_LOCK(hle);
1370 if (hint & kmp_lock_hint_rtm)
1371 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1372 if (hint & kmp_lock_hint_adaptive)
1373 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1374
1375 // Rule out conflicting hints first by returning the default lock
1376 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1377 return __kmp_user_lock_seq;
1378 if ((hint & omp_lock_hint_speculative) &&
1379 (hint & omp_lock_hint_nonspeculative))
1380 return __kmp_user_lock_seq;
1381
1382 // Do not even consider speculation when it appears to be contended
1383 if (hint & omp_lock_hint_contended)
1384 return lockseq_queuing;
1385
1386 // Uncontended lock without speculation
1387 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1388 return lockseq_tas;
1389
1390 // Use RTM lock for speculation
1391 if (hint & omp_lock_hint_speculative)
1392 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1393
1394 return __kmp_user_lock_seq;
1395}
1396
1397#if OMPT_SUPPORT && OMPT_OPTIONAL
1398#if KMP_USE_DYNAMIC_LOCK
1399static kmp_mutex_impl_t
1400__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1401 if (user_lock) {
1402 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1403 case 0:
1404 break;
1405#if KMP_USE_FUTEX
1406 case locktag_futex:
1407 return kmp_mutex_impl_queuing;
1408#endif
1409 case locktag_tas:
1410 return kmp_mutex_impl_spin;
1411#if KMP_USE_TSX
1412 case locktag_hle:
1413 case locktag_rtm_spin:
1414 return kmp_mutex_impl_speculative;
1415#endif
1416 default:
1417 return kmp_mutex_impl_none;
1418 }
1419 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1420 }
1421 KMP_ASSERT(ilock);
1422 switch (ilock->type) {
1423#if KMP_USE_TSX
1424 case locktag_adaptive:
1425 case locktag_rtm_queuing:
1426 return kmp_mutex_impl_speculative;
1427#endif
1428 case locktag_nested_tas:
1429 return kmp_mutex_impl_spin;
1430#if KMP_USE_FUTEX
1431 case locktag_nested_futex:
1432#endif
1433 case locktag_ticket:
1434 case locktag_queuing:
1435 case locktag_drdpa:
1436 case locktag_nested_ticket:
1437 case locktag_nested_queuing:
1438 case locktag_nested_drdpa:
1439 return kmp_mutex_impl_queuing;
1440 default:
1441 return kmp_mutex_impl_none;
1442 }
1443}
1444#else
1445// For locks without dynamic binding
1446static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1447 switch (__kmp_user_lock_kind) {
1448 case lk_tas:
1449 return kmp_mutex_impl_spin;
1450#if KMP_USE_FUTEX
1451 case lk_futex:
1452#endif
1453 case lk_ticket:
1454 case lk_queuing:
1455 case lk_drdpa:
1456 return kmp_mutex_impl_queuing;
1457#if KMP_USE_TSX
1458 case lk_hle:
1459 case lk_rtm_queuing:
1460 case lk_rtm_spin:
1461 case lk_adaptive:
1462 return kmp_mutex_impl_speculative;
1463#endif
1464 default:
1465 return kmp_mutex_impl_none;
1466 }
1467}
1468#endif // KMP_USE_DYNAMIC_LOCK
1469#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1470
1484void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1485 kmp_critical_name *crit, uint32_t hint) {
1486 KMP_COUNT_BLOCK(OMP_CRITICAL);
1487 kmp_user_lock_p lck;
1488#if OMPT_SUPPORT && OMPT_OPTIONAL
1489 ompt_state_t prev_state = ompt_state_undefined;
1490 ompt_thread_info_t ti;
1491 // This is the case, if called from __kmpc_critical:
1492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1493 if (!codeptr)
1494 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1495#endif
1496
1497 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1498 __kmp_assert_valid_gtid(global_tid);
1499
1500 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1501 // Check if it is initialized.
1502 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1503 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1504 if (*lk == 0) {
1505 if (KMP_IS_D_LOCK(lockseq)) {
1506 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1507 KMP_GET_D_TAG(lockseq));
1508 } else {
1509 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1510 }
1511 }
1512 // Branch for accessing the actual lock object and set operation. This
1513 // branching is inevitable since this lock initialization does not follow the
1514 // normal dispatch path (lock table is not used).
1515 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1516 lck = (kmp_user_lock_p)lk;
1517 if (__kmp_env_consistency_check) {
1518 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1519 __kmp_map_hint_to_lock(hint));
1520 }
1521#if USE_ITT_BUILD
1522 __kmp_itt_critical_acquiring(lck);
1523#endif
1524#if OMPT_SUPPORT && OMPT_OPTIONAL
1525 if (ompt_enabled.enabled) {
1526 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1527 /* OMPT state update */
1528 prev_state = ti.state;
1529 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1530 ti.state = ompt_state_wait_critical;
1531
1532 /* OMPT event callback */
1533 if (ompt_enabled.ompt_callback_mutex_acquire) {
1534 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1535 ompt_mutex_critical, (unsigned int)hint,
1536 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1537 codeptr);
1538 }
1539 }
1540#endif
1541#if KMP_USE_INLINED_TAS
1542 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1543 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1544 } else
1545#elif KMP_USE_INLINED_FUTEX
1546 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1547 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1548 } else
1549#endif
1550 {
1551 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1552 }
1553 } else {
1554 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1555 lck = ilk->lock;
1556 if (__kmp_env_consistency_check) {
1557 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1558 __kmp_map_hint_to_lock(hint));
1559 }
1560#if USE_ITT_BUILD
1561 __kmp_itt_critical_acquiring(lck);
1562#endif
1563#if OMPT_SUPPORT && OMPT_OPTIONAL
1564 if (ompt_enabled.enabled) {
1565 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1566 /* OMPT state update */
1567 prev_state = ti.state;
1568 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1569 ti.state = ompt_state_wait_critical;
1570
1571 /* OMPT event callback */
1572 if (ompt_enabled.ompt_callback_mutex_acquire) {
1573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1574 ompt_mutex_critical, (unsigned int)hint,
1575 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1576 codeptr);
1577 }
1578 }
1579#endif
1580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1581 }
1582 KMP_POP_PARTITIONED_TIMER();
1583
1584#if USE_ITT_BUILD
1585 __kmp_itt_critical_acquired(lck);
1586#endif /* USE_ITT_BUILD */
1587#if OMPT_SUPPORT && OMPT_OPTIONAL
1588 if (ompt_enabled.enabled) {
1589 /* OMPT state update */
1590 ti.state = prev_state;
1591 ti.wait_id = 0;
1592
1593 /* OMPT event callback */
1594 if (ompt_enabled.ompt_callback_mutex_acquired) {
1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1596 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1597 }
1598 }
1599#endif
1600
1601 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1602 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1603} // __kmpc_critical_with_hint
1604
1605#endif // KMP_USE_DYNAMIC_LOCK
1606
1616void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1617 kmp_critical_name *crit) {
1618 kmp_user_lock_p lck;
1619
1620 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1621
1622#if KMP_USE_DYNAMIC_LOCK
1623 int locktag = KMP_EXTRACT_D_TAG(crit);
1624 if (locktag) {
1625 lck = (kmp_user_lock_p)crit;
1626 KMP_ASSERT(lck != NULL);
1627 if (__kmp_env_consistency_check) {
1628 __kmp_pop_sync(global_tid, ct_critical, loc);
1629 }
1630#if USE_ITT_BUILD
1631 __kmp_itt_critical_releasing(lck);
1632#endif
1633#if KMP_USE_INLINED_TAS
1634 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1635 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1636 } else
1637#elif KMP_USE_INLINED_FUTEX
1638 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1639 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1640 } else
1641#endif
1642 {
1643 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1644 }
1645 } else {
1646 kmp_indirect_lock_t *ilk =
1647 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1648 KMP_ASSERT(ilk != NULL);
1649 lck = ilk->lock;
1650 if (__kmp_env_consistency_check) {
1651 __kmp_pop_sync(global_tid, ct_critical, loc);
1652 }
1653#if USE_ITT_BUILD
1654 __kmp_itt_critical_releasing(lck);
1655#endif
1656 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1657 }
1658
1659#else // KMP_USE_DYNAMIC_LOCK
1660
1661 if ((__kmp_user_lock_kind == lk_tas) &&
1662 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1663 lck = (kmp_user_lock_p)crit;
1664 }
1665#if KMP_USE_FUTEX
1666 else if ((__kmp_user_lock_kind == lk_futex) &&
1667 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1668 lck = (kmp_user_lock_p)crit;
1669 }
1670#endif
1671 else { // ticket, queuing or drdpa
1672 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1673 }
1674
1675 KMP_ASSERT(lck != NULL);
1676
1677 if (__kmp_env_consistency_check)
1678 __kmp_pop_sync(global_tid, ct_critical, loc);
1679
1680#if USE_ITT_BUILD
1681 __kmp_itt_critical_releasing(lck);
1682#endif /* USE_ITT_BUILD */
1683 // Value of 'crit' should be good for using as a critical_id of the critical
1684 // section directive.
1685 __kmp_release_user_lock_with_checks(lck, global_tid);
1686
1687#endif // KMP_USE_DYNAMIC_LOCK
1688
1689#if OMPT_SUPPORT && OMPT_OPTIONAL
1690 /* OMPT release event triggers after lock is released; place here to trigger
1691 * for all #if branches */
1692 OMPT_STORE_RETURN_ADDRESS(global_tid);
1693 if (ompt_enabled.ompt_callback_mutex_released) {
1694 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1695 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1696 OMPT_LOAD_RETURN_ADDRESS(0));
1697 }
1698#endif
1699
1700 KMP_POP_PARTITIONED_TIMER();
1701 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1702}
1703
1713kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1714 int status;
1715 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1716 __kmp_assert_valid_gtid(global_tid);
1717
1718 if (!TCR_4(__kmp_init_parallel))
1719 __kmp_parallel_initialize();
1720
1721 __kmp_resume_if_soft_paused();
1722
1723 if (__kmp_env_consistency_check)
1724 __kmp_check_barrier(global_tid, ct_barrier, loc);
1725
1726#if OMPT_SUPPORT
1727 ompt_frame_t *ompt_frame;
1728 if (ompt_enabled.enabled) {
1729 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1730 if (ompt_frame->enter_frame.ptr == NULL)
1731 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1732 }
1733 OMPT_STORE_RETURN_ADDRESS(global_tid);
1734#endif
1735#if USE_ITT_NOTIFY
1736 __kmp_threads[global_tid]->th.th_ident = loc;
1737#endif
1738 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1739#if OMPT_SUPPORT && OMPT_OPTIONAL
1740 if (ompt_enabled.enabled) {
1741 ompt_frame->enter_frame = ompt_data_none;
1742 }
1743#endif
1744
1745 return (status != 0) ? 0 : 1;
1746}
1747
1757void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1758 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1759 __kmp_assert_valid_gtid(global_tid);
1760 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1761}
1762
1773kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1774 kmp_int32 ret;
1775 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1776 __kmp_assert_valid_gtid(global_tid);
1777
1778 if (!TCR_4(__kmp_init_parallel))
1779 __kmp_parallel_initialize();
1780
1781 __kmp_resume_if_soft_paused();
1782
1783 if (__kmp_env_consistency_check) {
1784 if (loc == 0) {
1785 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1786 }
1787 __kmp_check_barrier(global_tid, ct_barrier, loc);
1788 }
1789
1790#if OMPT_SUPPORT
1791 ompt_frame_t *ompt_frame;
1792 if (ompt_enabled.enabled) {
1793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1794 if (ompt_frame->enter_frame.ptr == NULL)
1795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1796 }
1797 OMPT_STORE_RETURN_ADDRESS(global_tid);
1798#endif
1799#if USE_ITT_NOTIFY
1800 __kmp_threads[global_tid]->th.th_ident = loc;
1801#endif
1802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1803#if OMPT_SUPPORT && OMPT_OPTIONAL
1804 if (ompt_enabled.enabled) {
1805 ompt_frame->enter_frame = ompt_data_none;
1806 }
1807#endif
1808
1809 ret = __kmpc_master(loc, global_tid);
1810
1811 if (__kmp_env_consistency_check) {
1812 /* there's no __kmpc_end_master called; so the (stats) */
1813 /* actions of __kmpc_end_master are done here */
1814 if (ret) {
1815 /* only one thread should do the pop since only */
1816 /* one did the push (see __kmpc_master()) */
1817 __kmp_pop_sync(global_tid, ct_master, loc);
1818 }
1819 }
1820
1821 return (ret);
1822}
1823
1824/* The BARRIER for a SINGLE process section is always explicit */
1836kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1837 __kmp_assert_valid_gtid(global_tid);
1838 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1839
1840 if (rc) {
1841 // We are going to execute the single statement, so we should count it.
1842 KMP_COUNT_BLOCK(OMP_SINGLE);
1843 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1844 }
1845
1846#if OMPT_SUPPORT && OMPT_OPTIONAL
1847 kmp_info_t *this_thr = __kmp_threads[global_tid];
1848 kmp_team_t *team = this_thr->th.th_team;
1849 int tid = __kmp_tid_from_gtid(global_tid);
1850
1851 if (ompt_enabled.enabled) {
1852 if (rc) {
1853 if (ompt_enabled.ompt_callback_work) {
1854 ompt_callbacks.ompt_callback(ompt_callback_work)(
1855 ompt_work_single_executor, ompt_scope_begin,
1856 &(team->t.ompt_team_info.parallel_data),
1857 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1858 1, OMPT_GET_RETURN_ADDRESS(0));
1859 }
1860 } else {
1861 if (ompt_enabled.ompt_callback_work) {
1862 ompt_callbacks.ompt_callback(ompt_callback_work)(
1863 ompt_work_single_other, ompt_scope_begin,
1864 &(team->t.ompt_team_info.parallel_data),
1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1866 1, OMPT_GET_RETURN_ADDRESS(0));
1867 ompt_callbacks.ompt_callback(ompt_callback_work)(
1868 ompt_work_single_other, ompt_scope_end,
1869 &(team->t.ompt_team_info.parallel_data),
1870 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1871 1, OMPT_GET_RETURN_ADDRESS(0));
1872 }
1873 }
1874 }
1875#endif
1876
1877 return rc;
1878}
1879
1889void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1890 __kmp_assert_valid_gtid(global_tid);
1891 __kmp_exit_single(global_tid);
1892 KMP_POP_PARTITIONED_TIMER();
1893
1894#if OMPT_SUPPORT && OMPT_OPTIONAL
1895 kmp_info_t *this_thr = __kmp_threads[global_tid];
1896 kmp_team_t *team = this_thr->th.th_team;
1897 int tid = __kmp_tid_from_gtid(global_tid);
1898
1899 if (ompt_enabled.ompt_callback_work) {
1900 ompt_callbacks.ompt_callback(ompt_callback_work)(
1901 ompt_work_single_executor, ompt_scope_end,
1902 &(team->t.ompt_team_info.parallel_data),
1903 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1904 OMPT_GET_RETURN_ADDRESS(0));
1905 }
1906#endif
1907}
1908
1916void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1917 KMP_POP_PARTITIONED_TIMER();
1918 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1919
1920#if OMPT_SUPPORT && OMPT_OPTIONAL
1921 if (ompt_enabled.ompt_callback_work) {
1922 ompt_work_t ompt_work_type = ompt_work_loop;
1923 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1924 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1925 // Determine workshare type
1926 if (loc != NULL) {
1927 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1928 ompt_work_type = ompt_work_loop;
1929 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1930 ompt_work_type = ompt_work_sections;
1931 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1932 ompt_work_type = ompt_work_distribute;
1933 } else {
1934 // use default set above.
1935 // a warning about this case is provided in __kmpc_for_static_init
1936 }
1937 KMP_DEBUG_ASSERT(ompt_work_type);
1938 }
1939 ompt_callbacks.ompt_callback(ompt_callback_work)(
1940 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1941 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1942 }
1943#endif
1944 if (__kmp_env_consistency_check)
1945 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1946}
1947
1948// User routines which take C-style arguments (call by value)
1949// different from the Fortran equivalent routines
1950
1951void ompc_set_num_threads(int arg) {
1952 // !!!!! TODO: check the per-task binding
1953 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1954}
1955
1956void ompc_set_dynamic(int flag) {
1957 kmp_info_t *thread;
1958
1959 /* For the thread-private implementation of the internal controls */
1960 thread = __kmp_entry_thread();
1961
1962 __kmp_save_internal_controls(thread);
1963
1964 set__dynamic(thread, flag ? true : false);
1965}
1966
1967void ompc_set_nested(int flag) {
1968 kmp_info_t *thread;
1969
1970 /* For the thread-private internal controls implementation */
1971 thread = __kmp_entry_thread();
1972
1973 __kmp_save_internal_controls(thread);
1974
1975 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1976}
1977
1978void ompc_set_max_active_levels(int max_active_levels) {
1979 /* TO DO */
1980 /* we want per-task implementation of this internal control */
1981
1982 /* For the per-thread internal controls implementation */
1983 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1984}
1985
1986void ompc_set_schedule(omp_sched_t kind, int modifier) {
1987 // !!!!! TODO: check the per-task binding
1988 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1989}
1990
1991int ompc_get_ancestor_thread_num(int level) {
1992 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1993}
1994
1995int ompc_get_team_size(int level) {
1996 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1997}
1998
1999/* OpenMP 5.0 Affinity Format API */
2000void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2001 if (!__kmp_init_serial) {
2002 __kmp_serial_initialize();
2003 }
2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2005 format, KMP_STRLEN(format) + 1);
2006}
2007
2008size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2009 size_t format_size;
2010 if (!__kmp_init_serial) {
2011 __kmp_serial_initialize();
2012 }
2013 format_size = KMP_STRLEN(__kmp_affinity_format);
2014 if (buffer && size) {
2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2016 format_size + 1);
2017 }
2018 return format_size;
2019}
2020
2021void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2022 int gtid;
2023 if (!TCR_4(__kmp_init_middle)) {
2024 __kmp_middle_initialize();
2025 }
2026 __kmp_assign_root_init_mask();
2027 gtid = __kmp_get_gtid();
2028 __kmp_aux_display_affinity(gtid, format);
2029}
2030
2031size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2032 char const *format) {
2033 int gtid;
2034 size_t num_required;
2035 kmp_str_buf_t capture_buf;
2036 if (!TCR_4(__kmp_init_middle)) {
2037 __kmp_middle_initialize();
2038 }
2039 __kmp_assign_root_init_mask();
2040 gtid = __kmp_get_gtid();
2041 __kmp_str_buf_init(&capture_buf);
2042 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2043 if (buffer && buf_size) {
2044 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2045 capture_buf.used + 1);
2046 }
2047 __kmp_str_buf_free(&capture_buf);
2048 return num_required;
2049}
2050
2051void kmpc_set_stacksize(int arg) {
2052 // __kmp_aux_set_stacksize initializes the library if needed
2053 __kmp_aux_set_stacksize(arg);
2054}
2055
2056void kmpc_set_stacksize_s(size_t arg) {
2057 // __kmp_aux_set_stacksize initializes the library if needed
2058 __kmp_aux_set_stacksize(arg);
2059}
2060
2061void kmpc_set_blocktime(int arg) {
2062 int gtid, tid;
2063 kmp_info_t *thread;
2064
2065 gtid = __kmp_entry_gtid();
2066 tid = __kmp_tid_from_gtid(gtid);
2067 thread = __kmp_thread_from_gtid(gtid);
2068
2069 __kmp_aux_set_blocktime(arg, thread, tid);
2070}
2071
2072void kmpc_set_library(int arg) {
2073 // __kmp_user_set_library initializes the library if needed
2074 __kmp_user_set_library((enum library_type)arg);
2075}
2076
2077void kmpc_set_defaults(char const *str) {
2078 // __kmp_aux_set_defaults initializes the library if needed
2079 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2080}
2081
2082void kmpc_set_disp_num_buffers(int arg) {
2083 // ignore after initialization because some teams have already
2084 // allocated dispatch buffers
2085 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2086 arg <= KMP_MAX_DISP_NUM_BUFF) {
2087 __kmp_dispatch_num_buffers = arg;
2088 }
2089}
2090
2091int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2092#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2093 return -1;
2094#else
2095 if (!TCR_4(__kmp_init_middle)) {
2096 __kmp_middle_initialize();
2097 }
2098 __kmp_assign_root_init_mask();
2099 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2100#endif
2101}
2102
2103int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2104#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2105 return -1;
2106#else
2107 if (!TCR_4(__kmp_init_middle)) {
2108 __kmp_middle_initialize();
2109 }
2110 __kmp_assign_root_init_mask();
2111 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2112#endif
2113}
2114
2115int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2116#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2117 return -1;
2118#else
2119 if (!TCR_4(__kmp_init_middle)) {
2120 __kmp_middle_initialize();
2121 }
2122 __kmp_assign_root_init_mask();
2123 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2124#endif
2125}
2126
2127/* -------------------------------------------------------------------------- */
2172void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2173 void *cpy_data, void (*cpy_func)(void *, void *),
2174 kmp_int32 didit) {
2175 void **data_ptr;
2176 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2177 __kmp_assert_valid_gtid(gtid);
2178
2179 KMP_MB();
2180
2181 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2182
2183 if (__kmp_env_consistency_check) {
2184 if (loc == 0) {
2185 KMP_WARNING(ConstructIdentInvalid);
2186 }
2187 }
2188
2189 // ToDo: Optimize the following two barriers into some kind of split barrier
2190
2191 if (didit)
2192 *data_ptr = cpy_data;
2193
2194#if OMPT_SUPPORT
2195 ompt_frame_t *ompt_frame;
2196 if (ompt_enabled.enabled) {
2197 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2198 if (ompt_frame->enter_frame.ptr == NULL)
2199 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2200 }
2201 OMPT_STORE_RETURN_ADDRESS(gtid);
2202#endif
2203/* This barrier is not a barrier region boundary */
2204#if USE_ITT_NOTIFY
2205 __kmp_threads[gtid]->th.th_ident = loc;
2206#endif
2207 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2208
2209 if (!didit)
2210 (*cpy_func)(cpy_data, *data_ptr);
2211
2212 // Consider next barrier a user-visible barrier for barrier region boundaries
2213 // Nesting checks are already handled by the single construct checks
2214 {
2215#if OMPT_SUPPORT
2216 OMPT_STORE_RETURN_ADDRESS(gtid);
2217#endif
2218#if USE_ITT_NOTIFY
2219 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2220// tasks can overwrite the location)
2221#endif
2222 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2223#if OMPT_SUPPORT && OMPT_OPTIONAL
2224 if (ompt_enabled.enabled) {
2225 ompt_frame->enter_frame = ompt_data_none;
2226 }
2227#endif
2228 }
2229}
2230
2231/* -------------------------------------------------------------------------- */
2232
2233#define INIT_LOCK __kmp_init_user_lock_with_checks
2234#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2235#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2236#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2237#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2238#define ACQUIRE_NESTED_LOCK_TIMED \
2239 __kmp_acquire_nested_user_lock_with_checks_timed
2240#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2241#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2242#define TEST_LOCK __kmp_test_user_lock_with_checks
2243#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2244#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2245#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2246
2247// TODO: Make check abort messages use location info & pass it into
2248// with_checks routines
2249
2250#if KMP_USE_DYNAMIC_LOCK
2251
2252// internal lock initializer
2253static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2254 kmp_dyna_lockseq_t seq) {
2255 if (KMP_IS_D_LOCK(seq)) {
2256 KMP_INIT_D_LOCK(lock, seq);
2257#if USE_ITT_BUILD
2258 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2259#endif
2260 } else {
2261 KMP_INIT_I_LOCK(lock, seq);
2262#if USE_ITT_BUILD
2263 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2264 __kmp_itt_lock_creating(ilk->lock, loc);
2265#endif
2266 }
2267}
2268
2269// internal nest lock initializer
2270static __forceinline void
2271__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2272 kmp_dyna_lockseq_t seq) {
2273#if KMP_USE_TSX
2274 // Don't have nested lock implementation for speculative locks
2275 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2276 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2277 seq = __kmp_user_lock_seq;
2278#endif
2279 switch (seq) {
2280 case lockseq_tas:
2281 seq = lockseq_nested_tas;
2282 break;
2283#if KMP_USE_FUTEX
2284 case lockseq_futex:
2285 seq = lockseq_nested_futex;
2286 break;
2287#endif
2288 case lockseq_ticket:
2289 seq = lockseq_nested_ticket;
2290 break;
2291 case lockseq_queuing:
2292 seq = lockseq_nested_queuing;
2293 break;
2294 case lockseq_drdpa:
2295 seq = lockseq_nested_drdpa;
2296 break;
2297 default:
2298 seq = lockseq_nested_queuing;
2299 }
2300 KMP_INIT_I_LOCK(lock, seq);
2301#if USE_ITT_BUILD
2302 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2303 __kmp_itt_lock_creating(ilk->lock, loc);
2304#endif
2305}
2306
2307/* initialize the lock with a hint */
2308void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2309 uintptr_t hint) {
2310 KMP_DEBUG_ASSERT(__kmp_init_serial);
2311 if (__kmp_env_consistency_check && user_lock == NULL) {
2312 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2313 }
2314
2315 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2316
2317#if OMPT_SUPPORT && OMPT_OPTIONAL
2318 // This is the case, if called from omp_init_lock_with_hint:
2319 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2320 if (!codeptr)
2321 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2322 if (ompt_enabled.ompt_callback_lock_init) {
2323 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2324 ompt_mutex_lock, (omp_lock_hint_t)hint,
2325 __ompt_get_mutex_impl_type(user_lock),
2326 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2327 }
2328#endif
2329}
2330
2331/* initialize the lock with a hint */
2332void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2333 void **user_lock, uintptr_t hint) {
2334 KMP_DEBUG_ASSERT(__kmp_init_serial);
2335 if (__kmp_env_consistency_check && user_lock == NULL) {
2336 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2337 }
2338
2339 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2340
2341#if OMPT_SUPPORT && OMPT_OPTIONAL
2342 // This is the case, if called from omp_init_lock_with_hint:
2343 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2344 if (!codeptr)
2345 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2346 if (ompt_enabled.ompt_callback_lock_init) {
2347 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2348 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2349 __ompt_get_mutex_impl_type(user_lock),
2350 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2351 }
2352#endif
2353}
2354
2355#endif // KMP_USE_DYNAMIC_LOCK
2356
2357/* initialize the lock */
2358void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2359#if KMP_USE_DYNAMIC_LOCK
2360
2361 KMP_DEBUG_ASSERT(__kmp_init_serial);
2362 if (__kmp_env_consistency_check && user_lock == NULL) {
2363 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2364 }
2365 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2366
2367#if OMPT_SUPPORT && OMPT_OPTIONAL
2368 // This is the case, if called from omp_init_lock_with_hint:
2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2370 if (!codeptr)
2371 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2372 if (ompt_enabled.ompt_callback_lock_init) {
2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2374 ompt_mutex_lock, omp_lock_hint_none,
2375 __ompt_get_mutex_impl_type(user_lock),
2376 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2377 }
2378#endif
2379
2380#else // KMP_USE_DYNAMIC_LOCK
2381
2382 static char const *const func = "omp_init_lock";
2383 kmp_user_lock_p lck;
2384 KMP_DEBUG_ASSERT(__kmp_init_serial);
2385
2386 if (__kmp_env_consistency_check) {
2387 if (user_lock == NULL) {
2388 KMP_FATAL(LockIsUninitialized, func);
2389 }
2390 }
2391
2392 KMP_CHECK_USER_LOCK_INIT();
2393
2394 if ((__kmp_user_lock_kind == lk_tas) &&
2395 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2396 lck = (kmp_user_lock_p)user_lock;
2397 }
2398#if KMP_USE_FUTEX
2399 else if ((__kmp_user_lock_kind == lk_futex) &&
2400 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2401 lck = (kmp_user_lock_p)user_lock;
2402 }
2403#endif
2404 else {
2405 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2406 }
2407 INIT_LOCK(lck);
2408 __kmp_set_user_lock_location(lck, loc);
2409
2410#if OMPT_SUPPORT && OMPT_OPTIONAL
2411 // This is the case, if called from omp_init_lock_with_hint:
2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2413 if (!codeptr)
2414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2415 if (ompt_enabled.ompt_callback_lock_init) {
2416 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2417 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2418 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2419 }
2420#endif
2421
2422#if USE_ITT_BUILD
2423 __kmp_itt_lock_creating(lck);
2424#endif /* USE_ITT_BUILD */
2425
2426#endif // KMP_USE_DYNAMIC_LOCK
2427} // __kmpc_init_lock
2428
2429/* initialize the lock */
2430void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2431#if KMP_USE_DYNAMIC_LOCK
2432
2433 KMP_DEBUG_ASSERT(__kmp_init_serial);
2434 if (__kmp_env_consistency_check && user_lock == NULL) {
2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2436 }
2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2438
2439#if OMPT_SUPPORT && OMPT_OPTIONAL
2440 // This is the case, if called from omp_init_lock_with_hint:
2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2442 if (!codeptr)
2443 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled.ompt_callback_lock_init) {
2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446 ompt_mutex_nest_lock, omp_lock_hint_none,
2447 __ompt_get_mutex_impl_type(user_lock),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 }
2450#endif
2451
2452#else // KMP_USE_DYNAMIC_LOCK
2453
2454 static char const *const func = "omp_init_nest_lock";
2455 kmp_user_lock_p lck;
2456 KMP_DEBUG_ASSERT(__kmp_init_serial);
2457
2458 if (__kmp_env_consistency_check) {
2459 if (user_lock == NULL) {
2460 KMP_FATAL(LockIsUninitialized, func);
2461 }
2462 }
2463
2464 KMP_CHECK_USER_LOCK_INIT();
2465
2466 if ((__kmp_user_lock_kind == lk_tas) &&
2467 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2468 OMP_NEST_LOCK_T_SIZE)) {
2469 lck = (kmp_user_lock_p)user_lock;
2470 }
2471#if KMP_USE_FUTEX
2472 else if ((__kmp_user_lock_kind == lk_futex) &&
2473 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2474 OMP_NEST_LOCK_T_SIZE)) {
2475 lck = (kmp_user_lock_p)user_lock;
2476 }
2477#endif
2478 else {
2479 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2480 }
2481
2482 INIT_NESTED_LOCK(lck);
2483 __kmp_set_user_lock_location(lck, loc);
2484
2485#if OMPT_SUPPORT && OMPT_OPTIONAL
2486 // This is the case, if called from omp_init_lock_with_hint:
2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2488 if (!codeptr)
2489 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2490 if (ompt_enabled.ompt_callback_lock_init) {
2491 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2492 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2493 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2494 }
2495#endif
2496
2497#if USE_ITT_BUILD
2498 __kmp_itt_lock_creating(lck);
2499#endif /* USE_ITT_BUILD */
2500
2501#endif // KMP_USE_DYNAMIC_LOCK
2502} // __kmpc_init_nest_lock
2503
2504void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2505#if KMP_USE_DYNAMIC_LOCK
2506
2507#if USE_ITT_BUILD
2508 kmp_user_lock_p lck;
2509 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2510 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2511 } else {
2512 lck = (kmp_user_lock_p)user_lock;
2513 }
2514 __kmp_itt_lock_destroyed(lck);
2515#endif
2516#if OMPT_SUPPORT && OMPT_OPTIONAL
2517 // This is the case, if called from omp_init_lock_with_hint:
2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2519 if (!codeptr)
2520 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2521 if (ompt_enabled.ompt_callback_lock_destroy) {
2522 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2523 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2524 }
2525#endif
2526 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2527#else
2528 kmp_user_lock_p lck;
2529
2530 if ((__kmp_user_lock_kind == lk_tas) &&
2531 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2532 lck = (kmp_user_lock_p)user_lock;
2533 }
2534#if KMP_USE_FUTEX
2535 else if ((__kmp_user_lock_kind == lk_futex) &&
2536 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2537 lck = (kmp_user_lock_p)user_lock;
2538 }
2539#endif
2540 else {
2541 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2542 }
2543
2544#if OMPT_SUPPORT && OMPT_OPTIONAL
2545 // This is the case, if called from omp_init_lock_with_hint:
2546 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2547 if (!codeptr)
2548 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2549 if (ompt_enabled.ompt_callback_lock_destroy) {
2550 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2551 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2552 }
2553#endif
2554
2555#if USE_ITT_BUILD
2556 __kmp_itt_lock_destroyed(lck);
2557#endif /* USE_ITT_BUILD */
2558 DESTROY_LOCK(lck);
2559
2560 if ((__kmp_user_lock_kind == lk_tas) &&
2561 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2562 ;
2563 }
2564#if KMP_USE_FUTEX
2565 else if ((__kmp_user_lock_kind == lk_futex) &&
2566 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2567 ;
2568 }
2569#endif
2570 else {
2571 __kmp_user_lock_free(user_lock, gtid, lck);
2572 }
2573#endif // KMP_USE_DYNAMIC_LOCK
2574} // __kmpc_destroy_lock
2575
2576/* destroy the lock */
2577void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2578#if KMP_USE_DYNAMIC_LOCK
2579
2580#if USE_ITT_BUILD
2581 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2582 __kmp_itt_lock_destroyed(ilk->lock);
2583#endif
2584#if OMPT_SUPPORT && OMPT_OPTIONAL
2585 // This is the case, if called from omp_init_lock_with_hint:
2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2587 if (!codeptr)
2588 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2589 if (ompt_enabled.ompt_callback_lock_destroy) {
2590 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2591 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592 }
2593#endif
2594 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2595
2596#else // KMP_USE_DYNAMIC_LOCK
2597
2598 kmp_user_lock_p lck;
2599
2600 if ((__kmp_user_lock_kind == lk_tas) &&
2601 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2602 OMP_NEST_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
2605#if KMP_USE_FUTEX
2606 else if ((__kmp_user_lock_kind == lk_futex) &&
2607 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2608 OMP_NEST_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2610 }
2611#endif
2612 else {
2613 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2614 }
2615
2616#if OMPT_SUPPORT && OMPT_OPTIONAL
2617 // This is the case, if called from omp_init_lock_with_hint:
2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619 if (!codeptr)
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_lock_destroy) {
2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2623 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2624 }
2625#endif
2626
2627#if USE_ITT_BUILD
2628 __kmp_itt_lock_destroyed(lck);
2629#endif /* USE_ITT_BUILD */
2630
2631 DESTROY_NESTED_LOCK(lck);
2632
2633 if ((__kmp_user_lock_kind == lk_tas) &&
2634 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2635 OMP_NEST_LOCK_T_SIZE)) {
2636 ;
2637 }
2638#if KMP_USE_FUTEX
2639 else if ((__kmp_user_lock_kind == lk_futex) &&
2640 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2641 OMP_NEST_LOCK_T_SIZE)) {
2642 ;
2643 }
2644#endif
2645 else {
2646 __kmp_user_lock_free(user_lock, gtid, lck);
2647 }
2648#endif // KMP_USE_DYNAMIC_LOCK
2649} // __kmpc_destroy_nest_lock
2650
2651void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2652 KMP_COUNT_BLOCK(OMP_set_lock);
2653#if KMP_USE_DYNAMIC_LOCK
2654 int tag = KMP_EXTRACT_D_TAG(user_lock);
2655#if USE_ITT_BUILD
2656 __kmp_itt_lock_acquiring(
2657 (kmp_user_lock_p)
2658 user_lock); // itt function will get to the right lock object.
2659#endif
2660#if OMPT_SUPPORT && OMPT_OPTIONAL
2661 // This is the case, if called from omp_init_lock_with_hint:
2662 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2663 if (!codeptr)
2664 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2665 if (ompt_enabled.ompt_callback_mutex_acquire) {
2666 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2667 ompt_mutex_lock, omp_lock_hint_none,
2668 __ompt_get_mutex_impl_type(user_lock),
2669 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2670 }
2671#endif
2672#if KMP_USE_INLINED_TAS
2673 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2674 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2675 } else
2676#elif KMP_USE_INLINED_FUTEX
2677 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2678 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2679 } else
2680#endif
2681 {
2682 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2683 }
2684#if USE_ITT_BUILD
2685 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2686#endif
2687#if OMPT_SUPPORT && OMPT_OPTIONAL
2688 if (ompt_enabled.ompt_callback_mutex_acquired) {
2689 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2690 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2691 }
2692#endif
2693
2694#else // KMP_USE_DYNAMIC_LOCK
2695
2696 kmp_user_lock_p lck;
2697
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2701 }
2702#if KMP_USE_FUTEX
2703 else if ((__kmp_user_lock_kind == lk_futex) &&
2704 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2705 lck = (kmp_user_lock_p)user_lock;
2706 }
2707#endif
2708 else {
2709 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2710 }
2711
2712#if USE_ITT_BUILD
2713 __kmp_itt_lock_acquiring(lck);
2714#endif /* USE_ITT_BUILD */
2715#if OMPT_SUPPORT && OMPT_OPTIONAL
2716 // This is the case, if called from omp_init_lock_with_hint:
2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2718 if (!codeptr)
2719 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2720 if (ompt_enabled.ompt_callback_mutex_acquire) {
2721 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2722 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2723 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2724 }
2725#endif
2726
2727 ACQUIRE_LOCK(lck, gtid);
2728
2729#if USE_ITT_BUILD
2730 __kmp_itt_lock_acquired(lck);
2731#endif /* USE_ITT_BUILD */
2732
2733#if OMPT_SUPPORT && OMPT_OPTIONAL
2734 if (ompt_enabled.ompt_callback_mutex_acquired) {
2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2736 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2737 }
2738#endif
2739
2740#endif // KMP_USE_DYNAMIC_LOCK
2741}
2742
2743void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2744#if KMP_USE_DYNAMIC_LOCK
2745
2746#if USE_ITT_BUILD
2747 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2748#endif
2749#if OMPT_SUPPORT && OMPT_OPTIONAL
2750 // This is the case, if called from omp_init_lock_with_hint:
2751 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2752 if (!codeptr)
2753 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2754 if (ompt_enabled.enabled) {
2755 if (ompt_enabled.ompt_callback_mutex_acquire) {
2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2757 ompt_mutex_nest_lock, omp_lock_hint_none,
2758 __ompt_get_mutex_impl_type(user_lock),
2759 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2760 }
2761 }
2762#endif
2763 int acquire_status =
2764 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2765 (void)acquire_status;
2766#if USE_ITT_BUILD
2767 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2768#endif
2769
2770#if OMPT_SUPPORT && OMPT_OPTIONAL
2771 if (ompt_enabled.enabled) {
2772 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2773 if (ompt_enabled.ompt_callback_mutex_acquired) {
2774 // lock_first
2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2776 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2777 codeptr);
2778 }
2779 } else {
2780 if (ompt_enabled.ompt_callback_nest_lock) {
2781 // lock_next
2782 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2783 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2784 }
2785 }
2786 }
2787#endif
2788
2789#else // KMP_USE_DYNAMIC_LOCK
2790 int acquire_status;
2791 kmp_user_lock_p lck;
2792
2793 if ((__kmp_user_lock_kind == lk_tas) &&
2794 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2795 OMP_NEST_LOCK_T_SIZE)) {
2796 lck = (kmp_user_lock_p)user_lock;
2797 }
2798#if KMP_USE_FUTEX
2799 else if ((__kmp_user_lock_kind == lk_futex) &&
2800 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2801 OMP_NEST_LOCK_T_SIZE)) {
2802 lck = (kmp_user_lock_p)user_lock;
2803 }
2804#endif
2805 else {
2806 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2807 }
2808
2809#if USE_ITT_BUILD
2810 __kmp_itt_lock_acquiring(lck);
2811#endif /* USE_ITT_BUILD */
2812#if OMPT_SUPPORT && OMPT_OPTIONAL
2813 // This is the case, if called from omp_init_lock_with_hint:
2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2815 if (!codeptr)
2816 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817 if (ompt_enabled.enabled) {
2818 if (ompt_enabled.ompt_callback_mutex_acquire) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820 ompt_mutex_nest_lock, omp_lock_hint_none,
2821 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2822 codeptr);
2823 }
2824 }
2825#endif
2826
2827 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2828
2829#if USE_ITT_BUILD
2830 __kmp_itt_lock_acquired(lck);
2831#endif /* USE_ITT_BUILD */
2832
2833#if OMPT_SUPPORT && OMPT_OPTIONAL
2834 if (ompt_enabled.enabled) {
2835 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2836 if (ompt_enabled.ompt_callback_mutex_acquired) {
2837 // lock_first
2838 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2839 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2840 }
2841 } else {
2842 if (ompt_enabled.ompt_callback_nest_lock) {
2843 // lock_next
2844 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2845 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2846 }
2847 }
2848 }
2849#endif
2850
2851#endif // KMP_USE_DYNAMIC_LOCK
2852}
2853
2854void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2855#if KMP_USE_DYNAMIC_LOCK
2856
2857 int tag = KMP_EXTRACT_D_TAG(user_lock);
2858#if USE_ITT_BUILD
2859 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2860#endif
2861#if KMP_USE_INLINED_TAS
2862 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2863 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2864 } else
2865#elif KMP_USE_INLINED_FUTEX
2866 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2867 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2868 } else
2869#endif
2870 {
2871 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2872 }
2873
2874#if OMPT_SUPPORT && OMPT_OPTIONAL
2875 // This is the case, if called from omp_init_lock_with_hint:
2876 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2877 if (!codeptr)
2878 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2879 if (ompt_enabled.ompt_callback_mutex_released) {
2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2881 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882 }
2883#endif
2884
2885#else // KMP_USE_DYNAMIC_LOCK
2886
2887 kmp_user_lock_p lck;
2888
2889 /* Can't use serial interval since not block structured */
2890 /* release the lock */
2891
2892 if ((__kmp_user_lock_kind == lk_tas) &&
2893 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2894#if KMP_OS_LINUX && \
2895 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2896// "fast" path implemented to fix customer performance issue
2897#if USE_ITT_BUILD
2898 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2899#endif /* USE_ITT_BUILD */
2900 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2901 KMP_MB();
2902
2903#if OMPT_SUPPORT && OMPT_OPTIONAL
2904 // This is the case, if called from omp_init_lock_with_hint:
2905 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2906 if (!codeptr)
2907 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2908 if (ompt_enabled.ompt_callback_mutex_released) {
2909 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2910 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2911 }
2912#endif
2913
2914 return;
2915#else
2916 lck = (kmp_user_lock_p)user_lock;
2917#endif
2918 }
2919#if KMP_USE_FUTEX
2920 else if ((__kmp_user_lock_kind == lk_futex) &&
2921 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2922 lck = (kmp_user_lock_p)user_lock;
2923 }
2924#endif
2925 else {
2926 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2927 }
2928
2929#if USE_ITT_BUILD
2930 __kmp_itt_lock_releasing(lck);
2931#endif /* USE_ITT_BUILD */
2932
2933 RELEASE_LOCK(lck, gtid);
2934
2935#if OMPT_SUPPORT && OMPT_OPTIONAL
2936 // This is the case, if called from omp_init_lock_with_hint:
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2938 if (!codeptr)
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2943 }
2944#endif
2945
2946#endif // KMP_USE_DYNAMIC_LOCK
2947}
2948
2949/* release the lock */
2950void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2951#if KMP_USE_DYNAMIC_LOCK
2952
2953#if USE_ITT_BUILD
2954 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2955#endif
2956 int release_status =
2957 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2958 (void)release_status;
2959
2960#if OMPT_SUPPORT && OMPT_OPTIONAL
2961 // This is the case, if called from omp_init_lock_with_hint:
2962 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2963 if (!codeptr)
2964 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2965 if (ompt_enabled.enabled) {
2966 if (release_status == KMP_LOCK_RELEASED) {
2967 if (ompt_enabled.ompt_callback_mutex_released) {
2968 // release_lock_last
2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2970 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2971 codeptr);
2972 }
2973 } else if (ompt_enabled.ompt_callback_nest_lock) {
2974 // release_lock_prev
2975 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2976 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2977 }
2978 }
2979#endif
2980
2981#else // KMP_USE_DYNAMIC_LOCK
2982
2983 kmp_user_lock_p lck;
2984
2985 /* Can't use serial interval since not block structured */
2986
2987 if ((__kmp_user_lock_kind == lk_tas) &&
2988 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2989 OMP_NEST_LOCK_T_SIZE)) {
2990#if KMP_OS_LINUX && \
2991 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2992 // "fast" path implemented to fix customer performance issue
2993 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2994#if USE_ITT_BUILD
2995 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2996#endif /* USE_ITT_BUILD */
2997
2998#if OMPT_SUPPORT && OMPT_OPTIONAL
2999 int release_status = KMP_LOCK_STILL_HELD;
3000#endif
3001
3002 if (--(tl->lk.depth_locked) == 0) {
3003 TCW_4(tl->lk.poll, 0);
3004#if OMPT_SUPPORT && OMPT_OPTIONAL
3005 release_status = KMP_LOCK_RELEASED;
3006#endif
3007 }
3008 KMP_MB();
3009
3010#if OMPT_SUPPORT && OMPT_OPTIONAL
3011 // This is the case, if called from omp_init_lock_with_hint:
3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3013 if (!codeptr)
3014 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3015 if (ompt_enabled.enabled) {
3016 if (release_status == KMP_LOCK_RELEASED) {
3017 if (ompt_enabled.ompt_callback_mutex_released) {
3018 // release_lock_last
3019 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3020 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3021 }
3022 } else if (ompt_enabled.ompt_callback_nest_lock) {
3023 // release_lock_previous
3024 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3025 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3026 }
3027 }
3028#endif
3029
3030 return;
3031#else
3032 lck = (kmp_user_lock_p)user_lock;
3033#endif
3034 }
3035#if KMP_USE_FUTEX
3036 else if ((__kmp_user_lock_kind == lk_futex) &&
3037 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3038 OMP_NEST_LOCK_T_SIZE)) {
3039 lck = (kmp_user_lock_p)user_lock;
3040 }
3041#endif
3042 else {
3043 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3044 }
3045
3046#if USE_ITT_BUILD
3047 __kmp_itt_lock_releasing(lck);
3048#endif /* USE_ITT_BUILD */
3049
3050 int release_status;
3051 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3052#if OMPT_SUPPORT && OMPT_OPTIONAL
3053 // This is the case, if called from omp_init_lock_with_hint:
3054 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3055 if (!codeptr)
3056 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3057 if (ompt_enabled.enabled) {
3058 if (release_status == KMP_LOCK_RELEASED) {
3059 if (ompt_enabled.ompt_callback_mutex_released) {
3060 // release_lock_last
3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3062 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3063 }
3064 } else if (ompt_enabled.ompt_callback_nest_lock) {
3065 // release_lock_previous
3066 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3067 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3068 }
3069 }
3070#endif
3071
3072#endif // KMP_USE_DYNAMIC_LOCK
3073}
3074
3075/* try to acquire the lock */
3076int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3077 KMP_COUNT_BLOCK(OMP_test_lock);
3078
3079#if KMP_USE_DYNAMIC_LOCK
3080 int rc;
3081 int tag = KMP_EXTRACT_D_TAG(user_lock);
3082#if USE_ITT_BUILD
3083 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3084#endif
3085#if OMPT_SUPPORT && OMPT_OPTIONAL
3086 // This is the case, if called from omp_init_lock_with_hint:
3087 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3088 if (!codeptr)
3089 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3090 if (ompt_enabled.ompt_callback_mutex_acquire) {
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3092 ompt_mutex_lock, omp_lock_hint_none,
3093 __ompt_get_mutex_impl_type(user_lock),
3094 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3095 }
3096#endif
3097#if KMP_USE_INLINED_TAS
3098 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3099 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3100 } else
3101#elif KMP_USE_INLINED_FUTEX
3102 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3103 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3104 } else
3105#endif
3106 {
3107 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3108 }
3109 if (rc) {
3110#if USE_ITT_BUILD
3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3112#endif
3113#if OMPT_SUPPORT && OMPT_OPTIONAL
3114 if (ompt_enabled.ompt_callback_mutex_acquired) {
3115 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3116 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3117 }
3118#endif
3119 return FTN_TRUE;
3120 } else {
3121#if USE_ITT_BUILD
3122 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3123#endif
3124 return FTN_FALSE;
3125 }
3126
3127#else // KMP_USE_DYNAMIC_LOCK
3128
3129 kmp_user_lock_p lck;
3130 int rc;
3131
3132 if ((__kmp_user_lock_kind == lk_tas) &&
3133 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3134 lck = (kmp_user_lock_p)user_lock;
3135 }
3136#if KMP_USE_FUTEX
3137 else if ((__kmp_user_lock_kind == lk_futex) &&
3138 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3140 }
3141#endif
3142 else {
3143 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3144 }
3145
3146#if USE_ITT_BUILD
3147 __kmp_itt_lock_acquiring(lck);
3148#endif /* USE_ITT_BUILD */
3149#if OMPT_SUPPORT && OMPT_OPTIONAL
3150 // This is the case, if called from omp_init_lock_with_hint:
3151 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3152 if (!codeptr)
3153 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3154 if (ompt_enabled.ompt_callback_mutex_acquire) {
3155 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3156 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3157 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3158 }
3159#endif
3160
3161 rc = TEST_LOCK(lck, gtid);
3162#if USE_ITT_BUILD
3163 if (rc) {
3164 __kmp_itt_lock_acquired(lck);
3165 } else {
3166 __kmp_itt_lock_cancelled(lck);
3167 }
3168#endif /* USE_ITT_BUILD */
3169#if OMPT_SUPPORT && OMPT_OPTIONAL
3170 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3172 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3173 }
3174#endif
3175
3176 return (rc ? FTN_TRUE : FTN_FALSE);
3177
3178 /* Can't use serial interval since not block structured */
3179
3180#endif // KMP_USE_DYNAMIC_LOCK
3181}
3182
3183/* try to acquire the lock */
3184int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3185#if KMP_USE_DYNAMIC_LOCK
3186 int rc;
3187#if USE_ITT_BUILD
3188 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3189#endif
3190#if OMPT_SUPPORT && OMPT_OPTIONAL
3191 // This is the case, if called from omp_init_lock_with_hint:
3192 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3193 if (!codeptr)
3194 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3195 if (ompt_enabled.ompt_callback_mutex_acquire) {
3196 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3197 ompt_mutex_nest_lock, omp_lock_hint_none,
3198 __ompt_get_mutex_impl_type(user_lock),
3199 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3200 }
3201#endif
3202 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3203#if USE_ITT_BUILD
3204 if (rc) {
3205 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3206 } else {
3207 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3208 }
3209#endif
3210#if OMPT_SUPPORT && OMPT_OPTIONAL
3211 if (ompt_enabled.enabled && rc) {
3212 if (rc == 1) {
3213 if (ompt_enabled.ompt_callback_mutex_acquired) {
3214 // lock_first
3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3216 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3217 codeptr);
3218 }
3219 } else {
3220 if (ompt_enabled.ompt_callback_nest_lock) {
3221 // lock_next
3222 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3223 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3224 }
3225 }
3226 }
3227#endif
3228 return rc;
3229
3230#else // KMP_USE_DYNAMIC_LOCK
3231
3232 kmp_user_lock_p lck;
3233 int rc;
3234
3235 if ((__kmp_user_lock_kind == lk_tas) &&
3236 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3237 OMP_NEST_LOCK_T_SIZE)) {
3238 lck = (kmp_user_lock_p)user_lock;
3239 }
3240#if KMP_USE_FUTEX
3241 else if ((__kmp_user_lock_kind == lk_futex) &&
3242 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3243 OMP_NEST_LOCK_T_SIZE)) {
3244 lck = (kmp_user_lock_p)user_lock;
3245 }
3246#endif
3247 else {
3248 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3249 }
3250
3251#if USE_ITT_BUILD
3252 __kmp_itt_lock_acquiring(lck);
3253#endif /* USE_ITT_BUILD */
3254
3255#if OMPT_SUPPORT && OMPT_OPTIONAL
3256 // This is the case, if called from omp_init_lock_with_hint:
3257 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3258 if (!codeptr)
3259 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3260 if (ompt_enabled.enabled) &&
3261 ompt_enabled.ompt_callback_mutex_acquire) {
3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263 ompt_mutex_nest_lock, omp_lock_hint_none,
3264 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3265 codeptr);
3266 }
3267#endif
3268
3269 rc = TEST_NESTED_LOCK(lck, gtid);
3270#if USE_ITT_BUILD
3271 if (rc) {
3272 __kmp_itt_lock_acquired(lck);
3273 } else {
3274 __kmp_itt_lock_cancelled(lck);
3275 }
3276#endif /* USE_ITT_BUILD */
3277#if OMPT_SUPPORT && OMPT_OPTIONAL
3278 if (ompt_enabled.enabled && rc) {
3279 if (rc == 1) {
3280 if (ompt_enabled.ompt_callback_mutex_acquired) {
3281 // lock_first
3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3283 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3284 }
3285 } else {
3286 if (ompt_enabled.ompt_callback_nest_lock) {
3287 // lock_next
3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3290 }
3291 }
3292 }
3293#endif
3294 return rc;
3295
3296 /* Can't use serial interval since not block structured */
3297
3298#endif // KMP_USE_DYNAMIC_LOCK
3299}
3300
3301// Interface to fast scalable reduce methods routines
3302
3303// keep the selected method in a thread local structure for cross-function
3304// usage: will be used in __kmpc_end_reduce* functions;
3305// another solution: to re-determine the method one more time in
3306// __kmpc_end_reduce* functions (new prototype required then)
3307// AT: which solution is better?
3308#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3309 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3310
3311#define __KMP_GET_REDUCTION_METHOD(gtid) \
3312 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3313
3314// description of the packed_reduction_method variable: look at the macros in
3315// kmp.h
3316
3317// used in a critical section reduce block
3318static __forceinline void
3319__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3320 kmp_critical_name *crit) {
3321
3322 // this lock was visible to a customer and to the threading profile tool as a
3323 // serial overhead span (although it's used for an internal purpose only)
3324 // why was it visible in previous implementation?
3325 // should we keep it visible in new reduce block?
3326 kmp_user_lock_p lck;
3327
3328#if KMP_USE_DYNAMIC_LOCK
3329
3330 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3331 // Check if it is initialized.
3332 if (*lk == 0) {
3333 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3334 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3335 KMP_GET_D_TAG(__kmp_user_lock_seq));
3336 } else {
3337 __kmp_init_indirect_csptr(crit, loc, global_tid,
3338 KMP_GET_I_TAG(__kmp_user_lock_seq));
3339 }
3340 }
3341 // Branch for accessing the actual lock object and set operation. This
3342 // branching is inevitable since this lock initialization does not follow the
3343 // normal dispatch path (lock table is not used).
3344 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3345 lck = (kmp_user_lock_p)lk;
3346 KMP_DEBUG_ASSERT(lck != NULL);
3347 if (__kmp_env_consistency_check) {
3348 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3349 }
3350 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3351 } else {
3352 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3353 lck = ilk->lock;
3354 KMP_DEBUG_ASSERT(lck != NULL);
3355 if (__kmp_env_consistency_check) {
3356 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3357 }
3358 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3359 }
3360
3361#else // KMP_USE_DYNAMIC_LOCK
3362
3363 // We know that the fast reduction code is only emitted by Intel compilers
3364 // with 32 byte critical sections. If there isn't enough space, then we
3365 // have to use a pointer.
3366 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3367 lck = (kmp_user_lock_p)crit;
3368 } else {
3369 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3370 }
3371 KMP_DEBUG_ASSERT(lck != NULL);
3372
3373 if (__kmp_env_consistency_check)
3374 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3375
3376 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3377
3378#endif // KMP_USE_DYNAMIC_LOCK
3379}
3380
3381// used in a critical section reduce block
3382static __forceinline void
3383__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3384 kmp_critical_name *crit) {
3385
3386 kmp_user_lock_p lck;
3387
3388#if KMP_USE_DYNAMIC_LOCK
3389
3390 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3391 lck = (kmp_user_lock_p)crit;
3392 if (__kmp_env_consistency_check)
3393 __kmp_pop_sync(global_tid, ct_critical, loc);
3394 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3395 } else {
3396 kmp_indirect_lock_t *ilk =
3397 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3398 if (__kmp_env_consistency_check)
3399 __kmp_pop_sync(global_tid, ct_critical, loc);
3400 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3401 }
3402
3403#else // KMP_USE_DYNAMIC_LOCK
3404
3405 // We know that the fast reduction code is only emitted by Intel compilers
3406 // with 32 byte critical sections. If there isn't enough space, then we have
3407 // to use a pointer.
3408 if (__kmp_base_user_lock_size > 32) {
3409 lck = *((kmp_user_lock_p *)crit);
3410 KMP_ASSERT(lck != NULL);
3411 } else {
3412 lck = (kmp_user_lock_p)crit;
3413 }
3414
3415 if (__kmp_env_consistency_check)
3416 __kmp_pop_sync(global_tid, ct_critical, loc);
3417
3418 __kmp_release_user_lock_with_checks(lck, global_tid);
3419
3420#endif // KMP_USE_DYNAMIC_LOCK
3421} // __kmp_end_critical_section_reduce_block
3422
3423static __forceinline int
3424__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3425 int *task_state) {
3426 kmp_team_t *team;
3427
3428 // Check if we are inside the teams construct?
3429 if (th->th.th_teams_microtask) {
3430 *team_p = team = th->th.th_team;
3431 if (team->t.t_level == th->th.th_teams_level) {
3432 // This is reduction at teams construct.
3433 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3434 // Let's swap teams temporarily for the reduction.
3435 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3436 th->th.th_team = team->t.t_parent;
3437 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3438 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3439 *task_state = th->th.th_task_state;
3440 th->th.th_task_state = 0;
3441
3442 return 1;
3443 }
3444 }
3445 return 0;
3446}
3447
3448static __forceinline void
3449__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3450 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3451 th->th.th_info.ds.ds_tid = 0;
3452 th->th.th_team = team;
3453 th->th.th_team_nproc = team->t.t_nproc;
3454 th->th.th_task_team = team->t.t_task_team[task_state];
3455 __kmp_type_convert(task_state, &(th->th.th_task_state));
3456}
3457
3458/* 2.a.i. Reduce Block without a terminating barrier */
3474kmp_int32
3475__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3476 size_t reduce_size, void *reduce_data,
3477 void (*reduce_func)(void *lhs_data, void *rhs_data),
3478 kmp_critical_name *lck) {
3479
3480 KMP_COUNT_BLOCK(REDUCE_nowait);
3481 int retval = 0;
3482 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3483 kmp_info_t *th;
3484 kmp_team_t *team;
3485 int teams_swapped = 0, task_state;
3486 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3487 __kmp_assert_valid_gtid(global_tid);
3488
3489 // why do we need this initialization here at all?
3490 // Reduction clause can not be used as a stand-alone directive.
3491
3492 // do not call __kmp_serial_initialize(), it will be called by
3493 // __kmp_parallel_initialize() if needed
3494 // possible detection of false-positive race by the threadchecker ???
3495 if (!TCR_4(__kmp_init_parallel))
3496 __kmp_parallel_initialize();
3497
3498 __kmp_resume_if_soft_paused();
3499
3500// check correctness of reduce block nesting
3501#if KMP_USE_DYNAMIC_LOCK
3502 if (__kmp_env_consistency_check)
3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3504#else
3505 if (__kmp_env_consistency_check)
3506 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3507#endif
3508
3509 th = __kmp_thread_from_gtid(global_tid);
3510 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3511
3512 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3513 // the value should be kept in a variable
3514 // the variable should be either a construct-specific or thread-specific
3515 // property, not a team specific property
3516 // (a thread can reach the next reduce block on the next construct, reduce
3517 // method may differ on the next construct)
3518 // an ident_t "loc" parameter could be used as a construct-specific property
3519 // (what if loc == 0?)
3520 // (if both construct-specific and team-specific variables were shared,
3521 // then unness extra syncs should be needed)
3522 // a thread-specific variable is better regarding two issues above (next
3523 // construct and extra syncs)
3524 // a thread-specific "th_local.reduction_method" variable is used currently
3525 // each thread executes 'determine' and 'set' lines (no need to execute by one
3526 // thread, to avoid unness extra syncs)
3527
3528 packed_reduction_method = __kmp_determine_reduction_method(
3529 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3530 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3531
3532 OMPT_REDUCTION_DECL(th, global_tid);
3533 if (packed_reduction_method == critical_reduce_block) {
3534
3535 OMPT_REDUCTION_BEGIN;
3536
3537 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3538 retval = 1;
3539
3540 } else if (packed_reduction_method == empty_reduce_block) {
3541
3542 OMPT_REDUCTION_BEGIN;
3543
3544 // usage: if team size == 1, no synchronization is required ( Intel
3545 // platforms only )
3546 retval = 1;
3547
3548 } else if (packed_reduction_method == atomic_reduce_block) {
3549
3550 retval = 2;
3551
3552 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3553 // won't be called by the code gen)
3554 // (it's not quite good, because the checking block has been closed by
3555 // this 'pop',
3556 // but atomic operation has not been executed yet, will be executed
3557 // slightly later, literally on next instruction)
3558 if (__kmp_env_consistency_check)
3559 __kmp_pop_sync(global_tid, ct_reduce, loc);
3560
3561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562 tree_reduce_block)) {
3563
3564// AT: performance issue: a real barrier here
3565// AT: (if primary thread is slow, other threads are blocked here waiting for
3566// the primary thread to come and release them)
3567// AT: (it's not what a customer might expect specifying NOWAIT clause)
3568// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3569// be confusing to a customer)
3570// AT: another implementation of *barrier_gather*nowait() (or some other design)
3571// might go faster and be more in line with sense of NOWAIT
3572// AT: TO DO: do epcc test and compare times
3573
3574// this barrier should be invisible to a customer and to the threading profile
3575// tool (it's neither a terminating barrier nor customer's code, it's
3576// used for an internal purpose)
3577#if OMPT_SUPPORT
3578 // JP: can this barrier potentially leed to task scheduling?
3579 // JP: as long as there is a barrier in the implementation, OMPT should and
3580 // will provide the barrier events
3581 // so we set-up the necessary frame/return addresses.
3582 ompt_frame_t *ompt_frame;
3583 if (ompt_enabled.enabled) {
3584 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3585 if (ompt_frame->enter_frame.ptr == NULL)
3586 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3587 }
3588 OMPT_STORE_RETURN_ADDRESS(global_tid);
3589#endif
3590#if USE_ITT_NOTIFY
3591 __kmp_threads[global_tid]->th.th_ident = loc;
3592#endif
3593 retval =
3594 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3595 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3596 retval = (retval != 0) ? (0) : (1);
3597#if OMPT_SUPPORT && OMPT_OPTIONAL
3598 if (ompt_enabled.enabled) {
3599 ompt_frame->enter_frame = ompt_data_none;
3600 }
3601#endif
3602
3603 // all other workers except primary thread should do this pop here
3604 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3605 if (__kmp_env_consistency_check) {
3606 if (retval == 0) {
3607 __kmp_pop_sync(global_tid, ct_reduce, loc);
3608 }
3609 }
3610
3611 } else {
3612
3613 // should never reach this block
3614 KMP_ASSERT(0); // "unexpected method"
3615 }
3616 if (teams_swapped) {
3617 __kmp_restore_swapped_teams(th, team, task_state);
3618 }
3619 KA_TRACE(
3620 10,
3621 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3622 global_tid, packed_reduction_method, retval));
3623
3624 return retval;
3625}
3626
3635void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3636 kmp_critical_name *lck) {
3637
3638 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3639
3640 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3641 __kmp_assert_valid_gtid(global_tid);
3642
3643 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3644
3645 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3646
3647 if (packed_reduction_method == critical_reduce_block) {
3648
3649 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3650 OMPT_REDUCTION_END;
3651
3652 } else if (packed_reduction_method == empty_reduce_block) {
3653
3654 // usage: if team size == 1, no synchronization is required ( on Intel
3655 // platforms only )
3656
3657 OMPT_REDUCTION_END;
3658
3659 } else if (packed_reduction_method == atomic_reduce_block) {
3660
3661 // neither primary thread nor other workers should get here
3662 // (code gen does not generate this call in case 2: atomic reduce block)
3663 // actually it's better to remove this elseif at all;
3664 // after removal this value will checked by the 'else' and will assert
3665
3666 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667 tree_reduce_block)) {
3668
3669 // only primary thread gets here
3670 // OMPT: tree reduction is annotated in the barrier code
3671
3672 } else {
3673
3674 // should never reach this block
3675 KMP_ASSERT(0); // "unexpected method"
3676 }
3677
3678 if (__kmp_env_consistency_check)
3679 __kmp_pop_sync(global_tid, ct_reduce, loc);
3680
3681 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3682 global_tid, packed_reduction_method));
3683
3684 return;
3685}
3686
3687/* 2.a.ii. Reduce Block with a terminating barrier */
3688
3704kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3705 size_t reduce_size, void *reduce_data,
3706 void (*reduce_func)(void *lhs_data, void *rhs_data),
3707 kmp_critical_name *lck) {
3708 KMP_COUNT_BLOCK(REDUCE_wait);
3709 int retval = 0;
3710 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3711 kmp_info_t *th;
3712 kmp_team_t *team;
3713 int teams_swapped = 0, task_state;
3714
3715 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3716 __kmp_assert_valid_gtid(global_tid);
3717
3718 // why do we need this initialization here at all?
3719 // Reduction clause can not be a stand-alone directive.
3720
3721 // do not call __kmp_serial_initialize(), it will be called by
3722 // __kmp_parallel_initialize() if needed
3723 // possible detection of false-positive race by the threadchecker ???
3724 if (!TCR_4(__kmp_init_parallel))
3725 __kmp_parallel_initialize();
3726
3727 __kmp_resume_if_soft_paused();
3728
3729// check correctness of reduce block nesting
3730#if KMP_USE_DYNAMIC_LOCK
3731 if (__kmp_env_consistency_check)
3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3733#else
3734 if (__kmp_env_consistency_check)
3735 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3736#endif
3737
3738 th = __kmp_thread_from_gtid(global_tid);
3739 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3740
3741 packed_reduction_method = __kmp_determine_reduction_method(
3742 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3743 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3744
3745 OMPT_REDUCTION_DECL(th, global_tid);
3746
3747 if (packed_reduction_method == critical_reduce_block) {
3748
3749 OMPT_REDUCTION_BEGIN;
3750 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3751 retval = 1;
3752
3753 } else if (packed_reduction_method == empty_reduce_block) {
3754
3755 OMPT_REDUCTION_BEGIN;
3756 // usage: if team size == 1, no synchronization is required ( Intel
3757 // platforms only )
3758 retval = 1;
3759
3760 } else if (packed_reduction_method == atomic_reduce_block) {
3761
3762 retval = 2;
3763
3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765 tree_reduce_block)) {
3766
3767// case tree_reduce_block:
3768// this barrier should be visible to a customer and to the threading profile
3769// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3770#if OMPT_SUPPORT
3771 ompt_frame_t *ompt_frame;
3772 if (ompt_enabled.enabled) {
3773 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3774 if (ompt_frame->enter_frame.ptr == NULL)
3775 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3776 }
3777 OMPT_STORE_RETURN_ADDRESS(global_tid);
3778#endif
3779#if USE_ITT_NOTIFY
3780 __kmp_threads[global_tid]->th.th_ident =
3781 loc; // needed for correct notification of frames
3782#endif
3783 retval =
3784 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3785 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3786 retval = (retval != 0) ? (0) : (1);
3787#if OMPT_SUPPORT && OMPT_OPTIONAL
3788 if (ompt_enabled.enabled) {
3789 ompt_frame->enter_frame = ompt_data_none;
3790 }
3791#endif
3792
3793 // all other workers except primary thread should do this pop here
3794 // (none of other workers except primary will enter __kmpc_end_reduce())
3795 if (__kmp_env_consistency_check) {
3796 if (retval == 0) { // 0: all other workers; 1: primary thread
3797 __kmp_pop_sync(global_tid, ct_reduce, loc);
3798 }
3799 }
3800
3801 } else {
3802
3803 // should never reach this block
3804 KMP_ASSERT(0); // "unexpected method"
3805 }
3806 if (teams_swapped) {
3807 __kmp_restore_swapped_teams(th, team, task_state);
3808 }
3809
3810 KA_TRACE(10,
3811 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3812 global_tid, packed_reduction_method, retval));
3813 return retval;
3814}
3815
3826void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3827 kmp_critical_name *lck) {
3828
3829 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3830 kmp_info_t *th;
3831 kmp_team_t *team;
3832 int teams_swapped = 0, task_state;
3833
3834 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3835 __kmp_assert_valid_gtid(global_tid);
3836
3837 th = __kmp_thread_from_gtid(global_tid);
3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3839
3840 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3841
3842 // this barrier should be visible to a customer and to the threading profile
3843 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3844 OMPT_REDUCTION_DECL(th, global_tid);
3845
3846 if (packed_reduction_method == critical_reduce_block) {
3847 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3848
3849 OMPT_REDUCTION_END;
3850
3851// TODO: implicit barrier: should be exposed
3852#if OMPT_SUPPORT
3853 ompt_frame_t *ompt_frame;
3854 if (ompt_enabled.enabled) {
3855 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3856 if (ompt_frame->enter_frame.ptr == NULL)
3857 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3858 }
3859 OMPT_STORE_RETURN_ADDRESS(global_tid);
3860#endif
3861#if USE_ITT_NOTIFY
3862 __kmp_threads[global_tid]->th.th_ident = loc;
3863#endif
3864 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3865#if OMPT_SUPPORT && OMPT_OPTIONAL
3866 if (ompt_enabled.enabled) {
3867 ompt_frame->enter_frame = ompt_data_none;
3868 }
3869#endif
3870
3871 } else if (packed_reduction_method == empty_reduce_block) {
3872
3873 OMPT_REDUCTION_END;
3874
3875// usage: if team size==1, no synchronization is required (Intel platforms only)
3876
3877// TODO: implicit barrier: should be exposed
3878#if OMPT_SUPPORT
3879 ompt_frame_t *ompt_frame;
3880 if (ompt_enabled.enabled) {
3881 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3882 if (ompt_frame->enter_frame.ptr == NULL)
3883 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3884 }
3885 OMPT_STORE_RETURN_ADDRESS(global_tid);
3886#endif
3887#if USE_ITT_NOTIFY
3888 __kmp_threads[global_tid]->th.th_ident = loc;
3889#endif
3890 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3891#if OMPT_SUPPORT && OMPT_OPTIONAL
3892 if (ompt_enabled.enabled) {
3893 ompt_frame->enter_frame = ompt_data_none;
3894 }
3895#endif
3896
3897 } else if (packed_reduction_method == atomic_reduce_block) {
3898
3899#if OMPT_SUPPORT
3900 ompt_frame_t *ompt_frame;
3901 if (ompt_enabled.enabled) {
3902 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3903 if (ompt_frame->enter_frame.ptr == NULL)
3904 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3905 }
3906 OMPT_STORE_RETURN_ADDRESS(global_tid);
3907#endif
3908// TODO: implicit barrier: should be exposed
3909#if USE_ITT_NOTIFY
3910 __kmp_threads[global_tid]->th.th_ident = loc;
3911#endif
3912 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3913#if OMPT_SUPPORT && OMPT_OPTIONAL
3914 if (ompt_enabled.enabled) {
3915 ompt_frame->enter_frame = ompt_data_none;
3916 }
3917#endif
3918
3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3921
3922 // only primary thread executes here (primary releases all other workers)
3923 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3924 global_tid);
3925
3926 } else {
3927
3928 // should never reach this block
3929 KMP_ASSERT(0); // "unexpected method"
3930 }
3931 if (teams_swapped) {
3932 __kmp_restore_swapped_teams(th, team, task_state);
3933 }
3934
3935 if (__kmp_env_consistency_check)
3936 __kmp_pop_sync(global_tid, ct_reduce, loc);
3937
3938 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3939 global_tid, packed_reduction_method));
3940
3941 return;
3942}
3943
3944#undef __KMP_GET_REDUCTION_METHOD
3945#undef __KMP_SET_REDUCTION_METHOD
3946
3947/* end of interface to fast scalable reduce routines */
3948
3949kmp_uint64 __kmpc_get_taskid() {
3950
3951 kmp_int32 gtid;
3952 kmp_info_t *thread;
3953
3954 gtid = __kmp_get_gtid();
3955 if (gtid < 0) {
3956 return 0;
3957 }
3958 thread = __kmp_thread_from_gtid(gtid);
3959 return thread->th.th_current_task->td_task_id;
3960
3961} // __kmpc_get_taskid
3962
3963kmp_uint64 __kmpc_get_parent_taskid() {
3964
3965 kmp_int32 gtid;
3966 kmp_info_t *thread;
3967 kmp_taskdata_t *parent_task;
3968
3969 gtid = __kmp_get_gtid();
3970 if (gtid < 0) {
3971 return 0;
3972 }
3973 thread = __kmp_thread_from_gtid(gtid);
3974 parent_task = thread->th.th_current_task->td_parent;
3975 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3976
3977} // __kmpc_get_parent_taskid
3978
3990void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3991 const struct kmp_dim *dims) {
3992 __kmp_assert_valid_gtid(gtid);
3993 int j, idx;
3994 kmp_int64 last, trace_count;
3995 kmp_info_t *th = __kmp_threads[gtid];
3996 kmp_team_t *team = th->th.th_team;
3997 kmp_uint32 *flags;
3998 kmp_disp_t *pr_buf = th->th.th_dispatch;
3999 dispatch_shared_info_t *sh_buf;
4000
4001 KA_TRACE(
4002 20,
4003 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4004 gtid, num_dims, !team->t.t_serialized));
4005 KMP_DEBUG_ASSERT(dims != NULL);
4006 KMP_DEBUG_ASSERT(num_dims > 0);
4007
4008 if (team->t.t_serialized) {
4009 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4010 return; // no dependencies if team is serialized
4011 }
4012 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4013 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4014 // the next loop
4015 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4016
4017 // Save bounds info into allocated private buffer
4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4019 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4020 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 pr_buf->th_doacross_info[0] =
4023 (kmp_int64)num_dims; // first element is number of dimensions
4024 // Save also address of num_done in order to access it later without knowing
4025 // the buffer index
4026 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4027 pr_buf->th_doacross_info[2] = dims[0].lo;
4028 pr_buf->th_doacross_info[3] = dims[0].up;
4029 pr_buf->th_doacross_info[4] = dims[0].st;
4030 last = 5;
4031 for (j = 1; j < num_dims; ++j) {
4032 kmp_int64
4033 range_length; // To keep ranges of all dimensions but the first dims[0]
4034 if (dims[j].st == 1) { // most common case
4035 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4036 range_length = dims[j].up - dims[j].lo + 1;
4037 } else {
4038 if (dims[j].st > 0) {
4039 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4040 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4041 } else { // negative increment
4042 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4043 range_length =
4044 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4045 }
4046 }
4047 pr_buf->th_doacross_info[last++] = range_length;
4048 pr_buf->th_doacross_info[last++] = dims[j].lo;
4049 pr_buf->th_doacross_info[last++] = dims[j].up;
4050 pr_buf->th_doacross_info[last++] = dims[j].st;
4051 }
4052
4053 // Compute total trip count.
4054 // Start with range of dims[0] which we don't need to keep in the buffer.
4055 if (dims[0].st == 1) { // most common case
4056 trace_count = dims[0].up - dims[0].lo + 1;
4057 } else if (dims[0].st > 0) {
4058 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4059 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4060 } else { // negative increment
4061 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4062 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4063 }
4064 for (j = 1; j < num_dims; ++j) {
4065 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4066 }
4067 KMP_DEBUG_ASSERT(trace_count > 0);
4068
4069 // Check if shared buffer is not occupied by other loop (idx -
4070 // __kmp_dispatch_num_buffers)
4071 if (idx != sh_buf->doacross_buf_idx) {
4072 // Shared buffer is occupied, wait for it to be free
4073 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4074 __kmp_eq_4, NULL);
4075 }
4076#if KMP_32_BIT_ARCH
4077 // Check if we are the first thread. After the CAS the first thread gets 0,
4078 // others get 1 if initialization is in progress, allocated pointer otherwise.
4079 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4081 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4082#else
4083 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4084 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4085#endif
4086 if (flags == NULL) {
4087 // we are the first thread, allocate the array of flags
4088 size_t size =
4089 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4090 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4091 KMP_MB();
4092 sh_buf->doacross_flags = flags;
4093 } else if (flags == (kmp_uint32 *)1) {
4094#if KMP_32_BIT_ARCH
4095 // initialization is still in progress, need to wait
4096 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4097#else
4098 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4099#endif
4100 KMP_YIELD(TRUE);
4101 KMP_MB();
4102 } else {
4103 KMP_MB();
4104 }
4105 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4106 pr_buf->th_doacross_flags =
4107 sh_buf->doacross_flags; // save private copy in order to not
4108 // touch shared buffer on each iteration
4109 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4110}
4111
4112void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4113 __kmp_assert_valid_gtid(gtid);
4114 kmp_int64 shft;
4115 size_t num_dims, i;
4116 kmp_uint32 flag;
4117 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4118 kmp_info_t *th = __kmp_threads[gtid];
4119 kmp_team_t *team = th->th.th_team;
4120 kmp_disp_t *pr_buf;
4121 kmp_int64 lo, up, st;
4122
4123 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4124 if (team->t.t_serialized) {
4125 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4126 return; // no dependencies if team is serialized
4127 }
4128
4129 // calculate sequential iteration number and check out-of-bounds condition
4130 pr_buf = th->th.th_dispatch;
4131 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4132 num_dims = (size_t)pr_buf->th_doacross_info[0];
4133 lo = pr_buf->th_doacross_info[2];
4134 up = pr_buf->th_doacross_info[3];
4135 st = pr_buf->th_doacross_info[4];
4136#if OMPT_SUPPORT && OMPT_OPTIONAL
4137 ompt_dependence_t deps[num_dims];
4138#endif
4139 if (st == 1) { // most common case
4140 if (vec[0] < lo || vec[0] > up) {
4141 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4142 "bounds [%lld,%lld]\n",
4143 gtid, vec[0], lo, up));
4144 return;
4145 }
4146 iter_number = vec[0] - lo;
4147 } else if (st > 0) {
4148 if (vec[0] < lo || vec[0] > up) {
4149 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4150 "bounds [%lld,%lld]\n",
4151 gtid, vec[0], lo, up));
4152 return;
4153 }
4154 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4155 } else { // negative increment
4156 if (vec[0] > lo || vec[0] < up) {
4157 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4158 "bounds [%lld,%lld]\n",
4159 gtid, vec[0], lo, up));
4160 return;
4161 }
4162 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4163 }
4164#if OMPT_SUPPORT && OMPT_OPTIONAL
4165 deps[0].variable.value = iter_number;
4166 deps[0].dependence_type = ompt_dependence_type_sink;
4167#endif
4168 for (i = 1; i < num_dims; ++i) {
4169 kmp_int64 iter, ln;
4170 size_t j = i * 4;
4171 ln = pr_buf->th_doacross_info[j + 1];
4172 lo = pr_buf->th_doacross_info[j + 2];
4173 up = pr_buf->th_doacross_info[j + 3];
4174 st = pr_buf->th_doacross_info[j + 4];
4175 if (st == 1) {
4176 if (vec[i] < lo || vec[i] > up) {
4177 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4178 "bounds [%lld,%lld]\n",
4179 gtid, vec[i], lo, up));
4180 return;
4181 }
4182 iter = vec[i] - lo;
4183 } else if (st > 0) {
4184 if (vec[i] < lo || vec[i] > up) {
4185 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4186 "bounds [%lld,%lld]\n",
4187 gtid, vec[i], lo, up));
4188 return;
4189 }
4190 iter = (kmp_uint64)(vec[i] - lo) / st;
4191 } else { // st < 0
4192 if (vec[i] > lo || vec[i] < up) {
4193 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4194 "bounds [%lld,%lld]\n",
4195 gtid, vec[i], lo, up));
4196 return;
4197 }
4198 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4199 }
4200 iter_number = iter + ln * iter_number;
4201#if OMPT_SUPPORT && OMPT_OPTIONAL
4202 deps[i].variable.value = iter;
4203 deps[i].dependence_type = ompt_dependence_type_sink;
4204#endif
4205 }
4206 shft = iter_number % 32; // use 32-bit granularity
4207 iter_number >>= 5; // divided by 32
4208 flag = 1 << shft;
4209 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4210 KMP_YIELD(TRUE);
4211 }
4212 KMP_MB();
4213#if OMPT_SUPPORT && OMPT_OPTIONAL
4214 if (ompt_enabled.ompt_callback_dependences) {
4215 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4216 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4217 }
4218#endif
4219 KA_TRACE(20,
4220 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4221 gtid, (iter_number << 5) + shft));
4222}
4223
4224void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4225 __kmp_assert_valid_gtid(gtid);
4226 kmp_int64 shft;
4227 size_t num_dims, i;
4228 kmp_uint32 flag;
4229 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4230 kmp_info_t *th = __kmp_threads[gtid];
4231 kmp_team_t *team = th->th.th_team;
4232 kmp_disp_t *pr_buf;
4233 kmp_int64 lo, st;
4234
4235 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4236 if (team->t.t_serialized) {
4237 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4238 return; // no dependencies if team is serialized
4239 }
4240
4241 // calculate sequential iteration number (same as in "wait" but no
4242 // out-of-bounds checks)
4243 pr_buf = th->th.th_dispatch;
4244 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4245 num_dims = (size_t)pr_buf->th_doacross_info[0];
4246 lo = pr_buf->th_doacross_info[2];
4247 st = pr_buf->th_doacross_info[4];
4248#if OMPT_SUPPORT && OMPT_OPTIONAL
4249 ompt_dependence_t deps[num_dims];
4250#endif
4251 if (st == 1) { // most common case
4252 iter_number = vec[0] - lo;
4253 } else if (st > 0) {
4254 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4255 } else { // negative increment
4256 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4257 }
4258#if OMPT_SUPPORT && OMPT_OPTIONAL
4259 deps[0].variable.value = iter_number;
4260 deps[0].dependence_type = ompt_dependence_type_source;
4261#endif
4262 for (i = 1; i < num_dims; ++i) {
4263 kmp_int64 iter, ln;
4264 size_t j = i * 4;
4265 ln = pr_buf->th_doacross_info[j + 1];
4266 lo = pr_buf->th_doacross_info[j + 2];
4267 st = pr_buf->th_doacross_info[j + 4];
4268 if (st == 1) {
4269 iter = vec[i] - lo;
4270 } else if (st > 0) {
4271 iter = (kmp_uint64)(vec[i] - lo) / st;
4272 } else { // st < 0
4273 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4274 }
4275 iter_number = iter + ln * iter_number;
4276#if OMPT_SUPPORT && OMPT_OPTIONAL
4277 deps[i].variable.value = iter;
4278 deps[i].dependence_type = ompt_dependence_type_source;
4279#endif
4280 }
4281#if OMPT_SUPPORT && OMPT_OPTIONAL
4282 if (ompt_enabled.ompt_callback_dependences) {
4283 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4284 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4285 }
4286#endif
4287 shft = iter_number % 32; // use 32-bit granularity
4288 iter_number >>= 5; // divided by 32
4289 flag = 1 << shft;
4290 KMP_MB();
4291 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4292 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4293 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4294 (iter_number << 5) + shft));
4295}
4296
4297void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4298 __kmp_assert_valid_gtid(gtid);
4299 kmp_int32 num_done;
4300 kmp_info_t *th = __kmp_threads[gtid];
4301 kmp_team_t *team = th->th.th_team;
4302 kmp_disp_t *pr_buf = th->th.th_dispatch;
4303
4304 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4305 if (team->t.t_serialized) {
4306 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4307 return; // nothing to do
4308 }
4309 num_done =
4310 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4311 if (num_done == th->th.th_team_nproc) {
4312 // we are the last thread, need to free shared resources
4313 int idx = pr_buf->th_doacross_buf_idx - 1;
4314 dispatch_shared_info_t *sh_buf =
4315 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4316 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4317 (kmp_int64)&sh_buf->doacross_num_done);
4318 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4319 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4320 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4321 sh_buf->doacross_flags = NULL;
4322 sh_buf->doacross_num_done = 0;
4323 sh_buf->doacross_buf_idx +=
4324 __kmp_dispatch_num_buffers; // free buffer for future re-use
4325 }
4326 // free private resources (need to keep buffer index forever)
4327 pr_buf->th_doacross_flags = NULL;
4328 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4329 pr_buf->th_doacross_info = NULL;
4330 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4331}
4332
4333/* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
4334void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4335 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4336}
4337
4338void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4339 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4340}
4341
4342void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4343 omp_allocator_handle_t free_allocator) {
4344 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4345 free_allocator);
4346}
4347
4348void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4349 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4350}
4351
4352int __kmpc_get_target_offload(void) {
4353 if (!__kmp_init_serial) {
4354 __kmp_serial_initialize();
4355 }
4356 return __kmp_target_offload;
4357}
4358
4359int __kmpc_pause_resource(kmp_pause_status_t level) {
4360 if (!__kmp_init_serial) {
4361 return 1; // Can't pause if runtime is not initialized
4362 }
4363 return __kmp_pause_resource(level);
4364}
4365
4366void __kmpc_error(ident_t *loc, int severity, const char *message) {
4367 if (!__kmp_init_serial)
4368 __kmp_serial_initialize();
4369
4370 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4371
4372#if OMPT_SUPPORT
4373 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4374 ompt_callbacks.ompt_callback(ompt_callback_error)(
4375 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4376 OMPT_GET_RETURN_ADDRESS(0));
4377 }
4378#endif // OMPT_SUPPORT
4379
4380 char *src_loc;
4381 if (loc && loc->psource) {
4382 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4383 src_loc =
4384 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4385 __kmp_str_loc_free(&str_loc);
4386 } else {
4387 src_loc = __kmp_str_format("unknown");
4388 }
4389
4390 if (severity == severity_warning)
4391 KMP_WARNING(UserDirectedWarning, src_loc, message);
4392 else
4393 KMP_FATAL(UserDirectedError, src_loc, message);
4394
4395 __kmp_str_free(&src_loc);
4396}
4397
4398#ifdef KMP_USE_VERSION_SYMBOLS
4399// For GOMP compatibility there are two versions of each omp_* API.
4400// One is the plain C symbol and one is the Fortran symbol with an appended
4401// underscore. When we implement a specific ompc_* version of an omp_*
4402// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4403// instead of the Fortran versions in kmp_ftn_entry.h
4404extern "C" {
4405// Have to undef these from omp.h so they aren't translated into
4406// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4407#ifdef omp_set_affinity_format
4408#undef omp_set_affinity_format
4409#endif
4410#ifdef omp_get_affinity_format
4411#undef omp_get_affinity_format
4412#endif
4413#ifdef omp_display_affinity
4414#undef omp_display_affinity
4415#endif
4416#ifdef omp_capture_affinity
4417#undef omp_capture_affinity
4418#endif
4419KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4420 "OMP_5.0");
4421KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4422 "OMP_5.0");
4423KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4424 "OMP_5.0");
4425KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4426 "OMP_5.0");
4427} // extern "C"
4428#endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:213
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:215
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:198
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:217
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1522
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:904
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:233
char const * psource
Definition: kmp.h:243
kmp_int32 flags
Definition: kmp.h:235