00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #define __STDC_LIMIT_MACROS
00037 #include <stdint.h>
00038 #ifdef USE_PCH
00039 #include "lno_pch.h"
00040 #endif // USE_PCH
00041 #pragma hdrstop
00042
00043 #include <math.h>
00044 #include <float.h>
00045 #include <sys/types.h>
00046 #include <limits.h>
00047 #include "pu_info.h"
00048 #include "lnoutils.h"
00049 #include "lnopt_main.h"
00050 #include "stab.h"
00051 #include "targ_const.h"
00052 #include "wn_simp.h"
00053 #include "stdlib.h"
00054 #include "lwn_util.h"
00055 #include "strtab.h"
00056 #include "config.h"
00057 #include "optimizer.h"
00058 #include "opt_du.h"
00059 #include "name.h"
00060 #include "wintrinsic.h"
00061 #include "lno_bv.h"
00062 #include "dep_graph.h"
00063 #include "debug.h"
00064 #include "scalar_expand.h"
00065 #include "cxx_memory.h"
00066 #include "reduc.h"
00067 #include "snl_utils.h"
00068 #include "sxlist.h"
00069 #include "snl_dist.h"
00070 #include "permute.h"
00071 #include "sxlimit.h"
00072 #include "parallel.h"
00073 #include "fiz_fuse.h"
00074 #include "ara.h"
00075 #include "snl_deps.h"
00076 #include "lego_util.h"
00077 #include "tile.h"
00078 #include "model.h"
00079 #include "cache_model.h"
00080 #include "config_cache.h"
00081 #include "parmodel.h"
00082 #include "sdlist.h"
00083 #include "doacross.h"
00084 #include "prompf.h"
00085 #include "anl_driver.h"
00086 #include "parids.h"
00087 #include "cond.h"
00088 #include "move.h"
00089 #include "tlog.h"
00090 #include "call_info.h"
00091 #include "cross_snl.h"
00092
00093 #define MAX_PARALLEL_NLOOPS 5
00094
00095 static double Doacross_Cost(WN* wn_outer, INT permutation[], INT nloops,
00096 INT parallel_depth, SNL_DEP_MATRIX** sdm_inv, BOOL sdm_scl[],
00097 SX_INFO* sx_info, SD_INFO* sd_info, INT sd_split_depth,
00098 double machine_cycles, double *cache_cycles_per_iter, double work_estimate,
00099 INT* doacross_tile_size_p, INT sync_distances[],
00100 INT* doacross_overhead);
00101 static double Parallel_Cost(WN* wn_outer, INT permutation[], INT nloops,
00102 INT parallel_depth, INT sd_split_depth, INT split_depth, SX_PLIST* plist,
00103 double machine_cycles, double *cache_cycles_per_iter);
00104 static INT Parallelizable(WN* wn_outer, INT permutation[], INT nloops,
00105 INT parallel_depth, SNL_DEP_MATRIX** sdm_inv, BOOL sdm_scl[],
00106 SX_INFO* sx_info, SD_INFO* sd_info, INT sd_split_depth);
00107
00108 #ifdef KEY
00109 INT Last_Apo_Loop_Id = 0;
00110 #endif
00111
00112
00113
00114
00115
00116
00117
00118 extern BOOL Cannot_Concurrentize(WN* wn_loop)
00119 {
00120 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00121 return dli->Pragma_Cannot_Concurrentize
00122 || dli->Serial_Version_of_Concurrent_Loop
00123 || dli->Inside_Critical_Section
00124 || dli->Has_Threadprivate;
00125 }
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 PARALLEL_INFO::PARALLEL_INFO(INT nloops)
00140 {
00141 _cost = (double) DBL_MAX;
00142 _parallel_depth = -1;
00143 _nloops = nloops;
00144 INT i;
00145 for (i = 0; i < nloops; i++)
00146 _permutation[i] = i;
00147 _int_type = INT_NONE;
00148 _split_depth = -1;
00149 _sd_split_depth = -1;
00150
00151 _is_doacross = FALSE;
00152 _doacross_overhead = 0;
00153 _doacross_tile_size = 0;
00154 for (i = 0; i < 2; i++)
00155 _sync_distances[i] = 0;
00156 }
00157
00158 static WN* Parallel_Loop(PARALLEL_INFO* parallel_info,
00159 DOLOOP_STACK* loop_stack);
00160
00161
00162
00163
00164
00165
00166 static void ap_tlog_info(
00167 PARALLEL_INFO* parallel_info,
00168 DOLOOP_STACK* loop_stack,
00169 const char* message)
00170 {
00171 char out_string[80];
00172 WN* wn_parallel = Parallel_Loop(parallel_info, loop_stack);
00173 if (wn_parallel == NULL)
00174 return;
00175 INT nloops = parallel_info->Nloops();
00176 INT inner_depth = loop_stack->Elements() - 1;
00177 INT outer_depth = inner_depth - nloops + 1;
00178 INT required_length = strlen(WB_Whirl_Symbol(wn_parallel)) + 13;
00179 char* in_string = CXX_NEW_ARRAY(char, required_length, &LNO_local_pool);
00180 sprintf(in_string, "%s %d ",
00181 WB_Whirl_Symbol(wn_parallel), (INT) WN_linenum(wn_parallel));
00182 for (INT i = 0; i < parallel_info->Nloops(); i++) {
00183 sprintf(&out_string[5*i], "%2d%2s", parallel_info->Permutation(i),
00184 parallel_info->Parallel_Depth() == i + outer_depth ?
00185 (parallel_info->Is_Doacross() ? "-X" : "-P") : "");;
00186 if (i < parallel_info->Nloops() - 1)
00187 sprintf(&out_string[5*i+4], ",");
00188 }
00189 Generate_Tlog("LNO","auto_parallelization",
00190 Srcpos_To_Line(WN_Get_Linenum(wn_parallel)),
00191 (char *)WB_Whirl_Symbol(wn_parallel),
00192 in_string, out_string, message);
00193 }
00194
00195
00196
00197
00198
00199
00200
00201
00202 static void Mark_Parallelizable_Loop(WN* wn_outer,
00203 INT permutation[],
00204 INT nloops,
00205 INT parallel_depth,
00206 SNL_DEP_MATRIX** sdm_inv,
00207 BOOL sdm_scl[],
00208 SX_INFO* sx_info,
00209 SD_INFO* sd_info,
00210 INT sd_split_depth)
00211
00212 {
00213 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
00214 ? Parallel_Debug_Level : 0;
00215 INT split_depth = Parallelizable(wn_outer, permutation, nloops,
00216 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info, sd_split_depth);
00217 if (split_depth == Do_Loop_Depth(wn_outer) + nloops)
00218 return;
00219 INT outer_depth = Do_Loop_Depth(wn_outer);
00220 INT original_depth = outer_depth + permutation[parallel_depth - outer_depth];
00221 WN* wn_parallel = SNL_Get_Inner_Snl_Loop(wn_outer, original_depth
00222 - outer_depth + 1);
00223 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_parallel);
00224 if (!dli->Parallelizable && parallel_debug_level >= 1) {
00225 fprintf(stdout, "Loop %s at %d is parallelizable\n",
00226 WB_Whirl_Symbol(wn_parallel), (INT) WN_linenum(wn_parallel));
00227 fprintf(TFile, "Loop %s at %d is parallelizable\n",
00228 WB_Whirl_Symbol(wn_parallel), (INT) WN_linenum(wn_parallel));
00229 }
00230 dli->Parallelizable = TRUE;
00231 }
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249 PARALLEL_INFO::PARALLEL_INFO(WN* wn_outer,
00250 INT permutation[],
00251 INT nloops,
00252 INT parallel_depth,
00253 INTERCHANGE_TYPE int_type,
00254 SNL_DEP_MATRIX** sdm_inv,
00255 BOOL sdm_scl[],
00256 SX_INFO* sx_info,
00257 SD_INFO* sd_info,
00258 INT sd_split_depth,
00259 double machine_cycles,
00260 double work_estimate)
00261 {
00262 _wn_outer = wn_outer;
00263 _nloops = nloops;
00264 INT i;
00265 for (i = 0; i < nloops; i++)
00266 _permutation[i] = permutation[i];
00267 _int_type = int_type;
00268 _is_doacross = FALSE;
00269 _doacross_overhead = 0;
00270 for (i = 0; i < 2; i++)
00271 _sync_distances[i] = NULL_DIST;
00272 _sd_split_depth = sd_split_depth;
00273 _split_depth = Parallelizable(wn_outer, permutation, nloops,
00274 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info, _sd_split_depth);
00275 BOOL is_doall = (_split_depth != Do_Loop_Depth(wn_outer) + nloops);
00276 double doall_cost=DBL_MAX;
00277 double doacross_cost=DBL_MAX;
00278 double cache_cycles_per_iter;
00279
00280
00281
00282
00283
00284
00285
00286 switch (LNO_Run_Doacross) {
00287 case 0: if (is_doall) {
00288 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00289 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00290 machine_cycles, &cache_cycles_per_iter);
00291 }
00292 break;
00293 case 1: if (is_doall) {
00294 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00295 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00296 machine_cycles, &cache_cycles_per_iter);
00297 } else if (LNO_Pseudo_Lower) {
00298 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00299 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00300 sd_split_depth, machine_cycles, &cache_cycles_per_iter,
00301 work_estimate, &_doacross_tile_size, _sync_distances,
00302 &_doacross_overhead);
00303 }
00304 break;
00305 case 2: if (is_doall)
00306 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00307 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00308 machine_cycles, &cache_cycles_per_iter);
00309 if (LNO_Pseudo_Lower)
00310 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00311 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00312 sd_split_depth, machine_cycles, &cache_cycles_per_iter,
00313 work_estimate, &_doacross_tile_size, _sync_distances,
00314 &_doacross_overhead);
00315 break;
00316 case 3: if (LNO_Pseudo_Lower)
00317 {
00318 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00319 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00320 sd_split_depth, machine_cycles, &cache_cycles_per_iter,
00321 work_estimate, &_doacross_tile_size, _sync_distances,
00322 &_doacross_overhead);
00323 BOOL is_doacross= (doacross_cost != DBL_MAX);
00324 if (!is_doacross && is_doall) {
00325 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00326 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00327 machine_cycles, &cache_cycles_per_iter);
00328 }
00329 }
00330 break;
00331 case 4: if (LNO_Pseudo_Lower)
00332 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00333 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00334 sd_split_depth, machine_cycles, &cache_cycles_per_iter,
00335 work_estimate, &_doacross_tile_size, _sync_distances,
00336 &_doacross_overhead);
00337 break;
00338 default: FmtAssert(0,("Invalid -LNO:doacross value"));
00339 }
00340
00341 if (doall_cost==DBL_MAX && doacross_cost==DBL_MAX) {
00342
00343 _parallel_depth = -1;
00344 _work_estimate = 0;
00345 _cost = DBL_MAX;
00346 _is_doacross = FALSE;
00347 _doacross_tile_size = 0;
00348 _sync_distances[0] = 0;
00349 _sync_distances[1] = 0;
00350 _doacross_overhead = 0;
00351 } else if (doall_cost<doacross_cost) {
00352
00353 _parallel_depth = parallel_depth;
00354 _work_estimate = (int) Compute_Work_Estimate(work_estimate,
00355 cache_cycles_per_iter);
00356 _cost = doall_cost;
00357 _is_doacross = FALSE;
00358 _doacross_tile_size = 0;
00359 _sync_distances[0] = 0;
00360 _sync_distances[1] = 0;
00361 _doacross_overhead = 0;
00362 } else {
00363
00364 _parallel_depth = parallel_depth;
00365 _work_estimate = (int) Compute_Work_Estimate(work_estimate,
00366 cache_cycles_per_iter);
00367 _cost = doacross_cost;
00368 _is_doacross = TRUE;
00369 _split_depth = -1;
00370 }
00371 }
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384 double Compute_Work_Estimate(double machine, double cache)
00385 {
00386
00387
00388
00389 if (machine >= cache)
00390 return machine + cache / 2.0;
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409 const double ratio = cache / machine;
00410 return machine + cache / pow(2.0, 1.0 + log10(ratio) / log10(4.0));
00411 }
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428 static double Parallel_Cost(WN* wn_outer,
00429 INT permutation[],
00430 INT nloops,
00431 INT parallel_depth,
00432 INT sd_split_depth,
00433 INT split_depth,
00434 SX_PLIST* plist,
00435 double machine_cycles,
00436 double *cache_cycles_per_iter)
00437 {
00438 *cache_cycles_per_iter = 0.0;
00439 if (parallel_depth == -1)
00440 return (double) DBL_MAX;
00441 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
00442 ? Parallel_Debug_Level : 0;
00443 PAR_STAT::id_count = 0;
00444 PAR_STAT* ps = CXX_NEW(PAR_STAT(wn_outer, nloops, &LNO_local_pool),
00445 &LNO_local_pool);
00446 #ifdef Is_True_On
00447 ps->Sanity_Check(stdout);
00448 #endif
00449 if (parallel_debug_level >= 3) {
00450 fprintf(stdout, "Before:\n");
00451 ps->Print(stdout);
00452 }
00453 ps = ps->Parallel_Interchange(wn_outer, permutation, nloops,
00454 parallel_depth, sd_split_depth, split_depth);
00455 #ifdef Is_True_On
00456 ps->Sanity_Check(stdout);
00457 #endif
00458 double cost = ps->Cycle_Count(wn_outer, permutation, nloops,
00459 parallel_depth, plist, split_depth, machine_cycles,
00460 cache_cycles_per_iter);
00461 if (parallel_debug_level >= 3) {
00462 ps->Sanity_Check(stdout);
00463 fprintf(stdout, "After:\n");
00464 ps->Print(stdout);
00465 }
00466 return cost;
00467 }
00468
00469
00470
00471
00472
00473
00474
00475 static void SNL_Finalize_Index_Variables(WN* wn_outer,
00476 INT nloops)
00477 {
00478 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00479 DOLOOP_STACK stack(&LNO_local_pool);
00480 Build_Doloop_Stack(wn_inner, &stack);
00481 INT outer_depth = Do_Loop_Depth(wn_outer);
00482 INT inner_depth = Do_Loop_Depth(wn_inner);
00483 for (INT i = outer_depth; i <= inner_depth; i++) {
00484 WN* wn_loop = stack.Bottom_nth(i);
00485 if (Index_Variable_Live_At_Exit(wn_loop))
00486 Finalize_Index_Variable(wn_loop, TRUE);
00487 }
00488 }
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499 extern WN* Minimal_Kernel(WN* wn_outer,
00500 INT nloops)
00501 {
00502 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00503 DU_MANAGER* du = Du_Mgr;
00504 if (!SNL_Fix_Array_Deps_On_Index_Variable(wn_outer, nloops))
00505 return NULL;
00506 WN* wn_new_outer = NULL;
00507 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00508 for (WN* wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
00509 if (WN_opcode(wn) != OPC_DO_LOOP)
00510 continue;
00511 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
00512 if (dli->Pragma_Cannot_Concurrentize)
00513 break;
00514 if (dli->Serial_Version_of_Concurrent_Loop)
00515 break;
00516 if (dli->Inside_Critical_Section)
00517 break;
00518 if (dli->Has_Threadprivate)
00519 break;
00520 if (!Do_Loop_Is_Good(wn))
00521 break;
00522 if (!Upper_Bound_Standardize(WN_end(wn), TRUE))
00523 break;
00524 if (Index_Variable_Live_At_Entry(wn))
00525 break;
00526 if (Index_Variable_Live_At_Exit(wn) && dli->Has_Exits)
00527 break;
00528 if (Need_Fix_Array_Deps_On_Index_Variable(wn))
00529 break;
00530 wn_new_outer = wn;
00531 if (wn == wn_outer)
00532 break;
00533 }
00534 if (wn_new_outer == NULL)
00535 return NULL;
00536 INT lost_loops = Do_Loop_Depth(wn_new_outer) - Do_Loop_Depth(wn_outer);
00537 INT new_nloops = nloops - lost_loops;
00538 SNL_Finalize_Index_Variables(wn_new_outer, new_nloops);
00539 SNL_Sink_Out_Sandwiched_Statements(wn_new_outer, new_nloops, TRUE, dg, du);
00540 return wn_new_outer;
00541 }
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551 static INT Safe_Depth(ACCESS_ARRAY* aa,
00552 INT depth)
00553 {
00554 INT safe_depth = 0;
00555 for (INT dim = 0; dim < aa->Num_Vec(); dim++) {
00556 ACCESS_VECTOR* av = aa->Dim(dim);
00557 if (av->Too_Messy || av->Contains_Non_Lin_Symb())
00558 return depth + 1;
00559 if (av->Non_Const_Loops() > safe_depth)
00560 safe_depth = av->Non_Const_Loops();
00561 }
00562 return safe_depth;
00563 }
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574 static WN* General_Kernel(WN* wn_outer,
00575 INT nloops)
00576 {
00577 WN* wn_new_outer = NULL;
00578 INT safe_depth = Do_Loop_Depth(wn_outer);
00579 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00580 WN* wn = 0;
00581 for (wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
00582 if (WN_opcode(wn) != OPC_DO_LOOP)
00583 continue;
00584 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
00585 INT safe_depth_lb = Safe_Depth(dli->LB, dli->Depth);
00586 if (safe_depth_lb > safe_depth)
00587 safe_depth = safe_depth_lb;
00588 INT safe_depth_ub = Safe_Depth(dli->UB, dli->Depth);
00589 if (safe_depth_ub > safe_depth)
00590 safe_depth = safe_depth_ub;
00591 if (wn == wn_outer)
00592 break;
00593 }
00594 for (wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
00595 if (WN_opcode(wn) != OPC_DO_LOOP)
00596 continue;
00597 if (!Do_Loop_Is_Good(wn) || Do_Loop_Has_Exits(wn))
00598 break;
00599 if (Do_Loop_Depth(wn) < safe_depth)
00600 break;
00601 if (Step_Size(wn) != 1)
00602 break;
00603 wn_new_outer = wn;
00604 }
00605 return wn_new_outer;
00606 }
00607
00608
00609
00610
00611
00612
00613
00614 static INT Choose(INT n,
00615 INT m)
00616 {
00617 FmtAssert(n >= 0 && m >= 0, ("Choose() takes a non-negative arguments"));
00618 return Factorial(n)/(Factorial(m)*Factorial(n-m));
00619 }
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629 static INT Choice(INT n,
00630 INT m,
00631 INT p)
00632 {
00633 INT mm = m;
00634 INT pp = p;
00635 INT result = 0;
00636 FmtAssert(n >= 0 && m >= 0, ("Choice() takes non-negative arguments"));
00637 FmtAssert(p >= 0 && p < Choose(n,m), ("Invalid Choice() index"));
00638 INT digit = 0;
00639 for (INT nn = n; nn >= 1; nn--) {
00640 INT cross = mm == 0 ? 0 : Choose(nn,mm) - Choose(nn-1,mm-1);
00641 digit = mm == 0 ? 0 : pp < cross ? 0 : 1;
00642 pp -= digit * cross;
00643 result = (result << 1) + digit;
00644 mm -= digit;
00645 }
00646 return result;
00647 }
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659 static void Permutation_Vector(INT parallel_index,
00660 INT parallel_loop,
00661 INT choice,
00662 INT nloops,
00663 INT permutation[])
00664 {
00665 permutation[parallel_index] = parallel_loop;
00666 INT bv_choice = Choice(nloops - 1, parallel_index, choice);
00667 INT perm_index = parallel_loop == 0 ? 1 : 0;
00668 INT before_index = 0;
00669 INT after_index = parallel_index + 1;
00670 for (INT i = nloops - 2; i >= 0; i--) {
00671 INT before = (bv_choice >> i) & 1;
00672 permutation[before ? before_index++ : after_index++] = perm_index++;
00673 if (perm_index == parallel_loop)
00674 perm_index++;
00675 }
00676 Is_True(Is_Permutation_Vector(permutation, nloops),
00677 ("Not a permutation vector"));
00678 }
00679
00680
00681
00682
00683
00684
00685
00686
00687 extern BOOL SNL_Legal_Perm_Deps(SNL_DEP_MATRIX* sdm_body,
00688 INT permutation[],
00689 INT nloops)
00690 {
00691 if (sdm_body == NULL)
00692 return FALSE;
00693
00694 FmtAssert(sdm_body->Nloops() == nloops,
00695 ("Permutation length and dep matrix length do not match"));
00696
00697 for (INT d = 0; d < sdm_body->Ndep(); d++) {
00698 for (INT i = 0; i < sdm_body->Nloops(); i++) {
00699 SNL_DEP dep = (*sdm_body)(d, permutation[i]);
00700 if (dep.Unbounded_Min() || dep.Min() < 0)
00701 return FALSE;
00702 else if (dep.Min() > 0)
00703 break;
00704 }
00705 }
00706 return TRUE;
00707 }
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 extern BOOL General_Permutation(WN* wn_outer,
00718 INT permutation[],
00719 INT nloops)
00720 {
00721 WN* wn_new_outer = General_Kernel(wn_outer, nloops);
00722 if (wn_new_outer == NULL)
00723 return FALSE;
00724 INT last = Do_Loop_Depth(wn_new_outer) - Do_Loop_Depth(wn_outer);
00725 for (INT i = 0; i < last; i++)
00726 if (permutation[i] != i)
00727 return FALSE;
00728 return TRUE;
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739 extern BOOL Invariant_Permutation(WN* wn_outer,
00740 INT permutation[],
00741 INT nloops)
00742 {
00743 if (!General_Permutation(wn_outer, permutation, nloops))
00744 return FALSE;
00745 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00746 DOLOOP_STACK stack(&LNO_local_pool);
00747 Build_Doloop_Stack(wn_inner, &stack);
00748 INT offset = Do_Loop_Depth(wn_outer);
00749 for (INT i = 0; i < nloops; i++) {
00750 for (INT j = i + 1; j < nloops; j++) {
00751 if (permutation[i] > permutation[j] && !SNL_Is_Invariant(&stack,
00752 offset + permutation[j], offset + permutation[i]))
00753 return FALSE;
00754 }
00755 }
00756 return TRUE;
00757 }
00758
00759
00760
00761
00762
00763
00764
00765
00766 extern BOOL Fully_Permutable_Permutation(WN* wn_outer,
00767 INT nloops)
00768 {
00769 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00770 INT i;
00771 for (i = 0; i < nloops; i++)
00772 permutation[i] = i;
00773 if (!General_Permutation(wn_outer, permutation, nloops))
00774 return FALSE;
00775 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00776 DOLOOP_STACK stack(&LNO_local_pool);
00777 Build_Doloop_Stack(wn_inner, &stack);
00778 INT inner_depth = Do_Loop_Depth(wn_inner);
00779 for (i = 2; i <= nloops; i++) {
00780 INT d = inner_depth + 1 - i;
00781 for (INT dd = d + 1; dd <= inner_depth; dd++)
00782 if (!SNL_Is_Invariant(&stack, d, dd))
00783 return FALSE;
00784 }
00785 return TRUE;
00786 }
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798 static BOOL SNL_Perm_Retained_Section(INT section_depth,
00799 INT permutation[],
00800 INT nloops)
00801 {
00802 INT last = -1;
00803 for (INT first = 0; first < nloops; first = last + 1) {
00804 last = Permutation_Last(first, permutation, nloops);
00805 if (last == section_depth)
00806 return TRUE;
00807 if (last > section_depth)
00808 return FALSE;
00809 }
00810 return FALSE;
00811 }
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821 static BOOL Is_Perfectly_Nested(WN* wn_outer,
00822 INT permutation[],
00823 INT nloops,
00824 INT depth)
00825 {
00826 INT outer_depth = Do_Loop_Depth(wn_outer);
00827 INT inner_depth = outer_depth + nloops - 1;
00828 FmtAssert(depth >= outer_depth && depth <= inner_depth - 1,
00829 ("Is_Perfectly_Nested: Test depth outside allowed range"));
00830 WN* wn_level = SNL_Get_Inner_Snl_Loop(wn_outer, depth - outer_depth + 1);
00831 return !SNL_Perm_Retained_Section(depth - outer_depth, permutation, nloops)
00832 || WN_opcode(WN_first(WN_do_body(wn_level))) == OPC_DO_LOOP
00833 && WN_first(WN_do_body(wn_level)) == WN_last(WN_do_body(wn_level));
00834 }
00835
00836
00837
00838
00839
00840
00841
00842 static BOOL SNL_Perm_Retained_Section(INT section_depth,
00843 INT permutation[],
00844 INT nloops,
00845 INT sd_split_depth)
00846 {
00847 if (sd_split_depth == -1)
00848 return SNL_Perm_Retained_Section(section_depth, permutation, nloops);
00849 return section_depth >= sd_split_depth
00850 && SNL_Perm_Retained_Section(section_depth, permutation, nloops);
00851 }
00852
00853
00854
00855
00856
00857
00858
00859
00860 static BOOL SNL_Dir_Cannot_Interchange(WN* wn_outer,
00861 INT permutation[],
00862 INT nloops)
00863 {
00864 if (!LNO_Interchange)
00865 return TRUE;
00866 INT outer_depth = Do_Loop_Depth(wn_outer);
00867 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00868 DOLOOP_STACK stack(&LNO_local_pool);
00869 Build_Doloop_Stack(wn_inner, &stack);
00870 for (INT i = outer_depth; i < outer_depth + nloops; i++) {
00871 WN* wn_loop = stack.Bottom_nth(i);
00872 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00873 if (dli_loop->Cannot_Interchange
00874 && permutation[i - outer_depth] != i - outer_depth)
00875 return TRUE;
00876 }
00877 return FALSE;
00878 }
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889 extern SNL_DEP_MATRIX** Inv_Dep_Info(WN* wn_outer,
00890 INT nloops,
00891 BOOL check_privates,
00892 BOOL definitely)
00893 {
00894 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00895 REDUCTION_MANAGER* rm = red_manager;
00896
00897
00898 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00899 DOLOOP_STACK stack(&LNO_local_pool);
00900 Build_Doloop_Stack(wn_inner, &stack);
00901 INT v_hash_table_size = MAX(MIN(dg->Get_Vertex_Count(), 512), 5);
00902 HASH_TABLE<WN*,INT> priv_table(v_hash_table_size, &LNO_local_pool);
00903 if (check_privates) {
00904 LWN_ITER* itr = LWN_WALK_TreeIter(wn_outer);
00905 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
00906 WN* wn = itr->wn;
00907 OPERATOR opr = WN_operator(wn);
00908 if (opr != OPR_ILOAD && opr != OPR_ISTORE && opr != OPR_LDID
00909 && opr != OPR_STID)
00910 continue;
00911 VINDEX16 v = dg->Get_Vertex(wn);
00912 if (v == 0 && (opr == OPR_LDID || opr == OPR_STID))
00913 continue;
00914 INT i;
00915 for (i = Do_Loop_Depth(wn_outer); i < stack.Elements(); i++) {
00916 WN* wn_loop = stack.Bottom_nth(i);
00917 if (Is_Privatizable_With_Context(wn_loop, wn, definitely))
00918 break;
00919 }
00920 if (i < stack.Elements())
00921 priv_table.Enter(wn, 1);
00922 }
00923 }
00924
00925
00926 INT i;
00927 for (i = 0; i < stack.Elements(); i++)
00928 if (Do_Loop_Is_Good(stack.Bottom_nth(i)) &&
00929 !Do_Loop_Has_Exits(stack.Bottom_nth(i)))
00930 break;
00931 INT nbad = i;
00932 INT outer_depth = Do_Loop_Depth(wn_outer);
00933 INT inner_depth = Do_Loop_Depth(wn_inner);
00934 SNL_DEP_INFO** sdi_list = CXX_NEW_ARRAY(SNL_DEP_INFO*, inner_depth
00935 - outer_depth + 1, &LNO_local_pool);
00936 for (i = outer_depth; i <= inner_depth; i++)
00937 sdi_list[i - outer_depth] = CXX_NEW(SNL_DEP_INFO(outer_depth - nbad,
00938 i - outer_depth + 1, nbad, stack, &LNO_local_pool), &LNO_local_pool);
00939 EINDEX16 e = 0;
00940 INT e_hash_table_size = MIN(dg->Get_Edge_Count(), 512);
00941 HASH_TABLE<EINDEX16,INT> edge_table(e_hash_table_size, &LNO_local_pool);
00942 LWN_ITER* itr = LWN_WALK_TreeIter(wn_outer);
00943 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
00944 WN* wn = itr->wn;
00945 INT wn_source_depth = Loop_Depth(wn);
00946 REDUCTION_TYPE rt_source = rm != NULL ? rm->Which_Reduction(wn) : RED_NONE;
00947 OPERATOR opr = WN_operator(wn);
00948 if (opr != OPR_ILOAD && opr != OPR_ISTORE && opr != OPR_LDID
00949 && opr != OPR_STID)
00950 continue;
00951 BOOL Is_Wn_Array = opr == OPR_ILOAD || opr == OPR_ISTORE;
00952 VINDEX16 v = dg->Get_Vertex(wn);
00953 if (v == 0
00954 #ifndef KEY
00955 && (opr == OPR_LDID || opr == OPR_STID)
00956 #endif
00957 )
00958 continue;
00959 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
00960 if (edge_table.Find(e))
00961 continue;
00962 edge_table.Enter(e, 1);
00963 WN* wn_sink = dg->Get_Wn(dg->Get_Sink(e));
00964 OPERATOR opr_sink = WN_operator(wn_sink);
00965 BOOL Is_Wn_Sink_Array = opr_sink == OPR_ILOAD || opr_sink == OPR_ISTORE;
00966 INT wn_sink_depth = Loop_Depth(wn_sink);
00967 REDUCTION_TYPE rt_sink = rm != NULL ? rm->Which_Reduction(wn_sink)
00968 : RED_NONE;
00969 if (!Wn_Is_Inside(wn_sink, wn_outer))
00970 continue;
00971 INT list_index = wn_source_depth < wn_sink_depth
00972 ? wn_source_depth - outer_depth : wn_sink_depth - outer_depth;
00973 if (list_index > nloops - 1)
00974 list_index = nloops - 1;
00975 if (!priv_table.Find(wn)
00976 && (rt_source == RED_NONE || rt_source != rt_sink)) {
00977 if (!sdi_list[list_index]->All_Stars())
00978 sdi_list[list_index]->Enter(dg->Depv_Array(e), e, TRUE);
00979 }
00980 }
00981 }
00982
00983
00984 SNL_DEP_MATRIX** sdm_list = CXX_NEW_ARRAY(SNL_DEP_MATRIX*, inner_depth
00985 - outer_depth + 1, &LNO_local_pool);
00986 for (i = 0; i < inner_depth - outer_depth + 1; i++) {
00987 if (sdi_list[i]->All_Stars()) {
00988 sdm_list[i] = NULL;
00989 } else {
00990 SNL_DEP_MATRIX* sdm_local = CXX_NEW(SNL_DEP_MATRIX(*sdi_list[i],
00991 &LNO_local_pool), &LNO_local_pool);
00992 sdm_list[i] = sdm_local;
00993 }
00994 }
00995 return sdm_list;
00996 }
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006 static INT Invariant_Red_Depth(WN* wn_array,
01007 WN* wn_outer,
01008 INT permutation[],
01009 INT nloops)
01010 {
01011 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01012 REDUCTION_MANAGER* rm = red_manager;
01013 FmtAssert(rm != NULL, ("Test requires reduction manager"));
01014
01015 INT outer_depth = Do_Loop_Depth(wn_outer);
01016 INT inner_depth = Loop_Depth(wn_array);
01017 INT invariant_depth = outer_depth;
01018 ACCESS_ARRAY *aa = (ACCESS_ARRAY *) WN_MAP_Get(LNO_Info_Map, wn_array);
01019 for (INT i = 0; i < aa->Num_Vec(); i++) {
01020 ACCESS_VECTOR* av = aa->Dim(i);
01021 if (av->Too_Messy)
01022 return outer_depth + nloops;
01023 if (inner_depth >= outer_depth + nloops) {
01024 INT j;
01025 for (j = inner_depth; j >= outer_depth + nloops; j--)
01026 if (av->Loop_Coeff(j) != 0)
01027 break;
01028 if (j + 1 > invariant_depth)
01029 invariant_depth = j + 1;
01030 }
01031 INT j;
01032 for (j = outer_depth + nloops - 1; j >= outer_depth; j--)
01033 if (av->Loop_Coeff(outer_depth + permutation[j - outer_depth]) != 0)
01034 break;
01035 if (j + 1 > invariant_depth)
01036 invariant_depth = j + 1;
01037 }
01038 WN* wn_red = LWN_Get_Parent(wn_array);
01039 EINDEX16 e = 0;
01040 VINDEX16 v = dg->Get_Vertex(wn_red);
01041 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
01042 WN* wn_source = dg->Get_Wn(dg->Get_Source(e));
01043 if (rm->Which_Reduction(wn_source) == rm->Which_Reduction(wn_red))
01044 continue;
01045 WN* wn_common = LNO_Common_Loop(wn_red, wn_source);
01046 if (wn_common == NULL)
01047 continue;
01048 DEPV_ARRAY* dv = dg->Depv_Array(e);
01049 INT lcd_depth = dv->Loop_Carrying_Dependence();
01050 if (lcd_depth + 1 > invariant_depth)
01051 invariant_depth = lcd_depth == -1 ? Do_Loop_Depth(wn_common) + 1
01052 : lcd_depth + 1;
01053 }
01054 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
01055 WN* wn_sink = dg->Get_Wn(dg->Get_Sink(e));
01056 if (rm->Which_Reduction(wn_sink) == rm->Which_Reduction(wn_red))
01057 continue;
01058 WN* wn_common = LNO_Common_Loop(wn_red, wn_sink);
01059 if (wn_common == NULL)
01060 continue;
01061 DEPV_ARRAY* dv = dg->Depv_Array(e);
01062 INT lcd_depth = dv->Loop_Carrying_Dependence();
01063 if (lcd_depth + 1 > invariant_depth)
01064 invariant_depth = lcd_depth == -1 ? Do_Loop_Depth(wn_common) + 1
01065 : lcd_depth + 1;
01066 }
01067 return invariant_depth;
01068 }
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079 static SNL_DEP_MATRIX** Red_Dep_Info(WN* wn_outer,
01080 INT permutation[],
01081 INT nloops,
01082 INT parallel_depth,
01083 BOOL check_privates,
01084 BOOL definitely)
01085 {
01086 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01087 REDUCTION_MANAGER* rm = red_manager;
01088
01089
01090 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01091 DOLOOP_STACK stack(&LNO_local_pool);
01092 Build_Doloop_Stack(wn_inner, &stack);
01093 INT v_hash_table_size = MAX(MIN(dg->Get_Vertex_Count(), 512), 5);
01094 HASH_TABLE<WN*,INT> priv_table(v_hash_table_size, &LNO_local_pool);
01095 if (check_privates) {
01096 LWN_ITER* itr = LWN_WALK_TreeIter(wn_outer);
01097 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01098 WN* wn = itr->wn;
01099 OPERATOR opr = WN_operator(wn);
01100 if (opr != OPR_ILOAD && opr != OPR_ISTORE && opr != OPR_LDID
01101 && opr != OPR_STID)
01102 continue;
01103 VINDEX16 v = dg->Get_Vertex(wn);
01104 if (v == 0 && (opr == OPR_LDID || opr == OPR_STID))
01105 continue;
01106 INT i;
01107 for (i = Do_Loop_Depth(wn_outer); i < stack.Elements(); i++) {
01108 WN* wn_loop = stack.Bottom_nth(i);
01109 if (Is_Privatizable_With_Context(wn_loop, wn, definitely))
01110 break;
01111 }
01112 if (i < stack.Elements())
01113 priv_table.Enter(wn, 1);
01114 }
01115 }
01116
01117
01118 INT i;
01119 for (i = 0; i < stack.Elements(); i++)
01120 if (Do_Loop_Is_Good(stack.Bottom_nth(i)) &&
01121 !Do_Loop_Has_Exits(stack.Bottom_nth(i)))
01122 break;
01123 INT nbad = i;
01124 INT outer_depth = Do_Loop_Depth(wn_outer);
01125 INT inner_depth = Do_Loop_Depth(wn_inner);
01126 SNL_DEP_INFO** sdi_list = CXX_NEW_ARRAY(SNL_DEP_INFO*, inner_depth
01127 - outer_depth + 1, &LNO_local_pool);
01128 for (i = outer_depth; i <= inner_depth; i++)
01129 sdi_list[i - outer_depth] = CXX_NEW(SNL_DEP_INFO(outer_depth - nbad,
01130 i - outer_depth + 1, nbad, stack, &LNO_local_pool), &LNO_local_pool);
01131 EINDEX16 e = 0;
01132 INT e_hash_table_size = MIN(dg->Get_Edge_Count(), 512);
01133 HASH_TABLE<EINDEX16,INT> edge_table(e_hash_table_size, &LNO_local_pool);
01134 LWN_ITER* itr = LWN_WALK_TreeIter(wn_outer);
01135 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01136 WN* wn = itr->wn;
01137 if (priv_table.Find(wn))
01138 continue;
01139 INT wn_source_depth = Loop_Depth(wn);
01140 REDUCTION_TYPE rt_source = rm != NULL ? rm->Which_Reduction(wn) : RED_NONE;
01141 if (rt_source == RED_NONE)
01142 continue;
01143 OPERATOR opr = WN_operator(wn);
01144 if (opr != OPR_ILOAD && opr != OPR_ISTORE)
01145 continue;
01146 WN* wn_array = opr == OPR_ILOAD ? WN_kid0(wn) : WN_kid1(wn);
01147 if (Invariant_Red_Depth(wn_array, wn_outer, permutation, nloops)
01148 <= parallel_depth)
01149 continue;
01150 VINDEX16 v = dg->Get_Vertex(wn);
01151 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
01152 if (edge_table.Find(e))
01153 continue;
01154 edge_table.Enter(e, 1);
01155 WN* wn_sink = dg->Get_Wn(dg->Get_Sink(e));
01156 OPERATOR opr_sink = WN_operator(wn_sink);
01157 if (opr_sink != OPR_ILOAD && opr_sink != OPR_ISTORE)
01158 continue;
01159 INT wn_sink_depth = Loop_Depth(wn_sink);
01160 REDUCTION_TYPE rt_sink = rm != NULL ? rm->Which_Reduction(wn_sink)
01161 : RED_NONE;
01162 if (rt_sink == RED_NONE)
01163 continue;
01164 if (rt_source != rt_sink)
01165 continue;
01166 if (!Wn_Is_Inside(wn_sink, wn_outer))
01167 continue;
01168 INT list_index = wn_source_depth < wn_sink_depth
01169 ? wn_source_depth - outer_depth : wn_sink_depth - outer_depth;
01170 if (list_index > nloops - 1)
01171 list_index = nloops - 1;
01172 if (!sdi_list[list_index]->All_Stars())
01173 sdi_list[list_index]->Enter(dg->Depv_Array(e), e, TRUE);
01174 }
01175 }
01176
01177
01178 SNL_DEP_MATRIX** sdm_list = CXX_NEW_ARRAY(SNL_DEP_MATRIX*, inner_depth
01179 - outer_depth + 1, &LNO_local_pool);
01180 for (i = 0; i < inner_depth - outer_depth + 1; i++) {
01181 if (sdi_list[i]->All_Stars()) {
01182 sdm_list[i] = NULL;
01183 } else {
01184 SNL_DEP_MATRIX* sdm_local = CXX_NEW(SNL_DEP_MATRIX(*sdi_list[i],
01185 &LNO_local_pool), &LNO_local_pool);
01186 sdm_list[i] = sdm_local;
01187 }
01188 }
01189 return sdm_list;
01190 }
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213 static INTERCHANGE_TYPE Is_Legal_Permutation(WN* wn_outer,
01214 INT permutation[],
01215 INT nloops,
01216 SX_INFO* sx_info,
01217 SD_INFO* sd_info,
01218 SNL_DEP_MATRIX** sdm_inv,
01219 INT* sd_split_depth,
01220 BOOL is_fully_permutable)
01221 {
01222
01223 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
01224 ? Parallel_Debug_Level : 0;
01225
01226
01227 INT i;
01228 for (i = 0; i < nloops; i++)
01229 if (permutation[i] != i)
01230 break;
01231 if (i == nloops)
01232 return is_fully_permutable ? INT_PERMUTABLE : INT_INVARIANT;
01233
01234 if (SNL_Dir_Cannot_Interchange(wn_outer, permutation, nloops)) {
01235 if (parallel_debug_level >= 1)
01236 fprintf(stdout, " Directive prevents interchange.\n");
01237 return INT_NONE;
01238 }
01239
01240
01241 SE_STATUS se_result = SNL_Is_Scalar_Expandable(wn_outer, permutation,
01242 nloops, sx_info, FALSE);
01243 if (se_result == SE_FALSE) {
01244 if (parallel_debug_level >= 1)
01245 fprintf(stdout, " Could not scalar expand.\n");
01246 return INT_NONE;
01247 }
01248
01249
01250 INT outer_depth = Do_Loop_Depth(wn_outer);
01251 *sd_split_depth = -1;
01252 if (se_result == SE_MAYBE) {
01253 *sd_split_depth = SNL_Bad_Scalars_Are_Distributable(wn_outer,
01254 permutation, nloops, sx_info, sd_info);
01255 if (*sd_split_depth == outer_depth + nloops) {
01256 if (parallel_debug_level >= 1)
01257 fprintf(stdout, " Could not remove unexpandable scalars.\n");
01258 return INT_NONE;
01259 }
01260 }
01261
01262
01263 if (!SNL_Permutation_Is_Distributable(wn_outer, permutation, nloops)) {
01264 if (parallel_debug_level >= 1)
01265 fprintf(stdout, " Could not distribute.\n");
01266 return INT_NONE;
01267 }
01268
01269
01270 for (i = 0; i < nloops; i++) {
01271 if (SNL_Perm_Retained_Section(i, permutation, nloops, *sd_split_depth)
01272 && !SNL_Legal_Perm_Deps(sdm_inv[i], permutation, i + 1)) {
01273 if (parallel_debug_level >= 1)
01274 fprintf(stdout, " Loop-carried dependence.\n");
01275 return INT_NONE;
01276 }
01277 }
01278
01279
01280 if (is_fully_permutable)
01281 return INT_PERMUTABLE;
01282 if (Invariant_Permutation(wn_outer, permutation, nloops))
01283 return INT_INVARIANT;
01284 if (General_Permutation(wn_outer, permutation, nloops))
01285 return INT_GENERAL;
01286 return INT_NONE;
01287 }
01288
01289
01290
01291
01292
01293
01294
01295 static void Print_Permutation_Vector(FILE* fp,
01296 INT permutation[],
01297 INT nloops,
01298 INT parallel_depth,
01299 BOOL is_doacross)
01300 {
01301 fprintf(fp, "Testing permutation [");
01302 if (!is_doacross)
01303 for (INT l = 0; l < nloops; l++) {
01304 fprintf(fp, "%d%s", permutation[l], l == parallel_depth ? "-P" : "");
01305 if (l < nloops - 1)
01306 fprintf(fp, ",");
01307 }
01308 else
01309 for (INT l = 0; l < nloops; l++) {
01310 fprintf(fp, "%d%s", permutation[l], l == parallel_depth ? "-X" : "");
01311 if (l < nloops - 1)
01312 fprintf(fp, ",");
01313 }
01314 fprintf(fp, "]\n");
01315 }
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329 static INTERCHANGE_TYPE Is_Legal_Permutation_Class(WN* wn_outer,
01330 INT permutation[],
01331 INT nloops,
01332 INT parallel_depth,
01333 SX_INFO* sx_info,
01334 SD_INFO* sd_info,
01335 SNL_DEP_MATRIX** sdm_inv,
01336 INT* sd_split_depth,
01337 BOOL print_message,
01338 BOOL is_fully_permutable)
01339 {
01340 INT outer_depth = Do_Loop_Depth(wn_outer);
01341 INT parallel_index = parallel_depth - outer_depth;
01342 INT aloops = parallel_index;
01343 INT bloops = nloops - parallel_index - 1;
01344 INT* aperm = CXX_NEW_ARRAY(INT, aloops, &LNO_local_pool);
01345 INT* bperm = CXX_NEW_ARRAY(INT, bloops, &LNO_local_pool);
01346 INT* nperm = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01347 nperm[parallel_index] = permutation[parallel_index];
01348 INT i;
01349 for (i = 0; i < Factorial(aloops); i++) {
01350 Permutation(i, aloops, aperm);
01351 INT j;
01352 for (j = 0; j < aloops; j++)
01353 nperm[j] = permutation[aperm[j]];
01354 for (j = 0; j < Factorial(bloops); j++) {
01355 Permutation(j, bloops, bperm);
01356 for (INT k = 0; k < bloops; k++)
01357 nperm[k + aloops + 1] = permutation[bperm[k] + aloops + 1];
01358 if (print_message) {
01359 Print_Permutation_Vector(stdout, nperm, nloops, parallel_index, FALSE);
01360 Print_Permutation_Vector(TFile, nperm, nloops, parallel_index, FALSE);
01361 }
01362 INTERCHANGE_TYPE int_type = Is_Legal_Permutation(wn_outer, nperm,
01363 nloops, sx_info, sd_info, sdm_inv, sd_split_depth,
01364 is_fully_permutable);
01365 if (int_type != INT_NONE) {
01366 for (INT l = 0; l < nloops; l++)
01367 permutation[l] = nperm[l];
01368 return int_type;
01369 }
01370 }
01371 }
01372 return INT_NONE;
01373 }
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390 static WN* Parallel_Interchange(WN* wn_outer,
01391 INT permutation[],
01392 INT nloops,
01393 SD_INFO* sd_info,
01394 SX_INFO* sx_info,
01395 INTERCHANGE_TYPE int_type,
01396 INT sd_split_depth,
01397 INT split_depth,
01398 DOLOOP_STACK* stack)
01399 {
01400 INT outer_depth = Do_Loop_Depth(wn_outer);
01401 if (split_depth == outer_depth && Identity_Permutation(permutation, nloops))
01402 return wn_outer;
01403
01404 BOOL sinv = int_type == INT_PERMUTABLE;
01405 BOOL inv = int_type == INT_PERMUTABLE || int_type == INT_INVARIANT;
01406 BOOL sd_ignore_illegals = sd_split_depth > 0 && sd_split_depth
01407 < outer_depth + nloops;
01408 BOOL ignore_illegals = split_depth > 0 && split_depth
01409 < outer_depth + nloops;
01410 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01411 SNL_Scalar_Expand_For_Splitting(wn_outer, wn_inner, sd_split_depth,
01412 &sx_info->Plist, &sd_info->Plist, sinv, sd_ignore_illegals, FALSE);
01413 SNL_Scalar_Expand(wn_outer, wn_inner, permutation, nloops,
01414 sx_info, sinv, sd_ignore_illegals, FALSE);
01415 SNL_Scalar_Expand_For_Splitting(wn_outer, wn_inner, split_depth,
01416 &sx_info->Plist, &sd_info->Plist, sinv, ignore_illegals, FALSE);
01417 SNL_Distribute_By_Splitting(wn_outer, wn_inner, nloops, sd_split_depth,
01418 stack);
01419 SNL_Distribute_For_Permutation(wn_outer, wn_inner, permutation, nloops,
01420 stack);
01421 SNL_Distribute_By_Splitting(wn_outer, wn_inner, nloops, split_depth,
01422 stack);
01423 WN* wn_new_outer = SNL_Permute_Loops(wn_outer, wn_inner, permutation,
01424 nloops, inv, FALSE);
01425 return wn_new_outer;
01426 }
01427
01428
01429
01430
01431
01432
01433
01434
01435
01436
01437
01438
01439
01440
01441
01442
01443
01444 static INT Splittable(WN* wn_outer,
01445 INT split_depth,
01446 INT nloops,
01447 SX_INFO* sx_info,
01448 SD_INFO* sd_info,
01449 BOOL is_privatizable)
01450 {
01451 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01452 DOLOOP_STACK stack(&LNO_local_pool);
01453 Build_Doloop_Stack(wn_inner, &stack);
01454
01455 INT outer_depth = Do_Loop_Depth(wn_outer);
01456 INT best_split_depth = split_depth;
01457 if (!is_privatizable || split_depth > outer_depth) {
01458 SE_STATUS se_status = SNL_Is_Scalar_Expandable(wn_outer, NULL, nloops,
01459 sx_info, FALSE);
01460 if (se_status == SE_FALSE)
01461 return outer_depth + nloops;
01462 if (se_status == SE_MAYBE) {
01463 INT upper_range = sd_info->Distribution_Range(outer_depth, sx_info);
01464 if (upper_range + 1 > best_split_depth)
01465 best_split_depth = upper_range + 1;
01466 if (best_split_depth >= outer_depth + nloops)
01467 return outer_depth + nloops;
01468 }
01469 }
01470 if (best_split_depth <= outer_depth)
01471 return -1;
01472 WN* wn_split = stack.Bottom_nth(best_split_depth);
01473 if (!SNL_Is_Distributable(wn_outer, wn_outer, wn_split, TRUE))
01474 return outer_depth + nloops;
01475 if (!SNL_Is_Distributable(wn_outer, wn_outer, wn_split, FALSE))
01476 return outer_depth + nloops;
01477 return best_split_depth;
01478 }
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508 static INT Parallelizable_At_Depth(WN* wn_outer,
01509 INT nloops,
01510 INT permutation[],
01511 SNL_DEP_MATRIX** sdm_body,
01512 BOOL sdm_scl[],
01513 SX_INFO* sx_info,
01514 SD_INFO* sd_info,
01515 INT sd_split_depth,
01516 INT parallel_depth)
01517 {
01518 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01519 DOLOOP_STACK stack(&LNO_local_pool);
01520 Build_Doloop_Stack(wn_inner, &stack);
01521 INT outer_depth = Do_Loop_Depth(wn_outer);
01522 BOOL need_test = FALSE;
01523 WN* wn_parallel_loop = stack.Bottom_nth(parallel_depth);
01524 DO_LOOP_INFO* dli_parallel = Get_Do_Loop_Info(wn_parallel_loop);
01525 if (dli_parallel->Has_Exits)
01526 return outer_depth + nloops;
01527 BOOL is_privatizable = !sdm_scl[permutation[parallel_depth - outer_depth]];
01528 for (INT k = nloops - 1; k >= parallel_depth - outer_depth; k--) {
01529 if (!SNL_Perm_Retained_Section(k, permutation, nloops, sd_split_depth))
01530 continue;
01531 if (sdm_body[k] == NULL) {
01532 if (k == nloops - 1)
01533 return outer_depth + nloops;
01534 INT split_depth = outer_depth + k + 1;
01535 return Splittable(wn_outer, split_depth, nloops, sx_info, sd_info,
01536 is_privatizable);
01537 }
01538 for (INT i = 0; i < sdm_body[k]->Ndep(); i++) {
01539 for (INT j = 0; j <= parallel_depth - outer_depth; j++) {
01540 SNL_DEP dep = (*sdm_body[k])(i, permutation[j]);
01541 if (j < parallel_depth - outer_depth) {
01542 if (dep.Moreless == SNL_DEP::SNL_DEP_PLUS
01543 || dep.Moreless == SNL_DEP::SNL_DEP_EXACT && dep.Distance > 0)
01544 break;
01545 } else {
01546 FmtAssert(j == parallel_depth - outer_depth,
01547 ("Index out of range"));
01548 if (dep.Moreless == SNL_DEP::SNL_DEP_EXACT && dep.Distance == 0)
01549 break;
01550 if (k == nloops - 1)
01551 return outer_depth + nloops;
01552 INT split_depth = outer_depth + k + 1;
01553 return Splittable(wn_outer, split_depth, nloops, sx_info, sd_info,
01554 is_privatizable);
01555 }
01556 }
01557 }
01558 }
01559 return Splittable(wn_outer, -1, nloops, sx_info, sd_info, is_privatizable);
01560 }
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581 static INT Parallelizable(WN* wn_outer,
01582 INT permutation[],
01583 INT nloops,
01584 INT parallel_depth,
01585 SNL_DEP_MATRIX** sdm_inv,
01586 BOOL sdm_scl[],
01587 SX_INFO* sx_info,
01588 SD_INFO* sd_info,
01589 INT sd_split_depth)
01590 {
01591 SNL_DEP_MATRIX** sdm_red = Red_Dep_Info(wn_outer, permutation, nloops,
01592 parallel_depth, TRUE, FALSE);
01593 INT inv_split_depth = Parallelizable_At_Depth(wn_outer, nloops,
01594 permutation, sdm_inv, sdm_scl, sx_info, sd_info, sd_split_depth,
01595 parallel_depth);
01596 INT red_split_depth = Parallelizable_At_Depth(wn_outer, nloops,
01597 permutation, sdm_red, sdm_scl, sx_info, sd_info, sd_split_depth,
01598 parallel_depth);
01599 INT split_depth = inv_split_depth > red_split_depth ? inv_split_depth :
01600 red_split_depth;
01601 return split_depth;
01602 }
01603
01604
01605
01606
01607
01608
01609
01610
01611 static BOOL* Scl_Dep_Info(WN* wn_outer,
01612 INT nloops)
01613 {
01614 BOOL* scl_list = CXX_NEW_ARRAY(BOOL, nloops, &LNO_local_pool);
01615 for (INT i = 0; i < nloops; i++)
01616 scl_list[i] = FALSE;
01617 INT outer_depth = Do_Loop_Depth(wn_outer);
01618 LWN_ITER* itr = LWN_WALK_TreeIter(wn_outer);
01619 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01620 WN* wn_scalar = itr->wn;
01621 OPERATOR opr_scalar = WN_operator(wn_scalar);
01622 if (opr_scalar != OPR_LDID && opr_scalar != OPR_STID)
01623 continue;
01624 for (WN* wn = wn_scalar; wn != NULL; wn = LWN_Get_Parent(wn)) {
01625 if (WN_opcode(wn) == OPC_DO_LOOP) {
01626 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01627 #ifdef KEY //bug 12049: we are only interested in the chunk of 'nloops'
01628
01629 if(dli->Depth < outer_depth + nloops)
01630 #endif
01631 if (dli->ARA_Info->Is_Problem_Scalar(wn_scalar))
01632 scl_list[dli->Depth - outer_depth] = TRUE;
01633 }
01634 if (wn == wn_outer)
01635 break;
01636 }
01637 }
01638 return scl_list;
01639 }
01640
01641
01642
01643
01644
01645
01646
01647 static WN* Parallel_Loop(PARALLEL_INFO* parallel_info,
01648 DOLOOP_STACK* loop_stack)
01649 {
01650 if (parallel_info->Parallel_Depth() < 0)
01651 return NULL;
01652 INT i;
01653 for (i = 0; i < loop_stack->Elements(); i++) {
01654 WN* wn_loop = loop_stack->Bottom_nth(i);
01655 if (Do_Loop_Depth(wn_loop) == parallel_info->Parallel_Depth())
01656 break;
01657 }
01658 WN* wn_parallel = loop_stack->Bottom_nth(i);
01659 return wn_parallel;
01660 }
01661
01662
01663
01664
01665
01666
01667
01668
01669 static void Print_Parallel_Loop(FILE* fp,
01670 PARALLEL_INFO* parallel_info,
01671 DOLOOP_STACK* loop_stack)
01672 {
01673 WN* wn_parallel = Parallel_Loop(parallel_info, loop_stack);
01674 if (wn_parallel == NULL)
01675 return;
01676 INT nloops = parallel_info->Nloops();
01677 INT inner_depth = loop_stack->Elements() - 1;
01678 INT outer_depth = inner_depth - nloops + 1;
01679 fprintf(fp, "Auto Parallelizing Loop %s at %d ",
01680 WB_Whirl_Symbol(wn_parallel), (INT) WN_linenum(wn_parallel));
01681 fprintf(fp, "using (");
01682 for (INT i = 0; i < parallel_info->Nloops(); i++) {
01683 fprintf(fp, "%d%s", parallel_info->Permutation(i),
01684 parallel_info->Parallel_Depth() == i + outer_depth ?
01685 (parallel_info->Is_Doacross() ? "-X" : "-P") : "");;
01686 if (i < parallel_info->Nloops() - 1)
01687 fprintf(fp, ",");
01688 }
01689 fprintf(fp, ")\n");
01690 }
01691
01692
01693
01694
01695
01696
01697
01698
01699 extern BOOL Outermore_Parallel_Construct(WN* wn_loop,
01700 BOOL check_suggested)
01701 {
01702 if (Do_Loop_Is_Mp(wn_loop))
01703 return FALSE;
01704
01705 for (WN* wn = LWN_Get_Parent(wn_loop); wn != NULL; wn = LWN_Get_Parent(wn)) {
01706 if (WN_opcode(wn) == OPC_DO_LOOP) {
01707 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01708 if (check_suggested && dli->Suggested_Parallel)
01709 return TRUE;
01710 }
01711 if (Is_Mp_Region(wn))
01712 return TRUE;
01713 }
01714 return FALSE;
01715 }
01716
01717
01718
01719
01720
01721
01722
01723
01724 extern BOOL Outermore_Parallel_Construct_Or_Lego_Loop(WN* wn_loop)
01725 {
01726 if (Do_Loop_Is_Mp(wn_loop))
01727 return FALSE;
01728
01729 for (WN* wn = LWN_Get_Parent(wn_loop); wn != NULL; wn = LWN_Get_Parent(wn)) {
01730 if (WN_opcode(wn) == OPC_DO_LOOP) {
01731 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01732 if (dli->Is_Outer_Lego_Tile || dli->Is_Inner_Lego_Tile)
01733 return TRUE;
01734 if (dli->Suggested_Parallel)
01735 return TRUE;
01736 }
01737 if (Is_Mp_Region(wn))
01738 return TRUE;
01739 }
01740 return FALSE;
01741 }
01742
01743
01744
01745
01746
01747
01748
01749
01750 extern BOOL Innermore_Parallel_Loop(WN* wn_loop,
01751 BOOL check_suggested)
01752 {
01753 LWN_ITER* itr = LWN_WALK_TreeIter(wn_loop);
01754 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01755 WN* wn = itr->wn;
01756 if (WN_opcode(wn) == OPC_DO_LOOP) {
01757 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01758 if (check_suggested && dli->Suggested_Parallel)
01759 return TRUE;
01760 if (Do_Loop_Is_Mp(wn))
01761 return TRUE;
01762 }
01763 }
01764 return FALSE;
01765 }
01766
01767
01768
01769
01770
01771
01772
01773
01774 extern BOOL Innermore_Parallel_Or_Lego_Loop(WN* wn_loop)
01775 {
01776 LWN_ITER* itr = LWN_WALK_TreeIter(wn_loop);
01777 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01778 WN* wn = itr->wn;
01779 if (WN_opcode(wn) == OPC_DO_LOOP) {
01780 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01781 if (dli->Is_Outer_Lego_Tile || dli->Is_Inner_Lego_Tile)
01782 return TRUE;
01783 if (Do_Loop_Is_Mp(wn))
01784 return TRUE;
01785 }
01786 }
01787 return FALSE;
01788 }
01789
01790
01791
01792
01793
01794
01795
01796
01797 static BOOL SNL_All_Parallelizable(WN* wn_outer,
01798 INT nloops)
01799 {
01800 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01801 for (WN* wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
01802 if (WN_opcode(wn) == OPC_DO_LOOP) {
01803 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01804 if (!dli->Parallelizable)
01805 return FALSE;
01806 if (wn == wn_outer)
01807 return TRUE;
01808 }
01809 }
01810 return FALSE;
01811 }
01812
01813
01814
01815
01816
01817
01818
01819
01820
01821
01822
01823 static BOOL Parallel_Directive_Class(WN* wn_outer_loop,
01824 INT nloops,
01825 PAR_DIR_TYPE par_dir[])
01826 {
01827 INT outer_depth = Do_Loop_Depth(wn_outer_loop);
01828 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer_loop, nloops);
01829 DOLOOP_STACK stack(&LNO_local_pool);
01830 Build_Doloop_Stack(wn_inner, &stack);
01831 BOOL has_prefer_parallel = FALSE;
01832 for (INT i = 0; i < nloops; i++) {
01833 WN* wn_loop = stack.Bottom_nth(outer_depth + i);
01834 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01835 if (dli->Pragma_Cannot_Concurrentize)
01836 par_dir[i] = PD_NO_CONCURRENT;
01837 else if (dli->Pragma_Prefer_Concurrentize)
01838 par_dir[i] = PD_PREFER_CONCURRENT;
01839 else
01840 par_dir[i] = PD_NONE;
01841 if (dli->Pragma_Prefer_Concurrentize)
01842 has_prefer_parallel = TRUE;
01843 }
01844 return has_prefer_parallel;
01845 }
01846
01847
01848
01849
01850
01851
01852
01853
01854 static INT SNL_Inner_Exit_Count(WN* wn_outer,
01855 INT nloops)
01856 {
01857 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01858 INT loop_count = 0;
01859 INT exit_count = 0;
01860 for (WN* wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
01861 if (WN_operator(wn) == OPR_DO_LOOP) {
01862 loop_count++;
01863 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01864 if (dli->Has_Exits)
01865 exit_count = loop_count;
01866 }
01867 if (wn == wn_outer)
01868 break;
01869 }
01870 return exit_count;
01871 }
01872
01873
01874
01875
01876
01877
01878
01879
01880 static void SNL_Auto_Parallelization(WN* wn_outer,
01881 INT nloops,
01882 BOOL mark_only)
01883 {
01884
01885 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
01886 ? Parallel_Debug_Level : 0;
01887
01888
01889 if (Outermore_Parallel_Construct_Or_Lego_Loop(wn_outer)
01890 || Innermore_Parallel_Or_Lego_Loop(wn_outer))
01891 return;
01892
01893
01894 WN* wn_new_outer = Minimal_Kernel(wn_outer, nloops);
01895 if (wn_new_outer == NULL)
01896 return;
01897 INT new_nloops =
01898 nloops - (Do_Loop_Depth(wn_new_outer) - Do_Loop_Depth(wn_outer));
01899
01900
01901 new_nloops -= SNL_Inner_Exit_Count(wn_new_outer, new_nloops);
01902 if (new_nloops == 0)
01903 return;
01904
01905
01906 if (new_nloops > MAX_PARALLEL_NLOOPS) {
01907 INT outer_depth = Do_Loop_Depth(wn_new_outer);
01908 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, new_nloops);
01909 DOLOOP_STACK stack(&LNO_local_pool);
01910 Build_Doloop_Stack(wn_inner, &stack);
01911 INT extra_nloops = new_nloops;
01912 for (INT i = 0; i < new_nloops; i += MAX_PARALLEL_NLOOPS) {
01913 WN* wn_local_outer = stack.Bottom_nth(outer_depth + i);
01914 INT local_nloops = extra_nloops >= MAX_PARALLEL_NLOOPS
01915 ? MAX_PARALLEL_NLOOPS : extra_nloops;
01916 SNL_Auto_Parallelization(wn_local_outer, local_nloops, mark_only);
01917 extra_nloops -= MAX_PARALLEL_NLOOPS;
01918 }
01919 return;
01920 }
01921
01922
01923 INT* permutation = CXX_NEW_ARRAY(INT, new_nloops, &LNO_local_pool);
01924 PARALLEL_INFO* pi_best = mark_only
01925 ? NULL : CXX_NEW(PARALLEL_INFO(new_nloops), &LNO_local_pool);
01926 PARALLEL_INFO* pi_best_pref = mark_only
01927 ? NULL : CXX_NEW(PARALLEL_INFO(new_nloops), &LNO_local_pool);
01928
01929
01930 ARA_LOOP_INFO *ara_root =
01931 CXX_NEW(ARA_LOOP_INFO(wn_new_outer, NULL, TRUE), &ARA_memory_pool);
01932 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_new_outer);
01933 ARA_Initialize_Loops(wn_new_outer, ara_root);
01934 dli->ARA_Info->Walk_Loop();
01935
01936
01937 SX_INFO sx_info(&LNO_local_pool);
01938 sx_info.Make_Sx_Info(wn_new_outer, new_nloops, TRUE);
01939 SX_PLIST* plist = &sx_info.Plist;
01940 SD_INFO sd_info(&LNO_local_pool);
01941 sd_info.Make_Sd_Info(wn_new_outer, new_nloops);
01942 SD_PLIST* sd_plist = &sd_info.Plist;
01943
01944
01945 BOOL *sdm_scl = Scl_Dep_Info(wn_new_outer, new_nloops);
01946 SNL_DEP_MATRIX** sdm_inv = Inv_Dep_Info(wn_new_outer, new_nloops, TRUE,
01947 FALSE);
01948
01949
01950 if (sdm_inv[new_nloops - 1] == NULL) {
01951 ARA_Cleanup(wn_new_outer);
01952 return;
01953 }
01954
01955
01956 double machine_cycles = 0.0;
01957 double work_estimate = 0.0;
01958 double min_parallel_cycles = 0.0;
01959 INT outer_depth = Do_Loop_Depth(wn_new_outer);
01960 BOOL is_fully_permutable = Fully_Permutable_Permutation(wn_new_outer,
01961 new_nloops);
01962 SNL_DEP_MATRIX** sdm_inv_np = Inv_Dep_Info(wn_new_outer, new_nloops,
01963 is_fully_permutable, TRUE);
01964 PAR_DIR_TYPE* par_directive = CXX_NEW_ARRAY(PAR_DIR_TYPE, new_nloops,
01965 &LNO_local_pool);
01966 BOOL par_pref = Parallel_Directive_Class(wn_new_outer, new_nloops,
01967 par_directive);
01968 INT i;
01969 for (i = outer_depth; i < outer_depth + new_nloops; i++) {
01970 INT ii = i - outer_depth;
01971 if (!mark_only) {
01972 machine_cycles = SNL_Machine_Cost(wn_new_outer, new_nloops, i, plist,
01973 &work_estimate, TRUE);
01974 if (work_estimate == 0.0)
01975 DevWarn("Work Estimate for loop %s at %d is 0",
01976 WB_Whirl_Symbol(wn_new_outer), (INT) WN_linenum(wn_new_outer));
01977 min_parallel_cycles = SNL_Min_Parallel_Overhead_Cost(wn_new_outer,
01978 new_nloops, i);
01979 if (!par_pref && machine_cycles + min_parallel_cycles >= pi_best->Cost())
01980 break;
01981 }
01982
01983 for (INT j = 0; j < new_nloops; j++) {
01984 if (par_directive[j] == PD_NO_CONCURRENT)
01985 continue;
01986
01987 for (INT k = 0; k < Choose(new_nloops - 1, ii); k++) {
01988 Permutation_Vector(ii, j, k, new_nloops, permutation);
01989 INT sd_split_depth = -1;
01990 INTERCHANGE_TYPE int_type = Is_Legal_Permutation_Class(wn_new_outer,
01991 permutation, new_nloops, i, &sx_info, &sd_info, sdm_inv_np,
01992 &sd_split_depth, !mark_only && parallel_debug_level >= 2,
01993 is_fully_permutable);
01994 if (int_type == INT_NONE)
01995 continue;
01996 if (mark_only) {
01997 Mark_Parallelizable_Loop(wn_new_outer, permutation, new_nloops,
01998 i, sdm_inv, sdm_scl, &sx_info, &sd_info,sd_split_depth);
01999 if (SNL_All_Parallelizable(wn_new_outer, new_nloops)) {
02000 ARA_Cleanup(wn_new_outer);
02001 return;
02002 }
02003 } else {
02004 PARALLEL_INFO* pi = CXX_NEW(PARALLEL_INFO(wn_new_outer, permutation,
02005 new_nloops, i, int_type, sdm_inv, sdm_scl, &sx_info, &sd_info,
02006 sd_split_depth, machine_cycles, work_estimate), &LNO_local_pool);
02007 if (pi->Cost() < pi_best->Cost())
02008 *pi_best = *pi;
02009 if (par_directive[j] == PD_PREFER_CONCURRENT
02010 && pi->Cost() < pi_best_pref->Cost())
02011 *pi_best_pref = *pi;
02012 }
02013 }
02014 }
02015 }
02016 if (pi_best_pref != NULL && pi_best_pref->Parallel_Depth() >= 0)
02017 *pi_best = *pi_best_pref;
02018
02019
02020 if (!mark_only && pi_best->Parallel_Depth() >= 0) {
02021 DOLOOP_STACK dist_stack(&LNO_local_pool);
02022 DOLOOP_STACK loop_stack(&LNO_local_pool);
02023 WN* wn_new_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, new_nloops);
02024 Build_Doloop_Stack(wn_new_inner, &loop_stack);
02025 WN* wn_kernel = Parallel_Interchange(wn_new_outer, pi_best->Permutation(),
02026 new_nloops, &sd_info, &sx_info, pi_best->Int_Type(),
02027 pi_best->Sd_Split_Depth(), pi_best->Split_Depth(), &dist_stack);
02028 WN* wn_parallel = Parallel_Loop(pi_best, &loop_stack);
02029 DO_LOOP_INFO* dli_parallel = Get_Do_Loop_Info(wn_parallel);
02030 dli_parallel->Suggested_Parallel = TRUE;
02031 dli_parallel->Work_Estimate = pi_best->Work_Estimate();
02032 if (pi_best->Is_Doacross()) {
02033 if (Check_Doacross_Sync_Coverage(wn_parallel,
02034 pi_best->Sync_Distances())) {
02035 dli_parallel->Is_Doacross = TRUE;
02036 dli_parallel->Doacross_Tile_Size = pi_best->Doacross_Tile_Size();
02037 dli_parallel->Sync_Distances[0]= (pi_best->Sync_Distances())[0];
02038 dli_parallel->Sync_Distances[1]= (pi_best->Sync_Distances())[1];
02039 dli_parallel->Doacross_Overhead= pi_best->Doacross_Overhead();
02040
02041 WN* parent_loop=LWN_Get_Parent(LWN_Get_Parent(wn_parallel));
02042
02043 } else {
02044
02045 dli_parallel->Suggested_Parallel = FALSE;
02046 dli_parallel->Work_Estimate = 0;
02047 }
02048 }
02049
02050 ARA_Cleanup(wn_new_outer);
02051
02052 for (i = 0; i < dist_stack.Elements(); i++) {
02053 WN* wn_loop = dist_stack.Bottom_nth(i);
02054 SNL_Auto_Parallelization(wn_loop, SNL_Loop_Count(wn_loop), mark_only);
02055 }
02056 if (LNO_Verbose || parallel_debug_level >= 1) {
02057 Print_Parallel_Loop(stdout, pi_best, &loop_stack);
02058 Print_Parallel_Loop(TFile, pi_best, &loop_stack);
02059 }
02060 if (LNO_Tlog || Get_Trace(TP_PTRACE1, TP_PTRACE1_PARALLEL)) {
02061 ap_tlog_info(pi_best, &loop_stack, "");
02062 }
02063 } else {
02064 ARA_Cleanup(wn_new_outer);
02065 }
02066 }
02067
02068
02069
02070
02071
02072
02073
02074 static void Check_Suggested_Parallel(WN* wn_tree)
02075 {
02076 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
02077 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
02078 if (dli->Suggested_Parallel && !Do_Loop_Is_Mp(wn_tree))
02079 DevWarn("Did NOT auto-parallelize suggested loop %s at %d",
02080 WB_Whirl_Symbol(wn_tree), (INT) WN_linenum(wn_tree));
02081 }
02082
02083 if (WN_opcode(wn_tree) == OPC_BLOCK) {
02084 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02085 Check_Suggested_Parallel(wn);
02086 } else {
02087 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02088 Check_Suggested_Parallel(WN_kid(wn_tree, i));
02089 }
02090 }
02091
02092
02093
02094
02095
02096
02097
02098 extern void Mark_Auto_Parallelizable_Loops(WN* func_nd)
02099 {
02100 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
02101 ? Parallel_Debug_Level : 0;
02102
02103 if (parallel_debug_level >= 1) {
02104 fprintf(stdout, "### Marking Auto-Parallel-Loops (Begin)\n");
02105 fprintf(TFile, "### Marking Auto-Parallel Loops (Begin)\n");
02106 }
02107
02108 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02109 DU_MANAGER* du = Du_Mgr;
02110
02111 FIZ_FUSE_INFO* ffi=
02112 CXX_NEW(FIZ_FUSE_INFO(&LNO_local_pool), &LNO_local_pool);
02113 ffi->Build(func_nd, TRUE);
02114
02115 for (INT i = 0; i < ffi->Num_Snl(); i++) {
02116 if (ffi->Get_Type(i) == Invalid || ffi->Get_Type(i) == Non_SNL)
02117 continue;
02118 WN* wn_outer_loop = ffi->Get_Wn(i);
02119 INT nloops = ffi->Get_Depth(i);
02120 SNL_Upper_Bound_Standardize(wn_outer_loop, nloops);
02121 Hoist_Bounds_One_Level(wn_outer_loop);
02122 SNL_Auto_Parallelization(wn_outer_loop, nloops, TRUE);
02123 }
02124
02125 if (LNO_Verbose || parallel_debug_level >= 1) {
02126 fprintf(stdout, "### Marking Auto-Parallel-Loops (End)\n");
02127 fprintf(TFile, "### Marking Auto-Parallel Loops (End)\n");
02128 }
02129 }
02130
02131
02132
02133
02134
02135
02136
02137 static void Mark_Critical_Section_Loops_Traverse(WN* wn_tree,
02138 INT critical_count)
02139 {
02140 if (WN_opcode(wn_tree) == OPC_BLOCK) {
02141 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn)) {
02142 if (WN_opcode(wn) == OPC_PRAGMA
02143 && WN_pragma(wn) == WN_PRAGMA_CRITICAL_SECTION_BEGIN) {
02144 critical_count++;
02145 } else if (WN_opcode(wn) == OPC_PRAGMA
02146 && WN_pragma(wn) == WN_PRAGMA_CRITICAL_SECTION_END) {
02147 critical_count--;
02148 } else {
02149 Mark_Critical_Section_Loops_Traverse(wn, critical_count);
02150 }
02151 }
02152 } else {
02153 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
02154 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
02155 if (critical_count > 0)
02156 dli->Inside_Critical_Section = TRUE;
02157 }
02158 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02159 Mark_Critical_Section_Loops_Traverse(WN_kid(wn_tree, i), critical_count);
02160 }
02161 }
02162
02163
02164
02165
02166
02167
02168
02169 extern void Mark_Critical_Section_Loops(WN* func_nd)
02170 {
02171 Mark_Critical_Section_Loops_Traverse(func_nd, 0);
02172 }
02173
02174
02175
02176
02177
02178
02179
02180 static void Mark_Threadprivate_Loops_Traverse(WN* wn_tree)
02181 {
02182 if (WN_operator(wn_tree) == OPR_BLOCK) {
02183 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02184 Mark_Threadprivate_Loops_Traverse(wn);
02185 } else {
02186 if (OPERATOR_has_sym(WN_operator(wn_tree)) && WN_st(wn_tree) != NULL
02187 && (ST_base(WN_st(wn_tree)) != WN_st(wn_tree)
02188 && ST_sclass(ST_base(WN_st(wn_tree))) == SCLASS_COMMON
02189 && ST_is_thread_private(ST_base(WN_st(wn_tree)))
02190 #ifdef KEY
02191
02192
02193
02194
02195
02196
02197
02198 || strncmp(ST_name(WN_st(wn_tree)), "__ppthd_common_", 15) == 0
02199 #endif
02200 || ST_is_thread_private(WN_st(wn_tree)))) {
02201 for (WN* wn = wn_tree; wn != NULL; wn = LWN_Get_Parent(wn)) {
02202 if (WN_operator(wn) == OPR_DO_LOOP) {
02203 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02204 dli->Has_Threadprivate = TRUE;
02205 }
02206 }
02207 }
02208 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02209 Mark_Threadprivate_Loops_Traverse(WN_kid(wn_tree, i));
02210 }
02211 }
02212
02213
02214
02215
02216
02217
02218
02219 extern void Mark_Threadprivate_Loops(WN* func_nd)
02220 {
02221 if (Run_autopar)
02222 Mark_Threadprivate_Loops_Traverse(func_nd);
02223 }
02224
02225
02226
02227
02228
02229
02230 extern void IPA_LNO_Evaluate_Call_Infos_Traverse(WN* wn_tree)
02231 {
02232 if (WN_operator(wn_tree) == OPR_CALL && Has_Call_Info(wn_tree))
02233 Get_Call_Info(wn_tree)->Evaluate();
02234
02235 if (WN_operator(wn_tree) == OPR_BLOCK) {
02236 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02237 IPA_LNO_Evaluate_Call_Infos_Traverse(wn);
02238 } else {
02239 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02240 IPA_LNO_Evaluate_Call_Infos_Traverse(WN_kid(wn_tree, i));
02241 }
02242 }
02243
02244
02245
02246
02247
02248
02249 extern void IPA_LNO_Evaluate_Call_Infos(WN* func_nd)
02250 {
02251 if (!LNO_IPA_Enabled)
02252 return;
02253 IPA_LNO_Evaluate_Call_Infos_Traverse(func_nd);
02254 }
02255
02256
02257
02258
02259
02260
02261 extern void IPA_LNO_Unevaluate_Call_Infos_Traverse(WN* wn_tree)
02262 {
02263 if (WN_operator(wn_tree) == OPR_CALL && Has_Call_Info(wn_tree))
02264 Get_Call_Info(wn_tree)->Unevaluate();
02265
02266 if (WN_operator(wn_tree) == OPR_BLOCK) {
02267 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02268 IPA_LNO_Unevaluate_Call_Infos_Traverse(wn);
02269 } else {
02270 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02271 IPA_LNO_Unevaluate_Call_Infos_Traverse(WN_kid(wn_tree, i));
02272 }
02273 }
02274
02275
02276
02277
02278
02279
02280 extern void IPA_LNO_Unevaluate_Call_Infos(WN* func_nd)
02281 {
02282 if (!LNO_IPA_Enabled)
02283 return;
02284 IPA_LNO_Unevaluate_Call_Infos_Traverse(func_nd);
02285 }
02286
02287
02288
02289
02290
02291
02292
02293 extern void Auto_Parallelization(PU_Info* current_pu,
02294 WN* func_nd)
02295 {
02296 #ifdef KEY
02297 static INT pu_num = -1;
02298 pu_num ++;
02299 if (pu_num > LNO_Apo_Skip_After ||
02300 pu_num < LNO_Apo_Skip_Before ||
02301 pu_num == LNO_Apo_Skip_Equal)
02302 return;
02303
02304 Last_Apo_Loop_Id = 0;
02305 #endif
02306
02307 extern BOOL running_cross_loop_analysis;
02308
02309 if (!(Run_autopar && LNO_Run_AP > 0)
02310 || Get_Trace(TP_LNOPT2, TT_LNO_NO_AUTO_PARALLEL)) {
02311 Annotate_For_Mp_Lowering(current_pu, func_nd);
02312 if (Run_prompf) {
02313 Print_Prompf_Transaction_Log(FALSE);
02314 Print_Prompf_Parallelization_Log(func_nd);
02315 Print_Prompf_Doacross_Log(current_pu, func_nd, FALSE);
02316 Print_Prompf_Parallel_Region_Log(current_pu, func_nd, FALSE);
02317 Print_Prompf_Nest_Log(func_nd, FALSE);
02318 }
02319 if (LNO_Prompl)
02320 Print_Prompl_Msgs(current_pu, func_nd);
02321 return;
02322 }
02323
02324
02325 #ifdef KEY //Bug 9731: update array access information before apo
02326
02327 LNO_Build_Access(func_nd, &LNO_default_pool);
02328 #endif
02329
02330 MEM_POOL_Push(&LNO_local_pool);
02331 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
02332 ? Parallel_Debug_Level : 0;
02333
02334
02335 if (parallel_debug_level >= 1) {
02336 fprintf(stdout, "### Auto-parallelization (Begin)\n");
02337 fprintf(TFile, "### Auto-parallelization (Begin)\n");
02338 }
02339
02340 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02341 DU_MANAGER* du = Du_Mgr;
02342
02343 if (LNO_Cross_Loop) {
02344 Cross_Loop_Cache_Analysis(current_pu, func_nd);
02345 }
02346
02347 FIZ_FUSE_INFO* ffi=
02348 CXX_NEW(FIZ_FUSE_INFO(&LNO_local_pool), &LNO_local_pool);
02349 ffi->Build(func_nd, TRUE);
02350
02351 for (INT i = 0; i < ffi->Num_Snl(); i++) {
02352 if (ffi->Get_Type(i) == Invalid || ffi->Get_Type(i) == Non_SNL)
02353 continue;
02354 WN* wn_outer_loop = ffi->Get_Wn(i);
02355 INT nloops = ffi->Get_Depth(i);
02356 SNL_Upper_Bound_Standardize(wn_outer_loop, nloops);
02357 Hoist_Bounds_One_Level(wn_outer_loop);
02358 SNL_Auto_Parallelization(wn_outer_loop, nloops, FALSE);
02359 }
02360
02361 Perform_ARA_and_Parallelization(current_pu, func_nd);
02362 Check_Suggested_Parallel(func_nd);
02363
02364 if (LNO_Verbose || parallel_debug_level >= 1) {
02365 fprintf(stdout, "### Auto-parallelization (End)\n");
02366 fprintf(TFile, "### Auto-parallelization (End)\n");
02367 }
02368 MEM_POOL_Pop(&LNO_local_pool);
02369 }
02370
02371
02372
02373
02374
02375
02376
02377
02378
02379
02380
02381
02382
02383
02384
02385
02386
02387
02388
02389
02390
02391
02392 static double Doacross_Cost(WN* wn_outer,
02393 INT permutation[],
02394 INT nloops,
02395 INT parallel_depth,
02396 SNL_DEP_MATRIX** sdm_inv,
02397 BOOL sdm_scl[],
02398 SX_INFO* sx_info,
02399 SD_INFO* sd_info,
02400 INT sd_split_depth,
02401 double machine_cycles,
02402 double *cache_cycles_per_iter,
02403 double work_estimate,
02404 INT* doacross_tile_size_p,
02405 INT sync_distances[],
02406 INT* doacross_overhead_p)
02407 {
02408 *cache_cycles_per_iter = 0.0;
02409
02410 INT outer_depth=Do_Loop_Depth(wn_outer);
02411
02412
02413 if (parallel_depth < outer_depth)
02414 return (double)DBL_MAX;
02415
02416 if (parallel_depth >= outer_depth+nloops-1)
02417 return DBL_MAX;
02418
02419
02420
02421 if (!Is_Perfectly_Nested(wn_outer, permutation, nloops, parallel_depth))
02422 return DBL_MAX;
02423
02424 MEM_POOL_Push(&LNO_local_pool);
02425
02426
02427
02428 DOLOOP_STACK* loop_stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02429 &LNO_local_pool);
02430 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
02431 Build_Doloop_Stack(wn_inner, loop_stack);
02432 INT outer=MIN(outer_depth+permutation[parallel_depth-outer_depth],
02433 outer_depth+permutation[parallel_depth-outer_depth+1]);
02434 INT inner=MAX(outer_depth+permutation[parallel_depth-outer_depth],
02435 outer_depth+permutation[parallel_depth-outer_depth+1]);
02436 if (!SNL_Is_Invariant(loop_stack, outer, inner)) {
02437 MEM_POOL_Pop(&LNO_local_pool);
02438 return DBL_MAX;
02439 }
02440
02441
02442
02443 SNL_DEP_MATRIX** sdm_red = Red_Dep_Info(wn_outer, permutation, nloops,
02444 parallel_depth, TRUE, FALSE);
02445 INT red_split_depth = Parallelizable_At_Depth(wn_outer, nloops,
02446 permutation, sdm_red, sdm_scl, sx_info, sd_info, sd_split_depth,
02447 parallel_depth);
02448
02449 MEM_POOL_Pop(&LNO_local_pool);
02450
02451
02452 if (red_split_depth == Do_Loop_Depth(wn_outer) + nloops)
02453 return DBL_MAX;
02454
02455 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
02456 ? Parallel_Debug_Level : 0;
02457
02458 if (parallel_debug_level >= 2)
02459 Print_Permutation_Vector(stdout,permutation,nloops,parallel_depth,TRUE);
02460
02461
02462
02463 BOOL *retained=CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
02464 for (INT i=0; i<nloops; i++)
02465 retained[i]=SNL_Perm_Retained_Section(i,permutation,nloops);
02466 Compute_Sync_Distances(wn_outer,nloops,permutation,parallel_depth,
02467 sdm_inv,retained,sync_distances);
02468
02469
02470
02471 INT doacross_tile_size=
02472 Get_Doacross_Tile_Size(sync_distances,wn_outer, permutation, nloops,
02473 parallel_depth,NOMINAL_PROCS,work_estimate);
02474 (*doacross_tile_size_p) = doacross_tile_size;
02475
02476
02477
02478 double doall_cycle;
02479 {
02480 MEM_POOL_Push(&LNO_local_pool);
02481 PAR_STAT::id_count = 0;
02482 PAR_STAT* ps = CXX_NEW(PAR_STAT(wn_outer, nloops, &LNO_local_pool),
02483 &LNO_local_pool);
02484 ps = ps->Parallel_Interchange(wn_outer, permutation, nloops,
02485 parallel_depth, sd_split_depth, red_split_depth);
02486 doall_cycle = ps->Cycle_Count(wn_outer, permutation, nloops,
02487 parallel_depth, &sx_info->Plist, red_split_depth, machine_cycles,
02488 cache_cycles_per_iter, TRUE);
02489 MEM_POOL_Pop(&LNO_local_pool);
02490 }
02491
02492
02493 double doacross_delay_cycle=
02494 Compute_Doacross_Delay_Cycle(wn_outer, permutation, parallel_depth,
02495 NOMINAL_PROCS, doacross_tile_size,
02496 sync_distances, machine_cycles);
02497
02498 double doacross_sync_cycle=
02499 Compute_Doacross_Sync_Cycle(wn_outer, permutation, parallel_depth,
02500 doacross_tile_size, sync_distances);
02501
02502
02503
02504 double cost;
02505 if (doacross_delay_cycle == DBL_MAX)
02506 cost = DBL_MAX;
02507 else
02508 cost = doall_cycle + doacross_delay_cycle + doacross_sync_cycle;
02509
02510 (*doacross_overhead_p) = int(doacross_delay_cycle + doacross_sync_cycle);
02511 if (parallel_debug_level >= 2) {
02512 printf(" sync vectors = ");
02513 if (sync_distances[0]!= NULL_DIST)
02514 printf("(%d -1) ",sync_distances[0]);
02515 if (sync_distances[1]!= NULL_DIST)
02516 printf("(%d 1)",sync_distances[1]);
02517 printf("\n");
02518 if (doacross_delay_cycle == DBL_MAX) {
02519 printf(" delay cycles = inf\n");
02520 printf(" sync cycles = inf\n");
02521 printf(" *doacross cycles = inf\n");
02522 } else {
02523 printf(" delay cycles = %13.2f\n", doacross_delay_cycle);
02524 printf(" sync cycles = %13.2f\n", doacross_sync_cycle);
02525 printf(" *doacross cycles = %13.2f\n", cost);
02526 }
02527 }
02528
02529 return cost;
02530 }
02531
02532
02533
02534
02535
02536
02537
02538
02539
02540
02541
02542 extern BOOL Is_Privatizable_With_Context(
02543 WN* loop,
02544 WN* wn,
02545 BOOL definitely)
02546 {
02547
02548 ARA_LOOP_INFO* ara_loop_info=Get_Do_Loop_Info(loop)->ARA_Info;
02549 if (!ara_loop_info)
02550 return FALSE;
02551 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02552 if (ara_loop_info->Is_Privatizable(wn,definitely)) {
02553 VINDEX16 v=dg->Get_Vertex(wn);
02554 if (v==0)
02555 return FALSE;
02556 EINDEX16 e;
02557 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
02558 WN* wn_sink = dg->Get_Wn(dg->Get_Sink(e));
02559 if (!Wn_Is_Inside(wn_sink,loop))
02560 continue;
02561 if (ara_loop_info->Is_Privatizable(wn_sink,definitely))
02562 continue;
02563 else {
02564 return FALSE;
02565 }
02566 }
02567 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
02568 WN* wn_source = dg->Get_Wn(dg->Get_Source(e));
02569 if (!Wn_Is_Inside(wn_source,loop))
02570 continue;
02571 if (ara_loop_info->Is_Privatizable(wn_source,definitely))
02572 continue;
02573 else {
02574 return FALSE;
02575 }
02576 }
02577 return TRUE;
02578 } else
02579 return FALSE;
02580 }
02581
02582
02583 #include "cross_parallel.cxx"