00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #define __STDC_LIMIT_MACROS
00043 #include <stdint.h>
00044 #ifdef USE_PCH
00045 #include "lno_pch.h"
00046 #endif // USE_PCH
00047 #pragma hdrstop
00048
00049 #include <sys/types.h>
00050 #include <alloca.h>
00051 #include <ctype.h>
00052 #include <limits.h>
00053
00054 #include "pu_info.h"
00055 #include "snl.h"
00056 #include "lnopt_main.h"
00057 #include "config_targ.h"
00058 #include "lwn_util.h"
00059 #include "lnoutils.h"
00060 #include "cxx_graph.h"
00061 #include "opt_du.h"
00062 #include "opt_alias_interface.h"
00063 #include "wintrinsic.h"
00064 #include "scalar_expand.h"
00065
00066 #include "strtab.h"
00067 #include "dvector.h"
00068 #include "small_trips.h"
00069
00070 #include "lego_gen.h"
00071 #include "lego_opts.h"
00072 #include "lego_util.h"
00073 #include "lego_pragma.h"
00074
00075 #include "ir_reader.h"
00076 #include "permute.h"
00077 #include "snl_utils.h"
00078 #include "move.h"
00079 #include "debug.h"
00080 #include "tlog.h"
00081 #include "wn_pragmas.h"
00082 #include "wintrinsic.h"
00083 #include "soe.h"
00084 #include "cond.h"
00085 #include "parmodel.h"
00086 #include "prompf.h"
00087 #include "anl_driver.h"
00088 #include "doacross.h"
00089 #include "ff_utils.h"
00090 #include "parallel.h"
00091 #include "split_tiles.h"
00092 #include "wn_mp.h"
00093 #include "fb_whirl.h"
00094
00095 #pragma weak New_Construct_Id
00096
00097 #define LEGO_BOGUS_VALUE 100
00098 #define LEGO_NAME_LENGTH 256
00099 #define TLOG_STRING_LENGTH 1000
00100
00101 enum LMT_VALUE {LMT_LEGO, LMT_MP, LMT_LEGO_MP};
00102
00103 static void Lego_Tile_Traverse(WN* wn_tree, BOOL LNO_Ozero);
00104 static void Mp_Tile_Traverse(WN* wn_tree);
00105 static void Lego_Mp_Tile_Traverse(WN* wn_tree, BOOL LNO_Ozero);
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 static void Lego_Build_DU_For_Ldids(WN* wn_ldid,
00116 SYMBOL sym_ldid,
00117 WN* wn_def,
00118 WN* loop,
00119 BOOL do_alias)
00120 {
00121 DU_MANAGER* du = Du_Mgr;
00122 if (WN_operator(wn_ldid) == OPR_LDID
00123 && SYMBOL(wn_ldid) == sym_ldid) {
00124 du->Add_Def_Use(wn_def, wn_ldid);
00125 DEF_LIST *def_list = du->Ud_Get_Def(wn_ldid);
00126 def_list->Set_loop_stmt(loop);
00127 if (do_alias)
00128 Duplicate_alias_info(Alias_Mgr, wn_def, wn_ldid);
00129 }
00130
00131 if (WN_opcode(wn_ldid) == OPC_BLOCK) {
00132 for (WN* wn = WN_first(wn_ldid); wn != NULL; wn = WN_next(wn))
00133 Lego_Build_DU_For_Ldids(wn, sym_ldid, wn_def, loop, do_alias);
00134 } else {
00135 for (INT i = 0; i < WN_kid_count(wn_ldid); i++)
00136 Lego_Build_DU_For_Ldids(WN_kid(wn_ldid, i), sym_ldid, wn_def, loop,
00137 do_alias);
00138 }
00139 }
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149 static void Processor_Update_Inner_Tile(WN* wn_bound,
00150 WN* loop,
00151 WN* outer_loop,
00152 SYMBOL* new_lb,
00153 SYMBOL* new_ub,
00154 SYMBOL* new_step,
00155 SYMBOL* pid)
00156 {
00157 DU_MANAGER* du = Du_Mgr;
00158
00159
00160 DOLOOP_STACK lb_stack(&LNO_local_pool);
00161 Lego_Find_Nodes(OPR_STID, new_lb, wn_bound, &lb_stack);
00162 DOLOOP_STACK ub_stack(&LNO_local_pool);
00163 Lego_Find_Nodes(OPR_STID, new_ub, wn_bound, &ub_stack);
00164 DOLOOP_STACK step_stack(&LNO_local_pool);
00165 if (new_step != NULL)
00166 Lego_Find_Nodes(OPR_STID, new_step, wn_bound, &step_stack);
00167 FmtAssert(lb_stack.Elements() > 0 && ub_stack.Elements() > 0,
00168 ("Could not find STIDs to lower and/or upper bounds."));
00169
00170
00171
00172 WN* wnn = NULL;
00173 for (WN* wn = WN_first(wn_bound); wn != NULL; wn = wnn) {
00174 wnn = WN_next(wn);
00175 LWN_Extract_From_Block(wn);
00176 LWN_Insert_Block_Before(LWN_Get_Parent(loop), loop, wn);
00177 Lego_Build_DU_For_Ldids(wn, *pid, WN_start(outer_loop), outer_loop,
00178 TRUE);
00179 Lego_Build_DU_For_Ldids(wn, *pid, WN_step(outer_loop), outer_loop,
00180 FALSE);
00181 }
00182 LWN_Delete_Tree(wn_bound);
00183
00184
00185
00186 WN* wn_stid_lb = lb_stack.Bottom_nth(0);
00187 SYMBOL sym_stid_lb(wn_stid_lb);
00188 WN* wn_stid_ub = ub_stack.Bottom_nth(0);
00189 SYMBOL sym_stid_ub(wn_stid_ub);
00190 TYPE_ID wtype_lb = sym_stid_lb.Type;
00191 TYPE_ID wtype_ub = sym_stid_ub.Type;
00192 OPCODE opld_lb = OPCODE_make_op(OPR_LDID, wtype_lb, wtype_lb);
00193 OPCODE opld_ub = OPCODE_make_op(OPR_LDID, wtype_ub, wtype_ub);
00194 WN* lb_ldid = LWN_CreateLdid(opld_lb, wn_stid_lb);
00195 INT i;
00196 for (i = 0; i < lb_stack.Elements(); i++)
00197 du->Add_Def_Use(lb_stack.Bottom_nth(i), lb_ldid);
00198 Replace_Wnexp_With_Exp_Copy(WN_kid0(WN_start(loop)), lb_ldid, du);
00199 LWN_Delete_Tree(lb_ldid);
00200 WN* ub_ldid = LWN_CreateLdid(opld_ub, wn_stid_ub);
00201 for (i = 0; i < ub_stack.Elements(); i++)
00202 du->Add_Def_Use(ub_stack.Bottom_nth(i), ub_ldid);
00203 Replace_Wnexp_With_Exp_Copy(UBexp(WN_end(loop)), ub_ldid, du);
00204 LWN_Delete_Tree(ub_ldid);
00205 if (new_step != NULL) {
00206 WN* wn_stid_st = step_stack.Bottom_nth(0);
00207 SYMBOL index = SYMBOL(WN_start(loop));
00208 WN* step_add = WN_kid0(WN_step(loop));
00209 WN* step_exp = WN_operator(WN_kid0(step_add)) == OPR_LDID
00210 && SYMBOL(WN_kid0(step_add)) == index
00211 ? WN_kid1(step_add) : WN_kid0(step_add);
00212 if (WN_operator(WN_kid0(wn_stid_st)) == OPR_INTCONST) {
00213 Replace_Wnexp_With_Exp_Copy(step_exp, WN_kid0(wn_stid_st), du);
00214 LWN_Delete_Tree(wn_stid_st);
00215 } else {
00216 SYMBOL sym_stid_st(wn_stid_st);
00217 TYPE_ID wtype_st = sym_stid_st.Type;
00218 OPCODE opld_st = OPCODE_make_op(OPR_LDID, wtype_st, wtype_st);
00219 WN* step_ldid = LWN_CreateLdid(opld_st, wn_stid_st);
00220 for (i = 0; i < step_stack.Elements(); i++)
00221 du->Add_Def_Use(step_stack.Bottom_nth(i), step_ldid);
00222 Replace_Wnexp_With_Exp_Copy(step_exp, step_ldid, du);
00223 LWN_Delete_Tree(step_ldid);
00224 }
00225 }
00226 }
00227
00228
00229
00230
00231
00232
00233
00234 static BOOL Is_Versioned_Mp_Region(WN* wn_region)
00235 {
00236 if (LWN_Get_Parent(wn_region) != NULL
00237 && LWN_Get_Parent(LWN_Get_Parent(wn_region)) != NULL
00238 && WN_opcode(LWN_Get_Parent(LWN_Get_Parent(wn_region))) == OPC_IF
00239 && WN_Is_If_MpVersion(LWN_Get_Parent(LWN_Get_Parent(wn_region))))
00240 return TRUE;
00241 return FALSE;
00242 }
00243
00244
00245
00246
00247
00248
00249
00250 extern BOOL Is_Versioned_Mp_Loop(WN* wn_loop)
00251 {
00252 for (WN* wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn))
00253 if (WN_operator(wn) == OPR_REGION
00254 && Is_Versioned_Mp_Region(wn))
00255 return TRUE;
00256 return FALSE;
00257 }
00258
00259
00260
00261
00262
00263
00264
00265
00266 static void Processor_Update_Outer_Tile(WN* outer_loop,
00267 WN* loop,
00268 SYMBOL* pid,
00269 BOOL lego_tile,
00270 BOOL negative_stride)
00271 {
00272
00273 DU_MANAGER* du = Du_Mgr;
00274 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
00275 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00276 if (dli->Mp_Info != NULL) {
00277 dli_outer->Mp_Info = dli->Mp_Info;
00278 dli->Mp_Info = NULL;
00279 }
00280
00281
00282
00283 TYPE_ID wtype = Do_Wtype(loop);
00284 WN* wn_zero = LWN_Make_Icon(Promote_Type(wtype), 0);
00285 if (negative_stride)
00286 Replace_Wnexp_With_Exp_Copy(UBexp(WN_end(outer_loop)), wn_zero, du);
00287 else
00288 Replace_Wnexp_With_Exp_Copy(WN_kid0(WN_start(outer_loop)), wn_zero, du);
00289 LWN_Delete_Tree(wn_zero);
00290 SYMBOL outer_index = SYMBOL(WN_start(outer_loop));
00291 WN* outer_step_add = WN_kid0(WN_step(outer_loop));
00292 WN* outer_step_exp = WN_operator(WN_kid0(outer_step_add))
00293 == OPR_LDID && SYMBOL(WN_kid0(outer_step_add)) == outer_index
00294 ? WN_kid1(outer_step_add) : WN_kid0(outer_step_add);
00295 WN* wn_stride = LWN_Make_Icon(Promote_Type(wtype), negative_stride ? -1 : 1);
00296 Replace_Wnexp_With_Exp_Copy(outer_step_exp, wn_stride, du);
00297 LWN_Delete_Tree(wn_stride);
00298 DOLOOP_STACK pid_stack(&LNO_local_pool);
00299 Lego_Find_Nodes(OPR_LDID, pid, WN_do_body(outer_loop), &pid_stack);
00300 INT i;
00301 for (i = 0; i < pid_stack.Elements(); i++) {
00302 WN* wn_use = pid_stack.Bottom_nth(i);
00303 du->Add_Def_Use(WN_start(outer_loop), wn_use);
00304 du->Add_Def_Use(WN_step(outer_loop), wn_use);
00305 }
00306 for (i = 0; i < pid_stack.Elements(); i++) {
00307 WN* wn_use = pid_stack.Bottom_nth(i);
00308 DEF_LIST *def_list = du->Ud_Get_Def(wn_use);
00309 def_list->Set_loop_stmt(outer_loop);
00310 }
00311 WN* num_threads = NULL;
00312 if (lego_tile)
00313 if (dli->Lego_Info->Dynamic_Affinity()) {
00314 WN* wn_numthreads_code = NULL;
00315 WN* wn = 0;
00316 for (wn = outer_loop; wn != NULL; wn = LWN_Get_Parent(wn)) {
00317 if (WN_opcode(wn) == OPC_DO_LOOP) {
00318 DO_LOOP_INFO* dli_wn = Get_Do_Loop_Info(wn);
00319 if (dli_wn->Mp_Info != NULL && dli_wn->Mp_Info->Nest_Index() == 0)
00320 break;
00321 }
00322 }
00323 FmtAssert(wn != NULL, ("Could not find outermost doacross loop"));
00324 WN* wn_outer_doacross = wn;
00325 num_threads = Numprocs(dli->Lego_Info->Array()->St(),
00326 dli->Lego_Info->Dim_Num(),
00327 Do_Depth(wn_outer_doacross) > 0,
00328 &wn_numthreads_code);
00329 WN* wnn = NULL;
00330 for (wn = WN_first(wn_numthreads_code); wn != NULL; wn = wnn) {
00331 wnn = WN_next(wn);
00332 LWN_Extract_From_Block(wn);
00333 LWN_Insert_Block_Before(LWN_Get_Parent(wn_outer_doacross),
00334 wn_outer_doacross, wn);
00335 LWN_Copy_Linenumber(wn_outer_doacross,wn);
00336
00337
00338
00339
00340 if (WN_opcode(wn) == OPC_IF) {
00341 IF_INFO *ii=CXX_NEW (IF_INFO(&LNO_default_pool,
00342 Find_SCF_Inside(wn, OPC_DO_LOOP) != NULL,
00343 Find_SCF_Inside(wn, OPC_REGION) != NULL), &LNO_default_pool);
00344 WN_MAP_Set(LNO_Info_Map,wn,(void *)ii);
00345 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00346 &LNO_local_pool);
00347 Build_Doloop_Stack(wn, stack);
00348 LNO_Build_If_Access(wn, stack);
00349 CXX_DELETE(stack, &LNO_local_pool);
00350 }
00351 }
00352 LWN_Delete_Tree(wn_numthreads_code);
00353 } else {
00354 num_threads = Get_Numthreads_Ldid(dli->Lego_Info);
00355 }
00356 else if (dli_outer->Mp_Info->Is_Pdo()) {
00357 if (Is_Versioned_Mp_Loop(outer_loop))
00358 num_threads = Get_Runtime_Cur_Numthreads_Ldid();
00359 else
00360 num_threads = Get_Runtime_Cur_Numthreads_Func(outer_loop);
00361 } else
00362 num_threads = Get_Frozen_Numthreads_Ldid(outer_loop);
00363 TYPE_ID desc = WN_rtype(num_threads);
00364 OPCODE subop = OPCODE_make_op(OPR_SUB, desc, MTYPE_V);
00365 WN* wn_one = LWN_Make_Icon(Promote_Type(wtype), 1);
00366 WN* wn_ub = LWN_CreateExp2(subop, num_threads, wn_one);
00367 WN* wn1;
00368 WN* wn_parent;
00369 if (negative_stride)
00370 wn1 = WN_kid0(WN_start(outer_loop));
00371 else
00372 wn1 = UBexp(WN_end(outer_loop));
00373 wn_parent = LWN_Get_Parent(wn1);
00374 if (WN_desc(wn_parent) != WN_rtype(wn_ub))
00375 wn_ub = LWN_Integer_Cast(wn_ub, WN_desc(wn_parent), WN_rtype(wn_ub));
00376 for (i=0; i<WN_kid_count(wn_parent); i++)
00377 if (WN_kid(wn_parent,i)==wn1) {
00378 WN_kid(wn_parent, i) = wn_ub;
00379 LWN_Set_Parent(wn_ub, wn_parent);
00380 LWN_Delete_Tree(wn1);
00381 break;
00382 }
00383 if (negative_stride) {
00384 OPCODE op = WN_opcode(WN_end(outer_loop));
00385 OPCODE op_inv = OPCODE_make_op(OPR_GE, OPCODE_rtype(op), OPCODE_desc(op));
00386 WN_set_opcode(WN_end(outer_loop), op_inv);
00387 }
00388 }
00389
00390
00391
00392
00393
00394
00395
00396 static BOOL Has_Calls(WN* wn_tree)
00397 {
00398 LWN_ITER* itr = LWN_WALK_TreeIter(wn_tree);
00399 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
00400 WN* wn = itr->wn;
00401 if (OPCODE_is_call(WN_opcode(wn)))
00402 return TRUE;
00403 }
00404 return FALSE;
00405 }
00406
00407
00408
00409
00410
00411
00412
00413 static BOOL Has_Live_Out_Index_Variable(WN* wn_loop)
00414 {
00415 USE_LIST *use_list = Du_Mgr->Du_Get_Use(WN_start(wn_loop));
00416 if (use_list == NULL || use_list->Incomplete())
00417 return TRUE;
00418 USE_LIST_ITER iter(use_list);
00419 const DU_NODE* node = NULL;
00420 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00421 WN* wn_use = node->Wn();
00422 if (!Wn_Is_Inside(wn_use, wn_loop))
00423 return TRUE;
00424 }
00425 return FALSE;
00426 }
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437 static BOOL Index_Variable_Is_Last_Local(WN* wn_loop)
00438 {
00439 if (!Do_Loop_Is_Mp(wn_loop))
00440 return FALSE;
00441 WN* wn_start = WN_start(wn_loop);
00442 if (!Has_Live_Out_Index_Variable(wn_loop))
00443 return FALSE;
00444 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
00445 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
00446 SYMBOL sym_index(WN_index(wn_loop));
00447 for (WN* wn = wn_first; wn != NULL; wn = WN_next(wn)) {
00448 if ((WN_opcode(wn) == OPC_PRAGMA || WN_opcode(wn) == OPC_XPRAGMA)
00449 && WN_pragma(wn) == WN_PRAGMA_LASTLOCAL
00450 && WN_st(wn) == sym_index.St()
00451 && WN_pragma_arg1(wn) == sym_index.WN_Offset())
00452 return TRUE;
00453 }
00454 return FALSE;
00455 }
00456
00457
00458
00459
00460
00461
00462
00463 static WN* Initialize_Doacross_Last_Local_Index(WN* wn_loop)
00464 {
00465 WN* wn_start = WN_start(wn_loop);
00466 WN* wn_copy = LWN_Copy_Tree(wn_start, TRUE, LNO_Info_Map);
00467 LWN_Copy_Def_Use(WN_kid0(wn_start), WN_kid0(wn_copy), Du_Mgr);
00468 Copy_alias_info(Alias_Mgr, wn_start, wn_copy);
00469 USE_LIST *ul_start = Du_Mgr->Du_Get_Use(wn_start);
00470 if (ul_start != NULL) {
00471 if (ul_start->Incomplete())
00472 Du_Mgr->Du_Set_Incomplete(wn_copy);
00473 USE_LIST_ITER iter(ul_start);
00474 const DU_NODE* node = NULL;
00475 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00476 WN* wn_use = node->Wn();
00477 if (!Wn_Is_Inside(wn_use, wn_loop))
00478 Du_Mgr->Add_Def_Use(wn_copy, wn_use);
00479 }
00480 }
00481 LWN_Insert_Block_Before(LWN_Get_Parent(wn_loop), wn_loop, wn_copy);
00482 return wn_copy;
00483 }
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497 static WN* Initialize_Pdo_Last_Local_Index(WN* wn_loop)
00498 {
00499
00500 WN* wn_trip_count = Trip_Count(wn_loop);
00501 TYPE_ID index_type = Promote_Type(Do_Wtype((WN *) wn_loop));
00502 WN* wn_zero = LWN_Make_Icon(index_type, 0);
00503 OPCODE op_lt = OPCODE_make_op(OPR_LT, Boolean_type, index_type);
00504 WN* wn_trip_test = LWN_CreateExp2(op_lt, wn_trip_count, wn_zero);
00505 WN* wn_if = LWN_CreateIf(wn_trip_test, WN_CreateBlock(), WN_CreateBlock());
00506 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
00507 LWN_Insert_Block_Before(LWN_Get_Parent(wn_region), wn_region, wn_if);
00508 WN_Set_Linenum(wn_if, WN_Get_Linenum(wn_loop));
00509 IF_INFO* ii_trip =
00510 CXX_NEW(IF_INFO(&LNO_default_pool, FALSE, FALSE), &LNO_default_pool);
00511 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii_trip);
00512 DOLOOP_STACK stk(&LNO_local_pool);
00513 Build_Doloop_Stack(wn_if, &stk);
00514 LNO_Build_If_Access(wn_if, &stk);
00515
00516
00517 PREG_NUM rreg1, rreg2;
00518 OPCODE op_call = OPCODE_make_op(OPR_CALL, MTYPE_I4, MTYPE_V);
00519 WN* wn_call = WN_Create(op_call, 0);
00520 WN_Set_Linenum(wn_call, WN_Get_Linenum(wn_loop));
00521 WN_st_idx(wn_call) = ST_st_idx(distr_st_entries[mp_my_threadnum]);
00522 Set_Runtime_Call_Side_Effects(wn_call);
00523 LWN_Insert_Block_Before(WN_then(wn_if), NULL, wn_call);
00524 ST* rst = Find_Return_Registers (Pointer_type, &rreg1, &rreg2);
00525 FmtAssert(rreg1 != 0 && rreg2 == 0,
00526 ("Initialize_Pdo_Last_Local_Index: Bad MTYPE_I4 return regs"));
00527 SYMBOL preg = Create_Preg_Symbol ("$myid", MTYPE_I4);
00528 WN* wn_myid = WN_CreateLdid (OPCODE_make_op(OPR_LDID, MTYPE_I4, MTYPE_I4),
00529 rreg1, rst, Be_Type_Tbl(MTYPE_I4));
00530 Create_alias(Alias_Mgr, wn_myid);
00531 Du_Mgr->Add_Def_Use(wn_call, wn_myid);
00532 OPCODE op_eq = OPCODE_make_op(OPR_EQ, Boolean_type, MTYPE_I4);
00533 WN* wn_zero_copy = LWN_Make_Icon(MTYPE_I4, 0);
00534 WN* wn_thread_test = LWN_CreateExp2(op_eq, wn_myid, wn_zero_copy);
00535 WN* wn_thread_if = LWN_CreateIf(wn_thread_test, WN_CreateBlock(),
00536 WN_CreateBlock());
00537 LWN_Insert_Block_After(LWN_Get_Parent(wn_call), wn_call, wn_thread_if);
00538 WN_Set_Linenum(wn_thread_if, WN_Get_Linenum(wn_loop));
00539 IF_INFO* ii_thread =
00540 CXX_NEW(IF_INFO(&LNO_default_pool, FALSE, FALSE), &LNO_default_pool);
00541 WN_MAP_Set(LNO_Info_Map, wn_thread_if, (void *) ii_thread);
00542 DOLOOP_STACK stk_thread(&LNO_local_pool);
00543 Build_Doloop_Stack(wn_thread_if, &stk_thread);
00544 LNO_Build_If_Access(wn_thread_if, &stk_thread);
00545
00546
00547 WN* wn_init = Initialize_Doacross_Last_Local_Index(wn_loop);
00548 LWN_Extract_From_Block(wn_init);
00549 LWN_Insert_Block_Before(WN_then(wn_thread_if), NULL, wn_init);
00550 return wn_if;
00551 }
00552
00553
00554
00555
00556
00557
00558 static void Initialize_Last_Local_Index(WN* wn_loop)
00559 {
00560 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00561 FmtAssert(dli_loop->Mp_Info != NULL,
00562 ("Initialize_Last_Local_Index: Expecting an MP loop"));
00563 if (dli_loop->Mp_Info->Is_Pdo())
00564 Initialize_Pdo_Last_Local_Index(wn_loop);
00565 else
00566 Initialize_Doacross_Last_Local_Index(wn_loop);
00567 }
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584 static WN* Processor_2D_Tile_Loop(WN* loop,
00585 MEM_POOL *pool,
00586 BOOL lego_tile)
00587 {
00588
00589 WN* wn_root = NULL;
00590 DU_MANAGER* du = Du_Mgr;
00591 for (WN* wn = loop; wn != NULL; wn_root = wn, wn = LWN_Get_Parent(wn));
00592 FmtAssert(wn_root != NULL, ("Could not find program tree root."));
00593
00594 if (Index_Variable_Is_Last_Local(loop))
00595 Initialize_Last_Local_Index(loop);
00596
00597
00598 INT tiling_key = Get_New_Lego_Mp_Tile_Key();
00599
00600
00601 SYMBOL* new_lb = NULL;
00602 SYMBOL* new_ub = NULL;
00603 SYMBOL* new_step = NULL;
00604 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
00605 SYMBOL oldsym(WN_index(loop));
00606 INT required_length = strlen(oldsym.Name()) + 10;
00607 char* Str_Buf = CXX_NEW_ARRAY(char, required_length, &LNO_local_pool);
00608 if (lego_tile)
00609 sprintf(Str_Buf, "$dsmtile0%s", oldsym.Name());
00610 else
00611 sprintf(Str_Buf, "$datile0%s", oldsym.Name());
00612 SYMBOL* pid = CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, Do_Wtype(loop))),
00613 &LNO_default_pool);
00614 if (lego_tile)
00615 dli->Lego_Info->Set_Pid0(pid);
00616 else
00617 dli->Mp_Info->Set_Pid0(pid);
00618 DOLOOP_STACK stack(&LNO_local_pool);
00619 Build_Doloop_Stack(loop, &stack);
00620 if (Bound_Is_Too_Messy(dli->LB))
00621 Hoist_Lower_Bound(loop, &stack, &LNO_default_pool);
00622 if (Bound_Is_Too_Messy(dli->UB))
00623 Hoist_Upper_Bound(loop, &stack, &LNO_default_pool);
00624 WN* wn_bound = Generate_Bounds(loop, &new_lb, &new_ub, &new_step);
00625 FmtAssert(new_lb != NULL && new_ub != NULL,
00626 ("Did not generate new lower and/or upper bound"));
00627
00628
00629 WN* outer_loop = NULL;
00630 SNL_INV_CACHE_BLOCK_REASON reason = lego_tile ? SNL_INV_LEGO_TILE :
00631 SNL_INV_MP_TILE;
00632 INT64 old_est_iters = dli->Est_Num_Iterations;
00633 outer_loop = Tile_Loop(loop, LEGO_BOGUS_VALUE, 0, reason, pid, pool);
00634 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00635 dli_outer->Est_Num_Iterations = NOMINAL_PROCS;
00636 dli->Est_Num_Iterations = old_est_iters;
00637 dli->Tile_Size = 0;
00638 if (Has_Calls(wn_bound)) {
00639 DO_LOOP_INFO* dli = Get_Do_Loop_Info(outer_loop);
00640 dli->Has_Calls = TRUE;
00641 }
00642
00643
00644
00645 Processor_Update_Inner_Tile(wn_bound, loop, outer_loop, new_lb, new_ub,
00646 new_step, pid);
00647
00648
00649
00650 Processor_Update_Outer_Tile(outer_loop, loop, pid, lego_tile,
00651 dli->Lego_Info != NULL && dli->Lego_Info->Stride() < 0);
00652
00653
00654 dli_outer->Is_Processor_Tile = TRUE;
00655 if (lego_tile) {
00656 dli_outer->Is_Outer_Lego_Tile = TRUE;
00657 dli->Is_Inner_Lego_Tile = TRUE;
00658 }
00659 dli_outer->Lego_Mp_Key_Lower = tiling_key;
00660 dli_outer->Lego_Mp_Key_Upper = tiling_key;
00661 dli_outer->Lego_Mp_Key_Depth = 0;
00662 dli_outer->Suggested_Parallel = dli->Suggested_Parallel;
00663 dli->Lego_Mp_Key_Lower = tiling_key;
00664 dli->Lego_Mp_Key_Upper = tiling_key;
00665 dli->Lego_Mp_Key_Depth = 1;
00666 dli->Suggested_Parallel = FALSE;
00667
00668
00669 dli_outer->No_Fission = TRUE;
00670 dli_outer->No_Fusion = TRUE;
00671 dli_outer->Cannot_Interchange = TRUE;
00672 dli_outer->Cannot_Block = TRUE;
00673 dli_outer->Required_Unroll = 1;
00674 dli_outer->Pragma_Cannot_Concurrentize = dli->Pragma_Cannot_Concurrentize;
00675 dli_outer->Inside_Critical_Section = dli->Inside_Critical_Section;
00676 dli_outer->Has_Threadprivate = dli->Has_Threadprivate;
00677 dli_outer->Serial_Version_of_Concurrent_Loop
00678 = dli->Serial_Version_of_Concurrent_Loop;
00679
00680
00681 DOLOOP_STACK dostack(&LNO_local_pool);
00682 Build_Doloop_Stack(LWN_Get_Parent(outer_loop), &dostack);
00683 LNO_Build_Access(outer_loop, &dostack, &LNO_default_pool);
00684 BOOL negative_stride = dli->Lego_Info && dli->Lego_Info->Stride() < 0;
00685 Hoist_Iload_Ldid_Upper_Bound_One_Level(outer_loop,negative_stride);
00686
00687 if (Cur_PU_Feedback) {
00688 LNO_FB_MP_Tile(outer_loop, NOMINAL_PROCS, loop);
00689 }
00690
00691
00692 if (LNO_Verbose) {
00693 fprintf(stdout, "2D Tile (%s) -> (%s,%s)\n", WB_Whirl_Symbol(loop),
00694 WB_Whirl_Symbol(outer_loop), WB_Whirl_Symbol(loop));
00695 fprintf(TFile, "2D Tile (%s) -> (%s,%s)\n", WB_Whirl_Symbol(loop),
00696 WB_Whirl_Symbol(outer_loop), WB_Whirl_Symbol(loop));
00697 }
00698 if (LNO_Tlog) {
00699 INT required_length = strlen(WB_Whirl_Symbol(loop)) + 3;
00700 char* tlog_instring = CXX_NEW_ARRAY(char, required_length,
00701 &LNO_local_pool);
00702 required_length = strlen(WB_Whirl_Symbol(outer_loop))
00703 + strlen(WB_Whirl_Symbol(loop)) + 4;
00704 char* tlog_outstring = CXX_NEW_ARRAY(char, required_length,
00705 &LNO_local_pool);
00706 sprintf(tlog_instring, "(%s)", (char *) WB_Whirl_Symbol(loop));
00707 sprintf(tlog_outstring, "(%s,%s)", (char *) WB_Whirl_Symbol(outer_loop),
00708 (char *) WB_Whirl_Symbol(loop));
00709 Generate_Tlog("LNO", "lego_mp_tile", Srcpos_To_Line(WN_linenum(loop)),
00710 (char *) WB_Whirl_Symbol(loop), tlog_instring, tlog_outstring, "");
00711 }
00712
00713 return outer_loop;
00714 }
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735 static WN* Processor_3D_Tile_Loop(WN* loop,
00736 MEM_POOL *pool,
00737 BOOL lego_tile)
00738 {
00739
00740 WN* wn_root = NULL;
00741 for (WN* wn = loop; wn != NULL; wn_root = wn, wn = LWN_Get_Parent(wn));
00742 FmtAssert(wn_root != NULL, ("Could not find program tree root."));
00743
00744
00745 INT tiling_key = Get_New_Lego_Mp_Tile_Key();
00746
00747
00748 SYMBOL* new_out_lb = NULL;
00749 SYMBOL* new_out_ub = NULL;
00750 SYMBOL* new_out_step = NULL;
00751 SYMBOL* new_in_lb = NULL;
00752 SYMBOL* new_in_ub = NULL;
00753 SYMBOL* new_in_step = NULL;
00754 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
00755 SYMBOL oldsym(WN_index(loop));
00756 SYMBOL* pid_in = NULL;
00757 SYMBOL* pid_out = NULL;
00758 INT required_length = strlen(oldsym.Name()) + strlen("$dsmtile0") + 1;
00759 char* Str_Buf = CXX_NEW_ARRAY(char, required_length, &LNO_local_pool);
00760 if (lego_tile) {
00761 sprintf(Str_Buf, "$dsmtile0%s", oldsym.Name());
00762 pid_out = CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, Do_Wtype(loop))),
00763 &LNO_default_pool);
00764 dli->Lego_Info->Set_Pid0(pid_out);
00765 sprintf(Str_Buf, "$dsmtile1%s", oldsym.Name());
00766 pid_in = CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, Do_Wtype(loop))),
00767 &LNO_default_pool);
00768 dli->Lego_Info->Set_Pid1(pid_in);
00769 } else {
00770 sprintf(Str_Buf, "$da_tile0%s", oldsym.Name());
00771 pid_out = CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, Do_Wtype(loop))),
00772 &LNO_default_pool);
00773 dli->Mp_Info->Set_Pid0(pid_out);
00774 sprintf(Str_Buf, "$da_tile1%s", oldsym.Name());
00775 pid_in = CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, Do_Wtype(loop))),
00776 &LNO_default_pool);
00777 dli->Mp_Info->Set_Pid1(pid_in);
00778 }
00779 DOLOOP_STACK stack(&LNO_local_pool);
00780 Build_Doloop_Stack(loop, &stack);
00781 if (Bound_Is_Too_Messy(dli->LB))
00782 Hoist_Lower_Bound(loop, &stack, &LNO_default_pool);
00783 if (Bound_Is_Too_Messy(dli->UB))
00784 Hoist_Upper_Bound(loop, &stack, &LNO_default_pool);
00785 WN* wn_out_bound = Generate_Bounds(loop, &new_out_lb, &new_out_ub,
00786 &new_out_step, 0);
00787 FmtAssert(new_out_lb != NULL && new_out_ub != NULL,
00788 ("Did not generate new outer lower and/or upper bound"));
00789 WN* wn_in_bound = Generate_Bounds(loop, &new_in_lb, &new_in_ub,
00790 &new_in_step, 1);
00791 FmtAssert(new_in_lb != NULL && new_in_ub != NULL,
00792 ("Did not generate new inner lower and/or upper bound"));
00793
00794
00795 WN* outer_loop = NULL;
00796 SNL_INV_CACHE_BLOCK_REASON reason = lego_tile ? SNL_INV_LEGO_TILE :
00797 SNL_INV_MP_TILE;
00798 INT64 old_est_iters = dli->Est_Num_Iterations;
00799 outer_loop = Tile_Loop(loop, LEGO_BOGUS_VALUE, 0, reason, pid_out, pool);
00800 WN* inner_loop = NULL;
00801 inner_loop = Tile_Loop(loop, LEGO_BOGUS_VALUE/2, 0, reason, pid_in,
00802 pool);
00803 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00804 dli_outer->Est_Num_Iterations = NOMINAL_PROCS;
00805 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(inner_loop);
00806 dli_inner->Est_Num_Iterations = NOMINAL_PROCS;
00807 dli->Est_Num_Iterations = old_est_iters;
00808 dli->Tile_Size = 0;
00809
00810
00811
00812 Processor_Update_Inner_Tile(wn_out_bound, inner_loop, outer_loop,
00813 new_out_lb, new_out_ub, new_out_step, pid_out);
00814 Processor_Update_Inner_Tile(wn_in_bound, loop, inner_loop,
00815 new_in_lb, new_in_ub, new_in_step, pid_in);
00816
00817
00818
00819 Processor_Update_Outer_Tile(outer_loop, loop, pid_out, lego_tile,
00820 dli->Lego_Info != NULL && dli->Lego_Info->Stride() < 0);
00821
00822
00823 dli_outer->Is_Processor_Tile = TRUE;
00824 if (lego_tile) {
00825 dli_outer->Is_Outer_Lego_Tile = TRUE;
00826 dli_inner->Is_Outer_Lego_Tile = TRUE;
00827 dli->Is_Inner_Lego_Tile = TRUE;
00828 }
00829 dli_outer->Lego_Mp_Key_Depth = 0;
00830 dli_outer->Lego_Mp_Key_Lower = tiling_key;
00831 dli_outer->Lego_Mp_Key_Upper = tiling_key;
00832 dli_outer->Suggested_Parallel = dli->Suggested_Parallel;
00833 dli_inner->Lego_Mp_Key_Depth = 1;
00834 dli_inner->Lego_Mp_Key_Lower = tiling_key;
00835 dli_inner->Lego_Mp_Key_Upper = tiling_key;
00836 dli->Lego_Mp_Key_Depth = 2;
00837 dli->Lego_Mp_Key_Lower = tiling_key;
00838 dli->Lego_Mp_Key_Upper = tiling_key;
00839 dli->Suggested_Parallel = FALSE;
00840
00841 dli_outer->No_Fission = TRUE;
00842 dli_outer->No_Fusion = TRUE;
00843 dli_outer->Cannot_Interchange = TRUE;
00844 dli_outer->Cannot_Block = TRUE;
00845 dli_outer->Required_Unroll = 1;
00846 dli_outer->Pragma_Cannot_Concurrentize = dli->Pragma_Cannot_Concurrentize;
00847 dli_outer->Inside_Critical_Section = dli->Inside_Critical_Section;
00848 dli_outer->Has_Threadprivate = dli->Has_Threadprivate;
00849 dli_outer->Serial_Version_of_Concurrent_Loop
00850 = dli->Serial_Version_of_Concurrent_Loop;
00851 dli_inner->No_Fission = TRUE;
00852 dli_inner->No_Fusion = TRUE;
00853 dli_inner->Cannot_Interchange = TRUE;
00854 dli_inner->Cannot_Block = TRUE;
00855 dli_inner->Required_Unroll = 1;
00856 dli_inner->Pragma_Cannot_Concurrentize = dli->Pragma_Cannot_Concurrentize;
00857 dli_inner->Has_Threadprivate = dli->Has_Threadprivate;
00858 dli_inner->Serial_Version_of_Concurrent_Loop
00859 = dli->Serial_Version_of_Concurrent_Loop;
00860
00861
00862 DOLOOP_STACK dostack(&LNO_local_pool);
00863 Build_Doloop_Stack(LWN_Get_Parent(outer_loop), &dostack);
00864 LNO_Build_Access(outer_loop, &dostack, &LNO_default_pool);
00865 BOOL negative_stride = dli->Lego_Info && dli->Lego_Info->Stride() < 0;
00866 Hoist_Iload_Ldid_Upper_Bound_One_Level(outer_loop,negative_stride);
00867
00868 if (Cur_PU_Feedback) {
00869 LNO_FB_MP_Tile(inner_loop, 1, loop);
00870 LNO_FB_MP_Tile(outer_loop, 1, inner_loop);
00871 }
00872
00873
00874 if (LNO_Verbose) {
00875 fprintf(stdout, "3D Tile (%s) -> (%s,%s,%s)\n", WB_Whirl_Symbol(loop),
00876 WB_Whirl_Symbol(outer_loop), WB_Whirl_Symbol(inner_loop),
00877 WB_Whirl_Symbol(loop));
00878 fprintf(TFile, "3D Tile (%s) -> (%s,%s,%s)\n", WB_Whirl_Symbol(loop),
00879 WB_Whirl_Symbol(outer_loop), WB_Whirl_Symbol(inner_loop),
00880 WB_Whirl_Symbol(loop));
00881 }
00882 if (LNO_Tlog) {
00883 INT required_length = strlen(WB_Whirl_Symbol(loop)) + 3;
00884 char* tlog_instring = CXX_NEW_ARRAY(char, required_length,
00885 &LNO_local_pool);
00886 required_length = 5 + strlen(WB_Whirl_Symbol(outer_loop))
00887 + strlen(WB_Whirl_Symbol(inner_loop)) + strlen(WB_Whirl_Symbol(loop));
00888 char* tlog_outstring = CXX_NEW_ARRAY(char, required_length,
00889 &LNO_local_pool);
00890 sprintf(tlog_instring, "(%s)", WB_Whirl_Symbol(loop));
00891 sprintf(tlog_outstring, "(%s,%s,%s)", WB_Whirl_Symbol(outer_loop),
00892 WB_Whirl_Symbol(inner_loop), WB_Whirl_Symbol(loop));
00893 Generate_Tlog("LNO", "lego_mp_tile", Srcpos_To_Line(WN_linenum(loop)),
00894 (char *) WB_Whirl_Symbol(loop), tlog_instring, tlog_outstring, "");
00895 }
00896
00897 return outer_loop;
00898 }
00899
00900
00901
00902
00903
00904
00905
00906
00907 extern WN* Lego_Tile_Single_Loop(WN* loop,
00908 MEM_POOL *pool)
00909 {
00910 Is_True(Loop_Bounds_Simple(loop),
00911 ("Lego tiling cannot generate code for loop %s with complex bounds",
00912 ST_name(WN_st(WN_index(loop)))));
00913 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
00914 LEGO_INFO* lego_info = dli->Lego_Info;
00915 Is_True(lego_info, ("Generate_Bounds passed empty LEGO_INFO"));
00916 SYMBOL *array_sym = lego_info->Array();
00917 Is_True(array_sym != NULL,
00918 ("Lego_Tile_Single_Loop: Could not find array sym for loop %s",
00919 ST_name(WN_st(WN_index(loop)))));
00920 if (lego_info->Dynamic_Affinity())
00921 return Processor_3D_Tile_Loop(loop, pool, TRUE);
00922 DISTR_ARRAY *dact = Lookup_DACT(array_sym->St());
00923 DISTR_INFO *dinfo = dact->Dinfo();
00924 Is_True(dact != NULL,
00925 ("Lego_Tile_Single_Loop: No DACT for array %s in LEGO_INFO",
00926 ST_name(array_sym->St())));
00927 INT curr_dim = lego_info->Dim_Num();
00928 INT num_dim = dinfo->Num_Dim();
00929 Is_True((curr_dim >= 0) && (curr_dim < num_dim),
00930 ("Lego_Tile_Single_Loop: Bad dimension (%d) in LEGO_INFO, 0..%d expected",
00931 curr_dim, num_dim-1));
00932 switch (dact->Get_Dim(curr_dim)->Distr_Type()) {
00933 case DISTRIBUTE_BLOCK:
00934 return Processor_2D_Tile_Loop(loop, pool, TRUE);
00935 case DISTRIBUTE_CYCLIC_CONST:
00936 if (dact->Get_Dim(curr_dim)->Chunk_Const_Val() == 1)
00937 return Processor_2D_Tile_Loop(loop, pool, TRUE);
00938 else
00939 return Processor_3D_Tile_Loop(loop, pool, TRUE);
00940 case DISTRIBUTE_CYCLIC_EXPR:
00941 return Processor_3D_Tile_Loop(loop, pool, TRUE);
00942 case DISTRIBUTE_STAR:
00943 return NULL;
00944 }
00945 return NULL;
00946 }
00947
00948
00949
00950
00951
00952
00953
00954
00955 static WN* Create_Array_Load(ST* st_array,
00956 TYPE_ID mtype,
00957 INT index,
00958 INT element_size,
00959 INT element_count)
00960 {
00961 TY_IDX ty = Be_Type_Tbl(mtype);
00962 TY_IDX ty_ptr = Make_Pointer_Type(Be_Type_Tbl(mtype));
00963 TY_IDX arr_ty_ptr = Make_Pointer_Type(ST_type(st_array));
00964 OPCODE op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
00965 WN* wn_lda = WN_CreateLda(op_lda, 0, arr_ty_ptr, st_array);
00966 WN* wn_size = LWN_Make_Icon(mtype, element_count);
00967 WN* wn_index = LWN_Make_Icon(mtype, index);
00968 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
00969 WN* wn_array = WN_Create(op_array, 3);
00970 WN_element_size(wn_array) = element_size;
00971 WN_array_base(wn_array) = wn_lda;
00972 WN_array_index(wn_array, 0) = wn_index;
00973 WN_array_dim(wn_array, 0) = wn_size;
00974 LWN_Parentize(wn_array);
00975 OPCODE op_iload = OPCODE_make_op(OPR_ILOAD, mtype, mtype);
00976 WN* wn_iload = LWN_CreateIload(op_iload, 0, ty, ty_ptr, wn_array);
00977 Create_lda_array_alias(Alias_Mgr, wn_lda, wn_iload);
00978 return wn_iload;
00979 }
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989 extern WN* Create_Array_Store(ST* st_array,
00990 TYPE_ID mtype,
00991 INT index,
00992 INT element_size,
00993 INT element_count,
00994 WN* wn_value)
00995 {
00996 TY_IDX ty = Be_Type_Tbl(mtype);
00997 TY_IDX ty_ptr = Make_Pointer_Type(Be_Type_Tbl(mtype));
00998 TY_IDX arr_ty_ptr = Make_Pointer_Type(ST_type(st_array));
00999 OPCODE op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
01000 WN* wn_lda = WN_CreateLda(op_lda, 0, arr_ty_ptr, st_array);
01001 WN* wn_size = LWN_Make_Icon(mtype, element_count);
01002 WN* wn_index = LWN_Make_Icon(mtype, index);
01003 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
01004 WN* wn_array = WN_Create(op_array, 3);
01005 WN_element_size(wn_array) = element_size;
01006 WN_array_base(wn_array) = wn_lda;
01007 WN_array_index(wn_array, 0) = wn_index;
01008 WN_array_dim(wn_array, 0) = wn_size;
01009 LWN_Parentize(wn_array);
01010 OPCODE op_istore = OPCODE_make_op(OPR_ISTORE, MTYPE_V, mtype);
01011 WN* wn_istore = LWN_CreateIstore(op_istore, 0, ty_ptr, wn_value, wn_array);
01012 Create_lda_array_alias(Alias_Mgr, wn_lda, wn_istore);
01013 return wn_istore;
01014 }
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024 static void Mp_Layout_Load_Pids(WN* wn_loop,
01025 INT tile_count)
01026 {
01027 char Str_Buf[256];
01028 for (INT i = 0; i < tile_count; i++) {
01029 WN* wn_load_loop = SNL_Get_Inner_Snl_Loop(wn_loop, i + 1);
01030 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_load_loop);
01031 TYPE_ID type = Do_Wtype((WN*) wn_load_loop);
01032 sprintf(Str_Buf, "$layout_pid%d", WN_map_id(wn_load_loop));
01033 SYMBOL* sym_pid =
01034 CXX_NEW(SYMBOL(Create_Preg_Symbol(Str_Buf, type)), &LNO_default_pool);
01035 dli->Mp_Info->Set_Nest_Layout(sym_pid);
01036 }
01037 }
01038
01039
01040
01041
01042
01043
01044
01045
01046 static void Mp_Layout_Copy_Out_Layout(WN* wn_outer_loop,
01047 INT tile_count,
01048 ST* st_layout,
01049 WN* wn_bounds_code,
01050 STACK<WN*>* dep_stack)
01051 {
01052 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01053 INT64 linenum = WN_Get_Linenum(wn_outer_loop);
01054 WN* wn_inner_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop, tile_count);
01055 DOLOOP_STACK stack(&LNO_local_pool);
01056 Build_Doloop_Stack(wn_inner_loop, &stack);
01057 TY_IDX ty_i8 = Be_Type_Tbl(MTYPE_I8);
01058 TY_IDX ty_i8_ptr = Make_Pointer_Type(Be_Type_Tbl(MTYPE_I8));
01059 INT i;
01060 for (i = 0; i < stack.Elements(); i++)
01061 if (stack.Bottom_nth(i) == wn_outer_loop)
01062 break;
01063 for (INT j = 0; i < stack.Elements(); j++, i++) {
01064 WN* wn_loop = stack.Bottom_nth(i);
01065 WN* wn_stid = WN_start(wn_loop);
01066 WN* wn_iload = Create_Array_Load(st_layout, MTYPE_I8, j, 8, tile_count);
01067 if (Do_Depth(wn_outer_loop) > 0)
01068 dg->Add_Vertex(wn_iload);
01069 dep_stack->Push(wn_iload);
01070 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01071 SYMBOL* sym_stid = dli->Mp_Info->Nest_Layout();
01072 WN* wn_iload_cast = wn_iload;
01073 if (sym_stid->Type != WN_rtype(wn_iload))
01074 wn_iload_cast = LWN_Integer_Casts(wn_iload, sym_stid->Type,
01075 WN_rtype(wn_iload));
01076 WN* wn_new_stid = AWN_StidIntoSym(sym_stid, wn_iload_cast);
01077 WN_Set_Linenum(wn_new_stid, linenum);
01078 LWN_Insert_Block_Before(wn_bounds_code, NULL, wn_new_stid);
01079 }
01080 }
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091 static WN* Mp_Layout_Call(WN* wn_loop,
01092 INT tile_count,
01093 ST* st_onto,
01094 ST* st_layout,
01095 WN* wn_bounds_code)
01096 {
01097 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01098 INT64 linenum = WN_Get_Linenum(wn_loop);
01099 WN_Set_Linenum(wn_bounds_code, linenum);
01100 TY_IDX ty_i8 = Be_Type_Tbl(MTYPE_I8);
01101 TY_IDX ty_i8_ptr = Make_Pointer_Type(Be_Type_Tbl(MTYPE_I8));
01102
01103
01104 OPCODE op_call = OPCODE_make_op(OPR_CALL, MTYPE_V, MTYPE_V);
01105 WN* wn_call = WN_Create(op_call, 4);
01106 WN_st_idx(wn_call) = ST_st_idx(distr_st_entries[Processor_Layout]);
01107 WN_Set_Call_Parm_Mod(wn_call);
01108 WN_Set_Call_Parm_Ref(wn_call);
01109 WN_Set_Linenum(wn_call, linenum);
01110 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01111 if (Do_Depth(wn_loop) > 0)
01112 dg->Add_Vertex(wn_call);
01113
01114
01115 WN* wn_num_threads = dli->Mp_Info->Is_Pdo()
01116 ? (Is_Versioned_Mp_Loop(wn_loop))
01117 ? Get_Runtime_Cur_Numthreads_Ldid()
01118 : Get_Runtime_Cur_Numthreads_Func(wn_loop)
01119 : Get_Frozen_Numthreads_Ldid(wn_loop);
01120 TYPE_ID type = WN_rtype(wn_num_threads);
01121 if (type != MTYPE_I8)
01122 wn_num_threads = LWN_Integer_Casts(wn_num_threads, MTYPE_I8, type);
01123 WN* wn_parm = WN_CreateParm(MTYPE_I8, wn_num_threads, Be_Type_Tbl(MTYPE_I8),
01124 WN_PARM_BY_VALUE);
01125 LWN_Set_Parent(wn_num_threads, wn_parm);
01126 WN_kid(wn_call, 0) = wn_parm;
01127 LWN_Set_Parent(wn_parm, wn_call);
01128
01129
01130 WN* wn_tile_count = LWN_Make_Icon(MTYPE_I8, tile_count);
01131 type = WN_rtype(wn_tile_count);
01132 if (type != MTYPE_I8)
01133 wn_tile_count = LWN_Integer_Casts(wn_tile_count, MTYPE_I8, type);
01134 wn_parm = WN_CreateParm(MTYPE_I8, wn_tile_count, Be_Type_Tbl(MTYPE_I8),
01135 WN_PARM_BY_VALUE);
01136 LWN_Set_Parent(wn_tile_count, wn_parm);
01137 WN_kid(wn_call, 1) = wn_parm;
01138 LWN_Set_Parent(wn_parm, wn_call);
01139
01140
01141 OPCODE op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
01142 WN* wn_onto = WN_CreateLda(op_lda, 0, ty_i8_ptr, st_onto);
01143 #ifdef _NEW_SYMTAB
01144 Clear_ST_addr_not_passed(st_onto);
01145 #else
01146 Set_ST_addr_taken_passed(st_onto);
01147 #endif
01148 wn_parm = WN_CreateParm(Pointer_type, wn_onto, ty_i8_ptr,
01149 WN_PARM_BY_REFERENCE);
01150 LWN_Set_Parent(wn_onto, wn_parm);
01151 WN_kid(wn_call, 2) = wn_parm;
01152 LWN_Set_Parent(wn_parm, wn_call);
01153
01154
01155 op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
01156 WN* wn_layout = WN_CreateLda(op_lda, 0, ty_i8_ptr, st_layout);
01157 #ifdef _NEW_SYMTAB
01158 Clear_ST_addr_not_passed(st_layout);
01159 #else
01160 Set_ST_addr_taken_passed(st_layout);
01161 #endif
01162 wn_parm = WN_CreateParm(Pointer_type, wn_layout, ty_i8_ptr,
01163 WN_PARM_BY_REFERENCE);
01164 LWN_Set_Parent(wn_layout, wn_parm);
01165 WN_kid(wn_call, 3) = wn_parm;
01166 LWN_Set_Parent(wn_parm, wn_call);
01167
01168
01169 LWN_Insert_Block_Before(wn_bounds_code, WN_first(wn_bounds_code), wn_call);
01170 return wn_call;
01171 }
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181 static void Mp_Layout_Copy_In_Onto(WN* wn_loop,
01182 INT tile_count,
01183 ST* st_onto,
01184 WN* wn_bounds_code,
01185 STACK<WN*>* dep_stack)
01186 {
01187 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01188 INT* onto_consts = CXX_NEW_ARRAY(INT, tile_count, &LNO_local_pool);
01189 INT onto_count = 0;
01190 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
01191 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
01192 for (WN* wn = wn_first; wn != NULL; wn = WN_next(wn)) {
01193 if (WN_opcode(wn) == OPC_XPRAGMA && WN_pragma(wn) == WN_PRAGMA_ONTO) {
01194 FmtAssert(WN_operator(WN_kid0(wn)) == OPR_INTCONST,
01195 ("Parameter to ONTO clause is not constant."));
01196 onto_consts[onto_count++] = WN_const_val(WN_kid0(wn));
01197 }
01198 }
01199 FmtAssert(onto_count == 0 || onto_count == tile_count,
01200 ("Wrong number of onto pragmas in region"));
01201 for (INT i = tile_count - 1; i >= 0; i--) {
01202 WN* wn_value = onto_count == 0 ? LWN_Make_Icon(MTYPE_I8, 0)
01203 : LWN_Make_Icon(MTYPE_I8, onto_consts[i]);
01204 WN* wn_istore = Create_Array_Store(st_onto, MTYPE_I8, i, 8,
01205 tile_count, wn_value);
01206 LWN_Insert_Block_Before(wn_bounds_code, WN_first(wn_bounds_code),
01207 wn_istore);
01208 if (Do_Depth(wn_loop) > 0)
01209 dg->Add_Vertex(wn_istore);
01210 dep_stack->Push(wn_istore);
01211 }
01212 }
01213
01214
01215
01216
01217
01218
01219
01220
01221 static void Mp_Insert_Bounds_Code(WN* wn_loop,
01222 WN* wn_bounds_code,
01223 BOOL inside)
01224 {
01225
01226 if (inside) {
01227 WN* wnn = NULL;
01228 WN* wn = 0;
01229 for (wn = WN_first(wn_bounds_code); wn != NULL;
01230 wnn = wn, wn = WN_next(wn));
01231 for (wn = wnn; wn != NULL; wn = wnn) {
01232 wnn = WN_prev(wn);
01233 LWN_Extract_From_Block(wn);
01234 LWN_Insert_Block_Before(WN_do_body(wn_loop),
01235 WN_first(WN_do_body(wn_loop)), wn);
01236 }
01237 } else {
01238 WN* wnn = NULL;
01239 for (WN* wn = WN_first(wn_bounds_code); wn != NULL; wn = wnn) {
01240 wnn = WN_next(wn);
01241 LWN_Extract_From_Block(wn);
01242 LWN_Insert_Block_Before(LWN_Get_Parent(wn_loop), wn_loop, wn);
01243 }
01244 }
01245 LWN_Delete_Tree(wn_bounds_code);
01246 }
01247
01248
01249
01250
01251
01252
01253
01254 static void Mp_Fix_Deps(STACK<WN*>* dep_stack)
01255 {
01256 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01257 for (INT i = 0; i < dep_stack->Elements(); i++) {
01258 WN* wn_inode = dep_stack->Bottom_nth(i);
01259 OPERATOR opr = WN_operator(wn_inode);
01260 WN* wn_array = opr == OPR_ILOAD ? WN_kid0(wn_inode) : WN_kid1(wn_inode);
01261 DOLOOP_STACK st_stack(&LNO_local_pool);
01262 Build_Doloop_Stack(wn_inode, &st_stack);
01263 LNO_Build_Access_Array(wn_array, &st_stack, &LNO_default_pool);
01264 if (opr == OPR_ISTORE && Do_Depth(wn_inode) > 0) {
01265 if (!dg->Add_Edge(wn_inode, &st_stack, wn_inode, &st_stack, FALSE))
01266 LNO_Erase_Dg_From_Here_In(wn_inode, dg);
01267 }
01268 }
01269 }
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280 static void Mp_Fix_Ref_Array_Aliases(WN* wn_call,
01281 WN* wn_bounds_code,
01282 ST* st_alias)
01283 {
01284 WN* wn = NULL;
01285 LWN_ITER* itr = LWN_WALK_TreeIter(wn_bounds_code);
01286 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01287 if (WN_operator(itr->wn) == OPR_LDA
01288 && WN_st(itr->wn) == st_alias
01289 && WN_operator(LWN_Get_Parent(itr->wn)) != OPR_PARM) {
01290 wn = itr->wn;
01291 break;
01292 }
01293 }
01294 FmtAssert(wn != NULL,
01295 ("Mp_Fix_Ref_Array_Aliases: Could not find LDA of array node"));
01296 WN* wn_inode = LWN_Get_Parent(LWN_Get_Parent(wn));
01297 FmtAssert(wn_inode != NULL && (WN_operator(wn_inode)
01298 == OPR_ILOAD || WN_operator(wn_inode) == OPR_ISTORE),
01299 ("Mp_Fix_Ref_Array_Aliases: Could not find array node"));
01300 wn = NULL;
01301 itr = LWN_WALK_TreeIter(wn_call);
01302 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01303 if (WN_operator(itr->wn) == OPR_LDA
01304 && WN_st(itr->wn) == st_alias
01305 && WN_operator(LWN_Get_Parent(itr->wn)) == OPR_PARM) {
01306 wn = itr->wn;
01307 break;
01308 }
01309 }
01310 FmtAssert(wn != NULL,
01311 ("Mp_Fix_Ref_Array_Aliases: Could not find LDA of PARM node"));
01312 WN* wn_parm = LWN_Get_Parent(wn);
01313 FmtAssert(wn_parm != NULL && WN_operator(wn_parm) == OPR_PARM,
01314 ("Mp_Fix_Ref_Array_Aliases: Could not find PARM node"));
01315 Copy_alias_info(Alias_Mgr, wn_inode, wn_parm);
01316 }
01317
01318
01319
01320
01321
01322
01323
01324 static BOOL Is_Orphaned_Pdo(WN* wn_loop)
01325 {
01326 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01327 if (dli->Mp_Info == NULL)
01328 return FALSE;
01329 if (!dli->Mp_Info->Is_Pdo())
01330 return FALSE;
01331 for (WN* wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn)) {
01332 if (WN_operator(wn) == OPR_REGION) {
01333 WN* wn_first = WN_first(WN_region_pragmas(wn));
01334 if (wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA
01335 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN)
01336 return FALSE;
01337 }
01338 }
01339 return TRUE;
01340 }
01341
01342
01343
01344
01345
01346
01347
01348
01349 static void Mp_Localize_Onto_and_Layout(WN* wn_loop,
01350 ST* st_onto,
01351 ST* st_layout)
01352 {
01353 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01354 if (dli->Mp_Info->Is_Pdo() && !Is_Orphaned_Pdo(wn_loop)) {
01355 WN* wn = 0;
01356 for (wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn)) {
01357 if (WN_opcode(wn) == OPC_REGION) {
01358 WN* wn_first = WN_first(WN_region_pragmas(wn));
01359 if (wn_first != NULL
01360 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN)
01361 break;
01362 }
01363 }
01364 FmtAssert(wn != NULL, ("Mp_Layout_Code: Could not find parallel region"));
01365 Add_Pragma_To_MP_Region(wn, st_layout, 0, WN_PRAGMA_LOCAL);
01366 Add_Pragma_To_MP_Region(wn, st_onto, 0, WN_PRAGMA_LOCAL);
01367 }
01368 }
01369
01370
01371
01372
01373
01374
01375
01376
01377 static void Mp_Layout_Code(WN* wn_loop,
01378 INT tile_count,
01379 ST** st_onto,
01380 ST** st_layout)
01381 {
01382 char Str_Buf[256];
01383 STACK<WN*> dep_stack(&LNO_local_pool);
01384 sprintf(Str_Buf, "onto%d", WN_map_id(wn_loop));
01385 *st_onto = Create_Local_Array_ST(Str_Buf, Be_Type_Tbl(MTYPE_I8),
01386 tile_count);
01387 sprintf(Str_Buf, "layout%d", WN_map_id(wn_loop));
01388 *st_layout = Create_Local_Array_ST(Str_Buf, Be_Type_Tbl(MTYPE_I8),
01389 tile_count);
01390 WN* wn_bounds_code = WN_CreateBlock();
01391 Mp_Layout_Load_Pids(wn_loop, tile_count);
01392 Mp_Layout_Copy_Out_Layout(wn_loop, tile_count, *st_layout, wn_bounds_code,
01393 &dep_stack);
01394 WN* wn_call = Mp_Layout_Call(wn_loop, tile_count, *st_onto, *st_layout,
01395 wn_bounds_code);
01396 Mp_Layout_Copy_In_Onto(wn_loop, tile_count, *st_onto, wn_bounds_code,
01397 &dep_stack);
01398 Mp_Fix_Ref_Array_Aliases(wn_call, wn_bounds_code, *st_onto);
01399 Mp_Fix_Ref_Array_Aliases(wn_call, wn_bounds_code, *st_layout);
01400 Mp_Insert_Bounds_Code(wn_loop, wn_bounds_code, FALSE);
01401 Mp_Fix_Deps(&dep_stack);
01402 Mp_Localize_Onto_and_Layout(wn_loop, *st_onto, *st_layout);
01403 }
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413 static void Mp_Layout_Lego_Layout(WN* wn_outer_loop,
01414 INT tile_count,
01415 ST* st_layout,
01416 WN* wn_bounds_code,
01417 STACK<WN*>* dep_stack)
01418 {
01419 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01420 INT64 linenum = WN_Get_Linenum(wn_outer_loop);
01421 WN* wn_inner_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop, tile_count);
01422 DOLOOP_STACK stack(&LNO_local_pool);
01423 Build_Doloop_Stack(wn_inner_loop, &stack);
01424 TY_IDX ty_i8 = Be_Type_Tbl(MTYPE_I8);
01425 TY_IDX ty_i8_ptr = Make_Pointer_Type(Be_Type_Tbl(MTYPE_I8));
01426 INT outer_depth = Do_Loop_Depth(wn_outer_loop);
01427 for (INT i = outer_depth; i < stack.Elements(); i++) {
01428 WN* wn_loop = stack.Bottom_nth(i);
01429 WN* wn_stid = WN_start(wn_loop);
01430 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01431 WN* wn_ldid = NULL;
01432 if (dli->Lego_Info->Dynamic_Affinity()) {
01433 WN* wn_numthreads_code = NULL;
01434 wn_ldid = Numprocs(dli->Lego_Info->Array()->St(),
01435 dli->Lego_Info->Dim_Num(),
01436 Do_Depth(wn_outer_loop) > 0,
01437 &wn_numthreads_code);
01438 WN* wnn = NULL;
01439 for (WN* wn = WN_first(wn_numthreads_code); wn != NULL; wn = wnn) {
01440 wnn = WN_next(wn);
01441 LWN_Extract_From_Block(wn);
01442 LWN_Insert_Block_Before(LWN_Get_Parent(wn_outer_loop),
01443 wn_outer_loop, wn);
01444 LWN_Copy_Linenumber(wn_outer_loop,wn);
01445
01446
01447
01448
01449 if (WN_opcode(wn) == OPC_IF) {
01450 IF_INFO *ii=CXX_NEW (IF_INFO(&LNO_default_pool,
01451 Find_SCF_Inside(wn, OPC_DO_LOOP) != NULL,
01452 Find_SCF_Inside(wn, OPC_REGION) != NULL), &LNO_default_pool);
01453 WN_MAP_Set(LNO_Info_Map,wn,(void *)ii);
01454 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01455 &LNO_local_pool);
01456 Build_Doloop_Stack(wn, stack);
01457 LNO_Build_If_Access(wn, stack);
01458 CXX_DELETE(stack, &LNO_local_pool);
01459 }
01460 }
01461 LWN_Delete_Tree(wn_numthreads_code);
01462 } else {
01463 wn_ldid = Get_Numthreads_Ldid(dli->Lego_Info);
01464 }
01465 WN* wn_istore = Create_Array_Store(st_layout, MTYPE_I8, i - outer_depth,
01466 8, tile_count, wn_ldid);
01467 if (Do_Depth(wn_outer_loop) > 0)
01468 dg->Add_Vertex(wn_istore);
01469 dep_stack->Push(wn_istore);
01470 WN_Set_Linenum(wn_istore, linenum);
01471 LWN_Insert_Block_Before(wn_bounds_code, NULL, wn_istore);
01472 }
01473 }
01474
01475
01476
01477
01478
01479
01480
01481
01482 static void Lego_Layout_Code(WN* wn_loop,
01483 INT tile_count,
01484 ST** st_layout)
01485 {
01486 char Str_Buf[256];
01487 STACK<WN*> dep_stack(&LNO_local_pool);
01488 sprintf(Str_Buf, "layout%d", WN_map_id(wn_loop));
01489 *st_layout = Create_Local_Array_ST(Str_Buf, Be_Type_Tbl(MTYPE_I8),
01490 tile_count);
01491 WN* wn_bounds_code = WN_CreateBlock();
01492 Mp_Layout_Lego_Layout(wn_loop, tile_count, *st_layout, wn_bounds_code,
01493 &dep_stack);
01494 Mp_Insert_Bounds_Code(wn_loop, wn_bounds_code, FALSE);
01495 Mp_Fix_Deps(&dep_stack);
01496 }
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506 extern BOOL Add_Condition(COND_BOUNDS_INFO* info,
01507 WN* wn_cond,
01508 WN* wn_if)
01509 {
01510 if (Redundant_Condition(info, wn_cond, wn_if))
01511 return FALSE;
01512 WN* wn_total_cond = WN_if_test(wn_if);
01513 OPCODE op_cand = OPCODE_make_op(OPR_CAND, Boolean_type, MTYPE_V);
01514 wn_total_cond = LWN_CreateExp2(op_cand, wn_total_cond, wn_cond);
01515 WN_if_test(wn_if) = wn_total_cond;
01516 LWN_Parentize(wn_if);
01517 DOLOOP_STACK stack2(&LNO_local_pool);
01518 Build_Doloop_Stack(wn_if, &stack2);
01519 LNO_Build_If_Access(wn_if, &stack2);
01520 return TRUE;
01521 }
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531 static void Prune_Redundant_Trues(WN* wn_cond)
01532 {
01533 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01534 DU_MANAGER* du = Du_Mgr;
01535 for (INT i = 0; i < WN_kid_count(wn_cond); i++)
01536 Prune_Redundant_Trues(WN_kid(wn_cond, i));
01537
01538 if (WN_operator(wn_cond) == OPR_CAND) {
01539 WN* wn_original = NULL;
01540 if (WN_operator(WN_kid0(wn_cond)) == OPR_INTCONST
01541 && WN_const_val(WN_kid0(wn_cond)) == 1) {
01542 wn_original = WN_kid1(wn_cond);
01543 } else if (WN_operator(WN_kid1(wn_cond)) == OPR_INTCONST
01544 && WN_const_val(WN_kid1(wn_cond)) == 1) {
01545 wn_original = WN_kid0(wn_cond);
01546 }
01547 if (wn_original != NULL) {
01548 WN* wn_copy = LWN_Copy_Tree(wn_original);
01549 LWN_Copy_Def_Use(wn_original, wn_copy, du);
01550 dg->Add_Deps_To_Copy_Block(wn_original, wn_copy, FALSE);
01551 WN* wn_old_cond = wn_cond;
01552 WN* wn_parent = LWN_Get_Parent(wn_cond);
01553 INT i;
01554 for (i = 0; i < WN_kid_count(wn_parent); i++)
01555 if (WN_kid(wn_parent, i) == wn_cond)
01556 break;
01557 WN_kid(wn_parent, i) = wn_copy;
01558 LWN_Set_Parent(wn_copy, wn_parent);
01559 LWN_Delete_Tree(wn_old_cond);
01560 }
01561 }
01562 }
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572 static BOOL Mp_Retained_Pragma(WN* wn_pragma,
01573 BOOL auto_parallel)
01574 {
01575 FmtAssert(WN_opcode(wn_pragma) == OPC_PRAGMA
01576 || WN_opcode(wn_pragma) == OPC_XPRAGMA,
01577 ("Mp_Retained_Pragma: Argument not a pragma node"));
01578 switch (WN_pragma(wn_pragma)) {
01579 case WN_PRAGMA_CRITICAL_SECTION_BEGIN:
01580 case WN_PRAGMA_CRITICAL_SECTION_END:
01581 case WN_PRAGMA_BARRIER:
01582 case WN_PRAGMA_ENTER_GATE:
01583 case WN_PRAGMA_EXIT_GATE:
01584 case WN_PRAGMA_INDEPENDENT_BEGIN:
01585 case WN_PRAGMA_INDEPENDENT_END:
01586 return TRUE;
01587 case WN_PRAGMA_SINGLE_PROCESS_BEGIN:
01588 case WN_PRAGMA_SINGLE_PROCESS_END:
01589 return !auto_parallel;
01590 default:
01591 return FALSE;
01592 }
01593 }
01594
01595
01596
01597
01598
01599
01600
01601
01602 static BOOL Mp_Basic_Parallel_Construct(WN* wn_region)
01603 {
01604 if (WN_opcode(wn_region) == OPC_REGION) {
01605 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
01606 if (wn_first == NULL)
01607 return FALSE;
01608 if (WN_opcode(wn_first) == OPC_PRAGMA) {
01609 switch(WN_pragma(wn_first)) {
01610 case WN_PRAGMA_PARALLEL_BEGIN:
01611 case WN_PRAGMA_DOACROSS:
01612 case WN_PRAGMA_PARALLEL_DO:
01613 return TRUE;
01614 }
01615 }
01616 }
01617 return FALSE;
01618 }
01619
01620
01621
01622
01623
01624
01625
01626
01627 static void Mp_Disable_Opts_On_Internal_Regions(WN* wn_tree,
01628 INT parallel_level)
01629 {
01630 if (WN_opcode(wn_tree) == OPC_REGION) {
01631 if (Mp_Basic_Parallel_Construct(wn_tree)) {
01632 for (INT i = 0; i < WN_kid_count(wn_tree); i++) {
01633 Mp_Disable_Opts_On_Internal_Regions(WN_kid(wn_tree, i),
01634 Mp_Basic_Parallel_Construct(wn_tree)
01635 ? parallel_level + 1 : parallel_level);
01636 }
01637 }
01638 }
01639
01640 if (WN_opcode(wn_tree) == OPC_DO_LOOP && parallel_level >= 1
01641 && Do_Loop_Is_Mp(wn_tree)) {
01642 DO_LOOP_INFO* dli_tree = Get_Do_Loop_Info(wn_tree);
01643 dli_tree->Mp_Info->Disable_Plowering();
01644 }
01645
01646 if (WN_opcode(wn_tree) == OPC_BLOCK) {
01647 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
01648 Mp_Disable_Opts_On_Internal_Regions(wn, parallel_level);
01649 } else {
01650 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01651 Mp_Disable_Opts_On_Internal_Regions(WN_kid(wn_tree, i),
01652 parallel_level);
01653 }
01654 }
01655
01656
01657
01658
01659
01660
01661
01662
01663 static void Mp_Extract_Parallel_Directives(WN* wn_tree,
01664 BOOL auto_parallel,
01665 INT parallel_level)
01666 {
01667 if (WN_opcode(wn_tree) == OPC_REGION) {
01668 if (Mp_Basic_Parallel_Construct(wn_tree)) {
01669 if (parallel_level >= 1)
01670 return;
01671 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01672 Mp_Extract_Parallel_Directives(WN_kid(wn_tree, i), auto_parallel,
01673 parallel_level + 1);
01674 } else if (Is_Mp_Region(wn_tree)) {
01675 WN* wnn = NULL;
01676 BOOL has_retained_pragma = FALSE;
01677 WN* wn_first = WN_first(WN_region_pragmas(wn_tree));
01678 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
01679 wnn = WN_next(wn);
01680 if (Mp_Retained_Pragma(wn, auto_parallel)) {
01681 has_retained_pragma = TRUE;
01682 } else {
01683 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01684 INT old_id = WN_MAP32_Get(Prompf_Id_Map, wn);
01685 if (old_id != 0)
01686 Prompf_Info->Elimination(old_id);
01687 }
01688 LWN_Extract_From_Block(wn);
01689 LWN_Delete_Tree(wn);
01690 }
01691 }
01692 if (!has_retained_pragma) {
01693 WN* wn_new_tree = WN_first(WN_region_body(wn_tree));
01694 WN* wnn = NULL;
01695 for (WN* wn = wn_new_tree; wn != NULL; wn = wnn) {
01696 wnn = WN_next(wn);
01697 LWN_Extract_From_Block(wn);
01698 LWN_Insert_Block_Before(LWN_Get_Parent(wn_tree), wn_tree, wn);
01699 Mp_Extract_Parallel_Directives(wn, auto_parallel, parallel_level);
01700 }
01701 LWN_Extract_From_Block(wn_tree);
01702 LWN_Delete_Tree(wn_tree);
01703 return;
01704 }
01705 }
01706 }
01707
01708 if ((WN_opcode(wn_tree) == OPC_PRAGMA || WN_opcode(wn_tree) == OPC_XPRAGMA)
01709 && (WN_pragmas[WN_pragma(wn_tree)].users & PUSER_MP)
01710 && !Mp_Retained_Pragma(wn_tree, auto_parallel)) {
01711 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01712 INT old_id = WN_MAP32_Get(Prompf_Id_Map, wn_tree);
01713 if (old_id != 0)
01714 Prompf_Info->Elimination(old_id);
01715 }
01716 LWN_Extract_From_Block(wn_tree);
01717 LWN_Delete_Tree(wn_tree);
01718 return;
01719 }
01720
01721 if (WN_opcode(wn_tree) == OPC_BLOCK) {
01722 WN* wnn = NULL;
01723 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = wnn) {
01724 wnn = WN_next(wn);
01725 Mp_Extract_Parallel_Directives(wn, auto_parallel, parallel_level);
01726 }
01727 } else {
01728 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01729 Mp_Extract_Parallel_Directives(WN_kid(wn_tree, i), auto_parallel,
01730 parallel_level);
01731 }
01732 }
01733
01734
01735
01736
01737
01738
01739
01740
01741
01742 static BOOL Mp_Retained_Region(WN* wn_region,
01743 BOOL auto_parallel)
01744 {
01745 FmtAssert(WN_opcode(wn_region) == OPC_REGION,
01746 ("Mp_Retained_Region(): Expecting a REGION node"));
01747 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
01748 if (WN_opcode(wn_first) == OPC_PRAGMA) {
01749 switch(WN_pragma(wn_first)) {
01750 case WN_PRAGMA_SINGLE_PROCESS_BEGIN:
01751 return !auto_parallel;
01752 default:
01753 return TRUE;
01754 }
01755 }
01756 return TRUE;
01757 }
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767 static void Prompf_Mp_Version(WN* wn_orig,
01768 WN* wn_copy,
01769 BOOL auto_parallel)
01770 {
01771 LWN_ITER* itr1 = LWN_WALK_TreeIter(wn_orig);
01772 LWN_ITER* itr2 = LWN_WALK_TreeIter(wn_copy);
01773 STACK<INT> stack1(&LNO_local_pool);
01774 STACK<INT> stack2(&LNO_local_pool);
01775 STACK<PROMPF_ID_TYPE> stack3(&LNO_local_pool);
01776 for (; itr1 != NULL; itr1 = LWN_WALK_TreeNext(itr1)) {
01777 WN* wn1 = itr1->wn;
01778 WN* wn2 = itr2->wn;
01779 FmtAssert(WN_opcode(wn1) == WN_opcode(wn2),
01780 ("Prompf_Mp_Version: Corresponding nodes do not match"));
01781 INT old_id = WN_MAP32_Get(Prompf_Id_Map, wn1);
01782 if (old_id != 0 && (WN_opcode(wn1) == OPC_DO_LOOP
01783 || WN_opcode(wn1) == OPC_REGION
01784 && Mp_Retained_Region(wn1, auto_parallel)
01785 || (WN_opcode(wn1) == OPC_PRAGMA || WN_opcode(wn1) == OPC_XPRAGMA)
01786 && Mp_Retained_Pragma(wn1, auto_parallel))) {
01787 BOOL is_first = FALSE;
01788 WN* wn_region = NULL;
01789 for (WN* wn = wn1; wn != NULL; wn = LWN_Get_Parent(wn)) {
01790 if (WN_opcode(wn) == OPC_REGION) {
01791 wn_region = wn;
01792 break;
01793 }
01794 }
01795 PROMPF_ID_TYPE id_type = Prompf_Id_Type(wn1, wn_region, &is_first);
01796 INT new_id = 0;
01797 INT i;
01798 for (i = 0; i < stack1.Elements(); i++)
01799 if (stack1.Bottom_nth(i) == old_id)
01800 break;
01801 new_id = i < stack1.Elements() ? stack2.Bottom_nth(i) :
01802 New_Construct_Id();
01803 WN_MAP32_Set(Prompf_Id_Map, wn2, new_id);
01804 if (i == stack1.Elements()) {
01805 stack1.Push(old_id);
01806 stack2.Push(new_id);
01807 stack3.Push(id_type);
01808 }
01809 }
01810 itr2 = LWN_WALK_TreeNext(itr2);
01811 }
01812 INT nloops = stack1.Elements();
01813 if (nloops > 0) {
01814 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01815 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01816 PROMPF_ID_TYPE* id_types = CXX_NEW_ARRAY(PROMPF_ID_TYPE, nloops,
01817 &LNO_local_pool);
01818 for (INT i = 0; i < nloops; i++) {
01819 old_ids[i] = stack1.Bottom_nth(i);
01820 new_ids[i] = stack2.Bottom_nth(i);
01821 id_types[i] = stack3.Bottom_nth(i);
01822 }
01823 Prompf_Info->Mp_Version(old_ids, new_ids, id_types, nloops);
01824 }
01825 }
01826
01827
01828
01829
01830
01831
01832 static WN* Mp_Trip_Count(WN* wn_loop)
01833 {
01834 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
01835 INT nloops = dli_loop->Mp_Info->Nest_Total();
01836 if (!Fully_Permutable_Permutation(wn_loop, nloops))
01837 return NULL;
01838 WN* wn_trip = NULL;
01839 for (WN* wn = wn_loop; wn != NULL; wn = Next_SNL_Loop(wn)) {
01840 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01841 FmtAssert(dli->Mp_Info != NULL, ("Mp_Trip_Count: Expecting MP loop"));
01842 WN* wn_local_trip = Trip_Count(wn);
01843 if (wn_local_trip == NULL) {
01844 LWN_Delete_Tree(wn_trip);
01845 return NULL;
01846 }
01847 if (wn_trip == NULL) {
01848 wn_trip = wn_local_trip;
01849 } else {
01850 TYPE_ID type = Max_Wtype(WN_rtype(wn_trip), WN_rtype(wn_local_trip));
01851 wn_trip = AWN_Mpy(type, wn_trip, wn_local_trip);
01852 }
01853 if (dli->Mp_Info->Nest_Index() + 1 == dli->Mp_Info->Nest_Total())
01854 break;
01855 }
01856 return wn_trip;
01857 }
01858
01859
01860
01861
01862
01863
01864
01865
01866 extern BOOL Mp_Want_Version_Loop(WN* wn_loop,
01867 BOOL test_already)
01868 {
01869 if (!LNO_Version_Mp_Loops)
01870 return FALSE;
01871
01872 if (test_already && Is_Versioned_Mp_Loop(wn_loop))
01873 return FALSE;
01874
01875 INT construct_count = 0;
01876 for (WN* wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn))
01877 if (Mp_Basic_Parallel_Construct(wn))
01878 construct_count++;
01879 if (construct_count > 1)
01880 return FALSE;
01881
01882 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01883 if (dli->Mp_Info->Is_Pdo() && !Is_Orphaned_Pdo(wn_loop))
01884 return FALSE;
01885
01886
01887
01888 if (WN_prev(wn_loop) && !Is_Orphaned_Pdo(wn_loop))
01889 return FALSE;
01890
01891
01892 return TRUE;
01893 }
01894
01895
01896
01897
01898
01899
01900 static WN* Mp_Version_Loop(WN* wn_loop)
01901 {
01902 DU_MANAGER* du = Du_Mgr;
01903 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01904 REDUCTION_MANAGER* rm = red_manager;
01905 if (!Mp_Want_Version_Loop(wn_loop, TRUE))
01906 return NULL;
01907
01908
01909 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01910 BOOL is_auto_parallel = dli->Auto_Parallelized;
01911 BOOL is_pdo = dli->Mp_Info->Is_Pdo();
01912 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
01913 BOOL is_omp = WN_pragma_omp(WN_first(WN_region_pragmas(wn_region)));
01914 INT nest_total = dli->Mp_Info->Nest_Total();
01915
01916
01917 TYPE_ID type = Promote_Type(Do_Wtype((WN *) wn_loop));
01918 WN* wn_block = LWN_Get_Parent(wn_loop);
01919 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
01920 FmtAssert(wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA,
01921 ("Mp_Version_Loop: Missing PRAGMA in MP region"));
01922 switch (WN_pragma(wn_first)) {
01923 case WN_PRAGMA_DOACROSS:
01924 case WN_PRAGMA_PARALLEL_DO:
01925 case WN_PRAGMA_PDO_BEGIN:
01926 break;
01927 default:
01928 FmtAssert(FALSE, ("Mp_Version_Loop: Not an MP loop"));
01929 }
01930 WN* wn_parent = LWN_Get_Parent(wn_region);
01931 WN_MAP version_map = WN_MAP_Create(&LNO_local_pool);
01932 WN* wn_copy = LWN_Copy_Tree(wn_block, TRUE, LNO_Info_Map, TRUE, version_map);
01933 BOOL all_internal = WN_Rename_Duplicate_Labels(wn_block, wn_copy,
01934 Current_Func_Node, &LNO_local_pool);
01935 Is_True(all_internal, ("external labels renamed"));
01936
01937 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
01938 Prompf_Mp_Version(wn_block, wn_copy, is_auto_parallel);
01939
01940
01941 WN* wn_array[2];
01942 wn_array[0] = wn_block;
01943 wn_array[1] = wn_copy;
01944 Unrolled_DU_Update(wn_array, 2, Do_Loop_Depth(wn_loop) - 1, TRUE, FALSE);
01945 dg->Versioned_Dependences_Update(wn_block, wn_copy, Do_Loop_Depth(wn_loop),
01946 version_map);
01947 WN_MAP_Delete(version_map);
01948 if (rm != NULL)
01949 rm->Unroll_Update(wn_array, 2);
01950
01951
01952 INT nest_count = 0;
01953 LWN_ITER* itr = LWN_WALK_TreeIter(wn_copy);
01954 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01955 if (WN_opcode(itr->wn) == OPC_DO_LOOP) {
01956 WN* wn_loop_copy = itr->wn;
01957 DO_LOOP_INFO* dli_copy = Get_Do_Loop_Info(wn_loop_copy);
01958 WN* wn_delete_loop = wn_loop_copy;
01959 DO_LOOP_INFO* dli_delete = Get_Do_Loop_Info(wn_delete_loop);
01960 CXX_DELETE(dli_delete->Mp_Info, &LNO_default_pool);
01961 dli_delete->Mp_Info = NULL;
01962 dli_delete->Serial_Version_of_Concurrent_Loop = TRUE;
01963 if (dli_delete->Lego_Info != NULL) {
01964 DISTR_ARRAY* dact =
01965 Lookup_DACT(dli_delete->Lego_Info->Array()->St());
01966 if (dact == NULL || !dact->Dinfo()->IsReshaped()) {
01967 CXX_DELETE(dli_delete->Lego_Info, LEGO_pool);
01968 dli_delete->Lego_Info = NULL;
01969 }
01970 }
01971 if (++nest_count >= nest_total)
01972 break;
01973 }
01974 }
01975 WN* wn_prev = WN_prev(wn_region);
01976 LWN_Extract_From_Block(wn_region);
01977
01978
01979 WN* wn_total_cond = LWN_Make_Icon(Boolean_type, 1);
01980 WN* wn_if = LWN_CreateIf(wn_total_cond, WN_CreateBlock(), wn_copy);
01981 LWN_Insert_Block_After(WN_then(wn_if), NULL, wn_region);
01982 WN_Set_Linenum(wn_if, WN_Get_Linenum(wn_loop));
01983 IF_INFO *ii =
01984 CXX_NEW(IF_INFO(&LNO_default_pool, TRUE, TRUE), &LNO_default_pool);
01985 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii);
01986 WN_Set_If_MpVersion(wn_if);
01987 DOLOOP_STACK *stack = CXX_NEW(DOLOOP_STACK(&LNO_default_pool),
01988 &LNO_default_pool);
01989 Build_Doloop_Stack(wn_if, stack);
01990 LNO_Build_If_Access(wn_if, stack);
01991 LWN_Insert_Block_After(wn_parent, wn_prev, wn_if);
01992 Mp_Disable_Opts_On_Internal_Regions(wn_block, 0);
01993 Mp_Extract_Parallel_Directives(wn_copy, is_auto_parallel, 1);
01994 if (is_omp && !is_auto_parallel) {
01995
01996
01997
01998
01999
02000
02001
02002
02003
02004
02005
02006 }
02007
02008
02009 COND_BOUNDS_INFO *info =
02010 CXX_NEW(COND_BOUNDS_INFO(&LNO_local_pool), &LNO_local_pool);
02011 info->Collect_Outer_Info(wn_parent);
02012 WN* wnn = NULL;
02013 wn_first = WN_first(WN_region_pragmas(wn_region));
02014 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
02015 wnn = WN_next(wn);
02016 if (WN_opcode(wn) == OPC_XPRAGMA && WN_pragma(wn) == WN_PRAGMA_IF) {
02017 WN* wn_cond = LWN_Copy_Tree(WN_kid0(wn));
02018 LWN_Copy_Def_Use(WN_kid0(wn), wn_cond, du);
02019 dg->Add_Deps_To_Copy_Block(WN_kid0(wn), wn_cond, FALSE);
02020 if (!Add_Condition(info, wn_cond, wn_if))
02021 LWN_Delete_Tree(wn_cond);
02022 LWN_Extract_From_Block(wn);
02023 LWN_Delete_Tree(wn);
02024 }
02025 }
02026
02027
02028 WN* wn_trip_count = Mp_Trip_Count(wn_loop);
02029 if (wn_trip_count != NULL) {
02030 TYPE_ID index_type = Promote_Type(Do_Wtype((WN *) wn_loop));
02031 WN* wn_one = LWN_Make_Icon(index_type, 1);
02032 OPCODE op_gt = OPCODE_make_op(OPR_GT, Boolean_type, index_type);
02033 WN* wn_trip_test = LWN_CreateExp2(op_gt, wn_trip_count, wn_one);
02034 if (!Add_Condition(info, wn_trip_test, wn_if))
02035 LWN_Delete_Tree(wn_trip_test);
02036 }
02037
02038
02039 OPCODE op_intrinsic = OPCODE_make_op(OPR_INTRINSIC_OP, type, MTYPE_V);
02040 WN* wn_not_parallel = NULL;
02041 if (is_pdo) {
02042 WN* wn_intrinsic = WN_Create_Intrinsic(op_intrinsic,
02043 INTRN_OMP_DO_WORKSHARING, 0, NULL);
02044 LWN_Parentize(wn_intrinsic);
02045 wn_not_parallel = wn_intrinsic;
02046 } else {
02047 WN* wn_intrinsic = WN_Create_Intrinsic(op_intrinsic,
02048 INTRN_MP_IN_PARALLEL_REGION, 0, NULL);
02049 LWN_Parentize(wn_intrinsic);
02050 OPCODE op_lnot = OPCODE_make_op(OPR_LNOT, Boolean_type, MTYPE_V);
02051 wn_not_parallel = LWN_CreateExp1(op_lnot, wn_intrinsic);
02052 }
02053 Add_Condition(info, wn_not_parallel, wn_if);
02054 Prune_Redundant_Trues(WN_if_test(wn_if));
02055
02056 if (Cur_PU_Feedback) {
02057 Update_Guarded_Do_FB(wn_if, wn_loop, Cur_PU_Feedback);
02058 }
02059
02060 return wn_copy;
02061 }
02062
02063
02064
02065
02066
02067
02068
02069
02070 static BOOL Mp_Want_Version_Parallel_Region(WN* wn_region,
02071 BOOL test_already)
02072 {
02073
02074 if (!LNO_Version_Mp_Loops)
02075 return FALSE;
02076
02077 if (test_already && Is_Versioned_Mp_Region(wn_region))
02078 return FALSE;
02079
02080 INT construct_count = 0;
02081 for (WN* wn = wn_region; wn != NULL; wn = LWN_Get_Parent(wn))
02082 if (Mp_Basic_Parallel_Construct(wn))
02083 construct_count++;
02084 if (construct_count > 1)
02085 return FALSE;
02086
02087
02088
02089 WN* pragmas=WN_region_pragmas(wn_region);
02090 WN* next_wn=WN_first(pragmas);
02091 while (next_wn) {
02092 if (WN_opcode(next_wn)==OPC_PRAGMA)
02093 if ((WN_PRAGMA_ID)WN_pragma(next_wn)==WN_PRAGMA_SYNC_DOACROSS) {
02094 return FALSE;
02095 }
02096 next_wn=WN_next(next_wn);
02097 }
02098
02099
02100 return TRUE;
02101 }
02102
02103
02104
02105
02106
02107
02108
02109 static void Mp_Delete_Outer_Mp_Lego_Info(WN* wn_tree)
02110 {
02111 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
02112 DO_LOOP_INFO* dli_tree = Get_Do_Loop_Info(wn_tree);
02113 CXX_DELETE(dli_tree->Mp_Info, &LNO_default_pool);
02114 dli_tree->Mp_Info = NULL;
02115 dli_tree->Serial_Version_of_Concurrent_Loop = TRUE;
02116 if (dli_tree->Lego_Info != NULL) {
02117 DISTR_ARRAY* dact = Lookup_DACT(dli_tree->Lego_Info->Array()->St());
02118 if (dact == NULL || !dact->Dinfo()->IsReshaped()) {
02119 CXX_DELETE(dli_tree->Lego_Info, LEGO_pool);
02120 dli_tree->Lego_Info = NULL;
02121 }
02122 }
02123 }
02124
02125 if (Mp_Basic_Parallel_Construct(wn_tree))
02126 return;
02127
02128 if (WN_opcode(wn_tree) == OPC_BLOCK) {
02129 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02130 Mp_Delete_Outer_Mp_Lego_Info(wn);
02131 } else {
02132 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02133 Mp_Delete_Outer_Mp_Lego_Info(WN_kid(wn_tree, i));
02134 }
02135 }
02136
02137
02138
02139
02140
02141
02142
02143 static WN* Mp_Version_Parallel_Region(WN* wn_region)
02144 {
02145 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02146 DU_MANAGER* du = Du_Mgr;
02147 REDUCTION_MANAGER* rm = red_manager;
02148
02149 if (!Mp_Want_Version_Parallel_Region(wn_region, TRUE))
02150 return NULL;
02151
02152
02153 REGION_INFO* rgi = Get_Region_Info(wn_region);
02154 BOOL is_auto_parallel = rgi != NULL && rgi->Auto_Parallelized();
02155 BOOL is_omp = WN_pragma_omp(WN_first(WN_region_pragmas(wn_region)));
02156
02157
02158 WN* wn_block = WN_region_body(wn_region);
02159 WN* wn_parent = LWN_Get_Parent(wn_region);
02160 WN* wn_prev = WN_prev(wn_region);
02161 WN_MAP version_map = WN_MAP_Create(&LNO_local_pool);
02162 WN* wn_copy = LWN_Copy_Tree(wn_block, TRUE, LNO_Info_Map, TRUE,
02163 version_map);
02164 BOOL all_internal = WN_Rename_Duplicate_Labels(wn_block, wn_copy,
02165 Current_Func_Node, &LNO_local_pool);
02166 Is_True(all_internal, ("external labels renamed"));
02167
02168 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
02169 Prompf_Mp_Version(wn_block, wn_copy, is_auto_parallel);
02170
02171
02172 WN* wn_array[2];
02173 wn_array[0] = wn_block;
02174 wn_array[1] = wn_copy;
02175 Unrolled_DU_Update(wn_array, 2, Do_Depth(wn_region), TRUE, FALSE);
02176 dg->Versioned_Dependences_Update(wn_block, wn_copy, Do_Depth(wn_region) + 1,
02177 version_map);
02178 WN_MAP_Delete(version_map);
02179 if (rm != NULL)
02180 rm->Unroll_Update(wn_array, 2);
02181
02182
02183 Mp_Delete_Outer_Mp_Lego_Info(wn_copy);
02184 LWN_ITER* itr = LWN_WALK_TreeIter(wn_copy);
02185 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02186 if (WN_opcode(itr->wn) == OPC_DO_LOOP) {
02187 }
02188 }
02189 LWN_Extract_From_Block(wn_region);
02190
02191
02192 WN* wn_total_cond = LWN_Make_Icon(Boolean_type, 1);
02193 WN* wn_if = LWN_CreateIf(wn_total_cond, WN_CreateBlock(), wn_copy);
02194 LWN_Insert_Block_After(WN_then(wn_if), NULL, wn_region);
02195 WN_Set_Linenum(wn_if, WN_Get_Linenum(wn_region));
02196 BOOL has_do = Find_SCF_Inside(wn_region, OPC_DO_LOOP) != NULL;
02197 IF_INFO *ii =
02198 CXX_NEW(IF_INFO(&LNO_default_pool, has_do, TRUE), &LNO_default_pool);
02199 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii);
02200 WN_Set_If_MpVersion(wn_if);
02201 DOLOOP_STACK *stack = CXX_NEW(DOLOOP_STACK(&LNO_default_pool),
02202 &LNO_default_pool);
02203 Build_Doloop_Stack(wn_if, stack);
02204 LNO_Build_If_Access(wn_if, stack);
02205 LWN_Insert_Block_After(wn_parent, wn_prev, wn_if);
02206 Mp_Disable_Opts_On_Internal_Regions(wn_block, 0);
02207 Mp_Extract_Parallel_Directives(wn_copy, is_auto_parallel, 1);
02208 if (is_omp && !is_auto_parallel) {
02209
02210
02211
02212
02213
02214
02215
02216
02217
02218
02219
02220
02221 }
02222
02223
02224 COND_BOUNDS_INFO *info =
02225 CXX_NEW(COND_BOUNDS_INFO(&LNO_local_pool), &LNO_local_pool);
02226 info->Collect_Outer_Info(wn_parent);
02227 WN* wnn = NULL;
02228 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
02229 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
02230 wnn = WN_next(wn);
02231 if (WN_opcode(wn) == OPC_XPRAGMA && WN_pragma(wn) == WN_PRAGMA_IF) {
02232 WN* wn_cond = LWN_Copy_Tree(WN_kid0(wn));
02233 LWN_Copy_Def_Use(WN_kid0(wn), wn_cond, du);
02234 dg->Add_Deps_To_Copy_Block(WN_kid0(wn), wn_cond, FALSE);
02235 if (!Add_Condition(info, wn_cond, wn_if))
02236 LWN_Delete_Tree(wn_cond);
02237 LWN_Extract_From_Block(wn);
02238 LWN_Delete_Tree(wn);
02239 }
02240 }
02241
02242 if (is_auto_parallel) {
02243 WN* wn_loop = Find_SCF_Inside(wn_region, OPC_DO_LOOP);
02244 WN* wn_trip_count = Trip_Count(wn_loop);
02245 TYPE_ID index_type = Promote_Type(Do_Wtype((WN *) wn_loop));
02246 WN* wn_one = LWN_Make_Icon(index_type, 1);
02247 OPCODE op_gt = OPCODE_make_op(OPR_GT, Boolean_type, index_type);
02248 WN* wn_trip_test = LWN_CreateExp2(op_gt, wn_trip_count, wn_one);
02249 if (!Add_Condition(info, wn_trip_test, wn_if))
02250 LWN_Delete_Tree(wn_trip_test);
02251 }
02252
02253
02254 OPCODE op_intrinsic = OPCODE_make_op(OPR_INTRINSIC_OP, MTYPE_I4, MTYPE_V);
02255 WN* wn_intrinsic = WN_Create_Intrinsic(op_intrinsic,
02256 INTRN_MP_IN_PARALLEL_REGION, 0, NULL);
02257 LWN_Parentize(wn_intrinsic);
02258 OPCODE op_lnot = OPCODE_make_op(OPR_LNOT, Boolean_type, MTYPE_V);
02259 WN* wn_not_parallel = LWN_CreateExp1(op_lnot, wn_intrinsic);
02260 Add_Condition(info, wn_not_parallel, wn_if);
02261 Prune_Redundant_Trues(WN_if_test(wn_if));
02262 return wn_copy;
02263 }
02264
02265
02266
02267
02268
02269
02270
02271 static WN* Innermost_Doacross_Nest_Loop(WN* wn_outer)
02272 {
02273 WN* wn_inner = wn_outer;
02274 DO_LOOP_INFO* dli = NULL;
02275 WN* wn = 0;
02276 for (wn = wn_outer; wn != NULL; wn = LWN_Get_Parent(wn)) {
02277 if (WN_opcode(wn) == OPC_DO_LOOP) {
02278 dli = Get_Do_Loop_Info(wn);
02279 if (dli->Lego_Mp_Key_Depth == 0)
02280 break;
02281 }
02282 }
02283 FmtAssert(wn != NULL,
02284 ("Innermost_Doacross_Nest_Loop: Could not find level 0 loop"));
02285 INT lower = dli->Lego_Mp_Key_Lower;
02286 INT upper = dli->Lego_Mp_Key_Upper;
02287 for (wn = wn_outer; wn != NULL; wn = Next_SNL_Loop(wn)) {
02288 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02289 if (dli->Lego_Mp_Key_Lower == 0 || dli->Lego_Mp_Key_Lower < lower
02290 || dli->Lego_Mp_Key_Upper > upper)
02291 break;
02292 wn_inner = wn;
02293 }
02294 return wn_inner;
02295 }
02296
02297
02298
02299
02300
02301
02302
02303 static void Prompf_Tile(WN* wn_outer,
02304 BOOL is_mp)
02305 {
02306 INT new_ids[SNL_MAX_LOOPS];
02307 WN* wn_loops[SNL_MAX_LOOPS];
02308 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(wn_outer);
02309 if (dli_outer->Lego_Mp_Key_Lower == 0)
02310 return;
02311 WN* wn_inner = Innermost_Doacross_Nest_Loop(wn_outer);
02312 wn_loops[0] = wn_inner;
02313 INT old_id = WN_MAP32_Get(Prompf_Id_Map, wn_inner);
02314 INT i = 0;
02315 for (WN* wn = LWN_Get_Parent(wn_inner); wn != NULL; wn = LWN_Get_Parent(wn)) {
02316 if (WN_opcode(wn) == OPC_DO_LOOP) {
02317 wn_loops[i + 1] = wn;
02318 new_ids[i++] = New_Construct_Id();
02319 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02320 if (dli->Lego_Mp_Key_Depth == 0)
02321 break;
02322 }
02323 }
02324 INT nloops = i + 1;
02325 WN_MAP32_Set(Prompf_Id_Map, wn_loops[nloops - 1], old_id);
02326 for (i = 0; i < nloops - 1; i++)
02327 WN_MAP32_Set(Prompf_Id_Map, wn_loops[i], new_ids[nloops - 2 - i]);
02328 if (is_mp)
02329 if (dli_outer->Is_Doacross)
02330 Prompf_Info->Doacross_Outer_Tile(old_id, new_ids[0]);
02331 else
02332 Prompf_Info->Mp_Tile(old_id, new_ids, nloops - 1);
02333 else
02334 Prompf_Info->Dsm_Tile(old_id, new_ids, nloops - 1);
02335 }
02336
02337
02338
02339
02340
02341
02342
02343 static void Prompf_Nested_Tile(WN* wn_outer)
02344 {
02345 WN* wn_inner = Innermost_Doacross_Nest_Loop(wn_outer);
02346 DOLOOP_STACK stack(&PROMPF_pool);
02347 Build_Doloop_Stack(wn_inner, &stack);
02348 INT outer_depth = Do_Loop_Depth(wn_outer);
02349 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(wn_outer);
02350 INT lower_key = dli_outer->Lego_Mp_Key_Lower;
02351 INT upper_key = dli_outer->Lego_Mp_Key_Upper;
02352 INT nloops = upper_key - lower_key + 1;
02353 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02354 for (INT i = lower_key; i <= upper_key; i++) {
02355 INT old_id = 0;
02356 for (WN* wn = wn_inner; wn != wn_outer; wn = LWN_Get_Parent(wn)) {
02357 if (WN_opcode(wn) == OPC_DO_LOOP) {
02358 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02359 if (dli->Lego_Mp_Key_Lower == i) {
02360 if (old_id == 0) {
02361 old_id = WN_MAP32_Get(Prompf_Id_Map, wn);
02362 old_ids[i - lower_key] = old_id;
02363 FmtAssert(old_id != 0,
02364 ("Prompf_Nested_Tile: Need a real id on original inner loop"));
02365 } else {
02366 FmtAssert(WN_MAP32_Get(Prompf_Id_Map, wn) == 0,
02367 ("Prompf_Nested_Tile: Middle tile loop already assigned id"));
02368 INT new_id = New_Construct_Id();
02369 WN_MAP32_Set(Prompf_Id_Map, wn, new_id);
02370 Prompf_Info->Donest_Middle_Tile(old_id, new_id);
02371 }
02372 }
02373 }
02374 }
02375 }
02376
02377
02378 INT new_id = New_Construct_Id();
02379 WN_MAP32_Set(Prompf_Id_Map, wn_outer, new_id);
02380 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_outer));
02381 WN_MAP32_Set(Prompf_Id_Map, wn_region, new_id);
02382 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
02383 WN_MAP32_Set(Prompf_Id_Map, wn_first, new_id);
02384 Prompf_Info->Donest_Outer_Tile(old_ids, new_id, nloops);
02385 }
02386
02387
02388
02389
02390
02391
02392
02393
02394 static WN* Mp_Tile_Single_Loop(WN* loop,
02395 BOOL LNO_Ozero,
02396 MEM_POOL *pool)
02397 {
02398 WN* wn_new_loop = loop;
02399 Is_True(Upper_Bound_Standardize(WN_end(loop), TRUE),
02400 ("Tried to MP tile a loop with non-standard upper bound."));
02401 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
02402 if (dli->Lego_Info != NULL)
02403 return Lego_Tile_Single_Loop(loop, pool);
02404
02405 Is_True (dli->Mp_Info, ("Mp_Tile_Single_Loop(): NULL Mp_Info"));
02406 if (LNO_Ozero || !LNO_Pseudo_Lower && dli->Mp_Info->Nest_Total() <= 1
02407 || dli->Mp_Info->Plower_Disabled())
02408 return wn_new_loop;
02409
02410 switch (dli->Mp_Info->Sched_Type()) {
02411 case MP_SCHED_SIMPLE:
02412 wn_new_loop = Processor_2D_Tile_Loop(loop, pool, FALSE);
02413 break;
02414 case MP_SCHED_INTERLEAVE:
02415 wn_new_loop = Processor_3D_Tile_Loop(loop, pool, FALSE);
02416 break;
02417 case MP_SCHED_DYNAMIC:
02418 case MP_SCHED_GSS:
02419 case MP_SCHED_RUNTIME:
02420 default:
02421 DO_LOOP_INFO *dli = Get_Do_Loop_Info(loop);
02422 dli->No_Fission = TRUE;
02423 dli->No_Fusion = TRUE;
02424 dli->Cannot_Interchange = TRUE;
02425 dli->Cannot_Block = TRUE;
02426 dli->Required_Unroll = 1;
02427 break;
02428 }
02429 return wn_new_loop;
02430 }
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440 static void Mp_Coordinate_Copy_Out_Coordinates(WN* wn_outer_loop,
02441 INT tile_count,
02442 ST* st_coordinates,
02443 WN* wn_bounds_code,
02444 STACK<WN*>* dep_stack)
02445 {
02446 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02447 DU_MANAGER* du = Du_Mgr;
02448
02449 INT64 linenum = WN_Get_Linenum(wn_outer_loop);
02450 WN* wn_inner_tile_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop, tile_count);
02451 DOLOOP_STACK stack(&LNO_local_pool);
02452 Build_Doloop_Stack(wn_inner_tile_loop, &stack);
02453 TY_IDX ty_i8 = Be_Type_Tbl(MTYPE_I8);
02454 TY_IDX ty_i8_ptr = Make_Pointer_Type(Be_Type_Tbl(MTYPE_I8));
02455 INT i;
02456 for (i = 0; i < stack.Elements(); i++)
02457 if (stack.Bottom_nth(i) == wn_outer_loop)
02458 break;
02459 for (INT j = 0; i < stack.Elements(); j++, i++) {
02460 WN* wn_loop = stack.Bottom_nth(i);
02461 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_outer_loop);
02462 WN* wn_pattern = WN_start(wn_loop);
02463 WN* wn_iload = Create_Array_Load(st_coordinates, MTYPE_I8,
02464 j, 8, tile_count);
02465 dg->Add_Vertex(wn_iload);
02466 dep_stack->Push(wn_iload);
02467 SYMBOL sym_stid(wn_pattern);
02468 WN* wn_iload_cast = wn_iload;
02469 if (sym_stid.Type != MTYPE_I8)
02470 wn_iload_cast = LWN_Integer_Casts(wn_iload_cast, sym_stid.Type, MTYPE_I8);
02471 WN* wn_new_stid = AWN_StidIntoSym(&sym_stid, wn_iload_cast);
02472 WN_Set_Linenum(wn_new_stid, linenum);
02473 LWN_Insert_Block_Before(wn_bounds_code, NULL, wn_new_stid);
02474 STACK<WN*> nstack(&LNO_local_pool);
02475 Lego_Find_Nodes(OPR_LDID, sym_stid, WN_do_body(wn_loop), &nstack);
02476 for (INT j = 0; j < nstack.Elements(); j++) {
02477 WN* wn_ldid = nstack.Bottom_nth(j);
02478 du->Remove_Use_From_System(wn_ldid);
02479 du->Add_Def_Use(wn_new_stid, wn_ldid);
02480 DEF_LIST *def_list = du->Ud_Get_Def(wn_ldid);
02481 def_list->Set_loop_stmt(NULL);
02482 }
02483 }
02484 Add_Pragma_To_MP_Region(wn_outer_loop, st_coordinates, 0, WN_PRAGMA_LOCAL);
02485 }
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496 static WN* Mp_Coordinate_Call(WN* wn_loop,
02497 INT tile_count,
02498 ST* st_layout,
02499 ST* st_coordinates,
02500 WN* wn_bounds_code)
02501 {
02502 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02503 DU_MANAGER* du = Du_Mgr;
02504
02505 INT64 linenum = WN_Get_Linenum(wn_loop);
02506 WN_Set_Linenum(wn_bounds_code, linenum);
02507 TY_IDX ty_i8 = Be_Type_Tbl(MTYPE_I8);
02508 TY_IDX ty_i8_ptr = Make_Pointer_Type(Be_Type_Tbl(MTYPE_I8));
02509
02510
02511
02512 OPCODE op_call = OPCODE_make_op(OPR_CALL, MTYPE_V, MTYPE_V);
02513 WN* wn_call = WN_Create(op_call, 4);
02514 WN_st_idx(wn_call) = ST_st_idx(distr_st_entries[Processor_Coordinates]);
02515 WN_Set_Call_Parm_Mod(wn_call);
02516 WN_Set_Call_Parm_Ref(wn_call);
02517 WN_Set_Linenum(wn_call, linenum);
02518 dg->Add_Vertex(wn_call);
02519
02520
02521 OPCODE op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
02522 WN* wn_layout = WN_CreateLda(op_lda, 0, ty_i8_ptr, st_layout);
02523 #ifdef _NEW_SYMTAB
02524 Clear_ST_addr_not_passed(st_layout);
02525 #else
02526 Set_ST_addr_taken_passed(st_layout);
02527 #endif
02528 WN* wn_parm = WN_CreateParm(Pointer_type, wn_layout, ty_i8_ptr,
02529 WN_PARM_BY_REFERENCE);
02530 LWN_Set_Parent(wn_layout, wn_parm);
02531 WN_kid(wn_call, 0) = wn_parm;
02532 LWN_Set_Parent(wn_parm, wn_call);
02533
02534
02535 WN* wn_tile_count = LWN_Make_Icon(MTYPE_I8, tile_count);
02536 TYPE_ID type = WN_rtype(wn_tile_count);
02537 if (type != MTYPE_I8)
02538 wn_tile_count = LWN_Integer_Casts(wn_tile_count, MTYPE_I8, type);
02539 wn_parm = WN_CreateParm(MTYPE_I8, wn_tile_count, Be_Type_Tbl(MTYPE_I8),
02540 WN_PARM_BY_VALUE);
02541 LWN_Set_Parent(wn_tile_count, wn_parm);
02542 WN_kid(wn_call, 1) = wn_parm;
02543 LWN_Set_Parent(wn_parm, wn_call);
02544
02545
02546 SYMBOL sym_index(WN_start(wn_loop));
02547 WN* wn_mythreadid = AWN_LdidSym(&sym_index);
02548 type = WN_rtype(wn_mythreadid);
02549 WN* wn_mythreadid_cast = wn_mythreadid;
02550 if (type != MTYPE_I8)
02551 wn_mythreadid_cast = LWN_Integer_Casts(wn_mythreadid, MTYPE_I8, type);
02552 wn_parm = WN_CreateParm(MTYPE_I8, wn_mythreadid_cast, Be_Type_Tbl(MTYPE_I8),
02553 WN_PARM_BY_VALUE);
02554 LWN_Set_Parent(wn_mythreadid_cast, wn_parm);
02555 WN_kid(wn_call, 2) = wn_parm;
02556 LWN_Set_Parent(wn_parm, wn_call);
02557 du->Add_Def_Use(WN_start(wn_loop), wn_mythreadid);
02558 du->Add_Def_Use(WN_step(wn_loop), wn_mythreadid);
02559 DEF_LIST *def_list = du->Ud_Get_Def(wn_mythreadid);
02560 def_list->Set_loop_stmt(wn_loop);
02561
02562
02563 op_lda = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
02564 WN* wn_coordinate = WN_CreateLda(op_lda, 0, ty_i8_ptr, st_coordinates);
02565 #ifdef _NEW_SYMTAB
02566 Clear_ST_addr_not_passed(st_coordinates);
02567 #else
02568 Set_ST_addr_taken_passed(st_coordinates);
02569 #endif
02570 wn_parm = WN_CreateParm(Pointer_type, wn_coordinate, ty_i8_ptr,
02571 WN_PARM_BY_REFERENCE);
02572 LWN_Set_Parent(wn_coordinate, wn_parm);
02573 WN_kid(wn_call, 3) = wn_parm;
02574 LWN_Set_Parent(wn_parm, wn_call);
02575
02576
02577 LWN_Insert_Block_Before(wn_bounds_code, WN_first(wn_bounds_code), wn_call);
02578 return wn_call;
02579 }
02580
02581
02582
02583
02584
02585
02586
02587 static void Mp_Collapse_Coordinates(WN* wn_loop,
02588 INT tile_count,
02589 ST* st_layout,
02590 ST* st_coordinates)
02591 {
02592 STACK<WN*> dep_stack(&LNO_local_pool);
02593 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
02594 WN* wn_bounds_code = WN_CreateBlock();
02595 Mp_Coordinate_Copy_Out_Coordinates(wn_loop, tile_count, st_coordinates,
02596 wn_bounds_code, &dep_stack);
02597 WN* wn_call = Mp_Coordinate_Call(wn_loop, tile_count, st_layout,
02598 st_coordinates, wn_bounds_code);
02599 Mp_Fix_Ref_Array_Aliases(wn_call, LWN_Get_Parent(wn_loop), st_layout);
02600 Mp_Fix_Ref_Array_Aliases(wn_call, wn_bounds_code, st_coordinates);
02601 Mp_Insert_Bounds_Code(wn_loop, wn_bounds_code, TRUE);
02602 Mp_Fix_Deps(&dep_stack);
02603 }
02604
02605
02606
02607
02608
02609
02610 static DIRECTION Direction_Union(DIRECTION dir1,
02611 DIRECTION dir2)
02612 {
02613 DEP dep1 = DEP_SetDirection(dir1);
02614 DEP dep_union = DEP_UnionDirection(dep1, dir2);
02615 return DEP_Direction(dep_union);
02616 }
02617
02618
02619
02620
02621
02622
02623
02624
02625
02626 static DEPV* Depv_Collapse(DEPV* dv,
02627 INT num_dim,
02628 INT start_index,
02629 INT tile_count,
02630 MEM_POOL* pool)
02631 {
02632 FmtAssert(start_index >= 0 && start_index <= num_dim - 1,
02633 ("Bad indexing of DEPV in tile loop collapse"));
02634 FmtAssert(tile_count >= 0 && start_index + tile_count - 1 <= num_dim - 1,
02635 ("Bad indexing of DEPV in tile loop collapse"));
02636 DIRECTION dir_new;
02637 BOOL dir_new_defined = FALSE;
02638 INT i;
02639 for (i = start_index; i <= start_index + tile_count - 1; i++) {
02640 DIRECTION dir = DEP_Direction(DEPV_Dep(dv, i));
02641 if (dir & DIR_POS) {
02642 if (dir_new_defined) {
02643 dir_new = Direction_Union(dir_new, DIR_POS);
02644 } else {
02645 dir_new = DIR_POS;
02646 dir_new_defined = TRUE;
02647 }
02648 }
02649 if (dir & DIR_NEG) {
02650 if (dir_new_defined) {
02651 dir_new = Direction_Union(dir_new, DIR_NEG);
02652 } else {
02653 dir_new = DIR_NEG;
02654 dir_new_defined = TRUE;
02655 }
02656 }
02657 if (!(dir & DIR_EQ))
02658 break;
02659 }
02660 if (i == start_index + tile_count) {
02661 if (dir_new_defined) {
02662 dir_new = Direction_Union(dir_new, DIR_EQ);
02663 } else {
02664 dir_new = DIR_EQ;
02665 }
02666 }
02667 DEPV* dv_new = DEPV_Create(pool, num_dim);
02668 for (i = 0; i < start_index; i++)
02669 DEPV_Dep(dv_new, i) = DEPV_Dep(dv, i);
02670 DEPV_Dep(dv_new, start_index) = DEP_SetDirection(dir_new);
02671 for (i = start_index + 1; i < num_dim; i++)
02672 DEPV_Dep(dv_new, i) = DEPV_Dep(dv, i + tile_count - 1);
02673 return dv_new;
02674 }
02675
02676
02677
02678
02679
02680
02681
02682 static BOOL Depv_Already_On_List(DEPV_LIST* dvl_old,
02683 DEPV* dv_new)
02684 {
02685 DEPV_ITER iter(dvl_old);
02686 for (DEPV_NODE* node=iter.First(); !iter.Is_Empty(); node=iter.Next()) {
02687 DEPV *Depv = node->Depv;
02688 INT i;
02689 for (i = 0; i < dvl_old->Num_Dim(); i++)
02690 if (DEPV_Dep(dv_new, i) != DEPV_Dep(Depv, i))
02691 break;
02692 if (i == dvl_old->Num_Dim())
02693 return TRUE;
02694 }
02695 return FALSE;
02696 }
02697
02698
02699
02700
02701
02702
02703
02704
02705 static void Mp_Collapse_Dependences(WN* wn_loop,
02706 INT tile_count)
02707 {
02708 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02709 INT local_count = 0;
02710 INT bad_mem_count = 0;
02711 for (WN* wn = wn_loop; wn != NULL; wn = Find_Next_Innermost_Do(wn)) {
02712 if (++local_count > tile_count)
02713 break;
02714 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02715 if (dli->Has_Bad_Mem)
02716 bad_mem_count++;
02717 }
02718 FmtAssert(bad_mem_count == 0 || bad_mem_count == tile_count,
02719 ("Should either be no or all bad mem loops."));
02720 LWN_ITER* iter_first = LWN_WALK_TreeIter(WN_do_body(wn_loop));
02721 for (LWN_ITER *iter = iter_first; iter; iter = LWN_WALK_TreeNext(iter)) {
02722 WN* wn = iter->wn;
02723 OPCODE op = WN_opcode(wn);
02724 if (!OPCODE_is_load(op) && !OPCODE_is_store(op) && !OPCODE_is_call(op))
02725 continue;
02726 VINDEX16 v = dg->Get_Vertex(wn);
02727 if (v == 0)
02728 continue;
02729 EINDEX16 e_next = 0;
02730 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = e_next) {
02731 e_next = dg->Get_Next_Out_Edge(e);
02732 VINDEX16 vSink = dg->Get_Sink(e);
02733 WN* wnSink = dg->Get_Wn(vSink);
02734 if (!Wn_Is_Inside(wnSink, wn_loop))
02735 continue;
02736 DEPV_ARRAY* dva = dg->Depv_Array(e);
02737 if (bad_mem_count == tile_count) {
02738 dva->Remove_Unused_Dim(tile_count - 1);
02739 } else {
02740 DEPV_LIST dl_out(dva->Num_Dim() - tile_count + 1,
02741 dva->Num_Unused_Dim(), &LNO_local_pool);
02742 for (INT i = 0; i < dva->Num_Vec(); i++) {
02743 DEPV* dv_old = dva->Depv(i);
02744 INT start_index = Do_Depth(wn_loop) - dva->Num_Unused_Dim();
02745 DEPV* dv_new = Depv_Collapse(dv_old, dva->Num_Dim() - tile_count + 1,
02746 start_index, tile_count, &LNO_local_pool);
02747 if (!Depv_Already_On_List(&dl_out, dv_new))
02748 dl_out.Append(CXX_NEW(DEPV_NODE(dv_new), &LNO_local_pool));
02749 }
02750 DEPV_ARRAY* dva_new = Create_DEPV_ARRAY(&dl_out, dg->Pool());
02751 Delete_DEPV_ARRAY(dva, dg->Pool());
02752 dg->Set_Depv_Array(e, dva_new);
02753 }
02754 }
02755 }
02756 }
02757
02758
02759
02760
02761
02762
02763
02764
02765 static void Mp_Collapse_Loop_Statements(WN* wn_loop,
02766 INT tile_count)
02767 {
02768 DU_MANAGER* du = Du_Mgr;
02769 WN* wn_inner_tile_loop = SNL_Get_Inner_Snl_Loop(wn_loop, tile_count);
02770 DOLOOP_STACK stack(&LNO_local_pool);
02771 Build_Doloop_Stack(wn_inner_tile_loop, &stack);
02772 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(wn_loop));
02773 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02774 WN* wn = itr->wn;
02775 OPERATOR opr = WN_operator(wn);
02776
02777 if (opr != OPR_LDID)
02778 continue;
02779
02780 WN* wnn = 0;
02781 for (wnn = wn; wnn != NULL; wnn = LWN_Get_Parent(wnn))
02782 if (WN_opcode(wnn) == OPC_IO_ITEM)
02783 break;
02784 if (wnn != NULL)
02785 continue;
02786 WN* loop_stmt = du->Ud_Get_Def(wn)->Loop_stmt();
02787 for (INT i = 1; i < stack.Elements(); i++) {
02788 if (loop_stmt == stack.Bottom_nth(i)) {
02789 du->Ud_Get_Def(wn)->Set_loop_stmt(wn_loop);
02790 break;
02791 }
02792 }
02793 }
02794 }
02795
02796
02797
02798
02799
02800
02801
02802 static void Remove_Dead_Single_Defs(WN* wn_exp)
02803 {
02804 DU_MANAGER* du = Du_Mgr;
02805 LWN_ITER* itr = LWN_WALK_TreeIter(wn_exp);
02806 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02807 WN* wn = itr->wn;
02808 if (WN_operator(wn) == OPR_LDID) {
02809 DEF_LIST* def_list = du->Ud_Get_Def(wn);
02810 if (def_list == NULL)
02811 continue;
02812 if (def_list->Incomplete())
02813 continue;
02814 WN* wn_single_def = NULL;
02815 const DU_NODE* node = NULL;
02816 INT i = 0;
02817 DEF_LIST_ITER iter1(def_list);
02818 for (node = iter1.First(); !iter1.Is_Empty(); i++, node = iter1.Next()) {
02819 WN* def = node->Wn();
02820 if (i == 0) {
02821 wn_single_def = def;
02822 } else {
02823 wn_single_def = NULL;
02824 break;
02825 }
02826 }
02827 if (wn_single_def == NULL)
02828 continue;
02829 node = NULL;
02830 i = 0;
02831 USE_LIST* use_list = du->Du_Get_Use(wn_single_def);
02832 if (use_list == NULL)
02833 continue;
02834 if (use_list->Incomplete())
02835 continue;
02836 USE_LIST_ITER iter2(use_list);
02837 for (node = iter2.First(); !iter2.Is_Empty(); i++, node = iter2.Next()) {
02838 if (i > 0) {
02839 wn_single_def = NULL;
02840 break;
02841 }
02842 }
02843 if (wn_single_def == NULL)
02844 continue;
02845 du->Delete_Def_Use(wn_single_def, wn);
02846 LWN_Delete_Tree(wn_single_def);
02847 }
02848 }
02849 }
02850
02851
02852
02853
02854
02855
02856
02857
02858
02859
02860 static WN* Find_Lego_Base_Exp(WN* wn_exp)
02861 {
02862 LWN_ITER* itr = LWN_WALK_TreeIter(wn_exp);
02863 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02864 WN* wn = itr->wn;
02865 OPERATOR opr = WN_operator(wn);
02866 if (opr == OPR_MPY || opr == OPR_LDID || opr == OPR_ILOAD)
02867 return wn;
02868 }
02869 FmtAssert(FALSE, ("Could not find base of lego expression"));
02870 return NULL;
02871 }
02872
02873
02874
02875
02876
02877
02878
02879
02880 static void Mp_Collapse_Loop_Heads(WN* wn_outer_loop,
02881 INT tile_count,
02882 INT tiling_depth[],
02883 BOOL negative_stride)
02884 {
02885 DU_MANAGER* du = Du_Mgr;
02886 INT nloops = 0;
02887 INT i;
02888 for (i = 0; i < tile_count; i++)
02889 nloops += tiling_depth[i];
02890 WN* wn_inner_tile_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop, tile_count);
02891 DOLOOP_STACK stack(&LNO_local_pool);
02892 Build_Doloop_Stack(wn_inner_tile_loop, &stack);
02893 for (i = 0; i < stack.Elements(); i++)
02894 if (stack.Bottom_nth(i) == wn_outer_loop)
02895 break;
02896 DO_LOOP_INFO* dli_outer_loop = Get_Do_Loop_Info(wn_outer_loop);
02897 for (i++; i < stack.Elements(); i++) {
02898 WN* wn_loop = stack.Bottom_nth(i);
02899 WN* wnn = NULL;
02900 for (WN* wn = WN_first(WN_do_body(wn_loop)); wn != NULL; wn = wnn) {
02901 wnn = WN_next(wn);
02902 LWN_Extract_From_Block(wn);
02903 LWN_Insert_Block_Before(LWN_Get_Parent(wn_loop), wn_loop, wn);
02904 }
02905 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
02906 if (dli->Is_Outer_Lego_Tile) {
02907 WN* wn_old_root = negative_stride ? WN_kid0(WN_start(wn_outer_loop))
02908 : UBexp(WN_end(wn_outer_loop));
02909 WN* wn_old_base = Find_Lego_Base_Exp(wn_old_root);
02910 WN* wn_parent = LWN_Get_Parent(wn_old_base);
02911 WN* wn_new_root = negative_stride ? WN_kid0(WN_start(wn_loop))
02912 : UBexp(WN_end(wn_loop));
02913 WN* wn_new_base = Find_Lego_Base_Exp(wn_new_root);
02914 WN* wn_factor = LWN_Copy_Tree(wn_new_base);
02915 LWN_Copy_Def_Use(wn_new_base, wn_factor, du);
02916 WN* wn_mul = AWN_Mpy(Promote_Type(Do_Wtype(wn_outer_loop)),
02917 wn_factor, wn_old_base);
02918 WN_kid0(wn_parent) = wn_mul;
02919 LWN_Parentize(wn_parent);
02920 }
02921 Remove_Dead_Single_Defs(WN_end(wn_loop));
02922 LWN_Extract_From_Block(wn_loop);
02923 LWN_Delete_Tree(wn_loop);
02924 dli_outer_loop->Lego_Mp_Key_Upper++;
02925 }
02926 wn_inner_tile_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop,
02927 nloops - (tile_count - 1));
02928 DOLOOP_STACK depth_stack(&LNO_local_pool);
02929 Build_Doloop_Stack(wn_inner_tile_loop, &depth_stack);
02930 for (i = 0; i < stack.Elements(); i++)
02931 if (depth_stack.Bottom_nth(i) == wn_outer_loop)
02932 break;
02933 for (i++; i < depth_stack.Elements(); i++) {
02934 DO_LOOP_INFO* dli = Get_Do_Loop_Info(depth_stack.Bottom_nth(i));
02935 dli->Depth -= tile_count - 1;
02936 }
02937 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(wn_inner_tile_loop));
02938 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02939 WN* wn = itr->wn;
02940 if (WN_opcode(wn) == OPC_DO_LOOP) {
02941 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02942 dli->Depth -= tile_count - 1;
02943 }
02944 }
02945 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(wn_outer_loop);
02946 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_outer_loop));
02947 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
02948 FmtAssert(wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA,
02949 ("Mp_Collapse_Loop_Heads: Cannot find doacross/pdo pragma"));
02950 FmtAssert(wn_first != NULL && WN_pragma(wn_first) == WN_PRAGMA_DOACROSS
02951 || WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_DO
02952 || WN_pragma(wn_first) == WN_PRAGMA_PDO_BEGIN,
02953 ("Mp_Collapse_Loop_Heads: Cannot find doacross/pdo pragma"));
02954 WN_pragma_arg2(wn_first) = 1;
02955 dli_outer->Mp_Info->Set_Nest_Total(1);
02956 }
02957
02958
02959
02960
02961
02962
02963
02964 static void Mp_Collapse_Cleanup(WN* wn_loop)
02965 {
02966
02967 char Str_Buf[256];
02968 DU_MANAGER* du = Du_Mgr;
02969 TYPE_ID wtype = WN_desc(WN_start(wn_loop));
02970 SYMBOL sym_old(WN_st(WN_index(wn_loop)), WN_offset(WN_index(wn_loop)),
02971 wtype);
02972 sprintf(Str_Buf, "$danest%d", WN_map_id(wn_loop));
02973 SYMBOL sym_new = Create_Preg_Symbol(Str_Buf, wtype);
02974 Replace_Symbol(WN_index(wn_loop), sym_old, sym_new, NULL, NULL);
02975 Replace_Symbol(WN_start(wn_loop), sym_old, sym_new, NULL, NULL);
02976 Replace_Symbol(WN_end(wn_loop), sym_old, sym_new, NULL, NULL);
02977 Replace_Symbol(WN_step(wn_loop), sym_old, sym_new, NULL, NULL);
02978 USE_LIST *use_list = du->Du_Get_Use(WN_start(wn_loop));
02979 USE_LIST_ITER iter(use_list);
02980 const DU_NODE* node = NULL;
02981 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
02982 WN* wn_use = node->Wn();
02983 Replace_Symbol(wn_use, sym_old, sym_new, NULL, NULL);
02984 }
02985
02986
02987
02988
02989
02990
02991
02992
02993
02994
02995
02996
02997
02998
02999
03000
03001
03002 DOLOOP_STACK rebuild_stack(&LNO_local_pool);
03003 Build_Doloop_Stack(LWN_Get_Parent(wn_loop), &rebuild_stack);
03004 LNO_Build_Access(wn_loop, &rebuild_stack, &LNO_default_pool);
03005 }
03006
03007
03008
03009
03010
03011
03012
03013
03014
03015 static void Mp_Collapse_Tile_Loops(WN* wn_loop,
03016 INT tile_count,
03017 INT tiling_depth[],
03018 ST* st_layout,
03019 BOOL negative_stride)
03020 {
03021 char Str_Buf[256];
03022 ST* st_coordinates = NULL;
03023 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03024 sprintf(Str_Buf, "indices%d", WN_map_id(wn_loop));
03025 st_coordinates = Create_Local_Array_ST(Str_Buf, Be_Type_Tbl(MTYPE_I8),
03026 tile_count);
03027 Mp_Collapse_Coordinates(wn_loop, tile_count, st_layout, st_coordinates);
03028 Mp_Collapse_Dependences(wn_loop, tile_count);
03029 Mp_Collapse_Loop_Statements(wn_loop, tile_count);
03030 Mp_Collapse_Loop_Heads(wn_loop, tile_count, tiling_depth, negative_stride);
03031 Mp_Collapse_Cleanup(wn_loop);
03032 }
03033
03034
03035
03036
03037
03038
03039
03040 static void Mp_Nested_Last_Thread(WN* wn_outer_loop,
03041 INT tile_count,
03042 INT tiling_depth[],
03043 DU_MANAGER* du)
03044 {
03045 char Str_Buf[256];
03046 INT nloops = 0;
03047 for (INT i = 0; i < tile_count; i++)
03048 nloops += tiling_depth[i];
03049 INT64 linenum = WN_Get_Linenum(wn_outer_loop);
03050 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_outer_loop));
03051 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
03052 WN* wnn = NULL;
03053 WN* wn_exp = NULL;
03054 WN* wn_lastthread = NULL;
03055 OPCODE opand = OPCODE_make_op(OPR_LAND, Boolean_type, MTYPE_V);
03056 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
03057 wnn = WN_next(wn);
03058 if (WN_opcode(wn) == OPC_PRAGMA && WN_pragma(wn) == WN_PRAGMA_LASTTHREAD) {
03059 SYMBOL sym_lastthread(WN_st(wn), WN_pragma_arg1(wn), MTYPE_I4);
03060 if (wn_exp == NULL)
03061 wn_exp = wn_lastthread;
03062 wn_lastthread = AWN_LdidSym(&sym_lastthread);
03063 WN* wn_stid = Find_Node(sym_lastthread, wn_outer_loop);
03064 if (WN_operator(wn_stid) == OPR_LDA)
03065 wn_stid = LWN_Get_Parent(wn_stid);
03066 du->Add_Def_Use(wn_stid, wn_lastthread);
03067 Copy_alias_info(Alias_Mgr, wn_stid, wn_lastthread);
03068 if (wn_exp != NULL)
03069 wn_exp = LWN_CreateExp2(opand, wn_exp, wn_lastthread);
03070 LWN_Extract_From_Block(wn);
03071 LWN_Delete_Tree(wn);
03072 }
03073 }
03074 FmtAssert(wn_exp != NULL, ("Didn't find any LASTTHREAD pragmas"));
03075 sprintf(Str_Buf, "$da_is_last%d", WN_map_id(wn_outer_loop));
03076 SYMBOL* is_last = CXX_NEW(SYMBOL(Create_Stack_Symbol(Str_Buf, MTYPE_I4)),
03077 &LNO_default_pool);
03078 Add_Pragma_To_MP_Region(wn_outer_loop, is_last->St(),
03079 is_last->WN_Offset(), WN_PRAGMA_LASTTHREAD);
03080 Add_Pragma_To_MP_Region(wn_outer_loop, is_last->St(),
03081 is_last->WN_Offset(), WN_PRAGMA_LOCAL);
03082 wn_exp = AWN_StidIntoSym(is_last, wn_exp);
03083 Create_local_alias(Alias_Mgr, wn_exp);
03084 WN* wn_inner_loop =
03085 SNL_Get_Inner_Snl_Loop(wn_outer_loop, nloops - (tile_count - 1));
03086 LWN_Insert_Block_Before(WN_do_body(wn_inner_loop),
03087 WN_first(WN_do_body(wn_inner_loop)), wn_exp);
03088 Hoist_Statement(wn_exp, Hoistable_Statement(wn_exp, du));
03089 WN_Set_Linenum(wn_exp, linenum);
03090 du->Add_Def_Use(wn_exp, Return_Node(Current_Func_Node));
03091 }
03092
03093
03094
03095
03096
03097
03098
03099 static void Mp_Remove_Onto_Pragmas(WN* wn_loop)
03100 {
03101 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
03102 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
03103 WN* wnn = NULL;
03104 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
03105 wnn = WN_next(wn);
03106 if (WN_opcode(wn) == OPC_XPRAGMA && WN_pragma(wn) == WN_PRAGMA_ONTO) {
03107 LWN_Extract_From_Block(wn);
03108 LWN_Delete_Tree(wn);
03109 }
03110 }
03111 }
03112
03113
03114
03115
03116
03117
03118
03119 static INT Lego_Tiling_Depth(WN* wn_loop)
03120 {
03121 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03122 LEGO_INFO* lego_info = dli->Lego_Info;
03123 if (lego_info->Dynamic_Affinity())
03124 return 3;
03125 SYMBOL *array_sym = lego_info->Array();
03126 FmtAssert(array_sym != NULL, ("No array symbol on lego loop"));
03127 DISTR_ARRAY *dact = Lookup_DACT(array_sym->St());
03128 INT curr_dim = lego_info->Dim_Num();
03129 switch (dact->Get_Dim(curr_dim)->Distr_Type()) {
03130 case DISTRIBUTE_BLOCK:
03131 return 2;
03132 case DISTRIBUTE_CYCLIC_CONST:
03133 return dact->Get_Dim(curr_dim)->Chunk_Const_Val() == 1 ? 2 : 3;
03134 case DISTRIBUTE_CYCLIC_EXPR:
03135 return 3;
03136 default:
03137 FmtAssert(FALSE, ("Do not currently handle this sched type"));
03138 return -1;
03139 }
03140 }
03141
03142
03143
03144
03145
03146
03147
03148 static INT Mp_Tiling_Depth(WN* wn_loop)
03149 {
03150 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03151 if (dli->Lego_Info != NULL)
03152 return Lego_Tiling_Depth(wn_loop);
03153 switch (dli->Mp_Info->Sched_Type()) {
03154 case MP_SCHED_SIMPLE:
03155 return 2;
03156 case MP_SCHED_INTERLEAVE:
03157 return 3;
03158 case MP_SCHED_DYNAMIC:
03159 case MP_SCHED_GSS:
03160 case MP_SCHED_RUNTIME:
03161 default:
03162 FmtAssert(FALSE, ("Mp lowerer should handle these sched types."));
03163 return -1;
03164 }
03165 }
03166
03167
03168
03169
03170
03171
03172
03173 extern void Repair_Bad_Dependences(WN* wn_loop)
03174 {
03175 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03176 LS_IN_LOOP* loop_ls = CXX_NEW(LS_IN_LOOP(wn_loop, dg, &LNO_local_pool, TRUE),
03177 &LNO_local_pool);
03178 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(wn_loop));
03179 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
03180 WN* wn = itr->wn;
03181 VINDEX16 v = dg->Get_Vertex(wn);
03182 if (v == 0)
03183 continue;
03184 DOLOOP_STACK wn_stack(&LNO_local_pool);
03185 Build_Doloop_Stack(wn, &wn_stack);
03186 EINDEX16 e = 0;
03187 DOLOOP_STACK source_stack(&LNO_local_pool);
03188 STACK<WN*> stack_of_source_wn(&LNO_local_pool);
03189 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
03190 if (Is_Lexpos(dg->Depv_Array(e)))
03191 continue;
03192 WN* wn_source = dg->Get_Wn(dg->Get_Source(e));
03193 stack_of_source_wn.Push(wn_source);
03194 }
03195 INT i;
03196 for (i=0; i<stack_of_source_wn.Elements(); i++) {
03197 VINDEX16 v_source=dg->Get_Vertex(stack_of_source_wn.Top_nth(i));
03198 e=dg->Get_Edge(v_source,v);
03199 dg->Delete_Array_Edge(e);
03200 e=dg->Get_Edge(v,v_source);
03201 if (e != 0)
03202 dg->Delete_Array_Edge(e);
03203 }
03204 for (i=0; i<stack_of_source_wn.Elements(); i++) {
03205 WN* wn_source=stack_of_source_wn.Top_nth(i);
03206 Build_Doloop_Stack(wn_source, &source_stack);
03207 if (!dg->Add_Edge(wn_source, &source_stack, wn,
03208 &wn_stack, loop_ls->In(wn_source) < loop_ls->In(wn)))
03209 LNO_Erase_Dg_From_Here_In(wn, dg);
03210 source_stack.Clear();
03211 }
03212 DOLOOP_STACK sink_stack(&LNO_local_pool);
03213 STACK<WN*> stack_of_sink_wn(&LNO_local_pool);
03214 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
03215 if (Is_Lexpos(dg->Depv_Array(e)))
03216 continue;
03217 WN* wn_sink = dg->Get_Wn(dg->Get_Sink(e));
03218 stack_of_sink_wn.Push(wn_sink);
03219 }
03220 for (i=0; i<stack_of_sink_wn.Elements(); i++) {
03221 VINDEX16 v_sink=dg->Get_Vertex(stack_of_sink_wn.Top_nth(i));
03222 e=dg->Get_Edge(v,v_sink);
03223 dg->Delete_Array_Edge(e);
03224 e=dg->Get_Edge(v_sink,v);
03225 if (e != 0)
03226 dg->Delete_Array_Edge(e);
03227 }
03228 for (i=0; i<stack_of_sink_wn.Elements(); i++) {
03229 WN* wn_sink=stack_of_sink_wn.Top_nth(i);
03230 Build_Doloop_Stack(wn_sink, &sink_stack);
03231 if (!dg->Add_Edge(wn, &wn_stack, wn_sink,
03232 &sink_stack, loop_ls->In(wn) < loop_ls->In(wn_sink)))
03233 LNO_Erase_Dg_From_Here_In(wn, dg);
03234 sink_stack.Clear();
03235 }
03236 }
03237 }
03238
03239
03240
03241
03242
03243
03244
03245
03246
03247 static WN* Mp_Permute_Nested_Loops(WN* wn_outer_loop,
03248 INT tile_count,
03249 INT tiling_depth[])
03250 {
03251 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03252
03253 INT* first_loops = CXX_NEW_ARRAY(INT, tile_count, &LNO_local_pool);
03254 INT nloops = 0;
03255 INT i;
03256 for (i = 0; i < tile_count; i++) {
03257 first_loops[i] = nloops;
03258 nloops += tiling_depth[i];
03259 }
03260 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
03261 INT max_tiling_depth = 0;
03262 for (i = 0; i < tile_count; i++)
03263 if (tiling_depth[i] > max_tiling_depth)
03264 max_tiling_depth = tiling_depth[i];
03265 INT pindex = 0;
03266 for (i = 0; i < max_tiling_depth - 1; i++)
03267 for (INT j = 0; j < tile_count; j++)
03268 if (i < tiling_depth[j] - 1)
03269 permutation[pindex++] = first_loops[j] + i;
03270 for (i = 0; i < tile_count; i++)
03271 permutation[pindex++] = first_loops[i] + tiling_depth[i] - 1;
03272 WN* wn_inner_loop = SNL_Get_Inner_Snl_Loop(wn_outer_loop, nloops);
03273 if (SNL_Legal_Permutation(wn_outer_loop, wn_inner_loop,
03274 permutation, nloops)) {
03275 wn_outer_loop = SNL_INV_Permute_Loops(wn_outer_loop, permutation,
03276 nloops, TRUE);
03277 } else {
03278 pindex = 0;
03279 for (i = 0; i < tile_count; i++)
03280 permutation[pindex++] = first_loops[i];
03281 for (i = 0; i < tile_count; i++)
03282 for (INT j = 1; j < tiling_depth[i]; j++)
03283 permutation[pindex++] = first_loops[i] + j;
03284 wn_outer_loop = SNL_INV_Permute_Loops(wn_outer_loop, permutation,
03285 nloops, FALSE);
03286 Repair_Bad_Dependences(wn_outer_loop);
03287 }
03288 return wn_outer_loop;
03289 }
03290
03291
03292
03293
03294
03295
03296
03297
03298 static WN* Mp_Tile_Nested_Loop(WN* loop,
03299 MEM_POOL *pool)
03300 {
03301 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03302 DU_MANAGER* du = Du_Mgr;
03303
03304 ST* st_onto = NULL;
03305 ST* st_layout = NULL;
03306 WN* wn_new_loop = NULL;
03307 WN* wn_outer_loop = NULL;
03308 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
03309 INT tile_count = dli->Mp_Info->Nest_Total();
03310 INT* tiling_depth = CXX_NEW_ARRAY(INT, tile_count, &LNO_local_pool);
03311 WN* wn = loop;
03312 INT i;
03313 for (i = 0; i < tile_count; i++) {
03314 tiling_depth[i] = Mp_Tiling_Depth(wn);
03315 wn = Find_Next_Innermost_Do(wn);
03316 }
03317 BOOL lego_loop = dli->Lego_Info != NULL;
03318 Hoist_And_Sink_For_Nested_Doacross(loop, dg, du);
03319 if (lego_loop)
03320 Lego_Layout_Code(loop, tile_count, &st_layout);
03321 else
03322 Mp_Layout_Code(loop, tile_count, &st_onto, &st_layout);
03323 wn = loop;
03324 WN* wnn = NULL;
03325 for (i = 0; i < tile_count; i++, wn = wnn) {
03326 wnn = Find_Next_Innermost_Do(wn);
03327 wn_new_loop = Mp_Tile_Single_Loop(wn, FALSE, pool);
03328 if (wn_outer_loop == NULL)
03329 wn_outer_loop = wn_new_loop;
03330 }
03331 BOOL negative_stride = lego_loop && dli->Lego_Info->Stride() < 0;
03332 Mp_Permute_Nested_Loops(wn_outer_loop, tile_count, tiling_depth);
03333 Mp_Collapse_Tile_Loops(wn_outer_loop, tile_count, tiling_depth,
03334 st_layout, negative_stride);
03335 Mp_Nested_Last_Thread(wn_outer_loop, tile_count, tiling_depth, du);
03336 Mp_Remove_Onto_Pragmas(wn_outer_loop);
03337 return wn_outer_loop;
03338 }
03339
03340
03341
03342
03343
03344
03345
03346
03347 static void Mp_Extract_Bogus_Do_Across(WN* wn_loop)
03348 {
03349 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
03350 LWN_Extract_From_Block(LWN_Get_Parent(wn_loop), wn_loop);
03351 LWN_Insert_Block_Before(LWN_Get_Parent(wn_region),
03352 wn_region, wn_loop);
03353 LWN_Extract_From_Block(LWN_Get_Parent(wn_region), wn_region);
03354 LWN_Delete_Tree(wn_region);
03355 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03356 CXX_DELETE(dli->Mp_Info, &LNO_default_pool);
03357 dli->Mp_Info = NULL;
03358 }
03359
03360
03361
03362
03363
03364
03365
03366 static void Mp_Extract_Bogus_Do_Acrosses(WN* wn_loop)
03367 {
03368 FmtAssert(WN_opcode(wn_loop) == OPC_DO_LOOP,
03369 ("Root doacross is not a DO loop.\n"));
03370 DO_LOOP_INFO* dli_root = Get_Do_Loop_Info(wn_loop);
03371 FmtAssert(dli_root->Mp_Info != NULL, ("Root doacross is not an MP loop.\n"));
03372 if (dli_root->Mp_Info->Nest_Index() != 0)
03373 return;
03374 DOLOOP_STACK stack_doacross(&LNO_local_pool);
03375 stack_doacross.Clear();
03376 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(wn_loop));
03377 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
03378 WN* wn = itr->wn;
03379 if (WN_opcode(wn) == OPC_DO_LOOP) {
03380 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
03381 if (dli->Mp_Info != NULL && dli->Depth > dli_root->Depth
03382 + dli->Mp_Info->Nest_Index())
03383 stack_doacross.Push(wn);
03384 }
03385 }
03386 for (INT i = stack_doacross.Elements() - 1; i >= 0; i--)
03387 Mp_Extract_Bogus_Do_Across(stack_doacross.Bottom_nth(i));
03388 }
03389
03390
03391
03392
03393
03394
03395
03396 static BOOL Standardize_For_Tiling(WN* wn_loop)
03397 {
03398 BOOL return_value = TRUE;
03399 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03400 if (dli->Mp_Info != NULL) {
03401 INT loop_count = 0;
03402 INT total_count = dli->Mp_Info->Nest_Total();
03403 for (WN* wn = wn_loop; wn != NULL; wn = Find_Next_Innermost_Do(wn)) {
03404 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
03405 FmtAssert(loop_count == dli->Mp_Info->Nest_Index(),
03406 ("Bad indexing within nested doacross"));
03407 FmtAssert(total_count == dli->Mp_Info->Nest_Total(),
03408 ("Bad indexing within nested doacross"));
03409 WN* wn_step = Loop_Step(wn);
03410 if (WN_operator(wn_step) != OPR_INTCONST
03411 || WN_const_val(wn_step) != 1)
03412 return_value = FALSE;
03413 else if (!Upper_Bound_Standardize(WN_end(wn), TRUE))
03414 return_value = FALSE;
03415 if (++loop_count >= total_count)
03416 break;
03417 }
03418 } else {
03419 WN* wn_step = Loop_Step(wn_loop);
03420 if (WN_operator(wn_step) != OPR_INTCONST
03421 || WN_const_val(wn_step) != 1)
03422 return_value = FALSE;
03423 else if (!Upper_Bound_Standardize(WN_end(wn_loop)))
03424 return_value = FALSE;
03425 }
03426 return return_value;
03427 }
03428
03429
03430
03431
03432
03433
03434 static void Mp_Convert_To_Single_Loop(WN* wn_loop)
03435 {
03436 WN* wn = 0;
03437 for (wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn))
03438 if (WN_opcode(wn) == OPC_REGION)
03439 break;
03440 FmtAssert(wn != NULL, ("Could not find region"));
03441 WN* wn_first = WN_first(WN_region_pragmas(wn));
03442 WN* wnn = NULL;
03443 for (wn = wn_first; wn != NULL; wn = wnn) {
03444 wnn = WN_next(wn);
03445 if (WN_opcode(wn) == OPC_XPRAGMA && WN_pragma(wn) == WN_PRAGMA_ONTO) {
03446 LWN_Extract_From_Block(wn);
03447 LWN_Delete_Tree(wn);
03448 }
03449 }
03450 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03451 INT loop_count = 0;
03452 INT total_count = dli->Mp_Info->Nest_Total();
03453 for (wn = wn_loop; wn != NULL; wn = Find_Next_Innermost_Do(wn)) {
03454 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
03455 FmtAssert(loop_count == dli->Mp_Info->Nest_Index(),
03456 ("Bad indexing within nested doacross"));
03457 FmtAssert(total_count == dli->Mp_Info->Nest_Total(),
03458 ("Bad indexing within nested doacross"));
03459 CXX_DELETE(dli->Lego_Info, LEGO_pool);
03460 dli->Lego_Info = NULL;
03461 if (wn != wn_loop) {
03462 CXX_DELETE(dli->Mp_Info, &LNO_default_pool);
03463 dli->Mp_Info = NULL;
03464 }
03465 if (++loop_count >= total_count)
03466 break;
03467 }
03468 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
03469 wn_first = WN_first(WN_region_pragmas(wn_region));
03470 FmtAssert(wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA,
03471 ("Mp_Collapse_Loop_Heads: Cannot find doacross/pdo pragma"));
03472 FmtAssert(wn_first != NULL && WN_pragma(wn_first) == WN_PRAGMA_DOACROSS
03473 || WN_pragma(wn_first) == WN_PRAGMA_PDO_BEGIN,
03474 ("Mp_Collapse_Loop_Heads: Cannot find doacross/pdo pragma"));
03475 WN_pragma_arg2(wn_first) = 1;
03476 dli->Mp_Info->Set_Nest_Total(1);
03477 }
03478
03479
03480
03481
03482
03483
03484
03485 static BOOL SNL_Legal_Tile_Scalars(WN* wn_loop)
03486 {
03487 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03488 DU_MANAGER* du = Du_Mgr;
03489
03490 LWN_ITER* itr = LWN_WALK_TreeIter(wn_loop);
03491 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
03492 WN* wn = itr->wn;
03493 if (WN_operator(wn) == OPR_STID) {
03494 if (dg->Get_Vertex(wn))
03495 return FALSE;
03496 for (WN* wn_tp = wn; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
03497 if (WN_opcode(wn_tp) == OPC_DO_LOOP
03498 && SYMBOL(wn) == SYMBOL(WN_index(wn_tp)))
03499 return TRUE;
03500 USE_LIST *use_list = du->Du_Get_Use(wn);
03501 if (use_list == NULL)
03502 return TRUE;
03503 if (use_list->Incomplete())
03504 return FALSE;
03505 USE_LIST_ITER iter(use_list);
03506 const DU_NODE* node = NULL;
03507 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
03508 WN* wn_use = node->Wn();
03509 WN* wn = NULL;
03510 for (wn = wn_use; wn != NULL; wn = LWN_Get_Parent(wn))
03511 if (wn == wn_loop)
03512 break;
03513 if (wn == NULL)
03514 return FALSE;
03515 }
03516 return TRUE;
03517 } else if (WN_operator(wn) == OPR_LDID) {
03518 if (dg->Get_Vertex(wn))
03519 return FALSE;
03520 for (WN* wn_tp = wn; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
03521 if (WN_opcode(wn_tp) == OPC_DO_LOOP
03522 && SYMBOL(wn) == SYMBOL(WN_index(wn_tp)))
03523 return TRUE;
03524 DEF_LIST *def_list = du->Ud_Get_Def(wn);
03525 if (def_list == NULL)
03526 continue;
03527 if (def_list->Incomplete())
03528 return FALSE;
03529 WN* wn_loop_stmt = def_list->Loop_stmt();
03530 if (wn_loop_stmt == wn_loop)
03531 return FALSE;
03532 return TRUE;
03533 }
03534 }
03535 return TRUE;
03536 }
03537
03538
03539
03540
03541
03542
03543
03544 static BOOL SNL_Depv_Is_LCD(DEPV_ARRAY* depv_array,
03545 INT array_index,
03546 WN* wn_loop)
03547 {
03548 DEPV* depv = depv_array->Depv(array_index);
03549 INT loop_depth = Do_Loop_Depth(wn_loop);
03550 loop_depth -= depv_array->Num_Unused_Dim();
03551 for (INT i = 0; i < loop_depth; i++) {
03552 DIRECTION dir = DEP_Direction(DEPV_Dep(depv, i));
03553 if (dir == DIR_POS)
03554 return FALSE;
03555 }
03556 DIRECTION dir = DEP_Direction(DEPV_Dep(depv, loop_depth));
03557 return (dir != DIR_EQ);
03558 }
03559
03560
03561
03562
03563
03564
03565
03566
03567
03568 static BOOL SNL_Legal_Tile_Arrays(WN* wn_loop,
03569 HASH_TABLE<EINDEX16,INT>* edge_table)
03570 {
03571 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03572 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(wn_loop));
03573 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
03574 WN* wn = itr->wn;
03575 OPERATOR opr = WN_operator(wn);
03576 if (opr == OPR_ILOAD || opr == OPR_ISTORE
03577 || opr == OPR_LDID || opr == OPR_STID) {
03578 VINDEX16 v = dg->Get_Vertex(wn);
03579 if (v == 0 && (opr == OPR_LDID || opr == OPR_STID))
03580 continue;
03581 EINDEX16 e = 0;
03582 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
03583 if (edge_table->Find(e))
03584 continue;
03585 edge_table->Enter(e, 1);
03586 DEPV_ARRAY* depv_array = dg->Depv_Array(e);
03587 for (INT i = 0; i < depv_array->Num_Vec(); i++)
03588 if (SNL_Depv_Is_LCD(depv_array, i, wn_loop))
03589 return FALSE;
03590 }
03591 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
03592 if (edge_table->Find(e))
03593 continue;
03594 edge_table->Enter(e, 1);
03595 DEPV_ARRAY* depv_array = dg->Depv_Array(e);
03596 for (INT i = 0; i < depv_array->Num_Vec(); i++)
03597 if (SNL_Depv_Is_LCD(depv_array, i, wn_loop))
03598 return FALSE;
03599 }
03600 }
03601 }
03602 return TRUE;
03603 }
03604
03605
03606
03607
03608
03609
03610
03611
03612 static BOOL Lego_Tile_Legal(WN* wn_loop)
03613 {
03614 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03615 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03616 LEGO_INFO* lego_info = dli->Lego_Info;
03617 SYMBOL *array_sym = lego_info->Array();
03618 if (array_sym == NULL)
03619 return FALSE;
03620 INT hash_table_size = MIN(dg->Get_Edge_Count(), 512);
03621 HASH_TABLE<EINDEX16,INT> edge_table(hash_table_size, &LNO_local_pool);
03622 if (lego_info->Dynamic_Affinity()) {
03623 if (dli->Has_Bad_Mem)
03624 return FALSE;
03625 if (!SNL_Legal_Tile_Scalars(wn_loop))
03626 return FALSE;
03627 if (!SNL_Legal_Tile_Arrays(wn_loop, &edge_table))
03628 return FALSE;
03629 return TRUE;
03630 }
03631 DISTR_ARRAY *dact = Lookup_DACT(array_sym->St());
03632 INT curr_dim = lego_info->Dim_Num();
03633 switch (dact->Get_Dim(curr_dim)->Distr_Type()) {
03634 case DISTRIBUTE_BLOCK:
03635 return TRUE;
03636 case DISTRIBUTE_CYCLIC_CONST:
03637 case DISTRIBUTE_CYCLIC_EXPR:
03638 {
03639 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03640 if (dli->Has_Bad_Mem)
03641 return FALSE;
03642 if (!SNL_Legal_Tile_Scalars(wn_loop))
03643 return FALSE;
03644 if (!SNL_Legal_Tile_Arrays(wn_loop, &edge_table))
03645 return FALSE;
03646 return TRUE;
03647 }
03648 default:
03649 return FALSE;
03650 }
03651 }
03652
03653
03654
03655
03656
03657
03658
03659 static WN* Lego_Tile_Loop(WN* wn_loop,
03660 MEM_POOL *pool)
03661 {
03662 if (!Standardize_For_Tiling(wn_loop) || !Lego_Tile_Legal(wn_loop)) {
03663 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03664 CXX_DELETE(dli->Lego_Info, LEGO_pool);
03665 dli->Lego_Info = NULL;
03666 return wn_loop;
03667 }
03668 WN* wn_return = Lego_Tile_Single_Loop(wn_loop, pool);
03669 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
03670 Prompf_Tile(wn_return, FALSE);
03671 return wn_return;
03672 }
03673
03674
03675
03676
03677
03678
03679
03680 static BOOL Mp_Want_Freeze_Threads(WN* wn_loop,
03681 BOOL LNO_Ozero)
03682 {
03683 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03684 return !dli->Mp_Info->Is_Pdo() && dli->Lego_Info == NULL
03685 && !dli->Mp_Info->Plower_Disabled()
03686 && (LNO_Pseudo_Lower || dli->Mp_Info->Nest_Total() > 1)
03687 && !(LNO_Ozero && dli->Mp_Info->Nest_Total() == 1);
03688 }
03689
03690
03691
03692
03693
03694
03695
03696 static BOOL Mp_Want_Freeze_Cur_Threads(WN* wn_loop,
03697 BOOL LNO_Ozero)
03698 {
03699 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03700 if (!LNO_Pseudo_Lower && dli->Mp_Info->Nest_Total() > 1)
03701 return FALSE;
03702 if (LNO_Ozero && dli->Mp_Info->Nest_Total() == 1)
03703 return FALSE;
03704 if (dli->Lego_Info != NULL)
03705 return FALSE;
03706 if (dli->Mp_Info->Plower_Disabled())
03707 return FALSE;
03708 if (!dli->Mp_Info->Is_Pdo())
03709 return FALSE;
03710 if (Is_Orphaned_Pdo(wn_loop)) {
03711 if (!Mp_Want_Version_Loop(wn_loop, FALSE))
03712 return TRUE;
03713 } else {
03714 WN* wn = 0;
03715 for (wn = wn_loop; wn != NULL; wn = LWN_Get_Parent(wn)) {
03716 if (WN_operator(wn) == OPR_REGION) {
03717 WN* wn_first = WN_first(WN_region_pragmas(wn));
03718 if (wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA
03719 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN)
03720 break;
03721 }
03722 }
03723 WN* wn_region = wn;
03724 if (!Mp_Want_Version_Parallel_Region(wn_region, FALSE))
03725 return TRUE;
03726 }
03727 return FALSE;
03728 }
03729
03730
03731
03732
03733
03734
03735
03736 static BOOL Mp_Optimize_Interleaved_Loop(WN* wn_loop)
03737 {
03738 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
03739 DU_MANAGER* du = Du_Mgr;
03740
03741 WN* wn_middle = Next_SNL_Loop(wn_loop);
03742 if (wn_middle == NULL)
03743 return FALSE;
03744 WN* wn_inner = Next_SNL_Loop(wn_middle);
03745 if (wn_inner == NULL)
03746 return FALSE;
03747 INT tile_size = 0;
03748 WN* wn_outer = SNL_SPL_Loop_Is_Inner_Tile(wn_inner, &tile_size);
03749 if (wn_outer == NULL)
03750 return FALSE;
03751 SNL_SPL_Split_Inner_Tile_Loop(wn_outer, wn_inner, tile_size, "$ispl_",
03752 FALSE);
03753 WN* wn_first = NULL;
03754 WN* wn_last = NULL;
03755 if (Iterations(wn_inner, &LNO_local_pool) == 1)
03756 Remove_Unity_Trip_Loop(wn_inner, TRUE, &wn_first, &wn_last, dg, du);
03757 return TRUE;
03758 }
03759
03760
03761
03762
03763
03764
03765
03766 static void Mp_Optimize_Interleaved_Loop_Traverse(WN* wn_tree)
03767 {
03768 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
03769 DO_LOOP_INFO* dli_tree = Get_Do_Loop_Info(wn_tree);
03770 if (dli_tree->Mp_Info
03771 && dli_tree->Mp_Info->Sched_Type() == MP_SCHED_INTERLEAVE)
03772 Mp_Optimize_Interleaved_Loop(wn_tree);
03773 }
03774
03775 if (WN_opcode(wn_tree) == OPC_BLOCK) {
03776 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
03777 Mp_Optimize_Interleaved_Loop_Traverse(wn);
03778 } else {
03779 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
03780 Mp_Optimize_Interleaved_Loop_Traverse(WN_kid(wn_tree, i));
03781 }
03782 }
03783
03784
03785
03786
03787
03788
03789
03790 static void Mp_Optimize_Interleaved_Loops(WN* wn_loop)
03791 {
03792 Mp_Optimize_Interleaved_Loop_Traverse(wn_loop);
03793 }
03794
03795
03796
03797
03798
03799
03800
03801
03802
03803 extern WN* Mp_Tile_Loop(WN* wn_loop,
03804 BOOL LNO_Ozero,
03805 MEM_POOL *pool,
03806 LMT_VALUE lmt_traverse)
03807 {
03808 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
03809 if (!Standardize_For_Tiling(wn_loop)) {
03810 if (dli->Mp_Info->Nest_Total() > 1) {
03811 Mp_Compress_Nested_Loop(wn_loop);
03812 Mp_Convert_To_Single_Loop(wn_loop);
03813 }
03814 return wn_loop;
03815 }
03816
03817
03818
03819
03820
03821
03822
03823
03824 WN* wn_return = NULL;
03825 INT nest_depth = dli->Mp_Info->Nest_Total();
03826
03827
03828
03829 BOOL want_freeze_threads = FALSE;
03830 BOOL want_freeze_cur_threads = FALSE;
03831
03832
03833
03834 if (nest_depth > 1) {
03835 Mp_Compress_Nested_Loop(wn_loop);
03836 WN* wn_scalar_loop = Mp_Version_Loop(wn_loop);
03837 if (wn_scalar_loop != NULL) {
03838 switch (lmt_traverse) {
03839 case LMT_LEGO:
03840 Lego_Tile_Traverse(wn_scalar_loop, LNO_Ozero);
03841 break;
03842 case LMT_MP:
03843 Mp_Tile_Traverse(wn_scalar_loop);
03844 break;
03845 case LMT_LEGO_MP:
03846 Lego_Mp_Tile_Traverse(wn_scalar_loop, LNO_Ozero);
03847 break;
03848 }
03849 }
03850 if (want_freeze_threads)
03851 Freeze_Numthreads_Ldid(wn_loop);
03852 if (want_freeze_cur_threads)
03853 Freeze_Cur_Numthreads_Func(wn_loop);
03854 wn_return = Mp_Tile_Nested_Loop(wn_loop, pool);
03855 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
03856 Prompf_Nested_Tile(wn_return);
03857 } else {
03858 WN* wn_scalar_loop = Mp_Version_Loop(wn_loop);
03859 if (wn_scalar_loop != NULL) {
03860 switch (lmt_traverse) {
03861 case LMT_LEGO:
03862 Lego_Tile_Traverse(wn_scalar_loop, LNO_Ozero);
03863 break;
03864 case LMT_MP:
03865 Mp_Tile_Traverse(wn_scalar_loop);
03866 break;
03867 case LMT_LEGO_MP:
03868 Lego_Mp_Tile_Traverse(wn_scalar_loop, LNO_Ozero);
03869 break;
03870 }
03871 }
03872 if (want_freeze_threads)
03873 Freeze_Numthreads_Ldid(wn_loop);
03874 if (want_freeze_cur_threads)
03875 Freeze_Cur_Numthreads_Func(wn_loop);
03876 wn_return = Mp_Tile_Single_Loop(wn_loop, LNO_Ozero, pool);
03877 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
03878 Prompf_Tile(wn_loop, TRUE);
03879 }
03880 Mp_Optimize_Interleaved_Loops(wn_return);
03881 return wn_return;
03882 }
03883
03884
03885
03886
03887
03888
03889
03890 static WN* Traverse_Update(WN* wn_loop)
03891 {
03892 FmtAssert(WN_opcode(wn_loop) == OPC_DO_LOOP,
03893 ("Traverse_Update: Expecting a do loop"));
03894 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
03895 INT lower = dli_loop->Lego_Mp_Key_Lower;
03896 INT upper = dli_loop->Lego_Mp_Key_Upper;
03897 if (lower == 0 || upper == 0)
03898 return WN_do_body(wn_loop);
03899 WN* wn_inner_loop = wn_loop;
03900 INT nloops = SNL_Loop_Count(wn_loop);
03901 for (INT i = 1; i <= nloops; i++) {
03902 WN* wn = SNL_Get_Inner_Snl_Loop(wn_loop, i);
03903 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
03904 if (dli->Lego_Mp_Key_Lower == 0 || dli->Lego_Mp_Key_Upper == 0)
03905 break;
03906 if (dli->Lego_Mp_Key_Lower < lower || dli->Lego_Mp_Key_Upper > upper)
03907 break;
03908 wn_inner_loop = wn;
03909 }
03910 return WN_do_body(wn_inner_loop);
03911 }
03912
03913
03914
03915
03916
03917
03918
03919
03920 static void Lego_Mp_Tile_Traverse(WN* wn_tree,
03921 BOOL LNO_Ozero)
03922 {
03923 DU_MANAGER* du = Du_Mgr;
03924
03925 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
03926 WN* wn_return = NULL;
03927 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
03928 if (dli->Mp_Info != NULL)
03929 wn_return = Mp_Tile_Loop(wn_tree, LNO_Ozero,
03930 &LNO_default_pool, LMT_LEGO_MP);
03931 else if (dli->Lego_Info != NULL && !dli->Is_Inner_Lego_Tile)
03932 wn_return = Lego_Tile_Loop(wn_tree, &LNO_default_pool);
03933 if (wn_return != NULL)
03934 Hoist_Statements(wn_return, du);
03935 if (wn_return != NULL)
03936 wn_tree = Traverse_Update(wn_return);
03937 }
03938
03939 if (WN_opcode(wn_tree) == OPC_REGION) {
03940 WN* wn_first = WN_first(WN_region_pragmas(wn_tree));
03941 if (wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA
03942 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN) {
03943 WN* wn_scalar_region = Mp_Version_Parallel_Region(wn_tree);
03944 if (wn_scalar_region != NULL)
03945 Lego_Mp_Tile_Traverse(wn_scalar_region, LNO_Ozero);
03946 }
03947 }
03948
03949 if (WN_opcode(wn_tree) == OPC_BLOCK) {
03950 WN* wnn = NULL;
03951 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = wnn) {
03952 wnn = WN_next(wn);
03953 Lego_Mp_Tile_Traverse(wn, LNO_Ozero);
03954 }
03955 } else {
03956 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
03957 Lego_Mp_Tile_Traverse(WN_kid(wn_tree, i), LNO_Ozero);
03958 }
03959 }
03960
03961
03962
03963
03964
03965
03966
03967 extern void Lego_Mp_Tile(WN* wn_root,
03968 BOOL LNO_Ozero)
03969 {
03970 Lego_Mp_Tile_Traverse(wn_root, LNO_Ozero);
03971 if (Eliminate_Dead_SCF(wn_root, LWN_Delete_Tree))
03972 Mark_Code(wn_root, FALSE, FALSE);
03973 }
03974
03975
03976
03977
03978
03979
03980
03981 static BOOL Has_Lego_Mp_Loops(WN* wn_tree)
03982 {
03983 LWN_ITER* itr = LWN_WALK_TreeIter(wn_tree);
03984 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
03985 WN* wn = itr->wn;
03986 if (WN_opcode(wn) == OPC_DO_LOOP) {
03987 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
03988 if (dli->Mp_Info != NULL && dli->Lego_Info != NULL)
03989 return TRUE;
03990 }
03991 }
03992 return FALSE;
03993 }
03994
03995
03996
03997
03998
03999
04000
04001
04002 static void Lego_Tile_Traverse(WN* wn_tree,
04003 BOOL LNO_Ozero)
04004 {
04005 DU_MANAGER* du = Du_Mgr;
04006
04007 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
04008 WN* wn_return = NULL;
04009 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
04010 if (dli->Mp_Info != NULL && dli->Lego_Info != NULL)
04011 wn_return = Mp_Tile_Loop(wn_tree, LNO_Ozero,
04012 &LNO_default_pool, LMT_LEGO);
04013 else if (dli->Lego_Info != NULL && !dli->Is_Inner_Lego_Tile)
04014 wn_return = Lego_Tile_Loop(wn_tree, &LNO_default_pool);
04015 if (wn_return != NULL)
04016 Hoist_Statements(wn_return, du);
04017 if (wn_return != NULL)
04018 wn_tree = Traverse_Update(wn_return);
04019 }
04020
04021 if (WN_opcode(wn_tree) == OPC_REGION) {
04022 WN* wn_first = WN_first(WN_region_pragmas(wn_tree));
04023 if (wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA
04024 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN
04025 && Has_Lego_Mp_Loops(wn_tree)) {
04026 WN* wn_scalar_region = Mp_Version_Parallel_Region(wn_tree);
04027 if (wn_scalar_region != NULL)
04028 Lego_Tile_Traverse(wn_scalar_region, LNO_Ozero);
04029 }
04030 }
04031 if (WN_opcode(wn_tree) == OPC_BLOCK) {
04032 WN* wnn = NULL;
04033 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = wnn) {
04034 wnn = WN_next(wn);
04035 Lego_Tile_Traverse(wn, LNO_Ozero);
04036 }
04037 } else {
04038 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
04039 Lego_Tile_Traverse(WN_kid(wn_tree, i), LNO_Ozero);
04040 }
04041 }
04042
04043
04044
04045
04046
04047
04048
04049 extern void Lego_Tile(WN* wn_root,
04050 BOOL LNO_Ozero)
04051 {
04052 Lego_Tile_Traverse(wn_root, LNO_Ozero);
04053 if (Eliminate_Dead_SCF(wn_root, LWN_Delete_Tree))
04054 Mark_Code(wn_root, FALSE, FALSE);
04055 }
04056
04057
04058
04059
04060
04061
04062
04063
04064 static void Mp_Tile_Traverse(WN* wn_tree)
04065 {
04066 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
04067 DU_MANAGER* du = Du_Mgr;
04068 REDUCTION_MANAGER* rm = red_manager;
04069
04070 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
04071 WN* wn_return = NULL;
04072 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
04073 if (dli->Mp_Info != NULL && !dli->Is_Processor_Tile)
04074 wn_return = Mp_Tile_Loop(wn_tree, FALSE, &LNO_default_pool, LMT_MP);
04075 if (wn_return != NULL) {
04076 Hoist_Statements(wn_return, du);
04077 WN* wn_save_tree = LWN_Get_Parent(Traverse_Update(wn_return));
04078 if (dli->Is_Doacross)
04079 Parallelize_Doacross_Loop(
04080 wn_return, wn_tree, dli->Doacross_Tile_Size,
04081 dli->Sync_Distances, dg, du);
04082 wn_tree = WN_do_body(wn_save_tree);
04083 }
04084 }
04085
04086 if (WN_opcode(wn_tree) == OPC_REGION) {
04087 WN* wn_first = WN_first(WN_region_pragmas(wn_tree));
04088 if (wn_first != NULL && WN_opcode(wn_first) == OPC_PRAGMA
04089 && WN_pragma(wn_first) == WN_PRAGMA_PARALLEL_BEGIN) {
04090 WN* wn_scalar_region = Mp_Version_Parallel_Region(wn_tree);
04091 if (wn_scalar_region != NULL)
04092 Mp_Tile_Traverse(wn_scalar_region);
04093 }
04094 }
04095
04096 if (WN_opcode(wn_tree) == OPC_BLOCK) {
04097 WN* wnn = NULL;
04098 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = wnn) {
04099 wnn = WN_next(wn);
04100 Mp_Tile_Traverse(wn);
04101 }
04102 } else {
04103 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
04104 Mp_Tile_Traverse(WN_kid(wn_tree, i));
04105 }
04106 }
04107
04108
04109
04110
04111
04112
04113
04114 extern void Mp_Tile(WN* wn_root)
04115 {
04116
04117
04118
04119 return;
04120 Mp_Tile_Traverse(wn_root);
04121 if (Eliminate_Dead_SCF(wn_root, LWN_Delete_Tree))
04122 Mark_Code(wn_root, FALSE, FALSE);
04123 }
04124