00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00048 #define __STDC_LIMIT_MACROS
00049 #include <stdint.h>
00050 #ifdef USE_PCH
00051 #include "lno_pch.h"
00052 #endif // USE_PCH
00053 #pragma hdrstop
00054
00055 #ifdef _KEEP_RCS_ID
00056
00057 static char *rcs_id = "$Source: be/lno/SCCS/s.scalar_expand.cxx $ $Revision: 1.8 $";
00058 #endif
00059
00060 #include <sys/types.h>
00061 #include <ctype.h>
00062 #include "pu_info.h"
00063 #include "defs.h"
00064 #include "stab.h"
00065 #include "lnopt_main.h"
00066 #include "scalar_expand.h"
00067 #include "snl.h"
00068
00069 #include "const.h"
00070 #include "targ_sim.h"
00071 #include "config_targ.h"
00072 #include "cxx_template.h"
00073 #include "dep_graph.h"
00074 #include "opt_du.h"
00075 #include "cxx_memory.h"
00076 #include "cxx_hash.h"
00077 #include "lwn_util.h"
00078 #include "opt_du.h"
00079 #include "opt_alias_interface.h"
00080 #include "lnoutils.h"
00081 #include "wintrinsic.h"
00082 #include "stab.h"
00083 #include "strtab.h"
00084 #include "debug.h"
00085 #include "lno_bv.h"
00086 #include "sxlist.h"
00087 #include "parallel.h"
00088 #include "soe.h"
00089 #include "cond.h"
00090 #include "snl_utils.h"
00091
00092 static WN* BND_Min_Expr(WN* wn, WN* loops[], INT nloops);
00093 static WN* BND_Max_Expr(WN* wn, WN* loops[], INT nloops);
00094 ST* Find_Return_Registers(TYPE_ID type, PREG_NUM *rreg1, PREG_NUM *rreg2);
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 static WN* Invariant_Base(WN* wn_base,
00109 WN* wn_loop)
00110 {
00111 DU_MANAGER* du = Du_Mgr;
00112 DEF_LIST *def_list = du->Ud_Get_Def(wn_base);
00113 DEF_LIST_ITER iter(def_list);
00114 const DU_NODE* node = NULL;
00115 INT inside_count = 0;
00116 WN* inside_def = NULL;
00117 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00118 WN* def = node->Wn();
00119 if (Wn_Is_Inside(def, wn_loop)) {
00120 inside_def = def;
00121 if (++inside_count > 1)
00122 FmtAssert(FALSE, ("wn_base is not really invariant wrt wn_loop"));
00123 }
00124 }
00125 return inside_def != NULL ? WN_kid0(inside_def) : wn_base;
00126 }
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137 static WN* SE_Index_Loops(WN* loops[],
00138 INT dimcnt,
00139 WN* tile_loops[],
00140 INT strip_sizes[],
00141 INT nstrips,
00142 WN* index_loops[])
00143 {
00144 INT j = 0;
00145 for (INT i = 0; i < dimcnt; i++)
00146 index_loops[i] = nstrips > 0 && strip_sizes[i] > 0
00147 ? tile_loops[j++] : loops[i];
00148 WN* outerloop = loops[0];
00149 if (nstrips > 0 && Get_Do_Loop_Info(tile_loops[0])->Depth
00150 < Get_Do_Loop_Info(loops[0])->Depth)
00151 outerloop = tile_loops[0];
00152 return outerloop;
00153 }
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 static WN* SE_Lower_Bound(WN* index_loops[],
00167 WN* outerloop,
00168 INT i,
00169 INT dimcnt,
00170 BOOL invariant)
00171 {
00172 WN* lb = NULL;
00173 DU_MANAGER* du = Du_Mgr;
00174 if (invariant) {
00175 WN* wn_iv = Invariant_Base(WN_kid0(WN_start(index_loops[i])), outerloop);
00176 lb = LWN_Copy_Tree(wn_iv);
00177 LWN_Copy_Def_Use(wn_iv, lb, du);
00178 } else {
00179 lb = BND_Min_Expr(WN_kid0(WN_start(index_loops[i])), index_loops, dimcnt);
00180 }
00181 return lb;
00182 }
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195 static WN* SE_Upper_Bound(WN* index_loops[],
00196 WN* outerloop,
00197 INT i,
00198 INT dimcnt,
00199 BOOL invariant)
00200 {
00201 WN* ub = NULL;
00202 DU_MANAGER* du = Du_Mgr;
00203 if (invariant) {
00204 WN* wn_iv = Invariant_Base(SNL_UBexp(WN_end(index_loops[i])), outerloop);
00205 ub = LWN_Copy_Tree(wn_iv);
00206 LWN_Copy_Def_Use(wn_iv, ub, du);
00207 } else {
00208 ub = BND_Max_Expr(SNL_UBexp(WN_end(index_loops[i])), index_loops, dimcnt);
00209 }
00210 return ub;
00211 }
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236 static void SE_Indxs_and_Bounds(WN* loops[],
00237 INT dimcnt,
00238 SYMBOL symbol,
00239 BOOL invariant,
00240 BIT_VECTOR used_loops[],
00241 WN* tile_loops[],
00242 INT strip_sizes[],
00243 INT nstrips,
00244 WN* bounds[],
00245 WN* indxs[],
00246 BOOL has_lcd,
00247 INT* sz_addr,
00248 WN** bsz_addr)
00249 {
00250 DU_MANAGER* du = Du_Mgr;
00251 WN** index_loops = CXX_NEW_ARRAY(WN*, dimcnt, &LNO_local_pool);
00252 WN* outerloop = SE_Index_Loops(loops, dimcnt, tile_loops, strip_sizes,
00253 nstrips, index_loops);
00254
00255 TYPE_ID wtype = symbol.Type;
00256 INT sz = (wtype == MTYPE_I1 || wtype == MTYPE_U1) ? 1 :
00257 (wtype == MTYPE_I2 || wtype == MTYPE_U2) ? 2 :
00258 (wtype == MTYPE_I8 || wtype == MTYPE_U8 ||
00259 wtype == MTYPE_F8 || wtype == MTYPE_C4) ? 8 :
00260 #if defined(TARG_IA64)
00261 (wtype == MTYPE_F10) ? 16 :
00262 #endif
00263 (wtype == MTYPE_I4 || wtype == MTYPE_U4 ||
00264 wtype == MTYPE_F4) ? 4 :
00265 (wtype == MTYPE_F8) ? 8 :
00266 (wtype == MTYPE_F16) ? 16 :
00267 (wtype == MTYPE_FQ || wtype == MTYPE_C8) ? 16 :
00268 (wtype == MTYPE_CQ) ? 32 : 0;
00269 FmtAssert(sz > 0, ("Bad type in scalar expansion"));
00270
00271 TYPE_ID bsztype;
00272 INT i = 0;
00273 while (used_loops && !used_loops->Test(i)) i++;
00274 bsztype = Do_Wtype(loops[i]);
00275 for (i = i+1; i < dimcnt; i++) {
00276 if (!used_loops || used_loops->Test(i)) {
00277 bsztype = Max_Wtype(bsztype, Do_Wtype(loops[i]));
00278 }
00279 }
00280
00281 WN* bsz = LWN_Make_Icon(Promote_Type(bsztype), sz);
00282 OPCODE opmpy = OPCODE_make_op(OPR_MPY, Promote_Type(bsztype), MTYPE_V);
00283 for (i = 0; i < dimcnt; i++) {
00284 if (!used_loops || used_loops->Test(i)) {
00285 INT stripsize = nstrips == 0 ? 0 : strip_sizes[i];
00286 WN* cnt = stripsize > 0 ? LWN_Make_Icon(Promote_Type(bsztype),
00287 stripsize) : NULL;
00288 TYPE_ID dty = WN_desc(WN_start(index_loops[i]));
00289 TYPE_ID rty = Promote_Type(WN_desc(WN_start(index_loops[i])));
00290 OPCODE opsubty = OPCODE_make_op(OPR_SUB, rty, MTYPE_V);
00291 OPCODE opminty = OPCODE_make_op(OPR_MIN, rty, MTYPE_V);
00292 WN* lb = SE_Lower_Bound(index_loops, outerloop, i, dimcnt, invariant);
00293 WN* copy_lb = NULL;
00294 if (stripsize == 0) {
00295 copy_lb = LWN_Copy_Tree(lb);
00296 LWN_Copy_Def_Use(lb, copy_lb, du);
00297 }
00298 WN* ub = SE_Upper_Bound(index_loops, outerloop, i, dimcnt, invariant);
00299 WN* one = LWN_Make_Icon(rty, 1);
00300 WN* rcnt = LWN_CreateExp2(opsubty, ub, LWN_CreateExp2(opsubty, lb, one));
00301 WN* true_cnt = cnt != NULL ? LWN_CreateExp2(opminty, cnt, rcnt) : rcnt;
00302 if (has_lcd) {
00303 WN* wn_one = LWN_Make_Icon(rty, 1);
00304 WN* wn_zero = LWN_Make_Icon(rty, 0);
00305 OPCODE opaddty = OPCODE_make_op(OPR_ADD, rty, MTYPE_V);
00306 OPCODE opmaxty = OPCODE_make_op(OPR_MAX, rty, MTYPE_V);
00307 true_cnt = LWN_CreateExp2(opmaxty, true_cnt, wn_zero);
00308 true_cnt = LWN_CreateExp2(opaddty, true_cnt, wn_one);
00309 }
00310 bounds[i] = LWN_Copy_Tree(true_cnt);
00311 LWN_Copy_Def_Use(true_cnt, bounds[i], du);
00312 OPCODE opldty = OPCODE_make_op(OPR_LDID, rty, dty);
00313 WN* indx_ldid = LWN_CreateLdid(opldty, WN_step(loops[i]));
00314 WN* indx_ldid_lb = stripsize > 0 ?
00315 LWN_CreateLdid(opldty, WN_step(index_loops[i])) : copy_lb;
00316 indxs[i] = LWN_CreateExp2(opsubty, indx_ldid, indx_ldid_lb);
00317 SNL_Add_Du_To_Index_Ldid(loops[i], indx_ldid, du, TRUE);
00318 if (stripsize > 0)
00319 SNL_Add_Du_To_Index_Ldid(index_loops[i], indx_ldid_lb, du, TRUE);
00320 bsz = LWN_CreateExp2(opmpy, bsz,
00321 LWN_Int_Type_Conversion(true_cnt, Promote_Type(bsztype)));
00322 }
00323 }
00324 bsz = LWN_Int_Type_Conversion(bsz, Pointer_type);
00325 *sz_addr = sz;
00326 *bsz_addr = bsz;
00327 }
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348 static void SE_Findxs(WN* loops[],
00349 INT dimcnt,
00350 BOOL invariant,
00351 BIT_VECTOR used_loops[],
00352 WN* tile_loops[],
00353 INT strip_sizes[],
00354 INT nstrips,
00355 WN* findxs[])
00356 {
00357 DU_MANAGER* du = Du_Mgr;
00358 WN** index_loops = CXX_NEW_ARRAY(WN*, dimcnt, &LNO_local_pool);
00359 WN* outerloop = SE_Index_Loops(loops, dimcnt, tile_loops, strip_sizes,
00360 nstrips, index_loops);
00361 for (INT i = 0; i < dimcnt; i++) {
00362 if (!used_loops || used_loops->Test(i)) {
00363 TYPE_ID dty = WN_desc(WN_start(index_loops[i]));
00364 TYPE_ID rty = Promote_Type(WN_desc(WN_start(index_loops[i])));
00365 OPCODE opsubty = OPCODE_make_op(OPR_SUB, rty, MTYPE_V);
00366 INT stripsize = nstrips == 0 ? 0 : strip_sizes[i];
00367 WN* ub = SE_Upper_Bound(index_loops, outerloop, i, dimcnt,
00368 invariant);
00369 OPCODE opldty = OPCODE_make_op(OPR_LDID, rty, dty);
00370 WN* indx_ldid_lb = NULL;
00371 WN* wn_off1 = NULL;
00372 if (stripsize > 0) {
00373 wn_off1 = LWN_CreateLdid(opldty, WN_step(index_loops[i]));
00374 WN* wn_off2 = LWN_Make_Icon(Promote_Type(Do_Wtype(index_loops[i])),
00375 stripsize);
00376 indx_ldid_lb = LWN_CreateExp2(opsubty, wn_off1, wn_off2);
00377 } else {
00378 WN* lb = SE_Lower_Bound(index_loops, outerloop, i, dimcnt,
00379 invariant);
00380 indx_ldid_lb = lb;
00381 }
00382 findxs[i] = LWN_CreateExp2(opsubty, ub, indx_ldid_lb);
00383 if (stripsize > 0) {
00384 SNL_Add_Du_To_Index_Ldid(index_loops[i], indx_ldid_lb, du, TRUE);
00385 DEF_LIST* def_list = Du_Mgr->Ud_Get_Def(wn_off1);
00386 WN* wn_loop = Enclosing_Do_Loop(LWN_Get_Parent(outerloop));
00387 def_list->Set_loop_stmt(wn_loop);
00388 }
00389 }
00390 }
00391 }
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403 static WN* SE_Array(SYMBOL se_ptrsym,
00404 INT sz,
00405 INT dimcnt,
00406 const INT order[],
00407 BIT_VECTOR* used_loops,
00408 WN* bounds[],
00409 WN* indxs[],
00410 BOOL has_lcd,
00411 INT lcd_count)
00412 {
00413 DU_MANAGER* du = Du_Mgr;
00414 INT used_dimcnt = used_loops ? used_loops->Pop_Count() : dimcnt;
00415 OPCODE ldop = OPCODE_make_op(OPR_LDID, Pointer_type, Pointer_type);
00416 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
00417 WN* wn_array = WN_Create(op_array, used_dimcnt+used_dimcnt+1);
00418 WN_element_size(wn_array) = sz;
00419 WN_array_base(wn_array) = WN_CreateLdid(ldop, se_ptrsym.WN_Offset(),
00420 se_ptrsym.St(), ST_type(se_ptrsym.St()));
00421 LWN_Set_Parent(WN_array_base(wn_array), wn_array);
00422 FmtAssert(used_loops == NULL || !has_lcd,
00423 ("SE_Array: Not supporting used_loops and has_lcd at same time"));
00424 INT k = 0;
00425 for (INT i = 0; i < dimcnt; i++) {
00426 if (!used_loops || used_loops->Test(i)) {
00427 INT j = order[i];
00428 WN* wn_index = NULL;
00429 if (lcd_count >= 0 && j > lcd_count) {
00430 wn_index = LWN_Copy_Tree(bounds[j]);
00431 LWN_Copy_Def_Use(bounds[j], wn_index, du);
00432 TYPE_ID rty = Promote_Type(WN_rtype(bounds[j]));
00433 OPCODE subop = OPCODE_make_op(OPR_SUB, rty, MTYPE_V);
00434 WN* wn_one = LWN_Make_Icon(rty, 1);
00435 wn_index = LWN_CreateExp2(subop, wn_index, wn_one);
00436 } else {
00437 wn_index = LWN_Copy_Tree(indxs[j]);
00438 LWN_Copy_Def_Use(indxs[j], wn_index, du);
00439 if (has_lcd) {
00440 TYPE_ID rty = WN_rtype(indxs[j]);
00441 OPCODE addop = OPCODE_make_op(OPR_ADD, rty, MTYPE_V);
00442 WN* wn_one = LWN_Make_Icon(rty, 1);
00443 wn_index = LWN_CreateExp2(addop, wn_index, wn_one);
00444 }
00445 if (lcd_count >= 0 && j == lcd_count) {
00446 TYPE_ID rty = WN_rtype(indxs[j]);
00447 OPCODE subop = OPCODE_make_op(OPR_SUB, rty, MTYPE_V);
00448 WN* wn_one = LWN_Make_Icon(rty, 1);
00449 wn_index = LWN_CreateExp2(subop, wn_index, wn_one);
00450 }
00451 }
00452 WN_array_index(wn_array, k) = wn_index;
00453 WN_array_dim(wn_array, k) = LWN_Copy_Tree(bounds[j]);
00454 LWN_Copy_Def_Use(bounds[j], WN_array_dim(wn_array, k), du);
00455 LWN_Set_Parent(WN_array_index(wn_array, k), wn_array);
00456 LWN_Set_Parent(WN_array_dim(wn_array,k), wn_array);
00457 k++;
00458 }
00459 }
00460 return wn_array;
00461 }
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473 static void SE_Iload(WN* wn,
00474 WN* wn_array,
00475 WN* wn_newstdf,
00476 SYMBOL se_ptrsym,
00477 WN** alias_host)
00478 {
00479 DU_MANAGER* du = Du_Mgr;
00480 SYMBOL symbol(wn);
00481 TYPE_ID wtype = symbol.Type;
00482 #ifdef KEY // bug 7846
00483 OPCODE loadop = OPCODE_make_op(OPR_ILOAD, WN_rtype(wn), wtype);
00484 #else
00485 OPCODE loadop = OPCODE_make_op(OPR_ILOAD, Promote_Type(wtype), wtype);
00486 #endif
00487 TY_IDX wty = Be_Type_Tbl(wtype);
00488 TY_IDX pty = Make_Pointer_Type(Be_Type_Tbl(wtype));
00489 WN* load = LWN_CreateIload(loadop, 0, wty, pty, wn_array);
00490 WN* parent = LWN_Get_Parent(wn);
00491 LWN_Copy_Frequency_Tree(load,wn);
00492 LWN_Set_Parent(load,parent);
00493
00494
00495 if (*alias_host == NULL) {
00496 Create_unique_pointer_alias(Alias_Mgr, se_ptrsym.St(),
00497 WN_array_base(wn_array), load);
00498 *alias_host = load;
00499 Copy_alias_info(Alias_Mgr, WN_array_base(wn_array), wn_newstdf);
00500 } else {
00501 Copy_alias_info(Alias_Mgr, wn_newstdf, WN_array_base(wn_array));
00502 Copy_alias_info(Alias_Mgr, *alias_host, load);
00503 }
00504
00505 if (red_manager) {
00506 REDUCTION_TYPE rtype = red_manager->Which_Reduction(wn);
00507 if (rtype != RED_NONE) {
00508 red_manager->Add_Reduction(load, rtype);
00509 red_manager->Erase(wn);
00510 }
00511 }
00512 du->Add_Def_Use(wn_newstdf, WN_array_base(wn_array));
00513 for (INT k = 0; k < WN_kid_count(parent); k++)
00514 if (WN_kid(parent,k) == wn)
00515 WN_kid(parent,k) = load;
00516 LWN_Delete_Tree(wn);
00517 }
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529 static void SE_Istore(WN* wn,
00530 WN* wn_array,
00531 WN* wn_newstdf,
00532 SYMBOL se_ptrsym,
00533 WN** alias_host)
00534 {
00535 SYMBOL symbol(wn);
00536 DU_MANAGER* du = Du_Mgr;
00537 TYPE_ID wtype = symbol.Type;
00538 OPCODE storeop = OPCODE_make_op(OPR_ISTORE, MTYPE_V, wtype);
00539 TY_IDX pty = Make_Pointer_Type(Be_Type_Tbl(wtype));
00540 FmtAssert(pty, ("null ty pointer for wtype=%d", wtype));
00541 WN* store = LWN_CreateIstore(storeop, 0, pty, WN_kid0(wn), wn_array);
00542
00543
00544 if (*alias_host==NULL) {
00545 Create_unique_pointer_alias(Alias_Mgr, se_ptrsym.St(),
00546 WN_array_base(wn_array), store);
00547 *alias_host = store;
00548 Copy_alias_info(Alias_Mgr, WN_array_base(wn_array), wn_newstdf);
00549 } else {
00550 Copy_alias_info(Alias_Mgr, wn_newstdf, WN_array_base(wn_array));
00551 Copy_alias_info(Alias_Mgr, *alias_host, store);
00552 }
00553 du->Add_Def_Use(wn_newstdf, WN_array_base(wn_array));
00554
00555 WN_kid0(wn) = NULL;
00556 WN_set_kid_count(wn, 0);
00557 WN* block = LWN_Get_Parent(wn);
00558 LWN_Copy_Frequency_Tree(store, wn);
00559 LWN_Insert_Block_Before(block, wn, store);
00560 Is_True(LWN_Get_Parent(wn) == block, ("Very confused?"));
00561 LWN_Set_Parent(store,block);
00562 LWN_Copy_Linenumber(wn,store);
00563 LWN_Copy_Frequency_Tree(store,wn);
00564 LWN_Extract_From_Block(wn);
00565 if (red_manager) {
00566 REDUCTION_TYPE rtype = red_manager->Which_Reduction(wn);
00567 if (rtype != RED_NONE)
00568 red_manager->Add_Reduction(store, rtype);
00569 red_manager->Erase_Node(wn);
00570 }
00571 WN_Delete(wn);
00572 }
00573
00574 static WN* SE_Identity(WN* wn)
00575 {
00576 SYMBOL symbol(wn);
00577 OPCODE opldid = OPCODE_make_op(OPR_LDID, Promote_Type(symbol.Type),
00578 symbol.Type);
00579 OPCODE opstid = OPCODE_make_op(OPR_STID, MTYPE_V, symbol.Type);
00580 WN* wn_ldid = LWN_CreateLdid(opldid, wn);
00581 WN* wn_stid = LWN_CreateStid(opstid, wn, wn_ldid);
00582 return wn_stid;
00583 }
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595 static WN* SE_Final_Value(WN* wn,
00596 WN* guard_tests[],
00597 WN* loops[],
00598 INT dimcnt)
00599 {
00600 DU_MANAGER* du = Du_Mgr;
00601 WN* wn_stid = SE_Identity(wn);
00602 WN* wn_ldid = WN_kid0(wn_stid);
00603 WN* wn_loop = Enclosing_Do_Loop(wn);
00604 INT i;
00605 for (i = 0; i < dimcnt; i++)
00606 if (loops[i] == wn_loop)
00607 break;
00608 FmtAssert(i < dimcnt, ("SE_Final_Value: Could not find SE loop"));
00609 WN* wn_if = guard_tests[i];
00610 LWN_Insert_Block_After(WN_then(wn_if), NULL, wn_stid);
00611 USE_LIST *use_list = du->Du_Get_Use(wn);
00612 USE_LIST_ITER iter(use_list);
00613 const DU_NODE* node = NULL;
00614 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next())
00615 du->Add_Def_Use(wn_stid, node->Wn());
00616 return wn_ldid;
00617 }
00618
00619
00620
00621
00622
00623
00624
00625 static WN* SE_Find_Def(WN* wn_region,
00626 SYMBOL symbol)
00627 {
00628 LWN_ITER* itr = LWN_WALK_TreeIter(wn_region);
00629 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
00630 WN* wn = itr->wn;
00631 if (WN_operator(wn) == OPR_STID
00632 && SYMBOL(wn) == symbol)
00633 return wn;
00634 }
00635 return NULL;
00636 }
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646 static INT SE_Safe_Depth(ACCESS_ARRAY* aa,
00647 INT depth)
00648 {
00649 INT safe_depth = 0;
00650 for (INT dim = 0; dim < aa->Num_Vec(); dim++) {
00651 ACCESS_VECTOR* av = aa->Dim(dim);
00652 if (av->Too_Messy)
00653 return depth + 1;
00654 if (av->Non_Const_Loops() > safe_depth)
00655 safe_depth = av->Non_Const_Loops();
00656 }
00657 return safe_depth;
00658 }
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668 extern SE_STATUS SNL_Is_Scalar_Expandable(WN* wn_outer,
00669 INT permutation[],
00670 INT nloops,
00671 SX_INFO* sx_info,
00672 BOOL full_dist)
00673 {
00674
00675 INT outer_depth = Do_Loop_Depth(wn_outer);
00676 INT kernel_depth = outer_depth;
00677 INT* kernel_permutation = NULL;
00678 INT kernel_nloops = 0;
00679 if (permutation != NULL) {
00680 INT i;
00681 for (i = 0; i < nloops; i++)
00682 if (permutation[i] != i)
00683 break;
00684 INT kernel_first = i;
00685 kernel_nloops = nloops - kernel_first;
00686 kernel_permutation = CXX_NEW_ARRAY(INT, kernel_nloops, &LNO_local_pool);
00687 for (i = kernel_first; i < nloops; i++)
00688 kernel_permutation[i - kernel_first] = permutation[i];
00689 kernel_depth = outer_depth + kernel_first;
00690 }
00691
00692
00693 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00694 DOLOOP_STACK stack(&LNO_local_pool);
00695 Build_Doloop_Stack(wn_inner, &stack);
00696 INT safe_depth = Do_Loop_Depth(wn_outer);
00697 for (WN* wn = wn_inner; wn != NULL; wn = LWN_Get_Parent(wn)) {
00698 if (WN_opcode(wn) != OPC_DO_LOOP)
00699 continue;
00700 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
00701 INT safe_depth_lb = SE_Safe_Depth(dli->LB, dli->Depth);
00702 if (safe_depth_lb > safe_depth)
00703 safe_depth = safe_depth_lb;
00704 INT safe_depth_ub = SE_Safe_Depth(dli->UB, dli->Depth);
00705 if (safe_depth_ub > safe_depth)
00706 safe_depth = safe_depth_ub;
00707 if (wn == wn_outer)
00708 break;
00709 }
00710
00711
00712 SE_STATUS return_value = SE_TRUE;
00713 SX_CONST_PITER ii(&sx_info->Plist);
00714 INT* tkernel_permutation = !full_dist ? kernel_permutation : NULL;
00715 for (const SX_PNODE* n = ii.First(); !ii.Is_Empty(); n = ii.Next()) {
00716 if (n->Transformable(kernel_depth, tkernel_permutation, kernel_nloops)
00717 == SX_PNODE::ILLEGAL)
00718 return_value = SE_MAYBE;
00719 if (n->Transformable(kernel_depth, tkernel_permutation, kernel_nloops)
00720 == SX_PNODE::SE_REQD && safe_depth > kernel_depth)
00721 return SE_FALSE;
00722 }
00723
00724 return return_value;
00725 }
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737 extern INT SNL_Bad_Scalars_Are_Distributable(WN* wn_outer,
00738 INT permutation[],
00739 INT nloops,
00740 SX_INFO* sx_info,
00741 SD_INFO* sd_info)
00742 {
00743
00744 INT i;
00745 for (i = 0; i < nloops; i++)
00746 if (permutation[i] != i)
00747 break;
00748 INT kernel_first = i;
00749 INT outer_depth = Do_Loop_Depth(wn_outer);
00750 INT inner_depth = outer_depth + nloops - 1;
00751 INT kernel_depth = outer_depth + kernel_first;
00752
00753
00754 INT upper_range = sd_info->Distribution_Range(kernel_first, sx_info);
00755 if (upper_range == -1)
00756 return -1;
00757 if (upper_range >= outer_depth + nloops - 1)
00758 return outer_depth + nloops;
00759
00760 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00761 DOLOOP_STACK stack(&LNO_local_pool);
00762 Build_Doloop_Stack(wn_inner, &stack);
00763 WN* wn_bad_inner = stack.Bottom_nth(upper_range + 1);
00764 WN* wn_bad_outer = wn_outer;
00765 if (!SNL_Is_Distributable(wn_bad_outer, wn_bad_outer, wn_bad_inner, TRUE))
00766 return outer_depth + nloops;
00767 if (!SNL_Is_Distributable(wn_bad_outer, wn_bad_outer, wn_bad_inner, FALSE))
00768 return outer_depth + nloops;
00769 return upper_range + 1;
00770 }
00771
00772
00773
00774
00775
00776
00777
00778 static BOOL Is_Index_Variable(WN* wn,
00779 WN* loops[],
00780 INT nloops)
00781 {
00782 INT i;
00783 for (i = 0; i < nloops; i++)
00784 if (SYMBOL(wn) == SYMBOL(WN_index(loops[i])))
00785 break;
00786 return i < nloops ? TRUE : FALSE;
00787 }
00788
00789
00790
00791
00792
00793
00794
00795 static WN* BND_Lower_Bound(WN* wn,
00796 WN* loops[],
00797 INT nloops)
00798 {
00799 INT i;
00800 for (i = 0; i < nloops; i++)
00801 if (SYMBOL(wn) == SYMBOL(WN_index(loops[i])))
00802 break;
00803 WN* loop = loops[i];
00804 WN* lb = WN_kid0(WN_start(loop));
00805 WN* wn_new = BND_Min_Expr(lb, loops, nloops);
00806 return wn_new;
00807 }
00808
00809
00810
00811
00812
00813
00814
00815 static WN* BND_Upper_Bound(WN* wn,
00816 WN* loops[],
00817 INT nloops)
00818 {
00819 INT i;
00820 for (i = 0; i < nloops; i++)
00821 if (SYMBOL(wn) == SYMBOL(WN_index(loops[i])))
00822 break;
00823 WN* loop = loops[i];
00824 WN* ub = SNL_UBexp(WN_end(loop));
00825 WN* wn_new = BND_Max_Expr(ub, loops, nloops);
00826 return wn_new;
00827 }
00828
00829
00830
00831
00832
00833
00834
00835
00836 static BOOL BND_Verify_Expression(WN *wn,
00837 WN* loops[],
00838 INT nloops)
00839 {
00840 OPCODE wn_op = WN_opcode(wn);
00841 OPERATOR wn_oper = OPCODE_operator(wn_op);
00842 switch (wn_oper) {
00843 case OPR_INTCONST:
00844 return TRUE;
00845 case OPR_LDID:
00846 return TRUE;
00847 case OPR_STID:
00848 {
00849 for (INT i = 0; i < nloops; i++)
00850 if (SYMBOL(wn) == SYMBOL(WN_index(loops[i])))
00851 return TRUE;
00852 }
00853 return FALSE;
00854 case OPR_ADD:
00855 case OPR_MPY:
00856 case OPR_MIN:
00857 case OPR_MAX:
00858 case OPR_NEG:
00859 case OPR_SUB:
00860 case OPR_DIV:
00861 return TRUE;
00862 case OPR_INTRINSIC_OP:
00863 {
00864 INT32 intr = WN_intrinsic(wn);
00865 switch (intr) {
00866 case INTRN_I4DIVFLOOR:
00867 case INTRN_U4DIVFLOOR:
00868 case INTRN_I8DIVFLOOR:
00869 case INTRN_U8DIVFLOOR:
00870 case INTRN_I4DIVCEIL:
00871 case INTRN_U4DIVCEIL:
00872 case INTRN_I8DIVCEIL:
00873 case INTRN_U8DIVCEIL:
00874 return TRUE;
00875 default:
00876 return FALSE;
00877 }
00878 }
00879 default:
00880 return FALSE;
00881 }
00882 }
00883
00884
00885
00886
00887
00888
00889
00890
00891 static BOOL BND_Verify_Nest_Bounds(WN* loops[],
00892 INT nloops)
00893 {
00894 for (INT i = 0; i < nloops; i++) {
00895 if (!BND_Verify_Expression(WN_start(loops[i]), loops, nloops))
00896 return FALSE;
00897 if (!BND_Verify_Expression(WN_end(loops[i]), loops, nloops))
00898 return FALSE;
00899 if (!BND_Verify_Expression(WN_step(loops[i]), loops, nloops))
00900 return FALSE;
00901 }
00902 return TRUE;
00903 }
00904
00905
00906
00907
00908
00909
00910
00911
00912 static WN* BND_Min_Expr(WN* wn,
00913 WN* loops[],
00914 INT nloops)
00915 {
00916 OPCODE wn_op = WN_opcode(wn);
00917 OPERATOR wn_oper = OPCODE_operator(wn_op);
00918 switch (wn_oper) {
00919 case OPR_INTCONST:
00920 return LWN_Copy_Tree(wn, TRUE, LNO_Info_Map);
00921 case OPR_LDID:
00922 {
00923 WN* wn_new = NULL;
00924 if (Is_Index_Variable(wn, loops, nloops)) {
00925 wn_new = BND_Lower_Bound(wn, loops, nloops);
00926 } else {
00927 wn_new = LWN_Copy_Tree(wn, TRUE, LNO_Info_Map);
00928 LWN_Copy_Def_Use(wn, wn_new, Du_Mgr);
00929 }
00930 return wn_new;
00931 }
00932 case OPR_ADD:
00933 case OPR_MIN:
00934 case OPR_MAX:
00935 {
00936 TYPE_ID ty_com_rtype = OPCODE_rtype(wn_op);
00937 TYPE_ID ty_com_desc = OPCODE_desc(wn_op);
00938 OPCODE com_opc = OPCODE_make_op(wn_oper, ty_com_rtype, ty_com_desc);
00939 WN* wn_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
00940 WN* wn_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
00941 WN *wn_new = LWN_CreateExp2(com_opc, wn_kid0, wn_kid1);
00942 return wn_new;
00943 }
00944 case OPR_MPY:
00945 {
00946 TYPE_ID ty_mpy_rtype = OPCODE_rtype(wn_op);
00947 TYPE_ID ty_mpy_desc = OPCODE_desc(wn_op);
00948 OPCODE mpy_opc = OPCODE_make_op(wn_oper, ty_mpy_rtype, ty_mpy_desc);
00949 OPCODE cmp_opc = OPCODE_make_op(OPR_MIN, ty_mpy_rtype, ty_mpy_desc);
00950 WN* wn_mpy_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
00951 WN* wn_mpy_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
00952 WN* wn_new_exp0 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
00953 wn_mpy_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
00954 wn_mpy_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
00955 WN* wn_new_exp1 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
00956 WN* wn_new_one = LWN_CreateExp2(cmp_opc, wn_new_exp1, wn_new_exp0);
00957 wn_mpy_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
00958 wn_mpy_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
00959 wn_new_exp0 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
00960 wn_mpy_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
00961 wn_mpy_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
00962 wn_new_exp1 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
00963 WN* wn_new_two = LWN_CreateExp2(cmp_opc, wn_new_exp1, wn_new_exp0);
00964 WN *wn_new = LWN_CreateExp2(cmp_opc, wn_new_one, wn_new_two);
00965 return wn_new;
00966 }
00967 case OPR_NEG:
00968 {
00969 TYPE_ID ty_neg_rtype = OPCODE_rtype(wn_op);
00970 TYPE_ID ty_neg_desc = OPCODE_desc(wn_op);
00971 OPCODE neg_opc = OPCODE_make_op(wn_oper, ty_neg_rtype, ty_neg_desc);
00972 WN* wn_kid = BND_Max_Expr(WN_kid0(wn), loops, nloops);
00973 WN *wn_new = WN_CreateExp1(neg_opc, wn_kid);
00974 return wn_new;
00975 }
00976 case OPR_SUB:
00977 case OPR_DIV:
00978 {
00979 TYPE_ID ty_sub_rtype = OPCODE_rtype(wn_op);
00980 TYPE_ID ty_sub_desc = OPCODE_desc(wn_op);
00981 OPCODE sub_opc = OPCODE_make_op(wn_oper, ty_sub_rtype, ty_sub_desc);
00982 WN *wn_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
00983 WN *wn_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
00984 WN *wn_new = LWN_CreateExp2(sub_opc, wn_kid0, wn_kid1);
00985 return wn_new;
00986 }
00987 case OPR_INTRINSIC_OP:
00988 {
00989 INT32 intr = WN_intrinsic(wn);
00990 switch (intr) {
00991 case INTRN_I4DIVFLOOR:
00992 case INTRN_U4DIVFLOOR:
00993 case INTRN_I8DIVFLOOR:
00994 case INTRN_U8DIVFLOOR:
00995 {
00996 TYPE_ID ty_div_rtype = OPCODE_rtype(wn_op);
00997 TYPE_ID ty_div_desc = OPCODE_desc(wn_op);
00998 OPCODE div_opc = OPCODE_make_op(OPR_DIV, ty_div_rtype, ty_div_desc);
00999 OPCODE sub_opc = OPCODE_make_op(OPR_SUB, ty_div_rtype, ty_div_desc);
01000 WN *wn_kid0 = BND_Min_Expr(WN_kid0(WN_kid0(wn)), loops, nloops);
01001 WN *wn_kid1 = BND_Max_Expr(WN_kid0(WN_kid1(wn)), loops, nloops);
01002 WN* wn_div = LWN_CreateExp2(div_opc, wn_kid0, wn_kid1);
01003 WN* one = LWN_Make_Icon(ty_div_rtype, 1);
01004 WN *wn_new = LWN_CreateExp2(sub_opc, wn_div, one);
01005 return wn_new;
01006 }
01007 case INTRN_I4DIVCEIL:
01008 case INTRN_U4DIVCEIL:
01009 case INTRN_I8DIVCEIL:
01010 case INTRN_U8DIVCEIL:
01011 {
01012 TYPE_ID ty_div_rtype = OPCODE_rtype(wn_op);
01013 TYPE_ID ty_div_desc = OPCODE_desc(wn_op);
01014 OPCODE div_opc = OPCODE_make_op(OPR_DIV, ty_div_rtype, ty_div_desc);
01015 WN *wn_kid0 = BND_Min_Expr(WN_kid0(WN_kid0(wn)), loops, nloops);
01016 WN *wn_kid1 = BND_Max_Expr(WN_kid0(WN_kid1(wn)), loops, nloops);
01017 WN *wn_new = LWN_CreateExp2(div_opc, wn_kid0, wn_kid1);
01018 return wn_new;
01019 }
01020 default:
01021 FmtAssert(0, ("Bounds too complicated for scalar expansion."));
01022 }
01023 }
01024 default:
01025 FmtAssert(0, ("Bounds too complicated for scalar expansion."));
01026 return NULL;
01027 }
01028 }
01029
01030
01031
01032
01033
01034
01035
01036
01037 static WN* BND_Max_Expr(WN* wn,
01038 WN* loops[],
01039 INT nloops)
01040 {
01041 OPCODE wn_op = WN_opcode(wn);
01042 OPERATOR wn_oper = OPCODE_operator(wn_op);
01043 WN* wn_new = NULL;
01044
01045 switch (wn_oper) {
01046 case OPR_INTCONST:
01047 return LWN_Copy_Tree(wn, TRUE, LNO_Info_Map);
01048 case OPR_LDID:
01049 {
01050 if (Is_Index_Variable(wn, loops, nloops)) {
01051 wn_new = BND_Upper_Bound(wn, loops, nloops);
01052 } else {
01053 wn_new = LWN_Copy_Tree(wn, TRUE, LNO_Info_Map);
01054 LWN_Copy_Def_Use(wn, wn_new, Du_Mgr);
01055 }
01056 }
01057 return wn_new;
01058 case OPR_ADD:
01059 case OPR_MIN:
01060 case OPR_MAX:
01061 {
01062 TYPE_ID ty_com_rtype = OPCODE_rtype(wn_op);
01063 TYPE_ID ty_com_desc = OPCODE_desc(wn_op);
01064 OPCODE com_opc = OPCODE_make_op(wn_oper, ty_com_rtype, ty_com_desc);
01065 WN* wn_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
01066 WN* wn_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
01067 wn_new = LWN_CreateExp2(com_opc, wn_kid0, wn_kid1);
01068 }
01069 return wn_new;
01070 case OPR_MPY:
01071 {
01072 TYPE_ID ty_mpy_rtype = OPCODE_rtype(wn_op);
01073 TYPE_ID ty_mpy_desc = OPCODE_desc(wn_op);
01074 OPCODE mpy_opc = OPCODE_make_op(wn_oper, ty_mpy_rtype, ty_mpy_desc);
01075 OPCODE cmp_opc = OPCODE_make_op(OPR_MAX, ty_mpy_rtype, ty_mpy_desc);
01076 WN* wn_mpy_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
01077 WN* wn_mpy_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
01078 WN* wn_new_exp0 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
01079 wn_mpy_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
01080 wn_mpy_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
01081 WN* wn_new_exp1 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
01082 WN* wn_new_one = LWN_CreateExp2(cmp_opc, wn_new_exp1, wn_new_exp0);
01083 wn_mpy_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
01084 wn_mpy_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
01085 wn_new_exp0 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
01086 wn_mpy_kid0 = BND_Min_Expr(WN_kid0(wn), loops, nloops);
01087 wn_mpy_kid1 = BND_Max_Expr(WN_kid1(wn), loops, nloops);
01088 wn_new_exp1 = LWN_CreateExp2(mpy_opc, wn_mpy_kid0, wn_mpy_kid1);
01089 WN* wn_new_two = LWN_CreateExp2(cmp_opc, wn_new_exp1, wn_new_exp0);
01090 wn_new = LWN_CreateExp2(cmp_opc, wn_new_one, wn_new_two);
01091 }
01092 return wn_new;
01093 case OPR_NEG:
01094 {
01095 TYPE_ID ty_neg_rtype = OPCODE_rtype(wn_op);
01096 TYPE_ID ty_neg_desc = OPCODE_desc(wn_op);
01097 OPCODE neg_opc = OPCODE_make_op(wn_oper, ty_neg_rtype, ty_neg_desc);
01098 WN* wn_kid = BND_Min_Expr(WN_kid0(wn), loops, nloops);
01099 wn_new = WN_CreateExp1(neg_opc, wn_kid);
01100 }
01101 return wn_new;
01102 case OPR_SUB:
01103 case OPR_DIV:
01104 {
01105 TYPE_ID ty_sub_rtype = OPCODE_rtype(wn_op);
01106 TYPE_ID ty_sub_desc = OPCODE_desc(wn_op);
01107 OPCODE sub_opc = OPCODE_make_op(wn_oper, ty_sub_rtype, ty_sub_desc);
01108 WN *wn_kid0 = BND_Max_Expr(WN_kid0(wn), loops, nloops);
01109 WN *wn_kid1 = BND_Min_Expr(WN_kid1(wn), loops, nloops);
01110 WN *wn_new = LWN_CreateExp2(sub_opc, wn_kid0, wn_kid1);
01111 return wn_new;
01112 }
01113 case OPR_INTRINSIC_OP:
01114 {
01115 INT32 intr = WN_intrinsic(wn);
01116 switch (intr) {
01117 case INTRN_I4DIVFLOOR:
01118 case INTRN_U4DIVFLOOR:
01119 case INTRN_I8DIVFLOOR:
01120 case INTRN_U8DIVFLOOR:
01121 {
01122 TYPE_ID ty_div_rtype = OPCODE_rtype(wn_op);
01123 TYPE_ID ty_div_desc = OPCODE_desc(wn_op);
01124 OPCODE div_opc = OPCODE_make_op(OPR_DIV, ty_div_rtype, ty_div_desc);
01125 WN *wn_kid0 = BND_Max_Expr(WN_kid0(WN_kid0(wn)), loops, nloops);
01126 WN *wn_kid1 = BND_Min_Expr(WN_kid0(WN_kid1(wn)), loops, nloops);
01127 WN *wn_new = LWN_CreateExp2(div_opc, wn_kid0, wn_kid1);
01128 return wn_new;
01129 }
01130 case INTRN_I4DIVCEIL:
01131 case INTRN_U4DIVCEIL:
01132 case INTRN_I8DIVCEIL:
01133 case INTRN_U8DIVCEIL:
01134 {
01135 TYPE_ID ty_div_rtype = OPCODE_rtype(wn_op);
01136 TYPE_ID ty_div_desc = OPCODE_desc(wn_op);
01137 OPCODE div_opc = OPCODE_make_op(OPR_DIV, ty_div_rtype, ty_div_desc);
01138 OPCODE add_opc = OPCODE_make_op(OPR_ADD, ty_div_rtype, ty_div_desc);
01139 WN *wn_kid0 = BND_Max_Expr(WN_kid0(WN_kid0(wn)), loops, nloops);
01140 WN *wn_kid1 = BND_Min_Expr(WN_kid0(WN_kid1(wn)), loops, nloops);
01141 WN* wn_div = LWN_CreateExp2(div_opc, wn_kid0, wn_kid1);
01142 WN* one = LWN_Make_Icon(ty_div_rtype, 1);
01143 WN *wn_new = LWN_CreateExp2(add_opc, wn_div, one);
01144 return wn_new;
01145 }
01146 default:
01147 FmtAssert(0, ("Bounds too complicated for scalar expansion."));
01148 }
01149 }
01150 #ifdef KEY
01151
01152
01153 case OPR_CVT:
01154 {
01155 TYPE_ID ty_cvt_rtype = OPCODE_rtype(wn_op);
01156 TYPE_ID ty_cvt_desc = OPCODE_desc(wn_op);
01157 OPCODE cvt_opc = OPCODE_make_op(wn_oper, ty_cvt_rtype, ty_cvt_desc);
01158 WN* wn_kid = BND_Max_Expr(WN_kid0(wn), loops, nloops);
01159 wn_new = WN_CreateExp1(cvt_opc, wn_kid);
01160 LWN_Parentize(wn_new);
01161 }
01162 return wn_new;
01163 #endif
01164 default:
01165 FmtAssert(0, ("Bounds too complicated for scalar expansion."));
01166 return NULL;
01167 }
01168 }
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178 static BOOL SE_Easy_Loop(WN* wn_outer_loop)
01179 {
01180 INT nloops = Is_Inner_SNL(wn_outer_loop);
01181 if (nloops == 0)
01182 return FALSE;
01183 if (!Fully_Permutable_Permutation(wn_outer_loop, nloops))
01184 return FALSE;
01185 if (!SNL_Is_Distributable(wn_outer_loop, nloops))
01186 return FALSE;
01187 return TRUE;
01188 }
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200 static WN* SE_New_Outer_Loop(WN* wn_ref,
01201 WN* wn_scalar_ref,
01202 WN* wn_outer_loop)
01203 {
01204 WN* wn_new_outer_loop = NULL;
01205 WN* wn_common = Common_Ancestor(wn_ref, wn_scalar_ref);
01206 for (WN* wn = wn_ref; wn != wn_common; wn = LWN_Get_Parent(wn)) {
01207 if (WN_opcode(wn) == OPC_DO_LOOP) {
01208 if (!SE_Easy_Loop(wn))
01209 break;
01210 wn_new_outer_loop = wn;
01211 }
01212 }
01213 if (wn_new_outer_loop == NULL)
01214 return NULL;
01215 if (wn_outer_loop != NULL && Do_Loop_Depth(wn_new_outer_loop)
01216 <= Do_Loop_Depth(wn_outer_loop))
01217 wn_new_outer_loop = wn_outer_loop;
01218 return wn_new_outer_loop;
01219 }
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229 static INT SE_Prune_Stack_Elements(STACK<WN*>* class_stack,
01230 WN* wn_new_outer_loop,
01231 WN* wn_scalar_ref)
01232 {
01233 STACK<WN*> new_stack(&LNO_local_pool);
01234 INT new_i = -1;
01235 INT j;
01236 for (j = 0; j < class_stack->Elements(); j++) {
01237 WN* wn = class_stack->Bottom_nth(j);
01238 if (Wn_Is_Inside(wn, wn_new_outer_loop))
01239 new_stack.Push(wn);
01240 if (wn == wn_scalar_ref)
01241 new_i = new_stack.Elements();
01242 }
01243 class_stack->Clear();
01244 for (j = 0; j < new_stack.Elements(); j++)
01245 class_stack->Push(new_stack.Bottom_nth(j));
01246 new_stack.Clear();
01247 return new_i - 1;
01248 }
01249
01250
01251
01252
01253
01254
01255
01256 static BOOL Conditionally_Assigned(WN* wn_def,
01257 WN* wn_eq_loop)
01258 {
01259 if (!OPCODE_is_store(WN_opcode(wn_def)))
01260 return FALSE;
01261 for (WN* wn = wn_def; wn != NULL && wn != wn_eq_loop;
01262 wn = LWN_Get_Parent(wn))
01263 if (WN_opcode(wn) == OPC_IF)
01264 return TRUE;
01265 return FALSE;
01266 }
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280 extern STACK<WN*>* Scalar_Equivalence_Class(WN* ref, DU_MANAGER* du,
01281 MEM_POOL* pool, BOOL find_restrict, WN** wn_outer_loop)
01282 {
01283
01284 if (find_restrict)
01285 *wn_outer_loop = NULL;
01286
01287
01288 OPERATOR ref_opr=WN_operator(ref);
01289
01290 if (ref_opr!=OPR_LDID && ref_opr!=OPR_STID)
01291 return (STACK<WN*>*)NULL;
01292
01293 SYMBOL ref_symbol(ref);
01294
01295
01296 STACK<WN*>* class_stack = CXX_NEW(STACK<WN*>(pool), pool);
01297
01298
01299
01300 HASH_TABLE<const WN*,INT> scalar_ref_table(128, pool);
01301
01302 class_stack->Push(ref);
01303 scalar_ref_table.Enter(ref,1);
01304 INT i = 0;
01305 while (i<class_stack->Elements()) {
01306 WN* scalar_ref = class_stack->Bottom_nth(i++);
01307 OPCODE opc = WN_opcode(scalar_ref);
01308 OPERATOR opr = OPCODE_operator(opc);
01309 BOOL potential_read = FALSE;
01310 BOOL potential_write = FALSE;
01311
01312 switch (opr) {
01313 case OPR_LDID:
01314 potential_read = TRUE;
01315 break;
01316 case OPR_STID:
01317 potential_write = TRUE;
01318 break;
01319 default:
01320 if (!find_restrict) {
01321 CXX_DELETE(class_stack, pool);
01322 return (STACK<WN*>*)NULL;
01323 } else {
01324 WN* wn_new_outer_loop = SE_New_Outer_Loop(ref, scalar_ref,
01325 *wn_outer_loop);
01326 if (wn_new_outer_loop == NULL) {
01327 CXX_DELETE(class_stack, pool);
01328 return (STACK<WN*>*)NULL;
01329 }
01330 if (*wn_outer_loop != wn_new_outer_loop) {
01331 *wn_outer_loop = wn_new_outer_loop;
01332 i = SE_Prune_Stack_Elements(class_stack, wn_new_outer_loop,
01333 scalar_ref);
01334 }
01335 continue;
01336 }
01337 }
01338
01339
01340 if (ST_class(WN_st(scalar_ref)) == CLASS_PREG &&
01341 WN_offset(scalar_ref) <= Last_Dedicated_Preg_Offset) {
01342 if (find_restrict)
01343 *wn_outer_loop = NULL;
01344 CXX_DELETE(class_stack, pool);
01345 return (STACK<WN*>*)NULL;
01346 }
01347
01348
01349 SYMBOL sym(scalar_ref);
01350 if (sym!=ref_symbol || sym.Type != ref_symbol.Type) {
01351 if (find_restrict)
01352 *wn_outer_loop = NULL;
01353 CXX_DELETE(class_stack, pool);
01354 return (STACK<WN*>*)NULL;
01355 }
01356
01357 if (potential_read) {
01358 DEF_LIST *def_list=du->Ud_Get_Def(scalar_ref);
01359 if (def_list->Incomplete()) {
01360 if (find_restrict)
01361 *wn_outer_loop = NULL;
01362 CXX_DELETE(class_stack, pool);
01363 return (STACK<WN*>*)NULL;
01364 }
01365 WN* wn_new_outer_loop = NULL;
01366 DEF_LIST_ITER d_iter(def_list);
01367 for (DU_NODE *def_node=(DU_NODE *)d_iter.First(); !d_iter.Is_Empty();
01368 def_node=(DU_NODE *)d_iter.Next()) {
01369 WN* def=def_node->Wn();
01370 if (scalar_ref_table.Find(def)!=1) {
01371 class_stack->Push(def);
01372 scalar_ref_table.Enter(def,1);
01373 }
01374 }
01375 if (red_manager) {
01376 REDUCTION_TYPE red_type=red_manager->Which_Reduction(scalar_ref);
01377 if (red_type != RED_NONE) {
01378 WN* parent = scalar_ref;
01379 while (!OPCODE_is_store(WN_opcode(parent)))
01380 parent=LWN_Get_Parent(parent);
01381 if (scalar_ref_table.Find(parent)!=1) {
01382 class_stack->Push(parent);
01383 scalar_ref_table.Enter(parent,1);
01384 }
01385 }
01386 }
01387 }
01388
01389 if (potential_write) {
01390 USE_LIST *use_list=du->Du_Get_Use(scalar_ref);
01391 if (use_list && use_list->Incomplete()) {
01392 if (find_restrict)
01393 *wn_outer_loop = NULL;
01394 CXX_DELETE(class_stack, pool);
01395 return (STACK<WN*>*)NULL;
01396 }
01397 USE_LIST_ITER u_iter(use_list);
01398 for (DU_NODE *use_node=(DU_NODE *)u_iter.First(); !u_iter.Is_Empty();
01399 use_node=(DU_NODE *)u_iter.Next()) {
01400 WN* use=use_node->Wn();
01401 if (scalar_ref_table.Find(use)!=1) {
01402 class_stack->Push(use);
01403 scalar_ref_table.Enter(use,1);
01404 }
01405 }
01406 if (red_manager) {
01407 REDUCTION_TYPE red_type = red_manager->Which_Reduction(scalar_ref);
01408 if (red_type != RED_NONE) {
01409 WN* wn_load = NULL;
01410 LWN_ITER* itr = LWN_WALK_TreeIter(WN_kid0(scalar_ref));
01411 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
01412 wn_load = itr->wn;
01413 if (OPCODE_has_sym(WN_opcode(wn_load))
01414 && SYMBOL(wn_load) == SYMBOL(scalar_ref)
01415 && red_manager->Which_Reduction(wn_load) == red_type)
01416 break;
01417 }
01418 FmtAssert(wn_load != NULL,
01419 ("Scalar_Equivalence_Class: Could not find reduction load"));
01420 if (scalar_ref_table.Find(wn_load) != 1) {
01421 class_stack->Push(wn_load);
01422 scalar_ref_table.Enter(wn_load, 1);
01423 }
01424 }
01425 }
01426 }
01427 }
01428 return class_stack;
01429 }
01430
01431
01432
01433
01434
01435
01436
01437 extern STACK<WN*>* Scalar_Equivalence_Class(WN* ref, DU_MANAGER* du,
01438 MEM_POOL* pool)
01439 {
01440 return Scalar_Equivalence_Class(ref, du, pool, FALSE, NULL);
01441 }
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451 extern STACK<WN*>* Scalar_Equivalence_Class(WN* ref, DU_MANAGER* du,
01452 MEM_POOL* pool, WN* wn_loop)
01453 {
01454
01455
01456 OPERATOR ref_opr=WN_operator(ref);
01457 FmtAssert(ref_opr == OPR_LDID || ref_opr == OPR_STID,
01458 ("Scalar_Equivalence_Class: Expected ref to be LDID or STID"));
01459
01460 SYMBOL ref_symbol(ref);
01461
01462
01463 STACK<WN*>* class_stack = CXX_NEW(STACK<WN*>(pool), pool);
01464
01465
01466
01467 HASH_TABLE<const WN*,INT> scalar_ref_table(128, pool);
01468
01469 class_stack->Push(ref);
01470 scalar_ref_table.Enter(ref,1);
01471 INT i = 0;
01472
01473 while (i < class_stack->Elements()) {
01474 WN* scalar_ref = class_stack->Bottom_nth(i++);
01475
01476 OPCODE opc = WN_opcode(scalar_ref);
01477 OPERATOR opr = OPCODE_operator(opc);
01478 BOOL potential_read = FALSE;
01479 BOOL potential_write = FALSE;
01480
01481 switch (opr) {
01482 case OPR_LDID:
01483 potential_read = TRUE;
01484 break;
01485 case OPR_STID:
01486 potential_write = TRUE;
01487 break;
01488 default:
01489 FmtAssert(TRUE,
01490 ("Scalar_Equivalence_Class: Expected ref to be LDID or STID"));
01491 }
01492
01493 if (potential_read) {
01494 DEF_LIST *def_list=du->Ud_Get_Def(scalar_ref);
01495 FmtAssert(def_list != NULL && !def_list->Incomplete(),
01496 ("Scalar_Equivalence_Class: Expected complete def list"));
01497 DEF_LIST_ITER d_iter(def_list);
01498 for (DU_NODE *def_node=(DU_NODE *)d_iter.First(); !d_iter.Is_Empty();
01499 def_node=(DU_NODE *)d_iter.Next()) {
01500 WN* def=def_node->Wn();
01501 if (scalar_ref_table.Find(def)!=1 && Wn_Is_Inside(def, wn_loop)) {
01502 class_stack->Push(def);
01503 scalar_ref_table.Enter(def,1);
01504 }
01505 }
01506 if (red_manager) {
01507 REDUCTION_TYPE red_type=red_manager->Which_Reduction(scalar_ref);
01508 if (red_type != RED_NONE) {
01509 WN* parent = scalar_ref;
01510 while (!OPCODE_is_store(WN_opcode(parent)))
01511 parent=LWN_Get_Parent(parent);
01512 if (scalar_ref_table.Find(parent)!=1
01513 && Wn_Is_Inside(parent, wn_loop)) {
01514 class_stack->Push(parent);
01515 scalar_ref_table.Enter(parent,1);
01516 }
01517 }
01518 }
01519 }
01520
01521 if (potential_write) {
01522 USE_LIST *use_list=du->Du_Get_Use(scalar_ref);
01523 FmtAssert(use_list != NULL && !use_list->Incomplete(),
01524 ("Scalar_Equivalence_Class: Expected complete use list"));
01525 USE_LIST_ITER u_iter(use_list);
01526 for (DU_NODE *use_node=(DU_NODE *)u_iter.First(); !u_iter.Is_Empty();
01527 use_node=(DU_NODE *)u_iter.Next()) {
01528 WN* use=use_node->Wn();
01529 if (scalar_ref_table.Find(use)!=1 && Wn_Is_Inside(use, wn_loop)) {
01530 class_stack->Push(use);
01531 scalar_ref_table.Enter(use,1);
01532 }
01533 }
01534 }
01535 }
01536 return class_stack;
01537 }
01538
01539
01540
01541
01542
01543
01544
01545
01546 static WN* IF_Complement(WN* wn_node)
01547 {
01548 WN* wn_if = LWN_Get_Parent(wn_node);
01549 if (WN_operator(wn_if) != OPR_IF)
01550 return NULL;
01551 if (wn_node == WN_then(wn_if))
01552 return WN_else(wn_if);
01553 if (wn_node == WN_else(wn_if))
01554 return WN_then(wn_if);
01555 return NULL;
01556 }
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566 static WN* IF_Branch(WN* wn_node,
01567 WN* wn_def_loop)
01568 {
01569 WN* wnn = NULL;
01570 for (WN* wn = wn_node; wn != NULL; wnn = wn, wn = LWN_Get_Parent(wn)) {
01571 if (wn == wn_def_loop)
01572 return wn_def_loop;
01573 if (WN_operator(wn) == OPR_IF
01574 && (WN_then(wn) == wnn || WN_else(wn) == wnn))
01575 return wnn;
01576 }
01577 return NULL;
01578 }
01579
01580
01581
01582
01583
01584
01585
01586 static BOOL Has_Cutset(STACK<WN*>* stk_equiv,
01587 WN* wn_def_loop)
01588 {
01589 WN** cutset = CXX_NEW_ARRAY(WN*, stk_equiv->Elements(),
01590 &LNO_local_pool);
01591 for (INT i = 0; i < stk_equiv->Elements(); i++) {
01592 WN* wn_node = stk_equiv->Bottom_nth(i);
01593 cutset[i] = WN_operator(wn_node) == OPR_STID
01594 ? IF_Branch(wn_node, wn_def_loop) : NULL;
01595 if (cutset[i] == wn_def_loop)
01596 return TRUE;
01597 }
01598 BOOL change = TRUE;
01599 while (change) {
01600 change = FALSE;
01601 for (INT i = 0; i < stk_equiv->Elements(); i++) {
01602 WN* wn_node = stk_equiv->Bottom_nth(i);
01603 WN* wn_cutset = cutset[i];
01604 if (wn_cutset == NULL)
01605 continue;
01606 WN* wn_complement = IF_Complement(wn_cutset);
01607 FmtAssert(wn_complement != NULL,
01608 ("Has_Cutset: Could not find IF complement"));
01609 INT j;
01610 for (j = 0; j < i; j++)
01611 if (cutset[j] == wn_complement)
01612 break;
01613 if (j < i) {
01614 WN* wn_new = IF_Branch(LWN_Get_Parent(cutset[i]), wn_def_loop);
01615 if (wn_new == wn_def_loop)
01616 return TRUE;
01617 for (INT k = 0; k < stk_equiv->Elements(); k++) {
01618 if (cutset[k] == wn_cutset || cutset[k] == wn_complement)
01619 cutset[k] = wn_new;
01620 }
01621 change = TRUE;
01622 }
01623 }
01624 }
01625 return FALSE;
01626 }
01627
01628
01629
01630
01631
01632
01633
01634
01635
01636
01637
01638 extern SE_RESULT Scalar_Expandable(STACK<WN*>* stk_equiv,
01639 WN* wn_ref,
01640 WN* wn_def_loop,
01641 DU_MANAGER *du,
01642 WN* wn_outer_loop,
01643 WN* wn_eq_loop)
01644 {
01645 SYMBOL sym_ref(wn_ref);
01646
01647 if (!Upper_Bound_Standardize(WN_end(wn_def_loop), TRUE))
01648 return SE_NONE;
01649
01650 if (stk_equiv == NULL)
01651 return SE_NONE;
01652
01653 if (wn_eq_loop != NULL && !Wn_Is_Inside(wn_def_loop, wn_eq_loop))
01654 return SE_NONE;
01655
01656 if (!Wn_Is_Inside(wn_def_loop, wn_outer_loop))
01657 return SE_NONE;
01658
01659 if (!Has_Cutset(stk_equiv, wn_def_loop))
01660 return SE_NONE;
01661
01662 INT stid_count = 0;
01663 BOOL found_lcd = FALSE;
01664 INT inner_ldid_count = 0;
01665 for (INT i = 0; i < stk_equiv->Elements(); i++) {
01666 WN* wn_scalar_ref = stk_equiv->Bottom_nth(i);
01667 OPERATOR opr = WN_operator(wn_scalar_ref);
01668 if (opr != OPR_LDID && opr != OPR_STID)
01669 return SE_NONE;
01670 if (SYMBOL(wn_scalar_ref) != sym_ref)
01671 return SE_NONE;
01672 if (opr == OPR_STID) {
01673 stid_count++;
01674 } else if (opr == OPR_LDID) {
01675 if (!Wn_Is_Inside(wn_scalar_ref, wn_outer_loop)
01676 && wn_eq_loop != wn_outer_loop) {
01677 for (WN* wn = wn_ref; wn != NULL; wn = LWN_Get_Parent(wn)) {
01678 if (WN_opcode(wn) == OPC_DO_LOOP && !SE_Easy_Loop(wn))
01679 return SE_NONE;
01680 if (wn == wn_outer_loop)
01681 break;
01682 }
01683 wn_eq_loop = wn_outer_loop;
01684 i = SE_Prune_Stack_Elements(stk_equiv, wn_eq_loop, wn_scalar_ref);
01685 continue;
01686 }
01687 WN* wn_parent = LWN_Get_Parent(wn_scalar_ref);
01688 if (wn_parent != NULL && WN_operator(wn_parent)
01689 == OPR_ARRAY && WN_array_base(wn_parent) == wn_scalar_ref)
01690 return SE_NONE;
01691 DEF_LIST* def_list = du->Ud_Get_Def(wn_scalar_ref);
01692 WN* wn_loop_stmt = def_list->Loop_stmt();
01693 WN* wn_enclosing_loop = Enclosing_Do_Loop(wn_scalar_ref);
01694 DO_LOOP_INFO* dli_enclosing = NULL;
01695 if (wn_enclosing_loop != NULL) {
01696 dli_enclosing = Get_Do_Loop_Info(wn_enclosing_loop);
01697 if (dli_enclosing->Is_Inner)
01698 inner_ldid_count++;
01699 }
01700 if (wn_loop_stmt != NULL && (wn_loop_stmt == wn_def_loop
01701 || !Wn_Is_Inside(wn_loop_stmt, wn_def_loop))) {
01702 if (Conditionally_Assigned(wn_scalar_ref, wn_outer_loop))
01703 return SE_NONE;
01704 if (dli_enclosing != NULL && dli_enclosing->Is_Inner)
01705 return SE_NONE;
01706 found_lcd = TRUE;
01707 }
01708 }
01709 }
01710 if (found_lcd && (stid_count > 1 || inner_ldid_count == 0))
01711 return SE_NONE;
01712 return (wn_eq_loop != NULL) ? (found_lcd ? SE_HARD_LCD : SE_HARD)
01713 : (found_lcd ? SE_EASY_LCD : SE_EASY);
01714 }
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726 extern SE_RESULT Scalar_Expandable(WN* ref, WN* loop, DU_MANAGER *du)
01727 {
01728
01729 if (!Upper_Bound_Standardize(WN_end(loop), TRUE))
01730 return SE_NONE;
01731
01732 WN* wn_eq_loop = NULL;
01733 MEM_POOL_Push(&LNO_local_pool);
01734 STACK<WN*>* equivalence_class =
01735 Scalar_Equivalence_Class(ref, du, &LNO_local_pool, TRUE, &wn_eq_loop);
01736 SE_RESULT can_expand = Scalar_Expandable(equivalence_class, ref, loop, du,
01737 loop, wn_eq_loop);
01738 CXX_DELETE(equivalence_class, &LNO_local_pool);
01739 MEM_POOL_Pop(&LNO_local_pool);
01740 return can_expand;
01741 }
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766 enum LOOP_RELATIONSHIP {LR_ABOVE=345, LR_BELOW, LR_DESCENDANT};
01767
01768 static void Get_Do_And_Above(WN* ref, WN** loopp, LOOP_RELATIONSHIP* lrp)
01769 {
01770 WN* block_kid = ref;
01771 WN* block = LWN_Get_Parent(block_kid);
01772
01773 while (block && WN_opcode(block) != OPC_DO_LOOP) {
01774 Is_True(LWN_Get_Parent(block_kid) == block, ("Bug"));
01775
01776 if (WN_opcode(block) == OPC_BLOCK) {
01777 WN* wn;
01778
01779
01780 for (wn = WN_prev(block_kid); wn; wn = WN_prev(wn)) {
01781 if (WN_opcode(wn) == OPC_DO_LOOP) {
01782 *loopp = wn;
01783 *lrp = LR_ABOVE;
01784 return;
01785 }
01786 }
01787 for (wn = WN_next(block_kid); wn; wn = WN_next(wn)) {
01788 if (WN_opcode(wn) == OPC_DO_LOOP) {
01789 *loopp = wn;
01790 *lrp = LR_BELOW;
01791 return;
01792 }
01793 }
01794 }
01795 block_kid = block;
01796 block = LWN_Get_Parent(block_kid);
01797 }
01798
01799 *loopp = block;
01800 *lrp = LR_DESCENDANT;
01801 }
01802
01803 BOOL Scalar_Expansion_Not_Necessary(WN* ref, DU_MANAGER* du)
01804 {
01805 MEM_POOL_Push(&LNO_local_pool);
01806
01807 SYMBOL ref_symbol(ref);
01808 WN* wn_eq_loop = NULL;
01809 STACK<WN*>* equivalence_class =
01810 Scalar_Equivalence_Class(ref, du, &LNO_local_pool, TRUE, &wn_eq_loop);
01811 if (wn_eq_loop != NULL) {
01812 MEM_POOL_Pop(&LNO_local_pool);
01813 return FALSE;
01814 }
01815
01816 WN* loop;
01817 LOOP_RELATIONSHIP lr;
01818
01819 Get_Do_And_Above(ref, &loop, &lr);
01820
01821 while (!equivalence_class->Is_Empty()) {
01822 WN* scalar_ref = equivalence_class->Pop();
01823 WN* loop2;
01824 LOOP_RELATIONSHIP lr2;
01825
01826 Get_Do_And_Above(scalar_ref, &loop2, &lr2);
01827 if (loop != loop2 || lr != lr2) {
01828 MEM_POOL_Pop(&LNO_local_pool);
01829 return FALSE;
01830 }
01831 }
01832
01833 MEM_POOL_Pop(&LNO_local_pool);
01834 return TRUE;
01835 }
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861 static void Mark_Calls(WN* wn)
01862 {
01863 for ( ; wn; wn = LWN_Get_Parent(wn)) {
01864 if (WN_opcode(wn) == OPC_DO_LOOP) {
01865 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01866 if (dli) dli->Has_Calls = TRUE;
01867 }
01868 }
01869 }
01870
01871 static INT unique_se_id = 0;
01872
01873 static TY_IDX se_type_array[MTYPE_LAST + 1];
01874
01875 extern void SE_Symbols_For_SE(SYMBOL* ptr_array, const char* pref, INT unique_id,
01876 TYPE_ID mtype)
01877 {
01878 char name[64];
01879 ST *st;
01880
01881 sprintf(name, "$%s%d_%s", pref, unique_id, MTYPE_name(mtype));
01882 st = New_ST(CURRENT_SYMTAB);
01883
01884 TY_IDX ty_base = se_type_array[mtype];
01885 if (ty_base == (TY_IDX) NULL) {
01886 ty_base = Copy_TY(Be_Type_Tbl(mtype));
01887 se_type_array[mtype] = ty_base;
01888 }
01889 TY_IDX ty_idx = Make_Pointer_Type (ty_base);
01890 if (!TY_ptr_as_array(ty_idx)) {
01891 char buf[32];
01892 sprintf (buf, "->%d", TY_IDX_index (ty_base));
01893 TY &ty = New_TY (ty_idx);
01894 TY_Init (ty, Pointer_Size, KIND_POINTER, Pointer_Mtype, Save_Str(buf));
01895 Set_TY_pointed (ty, ty_base);
01896 Set_TY_align (ty_idx, Pointer_Size);
01897 Set_TY_ptr_as_array (ty);
01898 }
01899
01900 ST_Init (st,
01901 Save_Str(name),
01902 CLASS_VAR,
01903 SCLASS_AUTO,
01904 EXPORT_LOCAL,
01905 ty_idx);
01906 Set_ST_is_temp_var(st);
01907 *ptr_array = SYMBOL(st, 0, Pointer_type);
01908
01909 Set_ST_pt_to_unique_mem(st);
01910 Set_ST_pt_to_compiler_generated_mem(st);
01911 }
01912
01913 extern WN* Get_Expansion_Space(SYMBOL se_ptrsym,
01914 WN* bsz,
01915 const char* pref,
01916 INT unique_id,
01917 TYPE_ID wtype,
01918 WN* allocregion,
01919 WN* useregion,
01920 WN* deallocregion)
01921 {
01922 DU_MANAGER* du = Du_Mgr;
01923 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01924
01925 WN* newstdf = NULL;
01926 TY_IDX pty = Make_Pointer_Type(Be_Type_Tbl(wtype));
01927
01928 OPCODE stop = OPCODE_make_op(OPR_STID, MTYPE_V, Pointer_type);
01929 OPCODE ldop = OPCODE_make_op(OPR_LDID, Pointer_type, Pointer_type);
01930 OPCODE subop = OPCODE_make_op(OPR_SUB, Pointer_type, MTYPE_V);
01931 OPCODE addop = OPCODE_make_op(OPR_ADD, Pointer_type, MTYPE_V);
01932
01933 BOOL use_sp = LNO_Use_Malloc == FALSE;
01934 FmtAssert(!Get_Trace(TP_LNOPT, TT_LNO_SE_MALLOC),
01935 ("-ttLNO for malloc is obsolete: use -LNO:use_malloc"));
01936 SYMBOL sp_tmp;
01937 WN* sp_tmp_def;
01938
01939 PREG_NUM rreg1;
01940 PREG_NUM rreg2;
01941
01942
01943
01944
01945
01946
01947 if (use_sp) {
01948
01949 char newstname[32];
01950 sprintf(newstname, "$%s%d__$stk", pref, unique_id);
01951 sp_tmp = Create_Preg_Symbol(newstname, Pointer_type);
01952
01953 OPCODE op = OPCODE_make_op(OPR_INTRINSIC_CALL, Pointer_type, MTYPE_V);
01954 WN* call = WN_Create(op, 0);
01955
01956 WN_intrinsic(call) = Pointer_Size == 8 ?
01957 INTRN_U8READSTACKPOINTER :
01958 INTRN_U4READSTACKPOINTER;
01959 ST* rst = Find_Return_Registers(Pointer_type, &rreg1, &rreg2);
01960 FmtAssert(rreg1 != 0 && rreg2 == 0, ("Bad pointer type ret regs"));
01961
01962 LWN_Copy_Linenumber(allocregion, call);
01963 LWN_Insert_Block_Before(LWN_Get_Parent(allocregion), allocregion, call);
01964 if (Block_Loop_Depth(call) >= 0) {
01965 if (dg && !dg->Add_Vertex(call)) {
01966 LNO_Erase_Dg_From_Here_In(call, dg);
01967 }
01968 }
01969 WN* rreg_ldid = WN_CreateLdid(ldop, rreg1, rst, pty);
01970 Create_alias(Alias_Mgr, rreg_ldid);
01971 du->Add_Def_Use(call, rreg_ldid);
01972 sp_tmp_def = LWN_CreateStid(stop, sp_tmp.WN_Offset(),
01973 sp_tmp.St(), pty, rreg_ldid);
01974 Create_alias(Alias_Mgr, sp_tmp_def);
01975 LWN_Copy_Linenumber(allocregion, sp_tmp_def);
01976 LWN_Copy_Frequency_Tree(sp_tmp_def,allocregion);
01977 LWN_Insert_Block_Before(LWN_Get_Parent(allocregion), allocregion,
01978 sp_tmp_def);
01979 }
01980
01981 OPCODE icallop = OPCODE_make_op(OPR_INTRINSIC_CALL,
01982 Pointer_type, MTYPE_V);
01983 WN* call = WN_Create(icallop, 1);
01984
01985 ST* rst = Find_Return_Registers(Pointer_type, &rreg1, &rreg2);
01986 FmtAssert(rreg1 != 0 && rreg2 == 0, ("Bad pointer type ret regs"));
01987
01988 if (use_sp) {
01989 #ifdef _NEW_SYMTAB
01990 Set_PU_has_alloca(Get_Current_PU());
01991 #else
01992 Set_SYMTAB_has_alloca(Current_Symtab);
01993 #endif
01994 WN_intrinsic(call) =
01995 Pointer_Size == 8 ? INTRN_U8I8ALLOCA : INTRN_U4I4ALLOCA;
01996 }
01997 else {
01998 WN_intrinsic(call) =
01999 Pointer_Size == 8 ? INTRN_U8I8MALLOC : INTRN_U4I4MALLOC;
02000 }
02001 LWN_Copy_Linenumber(allocregion, call);
02002 if (LNO_Use_Parm) {
02003 WN* tmp=WN_CreateParm(MTYPE_U8, bsz, Be_Type_Tbl(MTYPE_U8),
02004 WN_PARM_BY_VALUE);
02005 LWN_Set_Parent(bsz, tmp);
02006 bsz=tmp;
02007 }
02008 WN_kid0(call) = bsz;
02009 LWN_Set_Parent(bsz, call);
02010
02011 LWN_Copy_Frequency_Tree(call, allocregion);
02012
02013 LWN_Insert_Block_Before(LWN_Get_Parent(allocregion), allocregion, call);
02014 if (Block_Loop_Depth(call) >= 0) {
02015 if (dg && !dg->Add_Vertex(call)) {
02016 LNO_Erase_Dg_From_Here_In(call, dg);
02017 }
02018 }
02019 WN* rreg_ldid = WN_CreateLdid(ldop, rreg1, rst, pty);
02020 Create_alias(Alias_Mgr, rreg_ldid);
02021 du->Add_Def_Use(call, rreg_ldid);
02022 newstdf = LWN_CreateStid(stop, se_ptrsym.WN_Offset(),
02023 se_ptrsym.St(), pty, rreg_ldid);
02024 Create_local_alias(Alias_Mgr,newstdf);
02025 LWN_Copy_Linenumber(useregion, newstdf);
02026
02027 LWN_Copy_Frequency_Tree(newstdf, allocregion);
02028
02029 LWN_Insert_Block_Before(LWN_Get_Parent(allocregion), allocregion, newstdf);
02030 Mark_Calls(call);
02031
02032
02033
02034 icallop = OPCODE_make_op(OPR_INTRINSIC_CALL, MTYPE_V, MTYPE_V);
02035 call = WN_Create(icallop, ((use_sp && Alloca_Dealloca_On) ? 2 : 1));
02036 WN* wn_callarg;
02037 if (use_sp) {
02038 WN_intrinsic(call) = Pointer_Size == 8 ?
02039 INTRN_U8I8SETSTACKPOINTER :
02040 INTRN_U4I4SETSTACKPOINTER;
02041 wn_callarg = LWN_CreateLdid(ldop, sp_tmp_def);
02042 du->Ud_Add_Def(wn_callarg, sp_tmp_def);
02043 du->Du_Add_Use(sp_tmp_def, wn_callarg);
02044 }
02045 else {
02046 WN_intrinsic(call) = Pointer_Size == 8 ? INTRN_U8FREE : INTRN_U4FREE;
02047 wn_callarg = WN_CreateLdid(ldop, se_ptrsym.WN_Offset(),
02048 se_ptrsym.St(),pty);
02049 Create_alias(Alias_Mgr, wn_callarg);
02050 du->Ud_Add_Def(wn_callarg, newstdf);
02051 du->Du_Add_Use(newstdf, wn_callarg);
02052 }
02053
02054 if (LNO_Use_Parm) {
02055 WN* tmp=WN_CreateParm(Pointer_type,wn_callarg,pty,WN_PARM_BY_VALUE);
02056 LWN_Set_Parent(wn_callarg, tmp);
02057 wn_callarg=tmp;
02058 }
02059 WN_kid0(call) = wn_callarg;
02060 LWN_Set_Parent(wn_callarg, call);
02061 LWN_Copy_Linenumber(useregion, call);
02062
02063 if (use_sp && Alloca_Dealloca_On) {
02064 WN_kid1(call) = WN_CreateParm(Pointer_type,
02065 WN_CreateLdid(ldop,
02066 se_ptrsym.WN_Offset(),
02067 se_ptrsym.St(), pty),
02068 pty, WN_PARM_BY_VALUE);
02069 LWN_Parentize(call);
02070 du->Add_Def_Use (newstdf, WN_kid0(WN_kid1(call)));
02071 }
02072
02073 LWN_Copy_Frequency_Tree(call, deallocregion);
02074
02075 LWN_Insert_Block_After(LWN_Get_Parent(deallocregion), deallocregion, call);
02076 if (Block_Loop_Depth(call) >= 0) {
02077 if (dg && !dg->Add_Vertex(call)) {
02078 LNO_Erase_Dg_From_Here_In(call, dg);
02079 }
02080 }
02081
02082 return newstdf;
02083
02084 }
02085
02086
02087
02088
02089
02090
02091
02092
02093 static void SE_Assign_Lexcounts(DYN_ARRAY<WN_REFERENCE_INFO>& deflist,
02094 DYN_ARRAY<WN_REFERENCE_INFO>& uselist,
02095 WN* allocregion,
02096 WN* deallocregion,
02097 SYMBOL sym)
02098 {
02099 INT lexcount = 0;
02100 for (WN* wnn = allocregion; wnn != NULL; wnn = WN_next(wnn)) {
02101 LWN_ITER* itr = LWN_WALK_TreeIter(wnn);
02102 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02103 WN* wn = itr->wn;
02104 if (Enclosing_Do_Loop(wn) == NULL)
02105 continue;
02106 if (WN_operator(wn) == OPR_ILOAD
02107 && WN_operator(WN_kid0(wn)) == OPR_ARRAY
02108 && OPCODE_has_sym(WN_opcode(WN_array_base(WN_kid0(wn))))
02109 && SYMBOL(WN_array_base(WN_kid0(wn))) == sym) {
02110 INT i;
02111 for (i = 0; i <= uselist.Lastidx(); i++)
02112 if (uselist[i].wn == wn)
02113 break;
02114 FmtAssert(i <= uselist.Lastidx(),
02115 ("SE_Assign_Lexcounts: Could not find use"));
02116 FmtAssert(uselist[i].lexcount == 0,
02117 ("SE_Assign_Lexcounts: Use already assigned a lexcount"));
02118 uselist[i].lexcount = ++lexcount;
02119 }
02120 if (WN_operator(wn) == OPR_ISTORE
02121 && WN_operator(WN_kid1(wn)) == OPR_ARRAY
02122 && OPCODE_has_sym(WN_opcode(WN_array_base(WN_kid1(wn))))
02123 && SYMBOL(WN_array_base(WN_kid1(wn))) == sym) {
02124 INT i;
02125 for (i = 0; i <= deflist.Lastidx(); i++)
02126 if (deflist[i].wn == wn)
02127 break;
02128 FmtAssert(i <= deflist.Lastidx(),
02129 ("SE_Assign_Lexcounts: Could not find def"));
02130 FmtAssert(deflist[i].lexcount == 0,
02131 ("SE_Assign_Lexcounts: Def already assigned a lexcount"));
02132 deflist[i].lexcount = ++lexcount;
02133 }
02134 }
02135 if (wnn == deallocregion)
02136 break;
02137 }
02138 INT i;
02139 for (i = 0; i <= deflist.Lastidx(); i++)
02140 FmtAssert(deflist[i].lexcount != 0,
02141 ("SE_Assign_Lexcounts: Did not assign a lexcount to def"));
02142 for (i = 0; i <= uselist.Lastidx(); i++)
02143 FmtAssert(uselist[i].lexcount != 0,
02144 ("SE_Assign_Lexcounts: Did not assign a lexcount to use"));
02145 }
02146
02147
02148
02149
02150
02151
02152
02153
02154
02155
02156
02157
02158 extern void SE_Fix_Dependence(DYN_ARRAY<WN_REFERENCE_INFO>& deflist,
02159 DYN_ARRAY<WN_REFERENCE_INFO>& uselist)
02160 {
02161 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02162 for (INT d = 0; d <= deflist.Lastidx(); d++) {
02163 DOLOOP_STACK dstack(&LNO_local_pool);
02164 Build_Doloop_Stack(deflist[d].wn, &dstack);
02165
02166 FmtAssert(WN_operator(WN_kid1(deflist[d].wn)) == OPR_ARRAY,
02167 ("Bad child of deflist[d]"));
02168
02169
02170 for (INT dd = d; dd <= deflist.Lastidx(); dd++) {
02171 FmtAssert(WN_operator(WN_kid1(deflist[dd].wn))
02172 == OPR_ARRAY, ("Bad child of deflist[d]"));
02173 DOLOOP_STACK ddstack(&LNO_local_pool);
02174 Build_Doloop_Stack(deflist[dd].wn, &ddstack);
02175 if (!dg->Add_Edge(deflist[d].wn, &dstack, deflist[dd].wn, &ddstack,
02176 deflist[d].lexcount < deflist[dd].lexcount)) {
02177 LNO_Erase_Dg_From_Here_In(WN_kid1(deflist[d].wn), dg);
02178 LNO_Erase_Dg_From_Here_In(WN_kid1(deflist[dd].wn), dg);
02179 }
02180 }
02181
02182
02183 for (INT uu = 0; uu <= uselist.Lastidx(); uu++) {
02184 FmtAssert(WN_operator(WN_kid0(uselist[uu].wn))
02185 == OPR_ARRAY, ("Bad child of uselist[uu]"));
02186 DOLOOP_STACK uustack(&LNO_local_pool);
02187 Build_Doloop_Stack(uselist[uu].wn, &uustack);
02188 if (!dg->Add_Edge(deflist[d].wn, &dstack, uselist[uu].wn, &uustack,
02189 deflist[d].lexcount < uselist[uu].lexcount)) {
02190 LNO_Erase_Dg_From_Here_In(WN_kid1(deflist[d].wn), dg);
02191 LNO_Erase_Dg_From_Here_In(WN_kid0(uselist[uu].wn), dg);
02192 }
02193 }
02194 }
02195 }
02196
02197
02198
02199
02200
02201
02202
02203
02204
02205
02206
02207
02208
02209
02210
02211
02212
02213
02214
02215
02216 static WN* SE_Wrap_Array(SYMBOL se_ptrsym,
02217 INT sz,
02218 INT wrap_index,
02219 WN* indxs[],
02220 WN* bounds[],
02221 const INT order[],
02222 INT dimcnt,
02223 BOOL is_load)
02224 {
02225 DU_MANAGER* du = Du_Mgr;
02226 OPCODE ldop = OPCODE_make_op(OPR_LDID, Pointer_type, Pointer_type);
02227 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
02228 WN* wn_array = WN_Create(op_array, dimcnt + dimcnt + 1);
02229 WN_element_size(wn_array) = sz;
02230 WN_array_base(wn_array) = WN_CreateLdid(ldop, se_ptrsym.WN_Offset(),
02231 se_ptrsym.St(), ST_type(se_ptrsym.St()));
02232 LWN_Set_Parent(WN_array_base(wn_array), wn_array);
02233 for (INT i = 0; i < dimcnt; i++) {
02234 INT j = order[i];
02235 WN* wn_index = NULL;
02236 if (j < wrap_index || j == wrap_index && !is_load) {
02237 wn_index = LWN_Copy_Tree(indxs[j]);
02238 LWN_Copy_Def_Use(indxs[j], wn_index, du);
02239 TYPE_ID desc = Promote_Type(WN_rtype(indxs[j]));
02240 OPCODE addop = OPCODE_make_op(OPR_ADD, desc, MTYPE_V);
02241 WN* wn_one = LWN_Make_Icon(desc, 1);
02242 wn_index = LWN_CreateExp2(addop, wn_index, wn_one);
02243 } else if (j == wrap_index && is_load) {
02244 wn_index = LWN_Copy_Tree(indxs[j]);
02245 LWN_Copy_Def_Use(indxs[j], wn_index, du);
02246 } else if (j == wrap_index + 1 && !is_load) {
02247 TYPE_ID desc = Promote_Type(WN_rtype(indxs[j]));
02248 wn_index = LWN_Make_Icon(desc, 0);
02249 } else {
02250 wn_index = LWN_Copy_Tree(bounds[j]);
02251 LWN_Copy_Def_Use(bounds[j], wn_index, du);
02252 TYPE_ID desc = Promote_Type(WN_rtype(bounds[j]));
02253 OPCODE subop = OPCODE_make_op(OPR_SUB, desc, MTYPE_V);
02254 WN* wn_one = LWN_Make_Icon(desc, 1);
02255 wn_index = LWN_CreateExp2(subop, wn_index, wn_one);
02256 }
02257 WN_array_index(wn_array, i) = wn_index;
02258 WN* wn_bound = LWN_Copy_Tree(bounds[j]);
02259 LWN_Copy_Def_Use(bounds[j], wn_bound, du);
02260 WN_array_dim(wn_array, i) = wn_bound;
02261 LWN_Set_Parent(WN_array_index(wn_array, i), wn_array);
02262 LWN_Set_Parent(WN_array_dim(wn_array, i), wn_array);
02263 }
02264 return wn_array;
02265 }
02266
02267
02268
02269
02270
02271
02272
02273 static WN* SE_Find_Stid(STACK<WN*>* stk_se)
02274 {
02275 WN* wn_stid = NULL;
02276 for (INT i = 0; i < stk_se->Elements(); i++) {
02277 WN* wn = stk_se->Bottom_nth(i);
02278 if (WN_operator(wn) == OPR_STID
02279 && (wn_stid == NULL || Do_Depth(wn) < Do_Depth(wn_stid)))
02280 wn_stid = wn;
02281 }
02282 FmtAssert(wn_stid != NULL,
02283 ("Scalar_Expand: Could not find OPR_STID def"));
02284 return wn_stid;
02285 }
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303
02304
02305
02306
02307
02308
02309
02310
02311
02312
02313
02314
02315
02316
02317
02318
02319
02320
02321
02322
02323
02324
02325
02326
02327
02328
02329
02330
02331 extern void Scalar_Expand(WN* allocregion,
02332 WN* region,
02333 WN* wn_sym,
02334 const SYMBOL& symbol,
02335 WN** loops,
02336 const INT* order,
02337 INT dimcnt,
02338 BOOL invariant,
02339 BOOL finalize,
02340 BOOL has_lcd,
02341 WN* guard_tests[],
02342 BIT_VECTOR *used_loops,
02343 WN** tile_loops,
02344 INT* stripsizes,
02345 INT nstrips)
02346 {
02347 DU_MANAGER* du = Du_Mgr;
02348 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02349 REDUCTION_MANAGER* rm = red_manager;
02350
02351 FmtAssert(wn_sym == NULL || symbol == SYMBOL(wn_sym),
02352 ("Scalar_Expand: wn_sym and symbol not compatible"));
02353
02354 WN* outerloop = loops[0];
02355 if (nstrips > 0 && Get_Do_Loop_Info(tile_loops[0])->Depth
02356 < Get_Do_Loop_Info(loops[0])->Depth)
02357 outerloop = tile_loops[0];
02358 FmtAssert(WN_opcode(allocregion) == OPC_DO_LOOP &&
02359 WN_opcode(region) == OPC_DO_LOOP &&
02360 allocregion == outerloop,
02361 ("region and allocregion must be a do, or allocregion problem"));
02362
02363 WN* wn_outer_loop = NULL;
02364 STACK<WN*>* stk_se = NULL;
02365 if (wn_sym != NULL) {
02366 stk_se = Scalar_Equivalence_Class(wn_sym, du, &LNO_local_pool, outerloop);
02367 FmtAssert(stk_se != NULL, ("Scalar_Expand: Could not find equiv class"));
02368 } else {
02369 stk_se = CXX_NEW(STACK<WN*>(&LNO_local_pool), &LNO_local_pool);
02370 LWN_ITER* wniter = LWN_WALK_TreeIter(region);
02371 while (wniter) {
02372 WN* wn = wniter->wn;
02373 wniter = LWN_WALK_TreeNext(wniter);
02374 OPERATOR opr = WN_operator(wn);
02375 if ((opr == OPR_LDID || opr == OPR_STID) && symbol == SYMBOL(wn))
02376 stk_se->Push(wn);
02377 }
02378 }
02379
02380
02381
02382 FmtAssert(nstrips <= dimcnt,
02383 ("Can't have more scalar expanded tiles than dimensions."));
02384 unique_se_id++;
02385 if (LNO_Verbose) {
02386 WN* wn_outer = loops[0];
02387 fprintf(stdout, "Scalar expanding %s %s%sin loop %s at %d\n",
02388 symbol.Name(), has_lcd ? "(with lcd) " : "",
02389 finalize ? "(with finalization) " : "",
02390 WB_Whirl_Symbol(wn_outer), (INT) WN_linenum(wn_outer));
02391 fprintf(TFile, "Scalar expanding %s %s%sin loop %s at %d\n",
02392 symbol.Name(), has_lcd ? "(with lcd) " : "",
02393 finalize ? "(with finalization) " : "",
02394 WB_Whirl_Symbol(wn_outer), (INT) WN_linenum(wn_outer));
02395 }
02396
02397
02398
02399
02400 INT sz = 0;
02401 WN* bsz = NULL;
02402 WN** bounds = CXX_NEW_ARRAY(WN*, dimcnt, &LNO_local_pool);
02403 WN** indxs = CXX_NEW_ARRAY(WN*, dimcnt, &LNO_local_pool);
02404 SE_Indxs_and_Bounds(loops, dimcnt, symbol, invariant, used_loops,
02405 tile_loops, stripsizes, nstrips, bounds, indxs, has_lcd, &sz, &bsz);
02406 WN** findxs = NULL;
02407
02408
02409
02410 char se[3]="se";
02411 SYMBOL se_ptrsym;
02412 SE_Symbols_For_SE(&se_ptrsym, se, unique_se_id, symbol.Type);
02413 Update_MP_Local_Var(se_ptrsym.St(), 0, LWN_Get_Parent(region));
02414 WN* deallocregion = guard_tests != NULL && guard_tests[0] != NULL
02415 ? guard_tests[0] : allocregion;
02416 WN* newstdf = Get_Expansion_Space(se_ptrsym, bsz, se, unique_se_id,
02417 symbol.Type, allocregion, loops[0], deallocregion);
02418 Is_True(newstdf,("No memory was allocated for scalar expansion \n"));
02419 OPCODE ldop = OPCODE_make_op(OPR_LDID, Pointer_type, Pointer_type);
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433 MEM_POOL_Push(&LNO_local_pool);
02434 {
02435 DYN_ARRAY<WN_REFERENCE_INFO> deflist(&LNO_local_pool);
02436 DYN_ARRAY<WN_REFERENCE_INFO> uselist(&LNO_local_pool);
02437
02438 WN* alias_host = NULL;
02439 INT lexcount = 0;
02440
02441 if (finalize) {
02442 findxs = CXX_NEW_ARRAY(WN*, dimcnt, &LNO_local_pool);
02443 WN* wn = SE_Find_Stid(stk_se);
02444 WN* wn_ldid = SE_Final_Value(wn, guard_tests, loops, dimcnt);
02445 SE_Findxs(loops, dimcnt, invariant, used_loops, tile_loops, stripsizes,
02446 nstrips, findxs);
02447 WN* wn_array = SE_Array(se_ptrsym, sz, dimcnt, order, used_loops,
02448 bounds, findxs, has_lcd, -1);
02449 SE_Iload(wn_ldid, wn_array, newstdf, se_ptrsym, &alias_host);
02450 if (Block_Loop_Depth(wn_array) >= 0) {
02451 INT idx = uselist.Newidx();
02452 uselist[idx].wn = LWN_Get_Parent(wn_array);
02453 uselist[idx].lexcount = 0;
02454 if (!dg->Add_Vertex(LWN_Get_Parent(wn_array)))
02455 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(wn_array), dg);
02456 }
02457 DOLOOP_STACK do_stack(&LNO_local_pool);
02458 Build_Doloop_Stack(LWN_Get_Parent(wn_array), &do_stack);
02459 LNO_Build_Access(wn_array, &do_stack, &LNO_default_pool);
02460 }
02461
02462 if (has_lcd) {
02463 WN* wn_old_stid = SE_Find_Stid(stk_se);
02464 WN* wn_stid = SE_Identity(wn_old_stid);
02465 WN* wn_ldid = WN_kid0(wn_stid);
02466 if (rm != NULL)
02467 rm->Erase(wn_stid);
02468 INT i;
02469 for (i = 0; i < stk_se->Elements(); i++) {
02470 WN* wn = stk_se->Bottom_nth(i);
02471 if (WN_operator(wn) == OPR_LDID) {
02472 DEF_LIST *def_list = du->Ud_Get_Def(wn);
02473 if (def_list != NULL) {
02474 DEF_LIST_ITER iter(def_list);
02475 const DU_NODE* node = NULL;
02476 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
02477 WN* wn_local_stid = node->Wn();
02478 if (!Wn_Is_Inside(wn_local_stid, outerloop))
02479 du->Add_Def_Use(wn_local_stid, wn_ldid);
02480 }
02481 }
02482 }
02483 }
02484 LWN_Insert_Block_Before(LWN_Get_Parent(loops[0]), loops[0], wn_stid);
02485 WN* wn_init_array = SE_Wrap_Array(se_ptrsym, sz, -1, indxs, bounds,
02486 order, dimcnt, FALSE);
02487 SE_Istore(wn_stid, wn_init_array, newstdf, se_ptrsym, &alias_host);
02488 WN* wn_init_istore = LWN_Get_Parent(wn_init_array);
02489 DOLOOP_STACK do_init_stack(&LNO_local_pool);
02490 Build_Doloop_Stack(LWN_Get_Parent(wn_init_array), &do_init_stack);
02491 LNO_Build_Access(wn_init_array, &do_init_stack, &LNO_default_pool);
02492 if (Block_Loop_Depth(wn_init_array) >= 0) {
02493 INT idx = deflist.Newidx();
02494 deflist[idx].wn = wn_init_istore;
02495 deflist[idx].lexcount = 0;
02496 if (!dg->Add_Vertex(LWN_Get_Parent(wn_init_array)))
02497 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(wn_init_array), dg);
02498 }
02499 for (i = dimcnt - 2; i >= 0; i--) {
02500 WN* wn_stid = SE_Identity(wn_old_stid);
02501 LWN_Insert_Block_After(WN_do_body(loops[i]), NULL, wn_stid);
02502
02503 WN* wn_ldid = WN_kid0(wn_stid);
02504 WN* wn_ldarray = SE_Wrap_Array(se_ptrsym, sz, i, indxs,
02505 bounds, order, dimcnt, TRUE);
02506 SE_Iload(wn_ldid, wn_ldarray, newstdf, se_ptrsym, &alias_host);
02507 WN* wn_wrap_iload = LWN_Get_Parent(wn_ldarray);
02508 if (Block_Loop_Depth(wn_ldarray) >= 0) {
02509 INT idx = uselist.Newidx();
02510 uselist[idx].wn = wn_wrap_iload;
02511 uselist[idx].lexcount = 0;
02512 if (!dg->Add_Vertex(LWN_Get_Parent(wn_ldarray)))
02513 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(wn_ldarray), dg);
02514 }
02515 DOLOOP_STACK do_ldstack(&LNO_local_pool);
02516 Build_Doloop_Stack(LWN_Get_Parent(wn_ldarray), &do_ldstack);
02517 LNO_Build_Access(wn_ldarray, &do_ldstack, &LNO_default_pool);
02518
02519 WN* wn_starray = SE_Wrap_Array(se_ptrsym, sz, i, indxs,
02520 bounds, order, dimcnt, FALSE);
02521 SE_Istore(wn_stid, wn_starray, newstdf, se_ptrsym, &alias_host);
02522 WN* wn_wrap_istore = LWN_Get_Parent(wn_starray);
02523 if (Block_Loop_Depth(wn_starray) >= 0) {
02524 INT idx = deflist.Newidx();
02525 deflist[idx].wn = wn_wrap_istore;
02526 deflist[idx].lexcount = 0;
02527 if (!dg->Add_Vertex(LWN_Get_Parent(wn_starray)))
02528 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(wn_starray), dg);
02529 }
02530 DOLOOP_STACK do_ststack(&LNO_local_pool);
02531 Build_Doloop_Stack(LWN_Get_Parent(wn_starray), &do_ststack);
02532 LNO_Build_Access(wn_starray, &do_ststack, &LNO_default_pool);
02533 }
02534 }
02535
02536 INT i;
02537 for (i = 0; i < stk_se->Elements(); i++) {
02538 WN* wn = stk_se->Bottom_nth(i);
02539 OPERATOR opr = WN_operator(wn);
02540
02541
02542
02543
02544 WN* wn_array = NULL;
02545 if (opr == OPR_LDID) {
02546 DEF_LIST* def_list = du->Ud_Get_Def(wn);
02547 wn_array = SE_Array(se_ptrsym, sz, dimcnt, order, used_loops,
02548 bounds, indxs, has_lcd, def_list->Loop_stmt() == NULL
02549 ? -1 : Do_Depth(wn) - Do_Loop_Depth(outerloop));
02550 SE_Iload(wn, wn_array, newstdf, se_ptrsym, &alias_host);
02551 INT idx = uselist.Newidx();
02552 uselist[idx].wn = LWN_Get_Parent(wn_array);
02553 uselist[idx].lexcount = 0;
02554 } else {
02555 wn_array = SE_Array(se_ptrsym, sz, dimcnt, order, used_loops,
02556 bounds, indxs, has_lcd, -1);
02557 SE_Istore(wn, wn_array, newstdf, se_ptrsym, &alias_host);
02558 INT idx = deflist.Newidx();
02559 deflist[idx].wn = LWN_Get_Parent(wn_array);
02560 deflist[idx].lexcount = 0;
02561 }
02562
02563
02564
02565
02566 DOLOOP_STACK do_stack(&LNO_local_pool);
02567 Build_Doloop_Stack(LWN_Get_Parent(wn_array), &do_stack);
02568 LNO_Build_Access(wn_array, &do_stack, &LNO_default_pool);
02569 if (!dg->Add_Vertex(LWN_Get_Parent(wn_array)))
02570 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(wn_array), dg);
02571 }
02572
02573
02574 SE_Assign_Lexcounts(deflist, uselist, newstdf, deallocregion,
02575 se_ptrsym);
02576 SE_Fix_Dependence(deflist, uselist);
02577
02578
02579 for (i = 0; i < dimcnt; i++) {
02580 if (!used_loops || used_loops->Test(i)) {
02581 LWN_Delete_Tree(bounds[i]);
02582 LWN_Delete_Tree(indxs[i]);
02583 if (finalize)
02584 LWN_Delete_Tree(findxs[i]);
02585 }
02586 }
02587 CXX_DELETE_ARRAY(bounds, &LNO_local_pool);
02588 CXX_DELETE_ARRAY(indxs, &LNO_local_pool);
02589 if (finalize)
02590 CXX_DELETE_ARRAY(findxs, &LNO_local_pool);
02591 }
02592
02593 MEM_POOL_Pop(&LNO_local_pool);
02594 }
02595
02596
02597
02598
02599
02600
02601
02602
02603
02604
02605 static void SE_Permutation_To_Order(const IMAT* u,
02606 const SNL_TILE_INFO* t,
02607 INT* order,
02608 INT nloops, INT subloops)
02609 {
02610 if (u == NULL) {
02611 for (INT i = 0; i < nloops; i++)
02612 order[i] = i;
02613 }
02614 else {
02615
02616
02617
02618 INT seen_used[SNL_MAX_LOOPS];
02619 INT i;
02620 for (i = 0; i < nloops; i++)
02621 seen_used[i] = 0;
02622
02623 for (i = 0; i < nloops; i++) {
02624 INT j;
02625 for (j = 0; j < nloops; j++) {
02626 INT first_non_zero = -1;
02627 if ((*u)(i,j)) {
02628 if (seen_used[j] != 2) {
02629 seen_used[j] = 2;
02630 order[i] = j;
02631 break;
02632 }
02633 seen_used[j] = 1;
02634 }
02635 }
02636 if (j == nloops) {
02637
02638
02639 INT j;
02640 for (j = 0; j < nloops; j++) {
02641 if (seen_used[j] == 1) {
02642 seen_used[j] = 2;
02643 order[i] = j;
02644 break;
02645 }
02646 }
02647 FmtAssert(j != nloops, ("SE_Permutation_To_Order: impossible"));
02648 }
02649 }
02650 }
02651
02652 Is_True(Is_Permutation_Vector(order, nloops),
02653 ("After applying permutation matrix, not a permutation vector."));
02654
02655 if (t != NULL && t->Strips() > 0) {
02656 FmtAssert(t->Rectangular(), ("TODO OK: generate conservative order"));
02657 INT missing_tiles = nloops - t->Nloops();
02658
02659
02660 INT mxiloop = 0;
02661 INT i;
02662 for (i = 0; i < t->Strips(); i++)
02663 if (mxiloop < t->Iloop(i))
02664 mxiloop = t->Iloop(i);
02665 INT mxptr = missing_tiles;
02666 for (i = 1+missing_tiles; i <= mxiloop+missing_tiles; i++)
02667 if (order[i] > order[mxptr])
02668 mxptr = i;
02669 if (mxptr != missing_tiles) {
02670 INT tmp = order[mxptr];
02671 order[mxptr] = order[missing_tiles];
02672 order[missing_tiles] = tmp;
02673 }
02674 }
02675
02676 Is_True(Is_Permutation_Vector(order, nloops),
02677 ("After cache tiling, not a permutation vector."));
02678
02679
02680
02681
02682 INT neworder[SNL_MAX_LOOPS];
02683 INT k;
02684 for (k = 0; k < subloops; k++)
02685 neworder[k] = 0;
02686 for (k = 0; k < subloops; k++) {
02687 INT minindex = 0;
02688 for (INT kk = 1; kk < subloops; kk++) {
02689 if (order[minindex] > order[kk])
02690 minindex = kk;
02691 }
02692 neworder[minindex] = k;
02693 order[minindex] = nloops;
02694 }
02695
02696 for (k = 0; k < subloops; k++)
02697 order[k] = neworder[k];
02698
02699 Is_True(Is_Permutation_Vector(order, subloops),
02700 ("After subloop selection, not a permutation vector."));
02701 }
02702
02703
02704
02705
02706
02707
02708
02709
02710
02711
02712
02713 extern void SNL_GEN_Scalar_Expand(WN* wn_outer,
02714 IMAT* unimodular,
02715 SNL_TILE_INFO* ti,
02716 INT nloops,
02717 SX_PLIST* plist,
02718 INT split_depth,
02719 SD_PLIST* sd_plist,
02720 BOOL ignore_illegal,
02721 BOOL full_dist)
02722 {
02723 if (nloops == 0)
02724 return;
02725
02726 DU_MANAGER* du = Du_Mgr;
02727 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02728 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
02729 INT inner_depth = Do_Loop_Depth(wn_inner);
02730 INT first_in_stack = inner_depth - nloops + 1;
02731 DOLOOP_STACK stack(&LNO_local_pool);
02732 Build_Doloop_Stack(wn_inner, &stack);
02733
02734 INT* permutation = Unimodular_To_Permutation(unimodular);
02735
02736 INT outer_depth = Do_Loop_Depth(wn_outer);
02737 INT guard_depth = SE_Guard_Depth(wn_outer, permutation, nloops, plist,
02738 split_depth, sd_plist, ignore_illegal, full_dist);
02739 INT guard_loops = guard_depth - outer_depth + 1;
02740 WN** guard_tests = guard_depth == -1
02741 ? NULL : CXX_NEW_ARRAY(WN*, guard_loops, &LNO_local_pool);
02742 SE_Guard_Tests(wn_outer, nloops, guard_tests, guard_depth);
02743
02744 SX_PITER ii(plist);
02745 SX_PNODE* nnext = NULL;
02746 INT outer = Do_Loop_Depth(wn_outer);
02747
02748 INT* tpermutation = !full_dist ? permutation : NULL;
02749 INT has_lcd = FALSE;
02750 for (SX_PNODE* n = ii.First(); n; n = nnext) {
02751 nnext = ii.Next();
02752
02753 SNL_DEBUG1(3, "SNL_General_Distribution() consider expanding %s",
02754 n->Symbol().Name());
02755 SX_PNODE::STATUS status;
02756 status = n->Transformable(outer, tpermutation, nloops);
02757 if (split_depth != -1 && status != SX_PNODE::ILLEGAL) {
02758 SD_PNODE* sdn = sd_plist->Find(n->Symbol());
02759 INT innermost_depth = sdn->Innermost_Depth();
02760 status = n->Splittable(split_depth, innermost_depth);
02761 }
02762 if (status == SX_PNODE::SE_NOT_REQD)
02763 continue;
02764 if (ignore_illegal && status == SX_PNODE::ILLEGAL)
02765 continue;
02766 FmtAssert(status == SX_PNODE::SE_REQD,
02767 ("Bug: can't expand scalar %s", n->Symbol().Name()));
02768
02769 INT order[LNO_MAX_DO_LOOP_DEPTH];
02770 WN* loops[LNO_MAX_DO_LOOP_DEPTH];
02771
02772 for (INT lp = first_in_stack; lp <= Do_Loop_Depth(wn_inner); lp++)
02773 loops[lp-first_in_stack] = stack.Bottom_nth(lp);
02774 INT dimcnt = n->Expansion_Depth()+1-first_in_stack;
02775 SE_Permutation_To_Order(unimodular, ti, order, nloops, dimcnt);
02776
02777 Scalar_Expand(stack.Bottom_nth(first_in_stack),
02778 stack.Bottom_nth(n->Expansion_Depth()), n->Wn_Symbol(), n->Symbol(),
02779 loops, order, dimcnt, FALSE, n->Finalize(), n->Lcd_Depth() != -1,
02780 guard_tests);
02781 plist->Remove(n);
02782 }
02783 }
02784
02785
02786
02787
02788
02789
02790
02791
02792
02793
02794
02795 extern void SNL_GEN_Scalar_Expand(WN* wn_outer,
02796 INT permutation[],
02797 INT nloops,
02798 SX_PLIST* plist,
02799 INT split_depth,
02800 SD_PLIST* sd_plist,
02801 BOOL ignore_illegal,
02802 BOOL full_dist)
02803 {
02804 if (nloops == 0)
02805 return;
02806
02807 SNL_TILE_INFO* ti = NULL;
02808 IMAT* unimodular = Permutation_To_Unimodular(permutation, nloops);
02809 SNL_GEN_Scalar_Expand(wn_outer, unimodular, ti, nloops, plist,
02810 split_depth, sd_plist, ignore_illegal, full_dist);
02811 }
02812
02813
02814
02815
02816
02817
02818
02819
02820
02821
02822 extern INT SE_Guard_Depth(WN* wn_outer,
02823 INT permutation[],
02824 INT nloops,
02825 SX_PLIST* plist,
02826 INT split_depth,
02827 SD_PLIST* sd_plist,
02828 BOOL ignore_illegal,
02829 BOOL full_dist)
02830 {
02831 SX_PITER ii(plist);
02832 SX_PNODE* sxnn = NULL;
02833 INT outer_depth = Do_Loop_Depth(wn_outer);
02834 INT guard_depth = -1;
02835 INT* tpermutation = !full_dist ? permutation : NULL;
02836 for (SX_PNODE* sxn = ii.First(); !ii.Is_Empty(); sxn = sxnn) {
02837 sxnn = ii.Next();
02838 SNL_DEBUG1(3, "SE_Guard_Depth() consider expanding %s\n",
02839 sxn->Symbol().Name());
02840 SX_PNODE::STATUS status;
02841 status = sxn->Transformable(outer_depth, tpermutation, nloops);
02842 if (split_depth != -1 && status != SX_PNODE::ILLEGAL) {
02843 SD_PNODE* sdn = sd_plist->Find(sxn->Symbol());
02844 INT innermost_depth = sdn->Innermost_Depth();
02845 status = sxn->Splittable(split_depth, innermost_depth);
02846 }
02847 if (status == SX_PNODE::SE_NOT_REQD)
02848 continue;
02849 if (ignore_illegal && status == SX_PNODE::ILLEGAL)
02850 continue;
02851 FmtAssert(status == SX_PNODE::SE_REQD,
02852 (": can't expand scalar %s", sxn->Symbol().Name()));
02853 if (sxn->Finalize() && sxn->Expansion_Depth() > guard_depth)
02854 guard_depth = sxn->Expansion_Depth();
02855 }
02856 return guard_depth;
02857 }
02858
02859
02860
02861
02862
02863
02864
02865
02866
02867 extern void SE_Guard_Tests(WN* wn_outer,
02868 INT nloops,
02869 WN* guard_tests[],
02870 INT guard_depth)
02871 {
02872 if (guard_depth == -1)
02873 return;
02874 WN* wn_this_if = NULL;
02875 INT outer_depth = Do_Loop_Depth(wn_outer);
02876 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
02877 DOLOOP_STACK stack(&LNO_local_pool);
02878 Build_Doloop_Stack(wn_inner, &stack);
02879 COND_BOUNDS_INFO *cond_info =
02880 CXX_NEW(COND_BOUNDS_INFO(&LNO_local_pool), &LNO_local_pool);
02881 cond_info->Collect_Outer_Info(LWN_Get_Parent(wn_outer));
02882 INT i;
02883 for (i = outer_depth; i <= guard_depth; i++) {
02884 WN* wn_loop = stack.Bottom_nth(i);
02885 WN* wn_test = LWN_Copy_Tree(WN_end(wn_loop), TRUE, LNO_Info_Map);
02886 LWN_Copy_Def_Use(WN_end(wn_loop), wn_test, Du_Mgr);
02887 Replace_Ldid_With_Exp_Copy(SYMBOL(WN_start(wn_loop)), wn_test,
02888 WN_kid0(WN_start(wn_loop)), Du_Mgr);
02889 if (wn_this_if != NULL
02890 && Redundant_Condition(cond_info, wn_test, wn_this_if)) {
02891 guard_tests[i - outer_depth] = wn_this_if;
02892 LWN_Delete_Tree(wn_test);
02893 continue;
02894 }
02895 WN* wn_new_then = WN_CreateBlock();
02896 WN* wn_new_else = WN_CreateBlock();
02897 WN* wn_new_if = LWN_CreateIf(wn_test, wn_new_then, wn_new_else);
02898 IF_INFO* ii = CXX_NEW(IF_INFO(&LNO_default_pool, FALSE, FALSE),
02899 &LNO_default_pool);
02900 WN_MAP_Set(LNO_Info_Map, wn_new_if, (void *) ii);
02901 if (wn_this_if == NULL) {
02902 LWN_Insert_Block_After(LWN_Get_Parent(wn_outer), wn_outer, wn_new_if);
02903 } else {
02904 WN* wn_block = WN_then(wn_this_if);
02905 LWN_Insert_Block_After(wn_block, NULL, wn_new_if);
02906 }
02907 DOLOOP_STACK if_stack(&LNO_local_pool);
02908 Build_Doloop_Stack(wn_new_if, &if_stack);
02909 LNO_Build_If_Access(wn_new_if, &if_stack);
02910 guard_tests[i - outer_depth] = wn_new_if;
02911 wn_this_if = wn_new_if;
02912 }
02913 for (i = guard_depth + 1; i < nloops; i++)
02914 guard_tests[i] = NULL;
02915 }
02916
02917
02918
02919
02920
02921
02922
02923
02924
02925
02926
02927 extern void SNL_INV_Scalar_Expand(WN* wn_outer,
02928 INT permutation[],
02929 INT nloops,
02930 SX_PLIST* plist,
02931 INT split_depth,
02932 SD_PLIST* sd_plist,
02933 BOOL ignore_illegal,
02934 BOOL full_dist)
02935 {
02936 if (nloops == 0)
02937 return;
02938
02939 INT outer_depth = Do_Loop_Depth(wn_outer);
02940 INT guard_depth = SE_Guard_Depth(wn_outer, permutation, nloops, plist,
02941 split_depth, sd_plist, ignore_illegal, full_dist);
02942 INT guard_loops = guard_depth - outer_depth + 1;
02943 WN** guard_tests = guard_depth == -1
02944 ? NULL : CXX_NEW_ARRAY(WN*, guard_loops, &LNO_local_pool);
02945 SE_Guard_Tests(wn_outer, nloops, guard_tests, guard_depth);
02946
02947 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
02948 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
02949 DOLOOP_STACK stack(&LNO_local_pool);
02950 Build_Doloop_Stack(wn_inner, &stack);
02951 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02952 DU_MANAGER* du = Du_Mgr;
02953 SX_PITER ii(plist);
02954 SX_PNODE* nnext = NULL;
02955 INT outer = Do_Loop_Depth(wn_outer);
02956
02957 BOOL has_lcd = FALSE;
02958 INT* tpermutation = !full_dist ? permutation : NULL;
02959 for (SX_PNODE* n = ii.First(); !ii.Is_Empty(); n = nnext) {
02960 nnext = ii.Next();
02961
02962 SNL_DEBUG1(3, "SNL_INV_Scalar_Expand() consider expanding %s\n",
02963 n->Symbol().Name());
02964
02965 SX_PNODE::STATUS status;
02966 status = n->Transformable(outer, tpermutation, nloops);
02967 if (split_depth != -1 && status != SX_PNODE::ILLEGAL) {
02968 SD_PNODE* sdn = sd_plist->Find(n->Symbol());
02969 INT innermost_depth = sdn->Innermost_Depth();
02970 status = n->Splittable(split_depth, innermost_depth);
02971 }
02972 if (status == SX_PNODE::SE_NOT_REQD)
02973 continue;
02974 if (ignore_illegal && status == SX_PNODE::ILLEGAL)
02975 continue;
02976 FmtAssert(status == SX_PNODE::SE_REQD,
02977 ("Bug: can't expand scalar %s", n->Symbol().Name()));
02978
02979
02980
02981
02982
02983 WN* loops[SNL_MAX_LOOPS];
02984 INT order[SNL_MAX_LOOPS];
02985
02986 INT lp;
02987 for (lp = 0; lp <= n->Expansion_Depth()-first_in_stack; lp++) {
02988 loops[lp] = stack.Bottom_nth(first_in_stack+lp);
02989 order[lp] = lp;
02990 }
02991
02992 if (permutation) {
02993 for (INT i = 0; i < lp; i++) {
02994 INT jsmall = -1;
02995 for (INT j = 0; j < lp; j++) {
02996 BOOL ok = TRUE;
02997 for (INT ii = 0; ii < i; ii++)
02998 if (order[ii] == j)
02999 ok = FALSE;
03000 if (ok && (jsmall == -1 || permutation[jsmall] > permutation[j]))
03001 jsmall = j;
03002 }
03003 order[i] = jsmall;
03004 }
03005 }
03006
03007 Scalar_Expand(stack.Bottom_nth(first_in_stack),
03008 stack.Bottom_nth(n->Expansion_Depth()), n->Wn_Symbol(), n->Symbol(),
03009 loops, order, n->Expansion_Depth()+1-first_in_stack, TRUE,
03010 n->Finalize(), n->Lcd_Depth() != -1, guard_tests);
03011 plist->Remove(n);
03012 }
03013 }
03014
03015
03016
03017
03018
03019
03020
03021
03022
03023
03024
03025 extern void SNL_Scalar_Expand(WN* wn_outer,
03026 WN* wn_inner,
03027 INT permutation[],
03028 INT nloops,
03029 SX_INFO* sx_info,
03030 BOOL invariant,
03031 BOOL ignore_illegal,
03032 BOOL full_dist)
03033 {
03034 INT outer_depth = Do_Loop_Depth(wn_outer);
03035 INT i;
03036 for (i = 0; i < nloops; i++)
03037 if (permutation[i] != i)
03038 break;
03039 if (i == nloops)
03040 return;
03041 DOLOOP_STACK stack(&LNO_local_pool);
03042 Build_Doloop_Stack(wn_inner, &stack);
03043 WN* wn_se_outer = stack.Bottom_nth(outer_depth + i);
03044 INT se_outer_depth = Do_Loop_Depth(wn_se_outer);
03045 INT nnloops = nloops - (se_outer_depth - outer_depth);
03046 INT* ppermutation = CXX_NEW_ARRAY(INT, nnloops, &LNO_local_pool);
03047 for (i = 0; i < nnloops; i++)
03048 ppermutation[i] = permutation[nloops - nnloops + i] - (nloops - nnloops);
03049 if (invariant)
03050 SNL_INV_Scalar_Expand(wn_se_outer, ppermutation, nnloops,
03051 &sx_info->Plist, -1, NULL, ignore_illegal, full_dist);
03052 else
03053 SNL_GEN_Scalar_Expand(wn_se_outer, ppermutation, nnloops,
03054 &sx_info->Plist, -1, NULL, ignore_illegal, full_dist);
03055 }
03056
03057
03058
03059
03060
03061
03062
03063
03064
03065
03066 extern INT Split_Sx_Depth(WN* wn_outer,
03067 INT nloops,
03068 SX_PLIST* plist,
03069 INT split_depth)
03070 {
03071 INT outer_depth = Do_Loop_Depth(wn_outer);
03072 if (split_depth <= outer_depth)
03073 return -1;
03074 INT split_sx_depth = outer_depth + nloops;
03075 if (plist == NULL)
03076 return split_sx_depth;
03077 SX_CONST_PITER ii(plist);
03078 for (const SX_PNODE* n = ii.First(); !ii.Is_Empty(); n = ii.Next()) {
03079 if (n->Outer_Se_Reqd() == n->Outer_Se_Not_Reqd())
03080 continue;
03081 if (n->Outer_Se_Reqd() >= split_depth)
03082 continue;
03083 if (n->Outer_Se_Reqd() < split_sx_depth)
03084 split_sx_depth = n->Outer_Se_Reqd();
03085 }
03086 return split_sx_depth;
03087 }
03088
03089
03090
03091
03092
03093
03094
03095
03096
03097
03098
03099
03100
03101
03102 extern void SNL_Scalar_Expand_For_Splitting(WN* wn_outer,
03103 WN* wn_inner,
03104 INT split_depth,
03105 SX_PLIST* plist,
03106 SD_PLIST* sd_plist,
03107 BOOL invariant,
03108 BOOL ignore_illegal,
03109 BOOL full_dist)
03110 {
03111 if (split_depth <= 0)
03112 return;
03113 INT outer_depth = Do_Loop_Depth(wn_outer);
03114 INT inner_depth = Do_Loop_Depth(wn_inner);
03115 INT nloops = inner_depth - outer_depth + 1;
03116 DOLOOP_STACK stack(&LNO_local_pool);
03117 Build_Doloop_Stack(wn_inner, &stack);
03118 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
03119 INT i;
03120 for (i = 0; i < nloops; i++)
03121 permutation[i] = i;
03122 INT souter_depth = Split_Sx_Depth(wn_outer, nloops, plist, split_depth);
03123 if (souter_depth == outer_depth + nloops)
03124 return;
03125 INT snloops = nloops + outer_depth - souter_depth;
03126 INT* spermutation = CXX_NEW_ARRAY(INT, snloops, &LNO_local_pool);
03127 for (i = 0; i < snloops; i++)
03128 spermutation[i] = permutation[i + nloops - snloops] - nloops + snloops;
03129 Is_True(Is_Permutation_Vector(spermutation, snloops),
03130 ("Bad permutation vector in SNL_Split_Scalar_Expand"));
03131 WN* wn_souter = stack.Bottom_nth(outer_depth + nloops - snloops);
03132 if (invariant)
03133 SNL_INV_Scalar_Expand(wn_souter, spermutation, snloops, plist,
03134 split_depth, sd_plist, ignore_illegal, full_dist);
03135 else
03136 SNL_GEN_Scalar_Expand(wn_souter, spermutation, snloops, plist,
03137 split_depth, sd_plist, ignore_illegal, full_dist);
03138 }
03139