00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00050 #define __STDC_LIMIT_MACROS
00051 #include <stdint.h>
00052 #ifdef USE_PCH
00053 #include "lno_pch.h"
00054 #endif // USE_PCH
00055 #pragma hdrstop
00056
00057 #define snl_CXX "snl.cxx"
00058 const static char *rcs_id = snl_CXX "$Revision: 1.9 $";
00059
00060 #include <sys/types.h>
00061 #include <alloca.h>
00062 #include "snl.h"
00063 #include "snl_xbounds.h"
00064 #include "config_targ.h"
00065 #include "lwn_util.h"
00066 #include "lnoutils.h"
00067 #include "cxx_graph.h"
00068 #include "opt_du.h"
00069 #include "opt_alias_interface.h"
00070 #include "wintrinsic.h"
00071 #include "scalar_expand.h"
00072 #include "strtab.h"
00073 #include "dvector.h"
00074 #include "lnopt_main.h"
00075 #include "fb_whirl.h"
00076 #include "move.h"
00077 #include "small_trips.h"
00078 #include "sxlimit.h"
00079 #include "ir_reader.h"
00080 #include "sxlist.h"
00081 #include "debug.h"
00082 #include "permute.h"
00083 #include "tile.h"
00084 #include "prompf.h"
00085 #include "anl_driver.h"
00086 #include "cond.h"
00087 #include "wind_down.h"
00088 #include "ff_utils.h"
00089 #include "ipa_lno_util.h"
00090
00091
00092
00093
00094
00095 struct SNL_NEWINFO {
00096 VINDEX16 Vold;
00097 INT Lex;
00098 SNL_NEWINFO(VINDEX16 v, INT l) : Vold(v), Lex(l) {}
00099 SNL_NEWINFO(INT l) : Vold(0), Lex(l) {FmtAssert(l == 0, ("Bad newinfo"));}
00100 SNL_NEWINFO() : Vold(0), Lex(0) {}
00101 };
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 static BOOL Build_New_To_Old(WN* orig,
00116 WN* copy,
00117 ARRAY_DIRECTED_GRAPH16* dg,
00118 HASH_TABLE<VINDEX16, SNL_NEWINFO>* new2old,
00119 INT& lex)
00120 {
00121 FmtAssert(orig != NULL && copy != NULL, ("Should catch at higher level."));
00122 FmtAssert(WN_opcode(orig) != OPCODE_UNKNOWN &&
00123 WN_opcode(copy) != OPCODE_UNKNOWN,
00124 ("Should catch at higher level."));
00125 if (OPCODE_is_load(WN_opcode(orig)) || OPCODE_is_store(WN_opcode(orig)) ||
00126 OPCODE_is_call(WN_opcode(orig))) {
00127 VINDEX16 origv = dg->Get_Vertex(orig);
00128 if (origv) {
00129 VINDEX16 newv = dg->Add_Vertex(copy);
00130 if (!newv)
00131 return 0;
00132 new2old->Enter(newv, SNL_NEWINFO(origv,++lex));
00133 }
00134 }
00135
00136 if (WN_opcode(orig) == OPC_BLOCK) {
00137 WN *orig_kid = WN_first(orig);
00138 WN *copy_kid = WN_first(copy);
00139 while (orig_kid != NULL && WN_opcode(orig_kid) == OPC_DO_LOOP
00140 && (copy == NULL || WN_opcode(orig_kid) != WN_opcode(copy)))
00141 orig_kid = WN_next(orig_kid);
00142 while (orig_kid != NULL) {
00143 if (!Build_New_To_Old(orig_kid, copy_kid, dg, new2old, lex))
00144 return 0;
00145 orig_kid = WN_next(orig_kid);
00146 copy_kid = WN_next(copy_kid);
00147 while (orig_kid != NULL && WN_opcode(orig_kid) == OPC_DO_LOOP
00148 && (copy == NULL || WN_opcode(orig_kid) != WN_opcode(copy)))
00149 orig_kid = WN_next(orig_kid);
00150 }
00151 } else {
00152 for (INT kidno=0; kidno<WN_kid_count(orig); kidno++) {
00153 if (!Build_New_To_Old(WN_kid(orig,kidno), WN_kid(copy,kidno), dg,
00154 new2old, lex))
00155 return 0;
00156 }
00157 }
00158 return 1;
00159 }
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169 extern void SNL_Peel_Iteration(WN* wn,
00170 BOOL first_iter)
00171 {
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00197 DU_MANAGER* du = Du_Mgr;
00198 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
00199
00200
00201 WN* wn_total_cond = LWN_Make_Icon(Boolean_type, 1);
00202 WN* wn_if = LWN_CreateIf(wn_total_cond, WN_CreateBlock(), WN_CreateBlock());
00203 if (first_iter)
00204 LWN_Insert_Block_Before(LWN_Get_Parent(wn), wn, wn_if);
00205 else
00206 LWN_Insert_Block_After(LWN_Get_Parent(wn), wn, wn_if);
00207 WN_Set_Linenum(wn_if, WN_Get_Linenum(wn));
00208 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn, 2);
00209 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(wn_inner);
00210 IF_INFO *ii =
00211 CXX_NEW(IF_INFO(&LNO_default_pool, FALSE, FALSE), &LNO_default_pool);
00212 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii);
00213 DOLOOP_STACK *if_stack = CXX_NEW(DOLOOP_STACK(&LNO_default_pool),
00214 &LNO_default_pool);
00215 Build_Doloop_Stack(wn_if, if_stack);
00216 LNO_Build_If_Access(wn_if, if_stack);
00217 COND_BOUNDS_INFO *info =
00218 CXX_NEW(COND_BOUNDS_INFO(&LNO_local_pool), &LNO_local_pool);
00219 info->Collect_Outer_Info(LWN_Get_Parent(wn_if));
00220 WN* wn_test = LWN_Copy_Tree(WN_end(wn), TRUE, LNO_Info_Map);
00221 LWN_Copy_Def_Use(WN_end(wn), wn_test, Du_Mgr);
00222 Replace_Ldid_With_Exp_Copy(SYMBOL(WN_start(wn)), wn_test,
00223 WN_kid0(WN_start(wn)), Du_Mgr);
00224 if (Redundant_Condition(info, wn_test, wn_if)) {
00225 LWN_Delete_Tree(wn_if);
00226 wn_if = NULL;
00227 } else {
00228 Replace_Wnexp_With_Exp_Copy(WN_if_test(wn_if), wn_test, du);
00229 }
00230 LWN_Delete_Tree(wn_test);
00231
00232 WN* newblock = LWN_Copy_Tree(WN_do_body(wn), TRUE, LNO_Info_Map);
00233 LWN_Set_Frequency_Tree(newblock,1);
00234 LWN_Adjust_Frequency_Tree(WN_do_body(wn), -1);
00235 LWN_Adjust_Frequency(WN_step(wn), -1);
00236
00237 WN* tmp_unrolls[2];
00238 tmp_unrolls[0] = WN_do_body(wn);
00239 tmp_unrolls[1] = newblock;
00240
00241 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00242 STACK<WN*> st_old(&PROMPF_pool);
00243 STACK<WN*> st_new(&PROMPF_pool);
00244 Prompf_Assign_Ids(WN_do_body(wn), newblock, &st_old, &st_new, FALSE);
00245 INT nloops = st_old.Elements();
00246 if (nloops >= 0) {
00247 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00248 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00249 for (INT i = 0; i < nloops; i++) {
00250 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
00251 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
00252 }
00253 if (first_iter)
00254 Prompf_Info->Pre_Peel(old_ids, new_ids, nloops);
00255 else
00256 Prompf_Info->Post_Peel(old_ids, new_ids, nloops);
00257 CXX_DELETE_ARRAY(old_ids, &LNO_local_pool);
00258 CXX_DELETE_ARRAY(new_ids, &LNO_local_pool);
00259 }
00260 }
00261
00262 if (red_manager != NULL) red_manager->Unroll_Update(tmp_unrolls, 2);
00263
00264 Unrolled_DU_Update(tmp_unrolls, 2, dli->Depth, TRUE, FALSE);
00265 WN *w;
00266 for (w = WN_first(newblock); w; w = WN_next(w)) {
00267 if (WN_opcode(w) == OPC_DO_LOOP)
00268 LWN_Delete_Tree(LWN_Extract_From_Block(w));
00269 }
00270
00271
00272
00273
00274 if (first_iter) {
00275 Replace_Ldid_With_Exp_Copy(WN_index(wn), newblock,
00276 WN_kid0(WN_start(wn)), du);
00277 Increase_By(WN_kid0(WN_start(wn)), 1, WN_start(wn), 0);
00278 Is_True(dli->LB->Num_Vec() == 1, ("Bug in Peel_Iteration()"));
00279 dli->LB->Dim(0)->Const_Offset++;
00280 }
00281 else {
00282 Upper_Bound_Standardize(WN_end(wn));
00283 Replace_Ldid_With_Exp_Copy(WN_index(wn), newblock,
00284 SNL_UBexp(WN_end(wn)), du);
00285 Increase_By(SNL_UBexp(WN_end(wn)), -1, WN_end(wn));
00286 Is_True(dli->UB->Num_Vec() == 1, ("Bug in Peel_Iteration()"));
00287 dli->UB->Dim(0)->Const_Offset--;
00288 }
00289 if (dli->Est_Num_Iterations > 0)
00290 dli->Est_Num_Iterations--;
00291
00292
00293
00294
00295
00296 if (Get_Do_Loop_Info(wn)->Depth == 0) {
00297 WN* first = WN_first(newblock);
00298 WN* last = WN_last(newblock);
00299 if (wn_if != NULL) {
00300 LWN_Insert_Block_After(WN_then(wn_if), NULL, newblock);
00301 } else {
00302 if (first_iter)
00303 LWN_Insert_Block_Before(LWN_Get_Parent(wn), wn, newblock);
00304 else
00305 LWN_Insert_Block_After(LWN_Get_Parent(wn), wn, newblock);
00306 }
00307 WN* wn_next = NULL;
00308 for (WN* wn_temp = first; wn_temp != NULL; wn_temp = wn_next) {
00309 wn_next = WN_next(wn_temp);
00310 Remove_Redundant_Stids(wn_temp, du);
00311 if (wn_temp == last)
00312 break;
00313 }
00314 return;
00315 }
00316
00317 MEM_POOL_Push(&SNL_local_pool);
00318
00319 BOOL disaster = FALSE;
00320 INT lex = 0;
00321
00322 typedef HASH_TABLE<VINDEX16, SNL_NEWINFO> VSNL_HASH_TABLE;
00323 VSNL_HASH_TABLE
00324 *new2old = CXX_NEW(VSNL_HASH_TABLE(43,&SNL_local_pool),&SNL_local_pool);
00325 HASH_TABLE_ITER<VINDEX16,SNL_NEWINFO> iter(new2old);
00326 WN* first = WN_first(newblock);
00327 WN* last = WN_last(newblock);
00328 if (!Build_New_To_Old(WN_do_body(wn), newblock, dg, new2old, lex)) {
00329 disaster = TRUE;
00330 goto disaster;
00331 }
00332
00333 {
00334 if (wn_if != NULL) {
00335 LWN_Insert_Block_After(WN_then(wn_if), NULL, newblock);
00336 } else {
00337 if (first_iter)
00338 LWN_Insert_Block_Before(LWN_Get_Parent(wn), wn, newblock);
00339 else
00340 LWN_Insert_Block_After(LWN_Get_Parent(wn), wn, newblock);
00341 }
00342
00343 for (w = first; w; w = WN_next(w)) {
00344 DOLOOP_STACK dostack(&LNO_local_pool);
00345 Build_Doloop_Stack(LWN_Get_Parent(wn), &dostack);
00346 LNO_Build_Access(w, &dostack, &LNO_default_pool);
00347 if (w == last)
00348 break;
00349 }
00350 FmtAssert(w == last, ("Bug in Peel_iteration()"));
00351
00352
00353
00354
00355 INT count = 0;
00356
00357 VINDEX16 vnew;
00358 SNL_NEWINFO newinfo;
00359 while (iter.Step(&vnew, &newinfo)) {
00360 HASH_TABLE<WN*,INT> old_nodes(MIN(dg->Get_Edge_Count(), 512),
00361 &SNL_local_pool);
00362 INT lex = newinfo.Lex;
00363 VINDEX16 vold = newinfo.Vold;
00364 Is_True(vold > 0, ("Missing vold vertex"));
00365 WN* wnnew = dg->Get_Wn(vnew);
00366 DOLOOP_STACK stack(&SNL_local_pool);
00367 Build_Doloop_Stack(wnnew, &stack);
00368
00369
00370
00371 EINDEX16 e;
00372 for (e = dg->Get_Out_Edge(vold); e; e = dg->Get_Next_Out_Edge(e)) {
00373 VINDEX16 vsink = dg->Get_Sink(e);
00374 Is_True(vsink, ("Sink has null vertex"));
00375
00376 if (new2old->Find(vsink).Vold != 0)
00377 continue;
00378
00379 WN* own = dg->Get_Wn(vsink);
00380 old_nodes.Enter(own, 1);
00381 DOLOOP_STACK ostack(&SNL_local_pool);
00382 Build_Doloop_Stack(own, &ostack);
00383
00384 count++;
00385 BOOL ok = dg->Add_Edge( wnnew, &stack, own, &ostack, first_iter);
00386 if (!ok) {
00387 LNO_Erase_Dg_From_Here_In(wnnew, dg);
00388 LNO_Erase_Dg_From_Here_In(own, dg);
00389 disaster = TRUE;
00390 goto disaster;
00391 }
00392 }
00393
00394
00395
00396 for (e = dg->Get_In_Edge(vold); e; e = dg->Get_Next_In_Edge(e)) {
00397 VINDEX16 vsource = dg->Get_Source(e);
00398 Is_True(vsource, ("Source has null vertex"));
00399
00400 if (new2old->Find(vsource).Vold != 0)
00401 continue;
00402
00403 WN* own = dg->Get_Wn(vsource);
00404 if (old_nodes.Find(own) != 0)
00405 continue;
00406 DOLOOP_STACK ostack(&SNL_local_pool);
00407 Build_Doloop_Stack(own, &ostack);
00408
00409 count++;
00410 BOOL ok = dg->Add_Edge( wnnew, &stack, own, &ostack, first_iter);
00411 if (!ok) {
00412 LNO_Erase_Dg_From_Here_In(wnnew, dg);
00413 LNO_Erase_Dg_From_Here_In(own, dg);
00414 disaster = TRUE;
00415 goto disaster;
00416 }
00417 }
00418 }
00419
00420
00421
00422 {
00423 HASH_TABLE_ITER<VINDEX16,SNL_NEWINFO> iter(new2old);
00424 VINDEX16 vnew;
00425 SNL_NEWINFO newinfo;
00426 while (iter.Step(&vnew, &newinfo)) {
00427 INT lex = newinfo.Lex;
00428 WN* wnnew = dg->Get_Wn(vnew);
00429 if (OPCODE_is_load(WN_opcode(wnnew)))
00430 continue;
00431
00432 DOLOOP_STACK stack(&SNL_local_pool);
00433 Build_Doloop_Stack(wnnew, &stack);
00434
00435 HASH_TABLE_ITER<VINDEX16,SNL_NEWINFO> iter2(new2old);
00436 VINDEX16 vnew2;
00437 SNL_NEWINFO newinfo2;
00438 while (iter2.Step(&vnew2, &newinfo2)) {
00439 INT lex2 = newinfo2.Lex;
00440 WN* wnnew2 = dg->Get_Wn(vnew2);
00441 BOOL is_load = OPCODE_is_load(WN_opcode(wnnew2));
00442
00443 if (!is_load && lex < lex2)
00444 continue;
00445
00446 count++;
00447 BOOL ok = dg->Add_Edge( wnnew, &stack, wnnew2, &stack, lex < lex2);
00448 if (!ok) {
00449 LNO_Erase_Dg_From_Here_In(WN_kid1(wnnew), dg);
00450 LNO_Erase_Dg_From_Here_In(WN_kid(wnnew2,is_load?0:1), dg);
00451 disaster = TRUE;
00452 goto disaster;
00453 }
00454 }
00455 }
00456 }
00457
00458 SNL_DEBUG1(1, "Peeling added %d edges\n", count);
00459 }
00460
00461 disaster:
00462 if (disaster) {
00463 SNL_DEBUG0(0, "Ran out of edges in peeling");
00464 HASH_TABLE_ITER<VINDEX16,SNL_NEWINFO> iter(new2old);
00465 VINDEX16 v;
00466 SNL_NEWINFO dummy;
00467 while (iter.Step(&v, &dummy)) {
00468 EINDEX16 e;
00469 EINDEX16 enext = 0;
00470 for (e = dg->Get_In_Edge(v); e; e = enext) {
00471 enext = dg->Get_Next_In_Edge(e);
00472 dg->Delete_Array_Edge(e);
00473 }
00474 for (e = dg->Get_Out_Edge(v); e; e = enext) {
00475 enext = dg->Get_Next_Out_Edge(e);
00476 dg->Delete_Array_Edge(e);
00477 }
00478 dg->Delete_Vertex(v);
00479 }
00480 }
00481
00482 WN* wn_next = NULL;
00483 #ifdef KEY
00484
00485
00486
00487 for (WN* wn_temp = first; wn_temp; wn_temp = wn_next) {
00488 #else
00489 for (WN* wn_temp = first; ; wn_temp = wn_next) {
00490 #endif
00491 wn_next = WN_next(wn_temp);
00492 Remove_Redundant_Stids(wn_temp, du);
00493 if (wn_temp == last)
00494 break;
00495 }
00496 MEM_POOL_Pop(&SNL_local_pool);
00497 }
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510 static void Remove_Privatizable_DU_Copy_Arcs(WN* bodies[],
00511 INT nbodies,
00512 const SX_INFO* priv)
00513 {
00514 if (priv->Plist.Is_Empty())
00515 return;
00516
00517
00518
00519 HASH_TABLE<WN*,INT> ht(97, &SNL_local_pool);
00520
00521 for (INT i = 1; i < nbodies; i++) {
00522 for (LWN_ITER *iter = LWN_WALK_TreeIter(bodies[i]);
00523 iter; iter = LWN_WALK_TreeNext(iter)) {
00524 WN* wn = iter->wn;
00525 OPERATOR opr = WN_operator(wn);
00526 if (opr == OPR_LDID || opr == OPR_STID)
00527 ht.Enter(wn, i);
00528 }
00529 }
00530
00531
00532
00533
00534 for (LWN_ITER *iter = LWN_WALK_TreeIter(bodies[0]);
00535 iter; iter = LWN_WALK_TreeNext(iter)) {
00536 WN* wn = iter->wn;
00537 OPERATOR opr = WN_operator(wn);
00538 if (opr == OPR_LDID && priv->Find(SYMBOL(wn))) {
00539 DEF_LIST* deflist = Du_Mgr->Ud_Get_Def(wn);
00540 DEF_LIST_ITER iter(deflist);
00541 for(const DU_NODE *n=iter.First(); !iter.Is_Empty(); n=iter.Next()) {
00542 WN *def = n->Wn();
00543 if (ht.Find(def))
00544 Du_Mgr->Delete_Def_Use(def, wn);
00545 }
00546 }
00547 else if (opr == OPR_STID && priv->Find(SYMBOL(wn))) {
00548 USE_LIST* uselist = Du_Mgr->Du_Get_Use(wn);
00549 USE_LIST_ITER iter(uselist);
00550 for(const DU_NODE *n=iter.First(); !iter.Is_Empty(); n=iter.Next()) {
00551 WN *use = n->Wn();
00552 if (ht.Find(use))
00553 Du_Mgr->Delete_Def_Use(wn, use);
00554 }
00555 }
00556 }
00557 }
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568 extern SNL_REGION SNL_GEN_Protect_Nest_With_Conditionals(const SNL_NEST_INFO*
00569 ni,
00570 BOOL* failed)
00571 {
00572 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00573 *failed = FALSE;
00574 INT r;
00575 const SYSTEM_OF_EQUATIONS* conds = &ni->Bi()->Conditionals();
00576 INT outerdepth = ni->Depth_Inner() - ni->Nloops_General() + 1;
00577 WN* outerloop = ni->Dostack().Bottom_nth(outerdepth);
00578
00579 if (conds->Num_Le_Constraints() == 0 && conds->Num_Eq_Constraints() == 0)
00580 return SNL_REGION(outerloop, outerloop);
00581
00582 WN* untransformable_nest = LWN_Copy_Tree(outerloop, TRUE, LNO_Info_Map);
00583
00584 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00585 STACK<WN*> st_old(&PROMPF_pool);
00586 STACK<WN*> st_new(&PROMPF_pool);
00587 Prompf_Assign_Ids(outerloop, untransformable_nest, &st_old, &st_new, FALSE);
00588 INT nloops = st_old.Elements();
00589 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00590 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00591 for (INT i = 0; i < nloops; i++) {
00592 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
00593 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
00594 }
00595 Prompf_Info->General_Version(old_ids, new_ids, nloops);
00596 }
00597
00598 WN* tmp_unrolls[2];
00599 tmp_unrolls[0] = outerloop;
00600 tmp_unrolls[1] = untransformable_nest;
00601 if (red_manager) red_manager->Unroll_Update(tmp_unrolls, 2);
00602 Unrolled_DU_Update(tmp_unrolls, 2, outerdepth-1, TRUE, FALSE);
00603
00604
00605
00606
00607
00608 Remove_Privatizable_DU_Copy_Arcs(tmp_unrolls, 2,
00609 &ni->Privatizability_Info());
00610
00611 if (!dg->Add_Deps_To_Copy_Block(outerloop, untransformable_nest)) {
00612 *failed = TRUE;
00613 SNL_DEBUG0(0, "Add_Deps_To_Copy_Block failed");
00614 LNO_Erase_Dg_From_Here_In(untransformable_nest, dg);
00615 LNO_Erase_Dg_From_Here_In(outerloop, dg);
00616 LWN_Delete_Tree(untransformable_nest);
00617 return SNL_REGION(outerloop, outerloop);
00618 }
00619
00620
00621
00622 OPCODE andop = OPCODE_make_op(OPR_BAND, Boolean_type, MTYPE_V);
00623
00624 WN* tree = NULL;
00625 for (r = 0; r < conds->Num_Eq_Constraints(); r++) {
00626 WN* expr = generate_tree_from_bounds_info_row(&conds->Aeq()(r,0),
00627 conds->Beq()[r],
00628 FALSE,
00629 &ni->Bi()->Var_Info());
00630 tree = tree ? LWN_CreateExp2(andop, tree, expr) : expr;
00631 }
00632
00633 for (r = 0; r < conds->Num_Le_Constraints(); r++) {
00634 WN* expr = generate_tree_from_bounds_info_row(&conds->Ale()(r,0),
00635 conds->Ble()[r],
00636 TRUE,
00637 &ni->Bi()->Var_Info());
00638 tree = tree ? LWN_CreateExp2(andop, tree, expr) : expr;
00639 }
00640
00641 Is_True(tree, ("SNL_Protect_Nest_With_Conditionals: bug"));
00642
00643
00644
00645 WN* parent = LWN_Get_Parent(outerloop);
00646 WN* prev = WN_prev(outerloop);
00647
00648 LWN_Extract_From_Block(parent, outerloop);
00649 WN* wnif = LWN_CreateIf(tree, WN_CreateBlock(), WN_CreateBlock());
00650 LWN_Copy_Frequency(wnif, outerloop);
00651 LWN_Copy_Linenumber(outerloop, wnif);
00652 LWN_Insert_Block_After(WN_then(wnif), NULL, outerloop);
00653 LWN_Insert_Block_After(WN_else(wnif), NULL, untransformable_nest);
00654 LWN_Insert_Block_After(parent, prev, wnif);
00655
00656
00657 BOOL has_regions=Find_SCF_Inside(wnif,OPC_REGION)!=NULL;
00658 IF_INFO *ii = CXX_NEW(IF_INFO(&LNO_default_pool,TRUE,has_regions),
00659 &LNO_default_pool);
00660 WN_MAP_Set(LNO_Info_Map,wnif,(void *)ii);
00661 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00662 &LNO_local_pool);
00663 Build_Doloop_Stack(wnif, stack);
00664 LNO_Build_If_Access(wnif, stack);
00665 CXX_DELETE(stack, &LNO_local_pool);
00666
00667 Renumber_Loops(WN_else(wnif), WN_else(wnif), dg);
00668
00669 return SNL_REGION(wnif, wnif);
00670 }
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682 extern SNL_REGION SNL_GEN_Distribution(WN* wn_outer,
00683 IMAT* unimodular,
00684 SNL_TILE_INFO* ti,
00685 INT nloops,
00686 BOOL find_split_depth,
00687 SX_PLIST* plist,
00688 BOOL above_is_distributable,
00689 BOOL below_is_distributable)
00690 {
00691
00692 SNL_GEN_Scalar_Expand(wn_outer, unimodular, ti, nloops, plist,
00693 -1, NULL, FALSE, TRUE);
00694
00695
00696 WN* wn_new_first = NULL;
00697 WN* wn_new_last = NULL;
00698 INT split_depth = find_split_depth ? Split_Depth(wn_outer, nloops) : -1;
00699 SNL_GEN_Distribute(wn_outer, split_depth, nloops, above_is_distributable,
00700 below_is_distributable, &wn_new_first, &wn_new_last);
00701
00702
00703 SNL_REGION region(wn_outer, wn_outer);
00704 if (wn_new_first != NULL)
00705 region.First = wn_new_first;
00706 if (wn_new_last != NULL)
00707 region.Last = wn_new_last;
00708 if (!Valid_SNL_Region(region))
00709 DevWarn("SNL_General_Distribution: Invalid SNL_REGION [0x%p,0x%p]"
00710 , region.First, region.Last);
00711 return region;
00712 }
00713
00714
00715
00716
00717
00718
00719
00720
00721
00722
00723
00724 static INT Which_Loop_Bound(const mINT32* row, INT from, INT nloops)
00725 {
00726 for (INT r = from + nloops -1; r >= from; r--)
00727 if (row[r])
00728 return r;
00729 return -1;
00730 }
00731
00732
00733
00734
00735
00736
00737
00738
00739 static void Row_Echelon(SYSTEM_OF_EQUATIONS* s,
00740 INT first,
00741 INT n)
00742 {
00743 INT r;
00744 INT i;
00745 BOOL inconsistent;
00746 BOOL ok;
00747
00748 Is_True(n <= SNL_MAX_LOOPS,
00749 ("loops nested too deeply: %d > %d", n, SNL_MAX_LOOPS));
00750
00751 ok = s->Copy_To_Work();
00752 FmtAssert(ok, ("Work array for system of equations too small"));
00753
00754 for (i = n+first-1; i >= first+1; i--) {
00755 ok = SYSTEM_OF_EQUATIONS::Project(i, &inconsistent);
00756 FmtAssert(ok, ("Projection failed!"));
00757 FmtAssert(!inconsistent, ("Projection can't be inconsistent!"));
00758 s->Add_Work_Le_Non_Simple_Redundant();
00759 }
00760
00761
00762
00763
00764 ok = s->Copy_To_Work();
00765 FmtAssert(ok, ("Work array for system of equations too small"));
00766 ok = s->Sub_In_Equal(&inconsistent);
00767 FmtAssert(ok, ("Sub_In_Equals failed"));
00768 FmtAssert(!inconsistent, ("Sub_In_Equal can't be inconsistent!"));
00769 s->Reset_To(0, 0, s->Num_Vars());
00770 s->Add_Work_Le();
00771
00772 INT rows = s->Num_Le_Constraints();
00773 INT cols = s->Num_Vars();
00774
00775 INT* which = CXX_NEW_ARRAY(INT, rows+1, &LNO_local_pool);
00776 BOOL* flag = CXX_NEW_ARRAY(BOOL, rows+1, &LNO_local_pool);
00777
00778 for (r = 0; r < rows; r++)
00779 which[r] = Which_Loop_Bound(&s->Ale()(r,0), first, n);
00780 which[rows] = n + first;
00781
00782 s->Sort_Le(which, FALSE);
00783
00784
00785
00786 s->Take_Gcds();
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798 SYSTEM_OF_EQUATIONS nsys(0, 0, cols, &LNO_local_pool);
00799
00800 for (r = 0; which[r] == -1; r++)
00801 nsys.Add_Le(&s->Ale()(r,0), s->Ble()[r]);
00802
00803 for (i = first; i < n+first; i++) {
00804 INT nle = nsys.Num_Le_Constraints();
00805 INT rle = r;
00806
00807
00808
00809
00810 INT pbounds = 0;
00811 INT nbounds = 0;
00812
00813 for (r = rle; which[r] == i; r++) {
00814 BOOL p = s->Ale()(r, i) > 0;
00815 if ((p && pbounds == 0) || (!p && nbounds == 0)) {
00816 flag[r] = TRUE;
00817 nsys.Add_Le(&s->Ale()(r,0), s->Ble()[r]);
00818 }
00819 else
00820 flag[r] = nsys.Add_Le_Non_Redundant(&s->Ale()(r,0), s->Ble()[r]);
00821 if (flag[r])
00822 p ? pbounds++ : nbounds++;
00823 }
00824 FmtAssert(pbounds > 0,
00825 ("Missing upper bounds expression r=%d rle=%d i=%d", r, rle, i));
00826 FmtAssert(nbounds > 0,
00827 ("Missing lower bounds expression r=%d rle=%d i=%d", r, rle, i));
00828
00829 if (pbounds == 1 && nbounds == 1)
00830 continue;
00831
00832
00833
00834
00835 nsys.Reset_To(nle, 0, cols);
00836
00837 pbounds = 0;
00838 nbounds = 0;
00839
00840 for (INT rr = r-1; rr >= rle; rr--) {
00841 if (flag[rr] == FALSE)
00842 continue;
00843
00844 BOOL p = s->Ale()(rr, i) > 0;
00845 if ((p && pbounds == 0) || (!p && nbounds == 0))
00846 nsys.Add_Le(&s->Ale()(rr,0), s->Ble()[rr]);
00847 else
00848 flag[rr] = nsys.Add_Le_Non_Redundant(&s->Ale()(rr,0), s->Ble()[rr]);
00849 if (flag[rr])
00850 p ? pbounds++ : nbounds++;
00851 }
00852 FmtAssert(pbounds > 0 && nbounds > 0, ("Missing bounds expression"));
00853 }
00854
00855 s->Reset_To(0, 0, s->Num_Vars());
00856 s->Add_Soe(&nsys);
00857
00858 CXX_DELETE_ARRAY(which, &LNO_local_pool);
00859 CXX_DELETE_ARRAY(flag, &LNO_local_pool);
00860 }
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878 static void Rewrite_Bounds(SNL_TRANS_INDEX_DATA* td,
00879 const SYSTEM_OF_EQUATIONS* soe,
00880 WN* tlb[],
00881 WN* tub[],
00882 WN* tstep[],
00883 DOLOOP_STACK* stack,
00884 INT first_in_stack,
00885 INT code)
00886 {
00887 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00888
00889
00890
00891
00892
00893 INT d;
00894
00895 WN* lb[SNL_MAX_LOOPS];
00896 WN* ub[SNL_MAX_LOOPS];
00897 WN* iloop[SNL_MAX_LOOPS];
00898
00899 INT tt_nloops = code == 2 ? td->t_nloops : 0;
00900 INT ii_nloops = code == 2 ? 0 : td->i_nloops;
00901 INT x_nloops = code == 2 ? tt_nloops : ii_nloops;
00902
00903
00904
00905 for (d = 0; d < x_nloops; d++) {
00906 lb[d] = NULL;
00907 ub[d] = NULL;
00908 iloop[d] = (code != 2) ? stack->Bottom_nth(first_in_stack+d) : NULL;
00909 }
00910
00911 mINT32* trow = CXX_NEW_ARRAY(mINT32, soe->Num_Vars(), &LNO_local_pool);
00912 mINT64 tcon;
00913
00914
00915
00916 for (d = 0; d < x_nloops; d++) {
00917
00918
00919
00920 SYMBOL symbol = (code == 2) ? td->tdata[d].symbol :
00921 td->idata[d].post_symbol;
00922 WN* alias_wn = (code == 2) ? td->tdata[d].alias_wn :
00923 td->idata[d].post_alias_wn;
00924
00925
00926
00927 for (INT r = 0 ; r < soe->Num_Le_Constraints(); r++) {
00928 INT from = code == 1 ? td->t_nloops : 0;
00929 INT l = Which_Loop_Bound(&soe->Ale()(r,0), from, x_nloops);
00930 if (l - from != d)
00931 continue;
00932
00933 BOOL is_lb = soe->Ale()(r,l) < 0;
00934 for (INT i = 0; i < soe->Num_Vars(); i++)
00935 trow[i] = is_lb ? soe->Ale()(r,i) : -soe->Ale()(r,i);
00936 tcon = is_lb ? -soe->Ble()[r] : soe->Ble()[r];
00937
00938
00939
00940
00941
00942 INT coef = -trow[l];
00943 Is_True(coef > 0, ("Bug in Rewrite_Bounds()"));
00944 trow[l] = 0;
00945
00946 if (coef > 1) {
00947
00948 INT g1 = Gcd(trow, soe->Num_Vars());
00949 INT g = INT(Gcd(Gcd(INT64(g1), tcon),INT64(coef)));
00950 if (g > 1) {
00951 for (INT c = 0; c < soe->Num_Vars(); c++)
00952 trow[c] /= g;
00953 tcon /= g;
00954 coef /= g;
00955 }
00956 }
00957 INT part = UCTILE_I | UCTILE_O;
00958 if (code != 0)
00959 part |= UCTILE_T;
00960 WN* wn = generate_tree_from_row(trow, td, tcon, symbol.Type, part);
00961 FmtAssert(wn, ("Unable to generate tree from row!"));
00962
00963 if (coef > 1) {
00964 WN* denominator = LWN_Make_Icon(symbol.Type, coef);
00965 if (is_lb)
00966 wn = LWN_CreateDivceil(symbol.Type, wn, denominator);
00967 else
00968 wn = LWN_CreateDivfloor(symbol.Type, wn, denominator);
00969 }
00970
00971 WN** b = is_lb ? &lb[d] : &ub[d];
00972 if (*b == NULL)
00973 *b = wn;
00974 else {
00975 OPERATOR opr = is_lb ? OPR_MAX : OPR_MIN;
00976 OPCODE opc = OPCODE_make_op(opr, symbol.Type, MTYPE_V);
00977 *b = LWN_CreateExp2(opc, *b, wn);
00978 }
00979 }
00980
00981 FmtAssert(lb[d], ("Index %d has no lower bound <code=%d>", d, code));
00982 FmtAssert(ub[d], ("Index %d has no upper bound <code=%d>", d, code));
00983 }
00984
00985 if (code != 2) {
00986 for (d = 0; d < x_nloops; d++) {
00987 SYMBOL symbol = td->idata[d].post_symbol;
00988 WN* alias_wn = td->idata[d].post_alias_wn;
00989 WN* indx = WN_index(iloop[d]);
00990 if (SYMBOL(indx) == symbol)
00991 continue;
00992
00993 Replace_Symbol(iloop[d], SYMBOL(indx), symbol, alias_wn, iloop[d]);
00994 WN_st_idx(indx) = ST_st_idx(symbol.St());
00995 WN_offset(indx) = symbol.WN_Offset();
00996 }
00997 }
00998
00999 for (d = 0; d < x_nloops; d++) {
01000 SYMBOL symbol = (code == 2) ? td->tdata[d].symbol :
01001 td->idata[d].post_symbol;
01002 WN* alias_wn = (code == 2) ? td->tdata[d].alias_wn :
01003 td->idata[d].post_alias_wn;
01004
01005
01006
01007 OPCODE opadd = OPCODE_make_op(OPR_ADD, symbol.Type, MTYPE_V);
01008 OPCODE opst = OPCODE_make_op(OPR_STID, MTYPE_V, symbol.Type);
01009 OPCODE opld = OPCODE_make_op(OPR_LDID, symbol.Type, symbol.Type);
01010 OPCODE ople = OPCODE_make_op(OPR_LE, Boolean_type, symbol.Type);
01011 TY_IDX ty = Be_Type_Tbl(symbol.Type);
01012
01013 if (code == 1) {
01014
01015
01016
01017
01018
01019
01020
01021
01022 if (d > 0 &&
01023 (WN_prev_executable(iloop[d]) || WN_next_executable(iloop[d]))) {
01024
01025
01026
01027
01028
01029
01030
01031
01032 Is_True(!WN_prev(iloop[d]) || !WN_next(iloop[d]),
01033 ("Cannot handle two sided tiling -- shouldn't happen here"));
01034
01035 BOOL doing_above = WN_prev_executable(iloop[d]) != NULL;
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093 WN** lldid = CXX_NEW_ARRAY(WN*, x_nloops, &LNO_local_pool);
01094
01095
01096
01097 WN* s_block = (doing_above) ?
01098 LWN_Create_Block_From_Stmts_Above(iloop[d]) :
01099 LWN_Create_Block_From_Stmts_Below(iloop[d]);
01100
01101 WN* current_body = WN_CreateBlock();
01102
01103
01104
01105
01106
01107 for (INT ii = d; ii < x_nloops; ii++) {
01108 char name1[32];
01109 char name2[32];
01110 char name3[32];
01111
01112 sprintf(name1, "$imperf_%c%d_%d", doing_above ? 'L' : 'U', d, ii);
01113 sprintf(name2, "$imperf_Lnew%d_%d", d, ii);
01114 sprintf(name3, "$imperf_Unew%d_%d", d, ii);
01115
01116 TYPE_ID type = Do_Wtype(iloop[ii]);
01117
01118 OPCODE opldid = OPCODE_make_op(OPR_LDID, type, type);
01119 OPCODE opstid = OPCODE_make_op(OPR_STID, MTYPE_V, type);
01120 OPCODE opeq = OPCODE_make_op(OPR_EQ, Boolean_type, type);
01121 OPCODE ople = OPCODE_make_op(OPR_LE, Boolean_type, type);
01122 OPCODE opand = OPCODE_make_op(OPR_LAND, Boolean_type, MTYPE_V);
01123
01124 SYMBOL lii = Create_Preg_Symbol(name1, type);
01125 SYMBOL lpii = Create_Preg_Symbol(name2, type);
01126 SYMBOL upii = Create_Preg_Symbol(name3, type);
01127
01128 WN* lstid = LWN_CreateStid(opstid, lii.WN_Offset(), lii.St(), ty,
01129 SNL_Copy_Exp((doing_above)?
01130 WN_kid0(WN_start(iloop[ii])):
01131 SNL_UBexp(WN_end(iloop[ii]))));
01132
01133 WN* lpstid = LWN_CreateStid(opstid, lpii.WN_Offset(),
01134 lpii.St(), ty,
01135 SNL_Copy_Exp(lb[ii]));
01136 WN* upstid = LWN_CreateStid(opstid, upii.WN_Offset(),
01137 upii.St(), ty,
01138 SNL_Copy_Exp(ub[ii]));
01139
01140 lldid[ii] = LWN_CreateLdid(opldid, lstid);
01141 WN* lupldid = doing_above ? LWN_CreateLdid(opldid, lpstid)
01142 : LWN_CreateLdid(opldid, upstid);
01143 WN* lpldid = LWN_CreateLdid(opldid, lpstid);
01144 WN* upldid = LWN_CreateLdid(opldid, upstid);
01145
01146 if (doing_above)
01147 Du_Mgr->Add_Def_Use(lpstid, lupldid);
01148 else
01149 Du_Mgr->Add_Def_Use(upstid, lupldid);
01150 Du_Mgr->Add_Def_Use(upstid, upldid);
01151 Du_Mgr->Add_Def_Use(lpstid, lpldid);
01152 Du_Mgr->Add_Def_Use(lstid, lldid[ii]);
01153
01154 Du_Mgr->Ud_Get_Def(lupldid)->Set_loop_stmt(NULL);
01155 Du_Mgr->Ud_Get_Def(upldid)->Set_loop_stmt(NULL);
01156 Du_Mgr->Ud_Get_Def(lpldid)->Set_loop_stmt(NULL);
01157 Du_Mgr->Ud_Get_Def(lldid[ii])->Set_loop_stmt(NULL);
01158
01159
01160
01161 for (INT jj = d; jj < ii; jj++) {
01162 SYMBOL idx(WN_index(iloop[jj]));
01163 Replace_Ldid_With_Exp_Copy(idx, lstid, lldid[jj], Du_Mgr);
01164 Replace_Ldid_With_Exp_Copy(idx, lpstid, lldid[jj], Du_Mgr);
01165 Replace_Ldid_With_Exp_Copy(idx, upstid, lldid[jj], Du_Mgr);
01166 }
01167
01168 LWN_Insert_Block_After(current_body, NULL, lstid);
01169 LWN_Insert_Block_After(current_body, lstid, lpstid);
01170 LWN_Insert_Block_After(current_body, lpstid, upstid);
01171
01172 WN* wif_tst = LWN_CreateExp2(opand,
01173 LWN_CreateExp2(opeq, lupldid, lldid[ii]),
01174 LWN_CreateExp2(ople, lpldid, upldid));
01175
01176 WN* wif = LWN_CreateIf(wif_tst, WN_CreateBlock(), WN_CreateBlock());
01177 LWN_Insert_Block_Before(current_body, NULL, wif);
01178
01179
01180
01181
01182 Fix_Do_Du_Info(current_body, NULL, TRUE, iloop[ii], 0);
01183
01184 if (ii == d) {
01185 WN* iloopd_prnt = LWN_Get_Parent(iloop[d]);
01186 if (doing_above)
01187 LWN_Insert_Block_Before(iloopd_prnt, iloop[d], current_body);
01188 else
01189 LWN_Insert_Block_After(iloopd_prnt, iloop[d], current_body);
01190 }
01191
01192
01193 BOOL has_regions=Find_SCF_Inside(wif,OPC_REGION)!=NULL;
01194 IF_INFO *ifi = CXX_NEW(IF_INFO(&LNO_default_pool, FALSE, has_regions),
01195 &LNO_default_pool);
01196 WN_MAP_Set(LNO_Info_Map, wif, (void *)ifi);
01197 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01198 &LNO_local_pool);
01199 Build_Doloop_Stack(wif, stack);
01200 LNO_Build_If_Access(wif, stack);
01201 CXX_DELETE(stack, &LNO_local_pool);
01202
01203 current_body = WN_then(wif);
01204 }
01205
01206 LWN_Insert_Block_After(current_body, NULL, s_block);
01207
01208 CXX_DELETE_ARRAY(lldid, &LNO_local_pool);
01209 }
01210 }
01211
01212
01213
01214 if (code == 2) {
01215 Is_True(tlb[d] == NULL, ("Surprising tile lb"));
01216 Is_True(tub[d] == NULL, ("Surprising tile ub"));
01217 Is_True(tstep[d] == NULL, ("Surprising tile step"));
01218
01219
01220
01221
01222
01223 lb[d] = LWN_CreateStid(opst, symbol.WN_Offset(), symbol.St(),
01224 ty, lb[d]);
01225
01226
01227
01228
01229 ub[d] = LWN_CreateExp2(ople, WN_CreateLdid(opld, symbol.WN_Offset(),
01230 symbol.St(), ty), ub[d]);
01231
01232 tlb[d] = lb[d];
01233 tub[d] = ub[d];
01234 tstep[d] = LWN_CreateStid(opst, symbol.WN_Offset(), symbol.St(), ty,
01235 LWN_CreateExp2(opadd,
01236 WN_CreateLdid(opld, symbol.WN_Offset(),
01237 symbol.St(), ty),
01238 LWN_Make_Icon(symbol.Type, 1)));
01239
01240 LWN_Copy_Linenumber(stack->Bottom_nth(first_in_stack), tlb[d]);
01241 LWN_Copy_Linenumber(stack->Bottom_nth(first_in_stack), tub[d]);
01242 LWN_Copy_Linenumber(stack->Bottom_nth(first_in_stack), tstep[d]);
01243 }
01244 else {
01245 LWN_Delete_Tree(WN_kid0(WN_start(iloop[d])));
01246 WN_kid0(WN_start(iloop[d])) = lb[d];
01247 LWN_Set_Parent(WN_kid0(WN_start(iloop[d])), WN_start(iloop[d]));
01248
01249 LWN_Delete_Tree(SNL_UBexp(WN_end(iloop[d])));
01250 SNL_UBexp(WN_end(iloop[d])) = ub[d];
01251 LWN_Set_Parent(SNL_UBexp(WN_end(iloop[d])), WN_end(iloop[d]));
01252
01253 Fix_Do_Du_Info(iloop[d], td, FALSE, NULL, 1);
01254
01255 Is_True(Step_Size(iloop[d]) == 1, ("Step wasn't unity"));
01256 }
01257 }
01258 }
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268 static INT UT_Body_Exp(WN* wn, SNL_TRANS_INDEX_DATA* td)
01269 {
01270 if (WN_operator(wn) == OPR_LDID) {
01271 for (INT d = 0; d < td->i_nloops; d++) {
01272 if (SYMBOL(wn) == td->idata[d].post_symbol) {
01273 FmtAssert(td->idata[d].newcode, ("Missing newcode"));
01274 (void) Replace_Wnexp_With_Exp_Copy(wn, td->idata[d].newcode,
01275 Du_Mgr);
01276 return td->idata[d].max_used_depth;
01277 }
01278 }
01279 return -1;
01280 }
01281 else {
01282 INT rv = -1;
01283
01284 for (INT kid = 0; kid < WN_kid_count(wn); kid++) {
01285 INT nrv = UT_Body_Exp(WN_kid(wn,kid), td);
01286 if (rv < nrv)
01287 rv = nrv;
01288 }
01289 return rv;
01290 }
01291 }
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303 static void UT_Body_Innermost(WN* body,
01304 SNL_TRANS_INDEX_DATA* td)
01305 {
01306 for(WN* wn = WN_first(body); wn; wn = WN_next(wn)) {
01307 switch (WN_opcode(wn)) {
01308 case OPC_IF:
01309 UT_Body_Exp(WN_if_test(wn), td);
01310 UT_Body_Innermost(WN_then(wn), td);
01311 UT_Body_Innermost(WN_else(wn), td);
01312 break;
01313 case OPC_DO_LOOP:
01314 UT_Body_Exp(WN_start(wn), td);
01315 UT_Body_Exp(WN_end(wn), td);
01316 UT_Body_Exp(WN_step(wn), td);
01317 UT_Body_Innermost(WN_do_body(wn), td);
01318 break;
01319 case OPC_DO_WHILE:
01320 case OPC_WHILE_DO:
01321 UT_Body_Exp(WN_while_test(wn), td);
01322 UT_Body_Innermost(WN_while_body(wn), td);
01323 break;
01324 #ifdef KEY //bug 11817: handle regions under loop body
01325 case OPC_REGION:
01326 UT_Body_Innermost(WN_region_pragmas(wn),td);
01327 UT_Body_Innermost(WN_region_exits(wn),td);
01328 UT_Body_Innermost(WN_region_body(wn),td);
01329 break;
01330 #endif
01331 default:
01332 UT_Body_Exp(wn, td);
01333 break;
01334 }
01335 }
01336 }
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355 static void UT_Generate_Imperfect_If_Code(DOLOOP_STACK* stack,
01356 INT first_in_stack,
01357 INT d1,
01358 INT d2,
01359 BOOL above_do,
01360 SNL_TRANS_INDEX_DATA* td)
01361 {
01362 FmtAssert(0, ("TODO: imperfect interchange not implemented. "
01363 "(e.g. DU updating.)"));
01364
01365 Is_True(d1 < d2, ("imperfect_if_code() doesn't have enough work"));
01366
01367 WN* loop[SNL_MAX_LOOPS];
01368
01369 for (INT i = 0; i < d2; i++)
01370 loop[i] = stack->Bottom_nth(first_in_stack+i);
01371
01372
01373
01374
01375
01376
01377
01378
01379 WN* oldblock = LWN_Get_Parent(loop[d1]);
01380 WN* block = above_do ?
01381 LWN_Create_Block_From_Stmts_Above(loop[d1]) :
01382 LWN_Create_Block_From_Stmts_Below(loop[d1]);
01383
01384
01385
01386
01387 OPCODE opand = OPCODE_make_op(OPR_LAND, Boolean_type, MTYPE_V);
01388 WN* header = NULL;
01389
01390 for (INT d = d1; d < d2; d++) {
01391 WN* bnd = above_do ? td->idata[d].lbtest : td->idata[d].ubtest;
01392 WN* dup = LWN_Copy_Tree(bnd, TRUE, LNO_Info_Map);
01393 header = header ? LWN_CreateExp2(opand, header, dup) : dup;
01394 }
01395
01396 WN* wn_if = LWN_CreateIf(header, block, WN_CreateBlock());
01397 LWN_Copy_Linenumber(loop[d2-1], wn_if);
01398
01399
01400
01401 LWN_Copy_Frequency_Tree(wn_if, WN_do_body(WN_first(loop[d2-1])));
01402 if (above_do)
01403 LWN_Insert_Block_After(WN_do_body(loop[d2-1]), NULL, wn_if);
01404 else
01405 LWN_Insert_Block_Before(WN_do_body(loop[d2-1]), NULL, wn_if);
01406 LWN_Set_Parent(wn_if, WN_do_body(loop[d2-1]));
01407
01408
01409 BOOL hasdo = !Get_Do_Loop_Info(loop[d2-1])->Is_Inner;
01410 BOOL has_regions = Find_SCF_Inside(wn_if,OPC_REGION)!=NULL;
01411 IF_INFO *ii = CXX_NEW(IF_INFO(&LNO_default_pool,hasdo,has_regions),
01412 &LNO_default_pool);
01413 WN_MAP_Set(LNO_Info_Map,wn_if,(void *)ii);
01414 DOLOOP_STACK* stackx = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01415 &LNO_local_pool);
01416 Build_Doloop_Stack(wn_if, stackx);
01417 LNO_Build_If_Access(wn_if, stackx);
01418 CXX_DELETE(stackx, &LNO_local_pool);
01419 }
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431 static void UT_Body_Imperfect(WN* body,
01432 DOLOOP_STACK* stack,
01433 INT first_in_stack,
01434 INT d,
01435 INT moveto[],
01436 SNL_TRANS_INDEX_DATA* td)
01437 {
01438 Is_True(WN_opcode(body) == OPC_BLOCK, ("trans_ut_body_imp gets non-block"));
01439
01440 WN* do_seen = NULL;
01441
01442 for (WN* wn = WN_first(body); wn; wn = WN_next(wn)) {
01443 switch (WN_opcode(wn)) {
01444 case OPC_IF:
01445 UT_Body_Exp(WN_if_test(wn), td);
01446 UT_Body_Imperfect(WN_then(wn), stack, first_in_stack, -1, moveto, td);
01447 UT_Body_Imperfect(WN_else(wn), stack, first_in_stack, -1, moveto, td);
01448 break;
01449 case OPC_DO_LOOP:
01450 Is_True(d >= 0, ("DO inside IF in SNL"));
01451 Is_True(do_seen == NULL, ("multiple DOs in SNL"));
01452 Is_True(stack->Bottom_nth(first_in_stack+d) == wn,
01453 ("loop confusion in UT_Body_Imperfect"));
01454
01455 do_seen = wn;
01456 if (d+1 < td->i_nloops)
01457 UT_Body_Imperfect(WN_do_body(wn), stack, first_in_stack,
01458 d+1, moveto, td);
01459 break;
01460 case OPC_DO_WHILE:
01461 case OPC_WHILE_DO:
01462 FmtAssert(0, ("while loops can't exist within SNL"));
01463 break;
01464 default:
01465 UT_Body_Exp(wn, td);
01466 break;
01467 }
01468 }
01469
01470
01471
01472 Is_True(d >= 1 && moveto[d] <= SNL_MAX_LOOPS,
01473 ("Problem with d=%d, moveto[d]=%d", d, moveto[d]));
01474
01475 if (moveto[d] > d && do_seen && WN_prev(do_seen))
01476 UT_Generate_Imperfect_If_Code(stack, first_in_stack, d,
01477 moveto[d], TRUE, td);
01478 if (moveto[d] > d && do_seen && WN_next(do_seen))
01479 UT_Generate_Imperfect_If_Code(stack, first_in_stack, d, moveto[d],
01480 FALSE, td);
01481 }
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507 extern SNL_REGION SNL_GEN_U_Ctiling(WN* wn_outer,
01508 INT nloops,
01509 IMAT* u,
01510 SNL_TILE_INFO* t,
01511 SNL_BOUNDS_INFO* bi,
01512 SX_PLIST* plist,
01513 EST_REGISTER_USAGE est_register_usage,
01514 BOOL warn_lexneg,
01515 BOOL from_genperm)
01516 {
01517 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01518 DU_MANAGER* du = Du_Mgr;
01519
01520 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01521 DOLOOP_STACK stack(&LNO_local_pool);
01522 Build_Doloop_Stack(wn_inner, &stack);
01523 INT first_in_stack = Do_Loop_Depth(wn_outer);
01524
01525 Is_True(u != NULL || t != NULL,
01526 ("SNL_GEN_U_Ctiling() with no transformation"));
01527
01528 MEM_POOL_Push(&LNO_local_pool);
01529
01530 MEM_POOL* old_dflt_pool = IMAT::Set_Default_Pool(&LNO_local_pool);
01531 FmtAssert(nloops == (t != NULL ? t->KHT().Cols() : u->Rows()),
01532 ("Old and new interfaces match"));
01533 SNL_REGION region(stack.Bottom_nth(first_in_stack),
01534 stack.Bottom_nth(first_in_stack));
01535
01536
01537 const IMAT* kht = t ? &t->KHT() : NULL;
01538 IMAT* uinv = NULL;
01539
01540 if (u) {
01541 uinv = CXX_NEW(IMAT(&LNO_local_pool), &LNO_local_pool);
01542 *uinv = u->Inv();
01543 }
01544
01545
01546
01547 SNL_TRANS_INDEX_DATA *td =
01548 CXX_NEW(SNL_TRANS_INDEX_DATA(u, uinv, kht, bi, &stack, first_in_stack,
01549 t, &LNO_local_pool), &LNO_local_pool);
01550
01551
01552
01553 if (u) {
01554
01555
01556 if (!from_genperm && Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01557 WN* wn_loop = SNL_Get_Inner_Snl_Loop(wn_outer,
01558 SNL_Loop_Count(wn_outer) - u->Rows() + 1);
01559 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01560 INT i = 0;
01561 for (WN* wn = wn_loop; wn != NULL; wn = Next_SNL_Loop(wn))
01562 old_ids[i++] = WN_MAP32_Get(Prompf_Id_Map, wn);
01563 FmtAssert(i == u->Rows() && u->Rows() == u->Cols(),
01564 ("SNL_GEN_U_Ctiling: Wrong number of loops in SNL"));
01565 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01566 permutation = Unimodular_To_Permutation(u);
01567 if (permutation != NULL) {
01568 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01569 for (i = 0; i < nloops; i++)
01570 new_ids[i] = old_ids[permutation[i]];
01571 Prompf_Info->Interchange(old_ids, new_ids, nloops);
01572 } else {
01573 FmtAssert(TRUE,
01574 ("SNL_GEN_U_Ctiling: Could not find permutation for U matrix"));
01575 }
01576 }
01577
01578
01579 const IMAT uinvold(*uinv, &LNO_local_pool);
01580 uinv->D_Add_Identity_Rows_and_Cols(bi->Bounds().Num_Vars() - nloops);
01581 bi->Bounds().Ale() *= *uinv;
01582 if (bi->Bounds().Num_Eq_Constraints() > 0)
01583 bi->Bounds().Aeq() *= *uinv;
01584
01585
01586 Row_Echelon(&bi->Bounds(), 0, nloops);
01587 Rewrite_Bounds(td, &bi->Bounds(), NULL, NULL, NULL,
01588 &stack, first_in_stack, 0);
01589 WN* inner = stack.Bottom_nth(first_in_stack+nloops-1);
01590 for (INT ii = 0; ii < nloops; ii++) {
01591 WN* loop = stack.Bottom_nth(first_in_stack+ii);
01592 Fix_Do_Du_Info(td->idata[ii].newcode, td, FALSE, inner, 0);
01593 }
01594 UT_Body_Innermost(WN_do_body(inner), td);
01595
01596
01597
01598 BOOL imperfect = FALSE;
01599 for (INT l = 1; l < nloops && imperfect == FALSE; l++) {
01600 WN* wn = stack.Bottom_nth(first_in_stack + l);
01601 if (WN_prev_executable(wn) || WN_next_executable(wn))
01602 imperfect = TRUE;
01603 }
01604
01605 if (imperfect) {
01606 INT moveto[SNL_MAX_LOOPS];
01607 for (INT i = 1; i < nloops; i++) {
01608 INT j;
01609 for (j = 0; j < nloops; j++)
01610 if ((*u)(i-1,j))
01611 moveto[i] = j+1;
01612 for (j = 1; j < i; j++)
01613 if (moveto[j] > moveto[i])
01614 moveto[i] = moveto[j];
01615 Is_True(moveto[i] >= i, ("Bug with moveto (%d)", i));
01616 }
01617
01618 UT_Body_Imperfect(WN_do_body(stack.Bottom_nth(first_in_stack)),
01619 &stack, first_in_stack, 1, moveto, td);
01620 }
01621
01622 BOOL utransformation_invalid = FALSE;
01623
01624 DOLOOP_STACK shortstack(&LNO_local_pool);
01625 Build_Doloop_Stack(LWN_Get_Parent(stack.Bottom_nth(first_in_stack)),
01626 &shortstack);
01627 LNO_Build_Access(stack.Bottom_nth(first_in_stack),
01628 &shortstack, &LNO_default_pool);
01629
01630
01631
01632 for (INT i = first_in_stack; i < first_in_stack + nloops; i++) {
01633 WN* loop = stack.Bottom_nth(i);
01634 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
01635 dli->Set_Est_Num_Iterations(&stack);
01636 dli->Is_Ivdep = FALSE;
01637 dli->Is_Concurrent_Call = FALSE;
01638 dli->Concurrent_Directive = FALSE;
01639 }
01640
01641
01642
01643 Fix_Do_Du_Info(stack.Bottom_nth(first_in_stack),
01644 NULL, TRUE, NULL, nloops);
01645
01646
01647
01648
01649
01650
01651 SNL_Expand_Reduction_Deps(stack.Bottom_nth(first_in_stack));
01652
01653
01654
01655
01656
01657
01658
01659
01660
01661
01662
01663
01664 SNL_Rebuild_Access_Arrays(stack.Bottom_nth(first_in_stack));
01665 LS_IN_LOOP loop_ls(stack.Bottom_nth(first_in_stack), dg,
01666 &LNO_local_pool);
01667 INT curdepth = loop_ls.Good_Depth;
01668 INT maxdepth = loop_ls.Good_Depth + nloops;
01669
01670
01671
01672
01673 HASH_TABLE<EINDEX16,INT>
01674 edge_table(MIN(dg->Get_Edge_Count(),512),&LNO_local_pool);
01675
01676
01677
01678 WN* awn;
01679 LS_IN_LOOP_ITER ali1(&loop_ls);
01680 while (awn = ali1.Step()) {
01681 Is_True(loop_ls.In(awn) > 0, ("Bug in ls_in_loop_iter"));
01682 DOLOOP_STACK astack(&LNO_local_pool);
01683 Build_Doloop_Stack(awn, &astack);
01684 VINDEX16 v = dg->Get_Vertex(awn);
01685 if (v == 0)
01686 continue;
01687
01688 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = dg->Get_Next_Out_Edge(e)) {
01689 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
01690 if (bwn && loop_ls.In(bwn))
01691 edge_table.Enter(e, 1);
01692 }
01693 }
01694
01695 LS_IN_LOOP_ITER ali(&loop_ls);
01696 while (awn = ali.Step()) {
01697 INT alex = loop_ls.In(awn);
01698 DOLOOP_STACK astack(&LNO_local_pool);
01699 Build_Doloop_Stack(awn, &astack);
01700 Is_True(alex > 0, ("Bug in ls_in_loop_iter"));
01701 VINDEX16 v = dg->Get_Vertex(awn);
01702 if (v == 0)
01703 continue;
01704
01705 EINDEX16 enext = 0;
01706 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
01707 enext = dg->Get_Next_Out_Edge(e);
01708 if (edge_table.Find(e) != 1)
01709 continue;
01710
01711 INT components = dg->Depv_Array(e)->Num_Dim();
01712 FmtAssert(components >= curdepth,
01713 ("edge=%d components=%d curdepth=%d",
01714 e, components, curdepth));
01715
01716 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
01717 INT blex = loop_ls.In(bwn);
01718
01719 if (components >= maxdepth) {
01720
01721
01722
01723 DEP olddep[SNL_MAX_LOOPS];
01724 for (INT i = 0; i < dg->Depv_Array(e)->Num_Vec(); i++) {
01725 DEPV* d = dg->Depv_Array(e)->Depv(i);
01726 INT j;
01727 for (j = 0; j < components - curdepth; j++)
01728 olddep[j] = DEPV_Dep(d, j+curdepth);
01729 for (j = 0; j < components - curdepth; j++) {
01730 if (j >= u->Rows())
01731 continue;
01732 SNL_DEP dd;
01733 for (INT k = 0; k < nloops; k++)
01734 dd = dd + (*u)(j,k) * SNL_DEP(olddep[k]);
01735 DEPV_Dep(d, j+curdepth) = dd.Dep();
01736 }
01737 }
01738 if (SNL_Test_Reduction_Lexneg(e, awn, bwn, alex, blex)) {
01739 utransformation_invalid = TRUE;
01740 SNL_DEBUG1(1, "general permutation edge=%d illegal?!\n", e);
01741 }
01742 edge_table.Remove(e);
01743 }
01744 }
01745 }
01746
01747
01748
01749
01750 LS_IN_LOOP_ITER ali2(&loop_ls);
01751 while (awn = ali2.Step()) {
01752 INT alex = loop_ls.In(awn);
01753 DOLOOP_STACK astack(&LNO_local_pool);
01754 Build_Doloop_Stack(awn, &astack);
01755 Is_True(alex > 0, ("Bug in ls_in_loop_iter"));
01756 VINDEX16 v = dg->Get_Vertex(awn);
01757 if (v == 0)
01758 continue;
01759
01760 EINDEX16 enext = 0;
01761 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
01762 enext = dg->Get_Next_Out_Edge(e);
01763 if (edge_table.Find(e) != 1)
01764 continue;
01765
01766
01767 EINDEX16 econj = dg->Get_Edge(dg->Get_Sink(e), dg->Get_Source(e));
01768
01769
01770 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
01771 INT blex = loop_ls.In(bwn);
01772 Is_True(blex > 0, ("Bug in ls_in_loop_iter"));
01773 if (econj && e != econj && edge_table.Find(econj)) {
01774 if (enext == econj)
01775 enext = dg->Get_Next_Out_Edge(e);
01776 edge_table.Remove(econj);
01777 dg->Delete_Array_Edge(econj);
01778 }
01779 edge_table.Remove(e);
01780 dg->Delete_Array_Edge(e);
01781 DOLOOP_STACK bstack(&LNO_local_pool);
01782 Build_Doloop_Stack(bwn, &bstack);
01783 BOOL ok = dg->Add_Edge(awn, &astack, bwn, &bstack, alex < blex);
01784 if (!ok) {
01785 LNO_Erase_Dg_From_Here_In(awn, dg);
01786 LNO_Erase_Dg_From_Here_In(bwn, dg);
01787 }
01788 }
01789 }
01790 if (warn_lexneg && utransformation_invalid)
01791 DevWarn("General permutation transformation invalid?!");
01792 }
01793
01794 WN* inner_loop = stack.Bottom_nth(first_in_stack+nloops-1);
01795 DO_LOOP_INFO* inner_dli = Get_Do_Loop_Info(inner_loop);
01796 inner_dli->Est_Register_Usage = est_register_usage;
01797
01798
01799
01800 if (t) {
01801
01802
01803
01804
01805
01806
01807 FmtAssert(t->Rectangular(), ("non-rectangular tiling unimplemented"));
01808 INT khtcols = t->KHT().Cols();
01809 INT lrows = t->L().Rows();
01810 INT lcols = t->L().Cols();
01811 INT bcols = bi->Bounds().Num_Vars();
01812 INT khtrows = t->KHT().Rows();
01813 INT blerows = bi->Bounds().Num_Le_Constraints();
01814 INT beqrows = bi->Bounds().Num_Eq_Constraints();
01815 INT i;
01816 INT j;
01817
01818 i = 0;
01819 INT* old_ids = CXX_NEW_ARRAY(INT, lrows, &LNO_local_pool);
01820 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01821 for (WN* wn = wn_outer; wn != NULL; wn = Next_SNL_Loop(wn))
01822 old_ids[i++] = WN_MAP32_Get(Prompf_Id_Map, wn);
01823 }
01824
01825
01826
01827 WN* tlb[SNL_MAX_LOOPS];
01828 WN* tub[SNL_MAX_LOOPS];
01829 WN* tstep[SNL_MAX_LOOPS];
01830
01831 for (i = 0; i < SNL_MAX_LOOPS; i++)
01832 tlb[i] = tub[i] = tstep[i] = NULL;
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870 mINT32 o[SNL_MAX_LOOPS];
01871
01872 for (INT dd = 0; dd < nloops; dd++)
01873 o[dd] = 1;
01874
01875 for (INT r = 0; r < blerows; r++) {
01876 INT l = Which_Loop_Bound(&bi->Bounds().Ale()(r,0), 0, nloops);
01877
01878 if (l < 0 || bi->Bounds().Ale()(r,l) > 0)
01879 continue;
01880
01881
01882
01883 o[l] = -bi->Bounds().Ble()[r];
01884 for (INT ll = 0; ll < nloops; ll++) {
01885 if (l != ll)
01886 o[l] += bi->Bounds().Ale()(r,ll);
01887 }
01888 }
01889
01890
01891
01892 SYSTEM_OF_EQUATIONS s1(blerows + 2*khtrows, beqrows, lcols + bcols,
01893 &LNO_local_pool);
01894 s1.Add_Le(blerows + 2*khtrows);
01895 s1.Add_Eq(beqrows);
01896
01897
01898
01899 for (i = 0; i < beqrows; i++) {
01900 mINT32* nreq = &s1.Aeq()(i,0);
01901 const mINT32* req = &bi->Bounds().Aeq()(i,0);
01902
01903 for (j = 0; j < lcols; j++)
01904 *nreq++ = 0;
01905 for (j = 0 ; j < bcols; j++)
01906 *nreq++ = *req++;
01907 s1.Beq()[i] = bi->Bounds().Beq()[i];
01908 }
01909
01910 for (i = 0; i < blerows; i++) {
01911 mINT32* nrle = &s1.Ale()(i,0);
01912 const mINT32* rle = &bi->Bounds().Ale()(i,0);
01913
01914 for (j = 0; j < lcols; j++)
01915 *nrle++ = 0;
01916 for (j = 0 ; j < bcols; j++)
01917 *nrle++ = *rle++;
01918 s1.Ble()[i] = bi->Bounds().Ble()[i];
01919 }
01920
01921 if (!t->Rectangular()) {
01922
01923
01924
01925 IMAT slmat(t->KHT() * t->L(), &LNO_local_pool);
01926
01927 for (i = 0; i < khtrows; i++) {
01928 mINT32* nr = &s1.Ale()(i+blerows,0);
01929 mINT32* nrneg = &s1.Ale()(i+blerows+khtrows,0);
01930 const mINT32* slx = &slmat(i,0);
01931 const mINT32* khtx = &t->KHT()(i,0);
01932
01933 for (j = 0; j < lcols; j++) {
01934 *nr++ = -*slx;
01935 *nrneg++ = *slx++;
01936 }
01937 for (j = 0; j < khtcols; j++) {
01938 *nr++ = *khtx;
01939 *nrneg++ = -*khtx++;
01940 }
01941 for (j = 0; j < bcols-khtcols; j++) {
01942 *nr++ = 0;
01943 *nrneg++ = 0;
01944 }
01945 INT32 dp = 0;
01946 if (LNO_Fancy_Tile) {
01947
01948
01949
01950 dp = Dot_Product(&t->KHT()(i,0), o, nloops);
01951 }
01952 s1.Ble()[i+blerows] = t->K() - 1 + dp;
01953 s1.Ble()[i+blerows+khtrows] = 0 - dp;
01954 }
01955 }
01956 else {
01957
01958
01959
01960
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970 FmtAssert(khtrows == t->Strips(), ("Bug in general tiling"));
01971
01972 for (INT s = 0; s < khtrows; s++) {
01973 INT row1 = blerows + s;
01974 INT row2 = blerows + s + khtrows;
01975
01976 INT offset = 0;
01977 if (LNO_Fancy_Tile) {
01978
01979
01980
01981
01982
01983
01984
01985
01986 offset = o[t->Iloop(s)];
01987 for (INT ss = khtrows-1; ss > s; ss--) {
01988 if (t->Iloop(s) == t->Iloop(ss)) {
01989 FmtAssert(t->Stripsz(s) % t->Stripsz(ss) == 0,
01990 ("multi-level blocking stripszs must be multiples"));
01991 FmtAssert(t->Stripsz(s) != t->Stripsz(ss),
01992 ("multi-level blocking stripszs can't be equal"));
01993 offset *= t->Stripsz(s) / t->Stripsz(ss);
01994 break;
01995 }
01996 }
01997 }
01998 s1.Zero_Row_Le(row1);
01999 s1.Ale()(row1, s) = -t->Stripsz(s);
02000 s1.Ale()(row1, t->Strips() + t->Iloop(s)) = 1;
02001 s1.Ble()[row1] = t->Stripsz(s) - 1 + offset;
02002
02003 s1.Zero_Row_Le(row2);
02004 s1.Ale()(row2, s) = t->Stripsz(s);
02005 s1.Ale()(row2, t->Strips() + t->Iloop(s)) = -1;
02006 s1.Ble()[row2] = -offset;
02007 }
02008 }
02009
02010 s1.Take_Gcds();
02011
02012
02013
02014
02015
02016 SYSTEM_OF_EQUATIONS s1x(0, 0, s1.Num_Vars(), &LNO_local_pool);
02017
02018 BOOL inconsistent;
02019 BOOL ok;
02020
02021 ok = s1.Copy_To_Work();
02022 FmtAssert(ok, ("Copy_To_Work() failed"));
02023 ok = s1.Sub_In_Equal(&inconsistent);
02024 FmtAssert(ok, ("Sub_In_Equal() failed"));
02025 FmtAssert(!inconsistent, ("inconsistent result from Sub_In_Equal()"));
02026
02027 for (i = lcols; i < lcols + khtcols; i++) {
02028 ok = SYSTEM_OF_EQUATIONS::Project(i, &inconsistent);
02029 FmtAssert(ok, ("Projection failed!"));
02030 FmtAssert(!inconsistent, ("Projection can't be inconsistent!"));
02031 }
02032
02033 s1x.Add_Work_Le();
02034 Row_Echelon(&s1x, 0, lcols);
02035 Rewrite_Bounds(td, &s1x, tlb, tub, tstep, &stack, first_in_stack, 2);
02036
02037
02038
02039
02040 WN* innerloop = stack.Bottom_nth(first_in_stack);
02041 WN* prev = WN_prev(innerloop);
02042 WN* block = LWN_Get_Parent(innerloop);
02043 LWN_Extract_From_Block(block, innerloop);
02044
02045 WN* d[SNL_MAX_LOOPS];
02046 WN* outerdo = NULL;
02047
02048 FmtAssert(!t->Rectangular() || t->Strips() == lcols, ("Huh?"));
02049
02050 for (i = 0; i < lcols; i++) {
02051 WN* index = WN_CreateIdname(td->tdata[i].symbol.WN_Offset(),
02052 td->tdata[i].symbol.St());
02053 d[i] = LWN_CreateDO(index, tlb[i], tub[i], tstep[i], WN_CreateBlock());
02054 if (i == 0)
02055 outerdo = d[i];
02056 LWN_Insert_Block_After(block, prev, d[i]);
02057 block = WN_do_body(d[i]);
02058 prev = NULL;
02059 }
02060
02061
02062
02063
02064
02065
02066 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02067 FmtAssert(t->Rectangular(),
02068 ("SNL_GEN_U_Ctiling: Should not be handling non-rectangular case"));
02069 for (INT i = 0; i < lcols; i++) {
02070 INT new_id = New_Construct_Id();
02071 WN_MAP32_Set(Prompf_Id_Map, d[i], new_id);
02072 Prompf_Info->Cache_Tile(old_ids[t->Iloop(i)], new_id);
02073 }
02074 }
02075
02076 for (i = 0; i < lcols; i++) {
02077 for (INT j = 0; j <= i; j++) {
02078 SNL_Change_Du_To_Index_Ldid(d[j], tlb[i], du, TRUE);
02079 SNL_Change_Du_To_Index_Ldid(d[j], tub[i], du, TRUE);
02080 SNL_Change_Du_To_Index_Ldid(d[j], tstep[i], du, TRUE);
02081 }
02082 Fix_Do_Du_Info(d[i], td, FALSE, NULL, 1);
02083 td->tdata[i].alias_wn = Find_Use_In_Exp(WN_step(d[i]),
02084 SYMBOL(WN_index(d[i])));
02085 }
02086 for (i = 0; i < lcols; i++) {
02087 if (td->tdata[i].alias_wn == NULL) {
02088 if (LNO_Verbose) {
02089 fprintf(TFile, "Disaster: step = ");
02090 Dump_WN(WN_step(d[i]), TFile, 2);
02091 }
02092 FmtAssert(td->tdata[i].alias_wn,
02093 ("Symbol %s missing in step!",
02094 SYMBOL(WN_index(d[i])).Name()));
02095 }
02096
02097 DO_LOOP_INFO* dli = CXX_NEW(DO_LOOP_INFO(&LNO_default_pool,
02098 NULL, NULL, NULL, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
02099 &LNO_default_pool);
02100 Set_Do_Loop_Info(d[i], dli);
02101 Is_True(Get_Do_Loop_Info(d[i]) == dli, ("Big bug"));
02102
02103 DO_LOOP_INFO* olddli;
02104 if (t->Rectangular()) {
02105 INT num = t->Stripsz(i);
02106 #if 0
02107 if (Cur_PU_Feedback) {
02108 INT32 orig_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_start(innerloop));
02109 if (orig_count>0) {
02110 INT32 orig_test = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_end(innerloop));
02111 INT32 outer_count = orig_count;
02112 INT32 outer_test = MAX(orig_test/num,1);
02113 LWN_Set_Frequency(d[i], outer_count);
02114 LWN_Set_Frequency(WN_start(d[i]), outer_count);
02115 LWN_Set_Frequency(WN_step(d[i]), outer_test-1);
02116
02117 LWN_Set_Frequency(innerloop, outer_test-1);
02118 LWN_Set_Frequency(WN_start(innerloop), outer_test-1);
02119 }
02120 }
02121 #endif
02122 olddli= Get_Do_Loop_Info(stack.Bottom_nth(first_in_stack+t->Iloop(i)));
02123
02124 dli->Est_Num_Iterations = (olddli->Est_Num_Iterations + num - 1) / num;
02125 olddli->Est_Num_Iterations = num;
02126
02127 if (olddli->Est_Max_Iterations_Index != -1) {
02128 dli->Est_Max_Iterations_Index =
02129 (olddli->Est_Max_Iterations_Index + num - 1) / num;
02130 }
02131 if (olddli->Est_Max_Iterations_Index == -1 ||
02132 olddli->Est_Max_Iterations_Index > num) {
02133 olddli->Est_Max_Iterations_Index = num;
02134 }
02135 }
02136 else {
02137
02138 #if 0
02139 if (Cur_PU_Feedback) {
02140 INT32 orig_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_start(innerloop));
02141 if (orig_count>0) {
02142 INT32 orig_test = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_end(innerloop));
02143 INT32 outer_count = orig_count;
02144 INT32 outer_test = MAX(orig_test/20,1);
02145 LWN_Set_Frequency(d[i], outer_count);
02146 LWN_Set_Frequency(WN_start(d[i]), outer_count);
02147 LWN_Set_Frequency(WN_step(d[i]), outer_test-1);
02148
02149 LWN_Set_Frequency(innerloop, outer_test-1);
02150 LWN_Set_Frequency(WN_start(innerloop), outer_test-1);
02151 LWN_Set_Frequency(WN_index(innerloop), outer_test-1);
02152 }
02153 }
02154 #endif
02155 olddli = Get_Do_Loop_Info(innerloop);
02156 dli->Est_Num_Iterations = (olddli->Est_Num_Iterations + 19) / 20;
02157 olddli->Est_Num_Iterations = 20;
02158 }
02159 dli->Is_Ivdep = olddli->Is_Ivdep;
02160 dli->Is_Concurrent_Call = olddli->Is_Concurrent_Call;
02161 dli->Concurrent_Directive = olddli->Concurrent_Directive;
02162 dli->Num_Iterations_Symbolic = olddli->Num_Iterations_Symbolic;
02163 if (dli->Est_Num_Iterations < 1)
02164 dli->Est_Num_Iterations = 1;
02165
02166 LWN_Copy_Linenumber(innerloop, d[i]);
02167 }
02168
02169 if (outerdo) {
02170 region.First = outerdo;
02171 region.Last = outerdo;
02172
02173 SNL_Change_Reduction_Loop_Stmts(plist, stack.Bottom_nth(first_in_stack),
02174 outerdo);
02175 }
02176
02177
02178
02179
02180
02181 s1.Add_Soe(&s1x);
02182 Row_Echelon(&s1, lcols, khtcols);
02183 Rewrite_Bounds(td, &s1, NULL, NULL, NULL, &stack, first_in_stack, 1);
02184
02185 LWN_Insert_Block_After(block, prev, innerloop);
02186
02187
02188
02189
02190
02191
02192 BOOL ttransformation_invalid = FALSE;
02193
02194 DOLOOP_STACK shortstack(&LNO_local_pool);
02195 Build_Doloop_Stack(LWN_Get_Parent(outerdo), &shortstack);
02196 Renumber_Loops(outerdo, outerdo, dg);
02197 LNO_Build_Access(outerdo, &shortstack, &LNO_default_pool);
02198
02199
02200 if (outerdo) {
02201 SNL_Expand_Reduction_Deps(outerdo);
02202
02203 Fix_Do_Du_Info(outerdo, td, TRUE, NULL, 0);
02204 }
02205
02206
02207
02208
02209
02210
02211
02212
02213
02214
02215
02216
02217 SNL_Rebuild_Access_Arrays(stack.Bottom_nth(first_in_stack));
02218 LS_IN_LOOP loop_ls(stack.Bottom_nth(first_in_stack), dg, &LNO_local_pool);
02219 INT curdepth = loop_ls.Good_Depth - lcols;
02220 INT maxdepth = loop_ls.Good_Depth + nloops - lcols;
02221
02222
02223
02224
02225 HASH_TABLE<EINDEX16,INT>
02226 edge_table(MIN(dg->Get_Edge_Count(),512),&LNO_local_pool);
02227
02228
02229
02230 WN* awn;
02231 LS_IN_LOOP_ITER ali1(&loop_ls);
02232 while (awn = ali1.Step()) {
02233 Is_True(loop_ls.In(awn) > 0, ("Bug in ls_in_loop_iter"));
02234 DOLOOP_STACK astack(&LNO_local_pool);
02235 Build_Doloop_Stack(awn, &astack);
02236 VINDEX16 v = dg->Get_Vertex(awn);
02237 if (v == 0)
02238 continue;
02239
02240 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = dg->Get_Next_Out_Edge(e)) {
02241 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
02242 if (bwn && loop_ls.In(bwn))
02243 edge_table.Enter(e, 1);
02244 }
02245 }
02246
02247
02248
02249 LS_IN_LOOP_ITER ali(&loop_ls);
02250 while (awn = ali.Step()) {
02251 INT alex = loop_ls.In(awn);
02252 DOLOOP_STACK astack(&LNO_local_pool);
02253 Build_Doloop_Stack(awn, &astack);
02254 Is_True(alex > 0, ("Bug in ls_in_loop_iter"));
02255 VINDEX16 v = dg->Get_Vertex(awn);
02256 if (v == 0)
02257 continue;
02258
02259 EINDEX16 enext = 0;
02260 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
02261 enext = dg->Get_Next_Out_Edge(e);
02262 if (edge_table.Find(e) != 1)
02263 continue;
02264
02265 INT components = dg->Depv_Array(e)->Num_Dim();
02266 FmtAssert(components >= curdepth,
02267 ("edge=%d components=%d curdepth=%d",
02268 e, components, curdepth));
02269
02270 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
02271 INT blex = loop_ls.In(bwn);
02272
02273 if (t->Rectangular() && components >= maxdepth) {
02274
02275
02276
02277 for (INT s = 0; s < t->Strips(); s++) {
02278
02279
02280
02281
02282
02283
02284
02285
02286 if (SNL_Update_Strip_Dependence(loop_ls.Depth - lcols, s,
02287 t->Iloop(s),
02288 e, awn, bwn, alex, blex)) {
02289 ttransformation_invalid = TRUE;
02290 SNL_DEBUG1(1, "general tiling e=%d seemingly illegal?!\n", e);
02291 }
02292 }
02293 edge_table.Remove(e);
02294 }
02295 }
02296 }
02297
02298
02299
02300
02301 LS_IN_LOOP_ITER ali2(&loop_ls);
02302 while (awn = ali2.Step()) {
02303 INT alex = loop_ls.In(awn);
02304 DOLOOP_STACK astack(&LNO_local_pool);
02305 Build_Doloop_Stack(awn, &astack);
02306 Is_True(alex > 0, ("Bug in ls_in_loop_iter"));
02307 VINDEX16 v = dg->Get_Vertex(awn);
02308 if (v == 0)
02309 continue;
02310
02311 EINDEX16 enext = 0;
02312 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
02313 enext = dg->Get_Next_Out_Edge(e);
02314 if (edge_table.Find(e) != 1)
02315 continue;
02316
02317
02318 EINDEX16 econj = dg->Get_Edge(dg->Get_Sink(e), dg->Get_Source(e));
02319
02320
02321 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
02322 INT blex = loop_ls.In(bwn);
02323 Is_True(blex > 0, ("Bug in ls_in_loop_iter"));
02324 if (econj && e != econj && edge_table.Find(econj)) {
02325 if (enext == econj)
02326 enext = dg->Get_Next_Out_Edge(e);
02327 edge_table.Remove(econj);
02328 dg->Delete_Array_Edge(econj);
02329 }
02330 edge_table.Remove(e);
02331 dg->Delete_Array_Edge(e);
02332
02333 DOLOOP_STACK bstack(&LNO_local_pool);
02334 Build_Doloop_Stack(bwn, &bstack);
02335 BOOL ok = dg->Add_Edge( awn, &astack, bwn, &bstack, alex < blex);
02336 if (!ok) {
02337 LNO_Erase_Dg_From_Here_In(awn, dg);
02338 LNO_Erase_Dg_From_Here_In(bwn, dg);
02339 }
02340 }
02341 }
02342 if (warn_lexneg && ttransformation_invalid)
02343 DevWarn("General tiling transformation invalid?!");
02344 }
02345
02346 if (Cur_PU_Feedback) {
02347 for (INT i = 0; i <= first_in_stack; ++i) {
02348 WN *cur_loop = stack.Bottom_nth(first_in_stack);
02349 WN *tile_loop = Outer_Tile(cur_loop, Du_Mgr);
02350 if (tile_loop) {
02351 if (WN_MAP32_Get(WN_MAP_FEEDBACK, tile_loop) == 0) {
02352 INT32 count = WN_MAP32_Get(WN_MAP_FEEDBACK, cur_loop);
02353 INT32 step = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_step(cur_loop));
02354 step = MAX(step/20,1);
02355 INT num_iter = MAX(1,step/count);
02356 LWN_Set_Frequency(WN_start(cur_loop), MAX(count/20,1));
02357 LWN_Set_Frequency(cur_loop, MAX(count/20,1));
02358
02359 for (INT j = 0; j<stack.Elements(); j++) {
02360 WN* inter_loop = stack.Bottom_nth(j);
02361 if (inter_loop == tile_loop) {
02362 LWN_Set_Frequency(tile_loop, count);
02363 LWN_Set_Frequency(WN_start(tile_loop), count);
02364 LWN_Set_Frequency(WN_step(tile_loop), count*num_iter);
02365 break;
02366 } else {
02367 INT32 count1 = WN_MAP32_Get(WN_MAP_FEEDBACK, inter_loop);
02368 LWN_Set_Frequency(WN_step(inter_loop), count*num_iter);
02369 LWN_Set_Frequency(WN_start(inter_loop), count1*num_iter);
02370 LWN_Set_Frequency(inter_loop, count1*num_iter);
02371 count = count1;
02372 }
02373 }
02374 }
02375 }
02376 }
02377 }
02378
02379 if (uinv)
02380 CXX_DELETE(uinv, &LNO_local_pool);
02381 IMAT::Set_Default_Pool(old_dflt_pool);
02382 CXX_DELETE(td, &LNO_local_pool);
02383 MEM_POOL_Pop(&LNO_local_pool);
02384
02385 if (t)
02386 Renumber_Loops(region.First, region.Last, dg);
02387 if (!Valid_SNL_Region(region))
02388 DevWarn("SNL_GEN_U_Ctiling: Invalid SNL_REGION [0x%p,0x%p]",
02389 region.First, region.Last);
02390 return region;
02391 }
02392
02393
02394
02395
02396
02397
02398
02399
02400
02401
02402 extern WN* SNL_GEN_Permute_Loops(WN* wn_outer,
02403 INT permutation[],
02404 INT nloops,
02405 BOOL warn_lexneg)
02406 {
02407 if (nloops == 0 || Identity_Permutation(permutation, nloops))
02408 return wn_outer;
02409
02410 if (LNO_Verbose) {
02411 Print_Interchange(stdout, wn_outer, permutation, nloops);
02412 Print_Interchange(TFile, wn_outer, permutation, nloops);
02413 }
02414 IMAT* unimodular =
02415 CXX_NEW(IMAT(nloops, nloops, &LNO_local_pool), &LNO_local_pool);
02416 for (INT i = 0; i < nloops; i++)
02417 for (INT j = 0; j < nloops; j++)
02418 (*unimodular)(i,j) = j == permutation[i] ? 1 : 0;
02419 SNL_BOUNDS_INFO* bi =
02420 CXX_NEW(SNL_BOUNDS_INFO(&LNO_local_pool), &LNO_local_pool);
02421 DOLOOP_STACK stack(&LNO_local_pool);
02422 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
02423 Build_Doloop_Stack(wn_inner, &stack);
02424 bi->Outermost_Depth() = Do_Loop_Depth(wn_outer);
02425 for (INT d = Do_Loop_Depth(wn_outer); d <= Do_Loop_Depth(wn_inner); d++)
02426 bi->Collect_Do_Info(stack.Bottom_nth(d));
02427 bi->Conditionals().Add_Vars(bi->Bounds().Num_Vars() -
02428 bi->Conditionals().Num_Vars());
02429 bi->Canonicize(nloops, &stack, Do_Loop_Depth(wn_outer));
02430 SNL_GEN_U_Ctiling(wn_outer, nloops, unimodular, NULL, bi, NULL,
02431 EST_REGISTER_USAGE(), warn_lexneg, TRUE);
02432 return wn_outer;
02433 }
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449 static WN* Twod_Setbound(SNL_MONO m,
02450 SYMBOL symbol,
02451 WN* loop0,
02452 WN* loop1,
02453 INT u,
02454 BOOL lb,
02455 BOOL imperfect)
02456 {
02457 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02458 DU_MANAGER* du = Du_Mgr;
02459
02460 TYPE_ID wtype = Do_Wtype(loop1);
02461 WN* ploop1 = LWN_Get_Parent(loop1);
02462 OPCODE ldop = OPCODE_make_op(OPR_LDID, wtype, wtype);
02463 OPCODE stop = OPCODE_make_op(OPR_STID, MTYPE_V, wtype);
02464
02465 WN* alias_wn = NULL;
02466 WN* bexpr;
02467
02468
02469
02470
02471 if (lb) {
02472 bexpr = WN_kid0(WN_start(loop1));
02473 alias_wn = WN_CreateLdid(ldop, symbol.WN_Offset(), symbol.St(),
02474 Be_Type_Tbl(wtype));
02475 WN_kid0(WN_start(loop1)) = alias_wn;
02476 LWN_Set_Parent(WN_kid0(WN_start(loop1)), WN_start(loop1));
02477 }
02478 else {
02479 Upper_Bound_Standardize(WN_end(loop1));
02480 bexpr = SNL_UBexp(WN_end(loop1));
02481 alias_wn = WN_CreateLdid(ldop, symbol.WN_Offset(), symbol.St(),
02482 Be_Type_Tbl(wtype));
02483 SNL_UBexp(WN_end(loop1)) = alias_wn;
02484 LWN_Set_Parent(SNL_UBexp(WN_end(loop1)), WN_end(loop1));
02485 }
02486 Create_alias(Alias_Mgr, alias_wn);
02487
02488
02489
02490 WN* stmt = LWN_CreateStid(stop, alias_wn, bexpr);
02491 LWN_Copy_Linenumber(loop1, stmt);
02492 LWN_Copy_Frequency_Tree(stmt,loop1);
02493 LWN_Insert_Block_Before(ploop1, loop1, stmt);
02494 du->Add_Def_Use(stmt, alias_wn);
02495 du->Ud_Get_Def(alias_wn)->Set_loop_stmt(loop0);
02496
02497
02498
02499
02500
02501 SYMBOL sym0 = SYMBOL(WN_index(loop0));
02502 sym0.Type = Do_Wtype(loop0);
02503
02504 if (m == SNL_MONO_OTHER) {
02505 OPCODE opc = OPCODE_make_op(lb ? OPR_MAX : OPR_MIN, wtype, MTYPE_V);
02506 for (INT i = 1; i < u; i++) {
02507 WN* texpr = SNL_Copy_Exp(bexpr);
02508 Add_To_Symbol(texpr, i, sym0);
02509 bexpr = LWN_CreateExp2(opc, bexpr, texpr);
02510 }
02511 WN_kid0(stmt) = bexpr;
02512 LWN_Set_Parent(bexpr, stmt);
02513 }
02514 else if (m == SNL_MONO_INVARIANT ||
02515 (m == SNL_MONO_DEC && lb) ||
02516 (m == SNL_MONO_INC && !lb)) {
02517
02518 }
02519 else {
02520
02521 Add_To_Symbol(bexpr, u-1, sym0);
02522 }
02523
02524 if (imperfect)
02525 Increase_By(stmt, (lb ? 1 : -1), NULL, -1);
02526
02527 return stmt;
02528 }
02529
02530
02531
02532
02533
02534
02535
02536 static void Set_Loop_Statements(WN* newbody,
02537 WN* wn_old,
02538 WN* wn_new)
02539 {
02540 LWN_ITER* itr = LWN_WALK_TreeIter(newbody);
02541 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02542 WN* wn_ldid = itr->wn;
02543 if (!WN_operator(wn_ldid))
02544 continue;
02545 DEF_LIST* def_list = Du_Mgr->Ud_Get_Def(wn_ldid);
02546 if (def_list != NULL && def_list->Loop_stmt() == wn_old)
02547 def_list->Set_loop_stmt(wn_new);
02548 }
02549 }
02550
02551
02552
02553
02554
02555
02556
02557
02558
02559 extern SNL_REGION SNL_GEN_2D_Regtile(SNL_NEST_INFO* ni,
02560 INT tilesz)
02561 {
02562 const INT bufsz = 64;
02563 char buf[bufsz];
02564 INT bufcnt;
02565
02566 DOLOOP_STACK* stack = &ni->Dostack();
02567 INT first_in_stack = ni->Depth_Inner() - 1;
02568 WN* loop0 = stack->Bottom_nth(first_in_stack);
02569 WN* loop1 = stack->Bottom_nth(first_in_stack+1);
02570
02571 SNL_REGION region(loop0,loop0);
02572
02573 MEM_POOL_Push_Freeze(&SNL_local_pool);
02574
02575 {
02576 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02577 DU_MANAGER* du = Du_Mgr;
02578
02579 EST_REGISTER_USAGE est_register_usage =
02580 Get_Do_Loop_Info(loop1)->Est_Register_Usage;
02581
02582 Is_True(Step_Size(loop0) == 1, ("Unit step required: loop 0"));
02583 Is_True(Step_Size(loop1) == 1, ("Unit step required: loop 1"));
02584
02585 ST* st0 = WN_st(WN_index(loop0));
02586 ST* st1 = WN_st(WN_index(loop1));
02587 WN_OFFSET offset0 = WN_offset(WN_index(loop0));
02588 WN_OFFSET offset1 = WN_offset(WN_index(loop1));
02589 TYPE_ID wtype0 = Do_Wtype(loop0);
02590 TYPE_ID wtype1 = Do_Wtype(loop1);
02591 TY_IDX ty0 = Be_Type_Tbl(wtype0);
02592 TY_IDX ty1 = Be_Type_Tbl(wtype1);
02593 SYMBOL symbol0(WN_index(loop0));
02594 WN* alias_wn0 = WN_step(loop0);
02595 WN* alias_wn1 = WN_step(loop1);
02596
02597 symbol0.Type = wtype0;
02598
02599 OPCODE op_le0 = OPCODE_make_op(OPR_LE, Boolean_type, wtype0);
02600 OPCODE op_le1 = OPCODE_make_op(OPR_LE, Boolean_type, wtype1);
02601 OPCODE op_ldid0 = OPCODE_make_op(OPR_LDID, wtype0, wtype0);
02602 OPCODE op_ldid1 = OPCODE_make_op(OPR_LDID, wtype1, wtype1);
02603 OPCODE op_stid0 = OPCODE_make_op(OPR_STID, MTYPE_V, wtype0);
02604 OPCODE op_stid1 = OPCODE_make_op(OPR_STID, MTYPE_V, wtype1);
02605 OPCODE op_add0 = OPCODE_make_op(OPR_ADD, wtype0, MTYPE_V);
02606 OPCODE op_add1 = OPCODE_make_op(OPR_ADD, wtype1, MTYPE_V);
02607
02608
02609
02610
02611 WN* nest_copy = LWN_Copy_Tree(loop0, TRUE, LNO_Info_Map);
02612 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02613 STACK<WN*> st_old(&PROMPF_pool);
02614 STACK<WN*> st_new(&PROMPF_pool);
02615 Prompf_Assign_Ids(loop0, nest_copy, &st_old, &st_new, FALSE);
02616 INT nloops = st_old.Elements();
02617 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02618 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02619 for (INT i = 0; i < nloops; i++) {
02620 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
02621 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
02622 }
02623 Prompf_Info->Register_SStrip(old_ids, new_ids, nloops);
02624 }
02625
02626 WN* wn_holder[2];
02627 wn_holder[0] = loop0;
02628 wn_holder[1] = nest_copy;
02629 LWN_Scale_Frequency(nest_copy, 1.0/tilesz);
02630 LWN_Scale_Frequency(WN_start(nest_copy), 1.0/tilesz);
02631 LWN_Scale_Frequency(WN_index(nest_copy), 1.0/tilesz);
02632 if (red_manager) red_manager->Unroll_Update(wn_holder, 2);
02633 Unrolled_DU_Update(wn_holder, 2, Do_Loop_Depth(loop0)-1, TRUE, FALSE);
02634 if (!dg->Add_Deps_To_Copy_Block(loop0, nest_copy, FALSE)) {
02635 SNL_DEBUG0(0, "Add_Deps_To_Copy_Block() failed -- continuing");
02636 LWN_Update_Dg_Delete_Tree(nest_copy, dg);
02637 LNO_Erase_Dg_From_Here_In(nest_copy, dg);
02638 LWN_Delete_Tree(nest_copy);
02639 MEM_POOL_Pop_Unfreeze(&SNL_local_pool);
02640 if (!Valid_SNL_Region(region))
02641 DevWarn("SNL_General_2D_Regtile: Invalid SNL_REGION [0x%p,0x%p]",
02642 region.First, region.Last);
02643 return region;
02644 }
02645
02646
02647
02648
02649 BOOL imperf_above = WN_prev(loop1) ? TRUE : FALSE;
02650 BOOL imperf_below = WN_next(loop1) ? TRUE : FALSE;
02651
02652
02653
02654 region = SNL_Regtile_Loop(loop0, tilesz, 2, TRUE,
02655 est_register_usage,
02656 &ni->Privatizability_Info(),
02657 ni->Depth_Inner() - 1, TRUE);
02658
02659
02660
02661
02662
02663 if (imperf_above || imperf_below) {
02664 WN* wnprnt = LWN_Get_Parent(loop1);
02665 WN* wnnext = NULL;
02666 for (WN* wn = WN_first(wnprnt); wn; wn = wnnext) {
02667 wnnext = WN_next(wn);
02668 if (wn != loop1) {
02669 LWN_Extract_From_Block(wnprnt, wn);
02670 LWN_Delete_Tree(wn);
02671 }
02672 }
02673 }
02674
02675
02676
02677
02678
02679 bufcnt = sprintf(buf, "$r2d_");
02680 (SYMBOL(WN_index(loop0))).Name(buf+bufcnt, bufsz-bufcnt);
02681 Replace_Symbol(nest_copy, SYMBOL(st0,offset0,wtype0),
02682 Create_Preg_Symbol(buf, wtype0), NULL, nest_copy);
02683
02684 LWN_Delete_Tree(WN_kid0(WN_start(nest_copy)));
02685 WN* startld = LWN_CreateLdid(op_ldid0, alias_wn0);
02686 WN_kid0(WN_start(nest_copy)) = startld;
02687 LWN_Set_Parent(WN_kid0(WN_start(nest_copy)), WN_start(nest_copy));
02688
02689 LWN_Delete_Tree(SNL_UBexp(WN_end(nest_copy)));
02690 WN* ubld = LWN_CreateLdid(op_ldid0, alias_wn0);
02691 SNL_UBexp(WN_end(nest_copy), NULL) = LWN_CreateExp2(op_add0, ubld,
02692 LWN_Make_Icon(wtype0, tilesz-1));
02693 LWN_Set_Parent(SNL_UBexp(WN_end(nest_copy), NULL), WN_end(nest_copy));
02694
02695
02696
02697
02698
02699 Fix_Do_Du_Info(WN_start(nest_copy), NULL, FALSE, loop0, 0);
02700 Fix_Do_Du_Info(WN_end(nest_copy), NULL, FALSE, loop0, 0);
02701 Fix_Do_Du_Info(nest_copy, NULL, TRUE, NULL, 0);
02702
02703
02704
02705
02706
02707
02708
02709
02710
02711
02712
02713
02714
02715
02716
02717
02718
02719
02720 SNL_MONO lmono = Mono(WN_kid0(WN_start(loop1)), symbol0);
02721 SNL_MONO umono = Mono(SNL_UBexp(WN_end(loop1)), symbol0);
02722
02723
02724
02725 BOOL lextra = imperf_above || lmono != SNL_MONO_INVARIANT;
02726 BOOL uextra = imperf_below || umono != SNL_MONO_INVARIANT;
02727
02728
02729
02730 SYMBOL lstar = Create_Preg_Symbol("$lstar", wtype0);
02731 SYMBOL ustar = Create_Preg_Symbol("$ustar", wtype0);
02732
02733 WN* lstid = Twod_Setbound(lmono, lstar, loop0, loop1, tilesz,
02734 TRUE, imperf_above);
02735 WN* ustid = Twod_Setbound(umono, ustar, loop0, loop1, tilesz,
02736 FALSE, imperf_below);
02737
02738
02739
02740
02741
02742
02743
02744
02745
02746 WN* luse = LWN_CreateLdid(op_ldid1, lstid);
02747 WN* uuse = LWN_CreateLdid(op_ldid1, ustid);
02748 du->Add_Def_Use(lstid, luse);
02749 du->Add_Def_Use(ustid, uuse);
02750 du->Ud_Get_Def(luse)->Set_loop_stmt(loop0);
02751 du->Ud_Get_Def(uuse)->Set_loop_stmt(loop0);
02752
02753 WN* if1 = LWN_CreateIf(LWN_CreateExp2(op_le1, luse, uuse),
02754 WN_CreateBlock(), WN_CreateBlock());
02755 WN* p1block = LWN_Get_Parent(loop1);
02756 LWN_Insert_Block_Before(p1block, loop1, if1);
02757 LWN_Copy_Linenumber(loop1, if1);
02758 LWN_Copy_Frequency_Tree(if1, loop1);
02759 LWN_Extract_From_Block(loop1);
02760
02761
02762 BOOL has_regions=Find_SCF_Inside(if1,OPC_REGION)!=NULL;
02763 IF_INFO *ii = CXX_NEW(IF_INFO(&LNO_default_pool,TRUE,has_regions),
02764 &LNO_default_pool);
02765 WN_MAP_Set(LNO_Info_Map,if1,(void *)ii);
02766 DOLOOP_STACK* stackx = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02767 &LNO_local_pool);
02768 Build_Doloop_Stack(if1, stackx);
02769 LNO_Build_If_Access(if1, stackx);
02770 CXX_DELETE(stackx, &LNO_local_pool);
02771
02772
02773
02774
02775
02776
02777 LWN_Insert_Block_After(WN_else(if1), NULL, nest_copy);
02778
02779
02780
02781
02782 LWN_Delete_Tree(WN_kid0(WN_start(loop1)));
02783 WN_kid0(WN_start(loop1)) = LWN_CreateLdid(op_ldid1, lstid);
02784 LWN_Set_Parent(WN_kid0(WN_start(loop1)), WN_start(loop1));
02785
02786 LWN_Delete_Tree(SNL_UBexp(WN_end(loop1)));
02787 SNL_UBexp(WN_end(loop1)) = LWN_CreateLdid(op_ldid1, ustid);
02788 LWN_Set_Parent(SNL_UBexp(WN_end(loop1)), WN_end(loop1));
02789
02790 du->Add_Def_Use(lstid, WN_kid0(WN_start(loop1)));
02791 du->Add_Def_Use(ustid, SNL_UBexp(WN_end(loop1), NULL));
02792
02793
02794
02795 LWN_Insert_Block_After(WN_then(if1), NULL, loop1);
02796 Fix_Do_Du_Info(loop0, NULL, TRUE, NULL, 0);
02797
02798
02799
02800 if (lextra) {
02801
02802
02803
02804
02805
02806
02807
02808
02809 WN* lpcopy = LWN_Copy_Tree(nest_copy, TRUE, LNO_Info_Map);
02810 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02811 STACK<WN*> st_old(&PROMPF_pool);
02812 STACK<WN*> st_new(&PROMPF_pool);
02813 Prompf_Assign_Ids(nest_copy, lpcopy, &st_old, &st_new, FALSE);
02814 INT nloops = st_old.Elements();
02815 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02816 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02817 for (INT i = 0; i < nloops; i++) {
02818 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
02819 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
02820 }
02821 Prompf_Info->Register_Startup(old_ids, new_ids, nloops);
02822 }
02823 wn_holder[0] = nest_copy;
02824 wn_holder[1] = lpcopy;
02825 if (red_manager) red_manager->Unroll_Update(wn_holder, 2);
02826 Unrolled_DU_Update(wn_holder, 2, Do_Loop_Depth(nest_copy)-1, TRUE, FALSE);
02827 if (!dg->Add_Deps_To_Copy_Block(nest_copy, lpcopy, FALSE)) {
02828 SNL_DEBUG0(0, "Add_Deps_To_Copy_Block() failed -- continuing");
02829 LNO_Erase_Dg_From_Here_In(lpcopy, dg);
02830 LWN_Update_Dg_Delete_Tree(lpcopy, dg);
02831 Unmapped_Vertices_Here_Out(LWN_Get_Parent(nest_copy));
02832 }
02833
02834
02835
02836 WN* wn_prev = NULL;
02837 WN *wn;
02838 for (wn = WN_last(WN_do_body(lpcopy)); wn; wn = wn_prev) {
02839 wn_prev = WN_prev(wn);
02840 if (WN_opcode(wn) == OPC_DO_LOOP)
02841 break;
02842 LWN_Extract_From_Block(WN_do_body(lpcopy), wn);
02843 LWN_Delete_Tree(wn);
02844 }
02845 Is_True(wn, ("where'd the DO go?"));
02846
02847
02848
02849
02850
02851 if (lmono == SNL_MONO_INVARIANT) {
02852 WN* wn_encloser = Enclosing_Loop(LWN_Get_Parent(wn));
02853 FmtAssert(wn_encloser != NULL, ("Not a 2D regtile??"));
02854 WN* newbody = WN_do_body(wn);
02855 WN_do_body(wn) = WN_CreateBlock();
02856 Set_Loop_Statements(newbody, wn, wn_encloser);
02857 LWN_Set_Parent(WN_do_body(wn), wn);
02858 Replace_Ldid_With_Exp_Copy(SYMBOL(st1,offset1,MTYPE_V),
02859 newbody, WN_kid0(WN_start(wn)), du);
02860 LWN_Insert_Block_Before(WN_do_body(lpcopy), wn, newbody);
02861 LWN_Delete_Tree(LWN_Extract_From_Block(WN_do_body(lpcopy), wn));
02862 }
02863 else {
02864 WN* luse = LWN_CreateLdid(op_ldid1, lstid);
02865 du->Add_Def_Use(lstid, luse);
02866 LWN_Delete_Tree(SNL_UBexp(WN_end(wn), NULL));
02867 SNL_UBexp(WN_end(wn)) = LWN_CreateExp2(op_add1, luse,
02868 LWN_Make_Icon(wtype1, -1));
02869 LWN_Set_Parent(SNL_UBexp(WN_end(wn), NULL), WN_end(wn));
02870 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02871 dli->Est_Num_Iterations = 2;
02872 dli->Is_Ivdep = FALSE;
02873 dli->Is_Concurrent_Call = FALSE;
02874 dli->Concurrent_Directive = FALSE;
02875 dli->Num_Iterations_Symbolic = FALSE;
02876 dli->Num_Iterations_Profile = FALSE;
02877 }
02878
02879 LWN_Insert_Block_After(WN_then(if1), NULL, lpcopy);
02880
02881
02882 Fix_Do_Du_Info(WN_start(lpcopy), NULL, TRUE, loop0, 0);
02883 Fix_Do_Du_Info(WN_end(lpcopy), NULL, TRUE, loop0, 0);
02884 Fix_Do_Du_Info(lpcopy, NULL, TRUE, NULL, 0);
02885 }
02886
02887 if (uextra) {
02888
02889
02890
02891
02892
02893
02894
02895
02896 WN* lpcopy = LWN_Copy_Tree(nest_copy, TRUE, LNO_Info_Map);
02897 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02898 STACK<WN*> st_old(&PROMPF_pool);
02899 STACK<WN*> st_new(&PROMPF_pool);
02900 Prompf_Assign_Ids(nest_copy, lpcopy, &st_old, &st_new, FALSE);
02901 INT nloops = st_old.Elements();
02902 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02903 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
02904 for (INT i = 0; i < nloops; i++) {
02905 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
02906 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
02907 }
02908 Prompf_Info->Register_Shutdown(old_ids, new_ids, nloops);
02909 }
02910 wn_holder[0] = nest_copy;
02911 wn_holder[1] = lpcopy;
02912 if (red_manager) red_manager->Unroll_Update(wn_holder, 2);
02913 Unrolled_DU_Update(wn_holder, 2, Do_Loop_Depth(nest_copy)-1, TRUE, FALSE);
02914 if (!dg->Add_Deps_To_Copy_Block(nest_copy, lpcopy, FALSE)) {
02915 SNL_DEBUG0(0, "Add_Deps_To_Copy_Block() failed -- continuing");
02916 LNO_Erase_Dg_From_Here_In(lpcopy, dg);
02917 Unmapped_Vertices_Here_Out(LWN_Get_Parent(nest_copy));
02918 LWN_Update_Dg_Delete_Tree(lpcopy, dg);
02919 }
02920
02921
02922
02923 WN* wn_next = NULL;
02924 WN *wn;
02925 for (wn = WN_first(WN_do_body(lpcopy)); wn; wn = wn_next) {
02926 wn_next = WN_next(wn);
02927 if (WN_opcode(wn) == OPC_DO_LOOP)
02928 break;
02929 LWN_Extract_From_Block(WN_do_body(lpcopy), wn);
02930 LWN_Delete_Tree(wn);
02931 }
02932 Is_True(wn, ("where'd the DO go?"));
02933
02934
02935
02936
02937
02938 if (umono == SNL_MONO_INVARIANT) {
02939 WN* wn_encloser = Enclosing_Loop(LWN_Get_Parent(wn));
02940 FmtAssert(wn_encloser != NULL, ("Not a 2D regtile??"));
02941 WN* newbody = WN_do_body(wn);
02942 WN_do_body(wn) = NULL;
02943 Set_Loop_Statements(newbody, wn, wn_encloser);
02944 Replace_Ldid_With_Exp_Copy(SYMBOL(st1,offset1,MTYPE_V),
02945 newbody, SNL_UBexp(WN_end(wn)), du);
02946 LWN_Insert_Block_Before(WN_do_body(lpcopy), wn, newbody);
02947 LWN_Delete_Tree(LWN_Extract_From_Block(WN_do_body(lpcopy), wn));
02948 }
02949 else {
02950 WN* uuse = LWN_CreateLdid(op_ldid1, ustid);
02951 du->Add_Def_Use(ustid, uuse);
02952 LWN_Delete_Tree(WN_kid0(WN_start(wn)));
02953 WN_kid0(WN_start(wn)) = LWN_CreateExp2(op_add1, uuse,
02954 LWN_Make_Icon(wtype1, 1));
02955 LWN_Set_Parent(WN_kid0(WN_start(wn)), WN_start(wn));
02956 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
02957 dli->Est_Num_Iterations = 2;
02958 dli->Is_Ivdep = FALSE;
02959 dli->Is_Concurrent_Call = FALSE;
02960 dli->Concurrent_Directive = FALSE;
02961 dli->Num_Iterations_Symbolic = FALSE;
02962 dli->Num_Iterations_Profile = FALSE;
02963 }
02964
02965 LWN_Insert_Block_Before(WN_then(if1), NULL, lpcopy);
02966
02967
02968 Fix_Do_Du_Info(WN_start(lpcopy), NULL, TRUE, loop0, 0);
02969 Fix_Do_Du_Info(WN_end(lpcopy), NULL, TRUE, loop0, 0);
02970 Fix_Do_Du_Info(lpcopy, NULL, TRUE, NULL, 2);
02971 }
02972
02973
02974
02975
02976
02977
02978
02979
02980
02981
02982
02983
02984
02985
02986
02987 SNL_Rebuild_Access_Arrays(loop0);
02988 LS_IN_LOOP loop_ls0(loop0, dg, &SNL_local_pool);
02989 LS_IN_LOOP loop_ls1(loop1, dg, &SNL_local_pool);
02990
02991 LS_IN_LOOP_ITER ali(&loop_ls0);
02992 WN* awn;
02993 while (awn = ali.Step()) {
02994 VINDEX16 v = dg->Get_Vertex(awn);
02995 if (v == 0)
02996 continue;
02997 EINDEX16 nexte = 0;
02998 BOOL ain1 = loop_ls1.In(awn) > 0;
02999 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = nexte) {
03000 nexte = dg->Get_Next_Out_Edge(e);
03001 Is_True(v == dg->Get_Source(e), ("Huh?"));
03002 WN* wsink = dg->Get_Wn(dg->Get_Sink(e));
03003 if (loop_ls0.In(wsink) && !(ain1 && loop_ls1.In(wsink)))
03004 dg->Delete_Array_Edge(e);
03005 }
03006 }
03007
03008 INT count = 0;
03009
03010 BOOL failed = FALSE;
03011 Renumber_Loops(region.First, region.Last, dg);
03012
03013 ali = &loop_ls0;
03014 while (awn = ali.Step()) {
03015 INT alex = loop_ls0.In(awn);
03016 BOOL ain1 = loop_ls1.In(awn) > 0;
03017
03018 if (OPCODE_is_load(WN_opcode(awn)))
03019 continue;
03020
03021 DOLOOP_STACK astack(&SNL_local_pool);
03022 Build_Doloop_Stack(awn, &astack);
03023 Is_True(alex > 0, ("Bug in ls_in_loop_iter"));
03024
03025 VINDEX16 v = dg->Get_Vertex(awn);
03026 if (v == 0)
03027 continue;
03028
03029 LS_IN_LOOP_ITER ali2(&loop_ls0);
03030 WN* bwn;
03031 while (bwn = ali2.Step()) {
03032 VINDEX16 v2 = dg->Get_Vertex(bwn);
03033 if (v2 == 0)
03034 continue;
03035
03036 BOOL is_load = OPCODE_is_load(WN_opcode(bwn));
03037 INT blex = loop_ls0.In(bwn);
03038 if (blex == 0 || (!is_load && blex < alex))
03039 continue;
03040
03041 if (ain1 && loop_ls1.In(bwn))
03042 continue;
03043
03044
03045 DOLOOP_STACK bstack(&SNL_local_pool);
03046 Build_Doloop_Stack(bwn, &bstack);
03047 count++;
03048
03049 #ifdef KEY
03050
03051
03052
03053
03054
03055
03056
03057
03058
03059
03060 if (astack.Elements() >= 15 ||
03061 bstack.Elements() >= 15) {
03062 LNO_Erase_Dg_From_Here_In(awn, dg);
03063 LNO_Erase_Dg_From_Here_In(bwn, dg);
03064 failed = TRUE;
03065 goto out;
03066 }
03067 #endif
03068
03069
03070
03071
03072
03073
03074 BOOL ok = dg->Add_Edge( awn, &astack, bwn, &bstack, alex < blex,
03075 FALSE);
03076 if (!ok) {
03077 LNO_Erase_Dg_From_Here_In(awn, dg);
03078 LNO_Erase_Dg_From_Here_In(bwn, dg);
03079 failed = TRUE;
03080 goto out;
03081 }
03082 }
03083 }
03084
03085 out:
03086 if (failed) {
03087
03088 LS_IN_LOOP_ITER ali(&loop_ls0);
03089 WN* awn;
03090 while (awn = ali.Step()) {
03091 VINDEX16 v = dg->Get_Vertex(awn);
03092 if (v) {
03093 EINDEX16 e;
03094 EINDEX16 enext = 0;
03095 for (e = dg->Get_In_Edge(v); e; e = enext) {
03096 enext = dg->Get_Next_In_Edge(e);
03097 dg->Delete_Array_Edge(e);
03098 }
03099 for (e = dg->Get_Out_Edge(v); e; e = enext) {
03100 enext = dg->Get_Next_Out_Edge(e);
03101 dg->Delete_Array_Edge(e);
03102 }
03103 dg->Delete_Vertex(v);
03104 }
03105 }
03106 }
03107
03108 SNL_DEBUG1(2, "Updated %d pairs for 2D regblock\n", count);
03109
03110 Renumber_Loops(region.First, region.Last, dg);
03111
03112
03113
03114 for (WN* wn_reg = region.First; wn_reg; wn_reg = WN_next(wn_reg)) {
03115 DOLOOP_STACK shortstack(&SNL_local_pool);
03116 Build_Doloop_Stack(LWN_Get_Parent(wn_reg), &shortstack);
03117 LNO_Build_Access(wn_reg, &shortstack, &LNO_default_pool);
03118 Optimize_Coupled_Loops(wn_reg, Du_Mgr);
03119 if (wn_reg == region.Last)
03120 break;
03121 }
03122 }
03123
03124 MEM_POOL_Pop_Unfreeze(&SNL_local_pool);
03125
03126 return region;
03127 }
03128