00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00049 #define __STDC_LIMIT_MACROS
00050 #include <stdint.h>
00051 #ifdef USE_PCH
00052 #include "lno_pch.h"
00053 #endif // USE_PCH
00054 #pragma hdrstop
00055
00056 #define snl_CXX "snl.cxx"
00057 const static char *rcs_id = snl_CXX "$Revision: 1.8 $";
00058
00059 #include <sys/types.h>
00060 #include <alloca.h>
00061 #include "snl.h"
00062 #include "snl_xbounds.h"
00063 #include "config_targ.h"
00064 #include "lwn_util.h"
00065 #include "lnoutils.h"
00066 #include "cxx_graph.h"
00067 #include "opt_du.h"
00068 #include "opt_alias_interface.h"
00069 #include "wintrinsic.h"
00070 #include "scalar_expand.h"
00071 #include "strtab.h"
00072 #include "dvector.h"
00073 #include "lnopt_main.h"
00074 #include "fb_whirl.h"
00075 #include "move.h"
00076 #include "small_trips.h"
00077 #include "sxlimit.h"
00078 #include "ir_reader.h"
00079 #include "sxlist.h"
00080 #include "debug.h"
00081 #include "permute.h"
00082 #include "tile.h"
00083 #include "prompf.h"
00084 #include "anl_driver.h"
00085 #include "cond.h"
00086 #include "wind_down.h"
00087 #include "ff_utils.h"
00088 #include "fb_info.h"
00089 #include "fb_whirl.h"
00090
00091 #pragma weak New_Construct_Id
00092
00093
00094
00095
00096
00097 char* ___SNL_Pcall = NULL;
00098 void* ___SNL_Pvall = NULL;
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112 extern void SNL_Change_Reduction_Loop_Stmts(SX_PLIST* plist,
00113 WN* from,
00114 WN* to)
00115 {
00116 DU_MANAGER* du = Du_Mgr;
00117 SNL_DEBUG2(3, "SNL_Change_Reduction_Loop_Stmts(0x%p, 0x%p)", from, to);
00118 FmtAssert(WN_opcode(to) == OPC_DO_LOOP && WN_opcode(from) == OPC_DO_LOOP,
00119 ("Bad to opcode %d or %d", WN_opcode(to), WN_opcode(from)));
00120
00121 if (plist != NULL) {
00122 SX_PITER ii(plist);
00123 INT cnt = 0;
00124 SX_PNODE *n;
00125 for (n = ii.First(); n; n = ii.Next()) {
00126 if (n->Has_Reduction())
00127 cnt++;
00128 }
00129 if (cnt == 0)
00130 return;
00131
00132 const SYMBOL** syms = CXX_NEW_ARRAY(const SYMBOL*, cnt,
00133 &MEM_local_pool);
00134 SX_PITER iii(plist);
00135 INT c = 0;
00136
00137 for (n = iii.First(); n; n = iii.Next()) {
00138 if (n->Has_Reduction())
00139 syms[c++] = &n->Symbol();
00140 }
00141
00142 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(from));
00143 for ( ; itr; itr = LWN_WALK_TreeNext(itr)) {
00144 WN* wn = itr->wn;
00145 OPERATOR opr = WN_operator(wn);
00146 if (opr == OPR_LDID) {
00147 SYMBOL symbol(wn);
00148 for (INT i = 0; i < cnt; i++) {
00149 if (symbol == *syms[i]) {
00150 WN* loop_stmt = du->Ud_Get_Def(wn)->Loop_stmt();
00151 if (loop_stmt == from) {
00152 du->Ud_Get_Def(wn)->Set_loop_stmt(to);
00153 SNL_DEBUG2(3, "SNL_Change_Reduction_Loop_Stmts: "
00154 "loop_stmt(0x%p)->0x%p", wn, to);
00155 }
00156 break;
00157 }
00158 }
00159 }
00160 }
00161 } else {
00162 if (red_manager == NULL)
00163 return;
00164 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(from));
00165 for ( ; itr; itr = LWN_WALK_TreeNext(itr)) {
00166 WN* wn = itr->wn;
00167 OPERATOR opr = WN_operator(wn);
00168 if (opr == OPR_LDID && red_manager->Which_Reduction(wn) != RED_NONE) {
00169 WN* loop_stmt = du->Ud_Get_Def(wn)->Loop_stmt();
00170 if (loop_stmt == from) {
00171 du->Ud_Get_Def(wn)->Set_loop_stmt(to);
00172 SNL_DEBUG2(3, "SNL_Change_Reduction_Loop_Stmts: "
00173 "loop_stmt(0x%p)->0x%p", wn, to);
00174 }
00175 }
00176 }
00177 }
00178 }
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188 extern void SNL_Expand_Reduction_Deps(WN* loop)
00189 {
00190 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00191 REDUCTION_MANAGER* rm = red_manager;
00192 if (rm == NULL)
00193 return;
00194
00195 for (EINDEX16 e = dg->Get_Edge(); e; e = dg->Get_Next_Edge(e)) {
00196 VINDEX16 v1 = dg->Get_Source(e);
00197 VINDEX16 v2 = dg->Get_Sink(e);
00198
00199 if (v1 == 0 || v2 == 0)
00200 continue;
00201
00202 WN* wn1 = dg->Get_Wn(v1);
00203 WN* wn2 = dg->Get_Wn(v2);
00204 FmtAssert(wn1 && wn2, ("Missing v->wn mapping"));
00205
00206
00207 WN* p1 = wn1;
00208 while (p1 && p1 != loop)
00209 p1 = LWN_Get_Parent(p1);
00210 if (p1 == NULL)
00211 continue;
00212
00213 WN* p2 = wn2;
00214 while (p2 && p2 != loop)
00215 p2 = LWN_Get_Parent(p2);
00216 if (p2 == NULL)
00217 continue;
00218
00219 INT wr1 = -1;
00220 INT wr2 = -1;
00221 if ((wr1=rm->Which_Reduction(wn1)) == 0 ||
00222 (wr2=rm->Which_Reduction(wn2)) == 0 ||
00223 (wr1 != wr2))
00224 continue;
00225
00226
00227 if (v1 == v2) {
00228 DEPV_ARRAY* array_e = dg->Depv_Array(e);
00229 FmtAssert(array_e, ("Edge %d (e) has no depv array!", e));
00230
00231 DEPV_LIST dl1(array_e, &LNO_local_pool);
00232 DEPV_LIST dl2(array_e, &LNO_local_pool);
00233
00234 DEPV_LIST* dl_new1 = Lex_Pos_Compose(&LNO_local_pool, &dl1, &dl2);
00235 Delete_DEPV_ARRAY(array_e, dg->Pool());
00236
00237 array_e = Create_DEPV_ARRAY(dl_new1, dg->Pool());
00238 FmtAssert(array_e, ("Create_DEPV_ARRAY() returned NULL"));
00239 dg->Set_Depv_Array(e, array_e);
00240 CXX_DELETE(dl_new1, &LNO_local_pool);
00241
00242 if (snl_debug >= 3) {
00243 fprintf(TFile, "changed reduction edge %d to: ", e);
00244 dg->Depv_Array(e)->Print(TFile);
00245 }
00246 }
00247 else if (v1 < v2) {
00248 EINDEX16 econj = dg->Get_Edge(v2, v1);
00249
00250 if (econj) {
00251 DEPV_ARRAY* array_e = dg->Depv_Array(e);
00252 DEPV_ARRAY* array_econj = dg->Depv_Array(econj);
00253 FmtAssert(array_e, ("Edge %d (e) has no depv array!", e));
00254 FmtAssert(array_econj, ("Edge %d (econj) has no depv array!", econj));
00255 DEPV_LIST dl1a(array_e, &LNO_local_pool);
00256 DEPV_LIST dl1b(array_e, &LNO_local_pool);
00257 DEPV_LIST dl2a(array_econj, &LNO_local_pool);
00258 DEPV_LIST dl2b(array_econj, &LNO_local_pool);
00259
00260 DEPV_LIST* dl_new1 = Lex_Pos_Compose(&LNO_local_pool, &dl1a, &dl2a);
00261 DEPV_LIST* dl_new2 = Lex_Pos_Compose(&LNO_local_pool, &dl2b, &dl1b);
00262 Delete_DEPV_ARRAY(array_e, dg->Pool());
00263 Delete_DEPV_ARRAY(array_econj, dg->Pool());
00264
00265 array_e = Create_DEPV_ARRAY(dl_new1, dg->Pool());
00266 array_econj = Create_DEPV_ARRAY(dl_new2, dg->Pool());
00267 FmtAssert(array_e, ("Create_DEPV_ARRAY() returned NULL"));
00268 FmtAssert(array_econj, ("Create_DEPV_ARRAY() returned NULL"));
00269 dg->Set_Depv_Array(e, array_e);
00270 dg->Set_Depv_Array(econj, array_econj);
00271 CXX_DELETE(dl_new1, &LNO_local_pool);
00272 CXX_DELETE(dl_new2, &LNO_local_pool);
00273
00274 if (snl_debug >= 3) {
00275 fprintf(TFile, "changed reduction edge %d and %d to ", e, econj);
00276 dg->Depv_Array(e)->Print(TFile);
00277 fprintf(TFile, " and ");
00278 dg->Depv_Array(econj)->Print(TFile);
00279 }
00280 }
00281 }
00282 }
00283 }
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294 extern BOOL SNL_Test_Reduction_Lexneg(EINDEX16 e,
00295 WN* awn,
00296 WN* bwn,
00297 INT alex,
00298 INT blex)
00299 {
00300 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00301 REDUCTION_MANAGER* rm = red_manager;
00302 if (e == 0) {
00303 Is_True(0, ("Bad edge into SNL_Test_Reduction_Lexneg()"));
00304 return FALSE;
00305 }
00306 if (Is_Lexpos(dg->Depv_Array(e)))
00307 return FALSE;
00308
00309 Is_True(awn == dg->Get_Wn(dg->Get_Source(e)),
00310 ("Bad awn 0x%p 0x%p", awn, dg->Get_Wn(dg->Get_Source(e))));
00311 Is_True(bwn == dg->Get_Wn(dg->Get_Sink(e)),
00312 ("Bad bwn 0x%p 0x%p", bwn, dg->Get_Wn(dg->Get_Sink(e))));
00313
00314 if (rm != NULL &&
00315 rm->Which_Reduction(awn) &&
00316 rm->Which_Reduction(bwn) &&
00317 (rm->Which_Reduction(awn) ==
00318 rm->Which_Reduction(bwn))) {
00319
00320 DEPV_ARRAY* array_e = dg->Depv_Array(e);
00321 FmtAssert(array_e, ("Edge %d has no depv array!", e));
00322
00323 DEPV_LIST dl(array_e, &LNO_local_pool);
00324 DEPV_LIST pos(dl.Num_Dim(), dl.Num_Unused_Dim(), &LNO_local_pool);
00325 DEPV_LIST neg(dl.Num_Dim(), dl.Num_Unused_Dim(), &LNO_local_pool);
00326 dl.Lex_Pos_Decompose(&LNO_local_pool, &pos, &neg, alex < blex, alex > blex);
00327 DEPV_ARRAY* array = Create_DEPV_ARRAY(&pos, dg->Pool());
00328 if (array) {
00329 Delete_DEPV_ARRAY(array_e, dg->Pool());
00330 dg->Set_Depv_Array(e, array);
00331 }
00332 else
00333 dg->Delete_Array_Edge(e);
00334
00335 if (snl_debug >= 3) {
00336 fprintf(TFile,
00337 "SNL_Test_Reduction_Lexneg: made reduction edge %d into: ", e);
00338 if (array)
00339 array->Print(TFile);
00340 else
00341 fprintf(TFile, "<NULL>\n");
00342 fflush(TFile);
00343 }
00344
00345 return FALSE;
00346 }
00347 else {
00348 if (snl_debug) {
00349 fprintf(TFile, "SNL_Test_Reduction_Lexneg: edge=%d lexneg:", e);
00350 dg->Depv_Array(e)->Print(TFile);
00351 }
00352 return TRUE;
00353 }
00354 }
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367 extern SNL_REGION SNL_Remove_Unity_Trip_Loop(WN* wdloop,
00368 BOOL update_access)
00369 {
00370 WN* wn_first = NULL;
00371 WN* wn_last = NULL;
00372 Remove_Unity_Trip_Loop(wdloop, update_access, &wn_first, &wn_last,
00373 Array_Dependence_Graph, Du_Mgr);
00374 SNL_REGION region;
00375 region.First = wn_first;
00376 region.Last = wn_last;
00377 if (!Valid_SNL_Region(region))
00378 DevWarn("SNL_Remove_Unity_Trip_Loop: Invalid SNL_REGION [0x%p,0x%p]",
00379 region.First, region.Last);
00380 return region;
00381 }
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391 static void RUL_Region_Update(SNL_REGION* region,
00392 SNL_REGION* local_region,
00393 WN* wn,
00394 WN* wn_prev,
00395 WN* wn_next)
00396 {
00397 if (!Valid_SNL_Region(*local_region))
00398 DevWarn("RUL_Region_Update: Invalid Local SNL_REGION [0x%p,0x%p]",
00399 local_region->First, local_region->Last);
00400 if (local_region->First == NULL) {
00401 FmtAssert(local_region->Last == NULL,
00402 ("RUL_Region_Update: First NULL but not last"));
00403 if (region->First == wn)
00404 region->First = wn_next;
00405 if (region->Last == wn)
00406 region->Last = wn_prev;
00407 if (!Valid_SNL_Region(*region))
00408 DevWarn("RUL_Region_Update: Invalid SNL_REGION [0x%p,0x%p]",
00409 region->First, region->Last);
00410 return;
00411 }
00412 if (region->First == wn && local_region->First != wn)
00413 region->First = local_region->First;
00414 if (region->Last == wn && local_region->Last != wn)
00415 region->Last = local_region->Last;
00416 if (!Valid_SNL_Region(*region))
00417 DevWarn("RUL_Region_Update: Invalid Input SNL_REGION [0x%p,0x%p]",
00418 region->First, region->Last);
00419 }
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429 extern SNL_REGION SNL_Remove_Useless_Loops(WN* wn_tree,
00430 BOOL update_access)
00431 {
00432 SNL_REGION region(wn_tree, wn_tree);
00433 SNL_REGION local_region(wn_tree, wn_tree);
00434 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
00435 INT iteration_count = Iterations(wn_tree, &LNO_local_pool);
00436 if (iteration_count == 1) {
00437 local_region = SNL_Remove_Unity_Trip_Loop(wn_tree, update_access);
00438 if (region.First == wn_tree)
00439 region.First = local_region.First;
00440 if (region.Last == wn_tree)
00441 region.Last = local_region.Last;
00442 WN* wnn = NULL;
00443 for (WN* wn = region.First; wn != NULL; wn = wnn) {
00444 wnn = WN_next(wn);
00445 WN* old_region_last = region.Last;
00446 WN* wn_prev = WN_prev(wn);
00447 WN* wn_next = WN_next(wn);
00448 local_region = SNL_Remove_Useless_Loops(wn, update_access);
00449 RUL_Region_Update(®ion, &local_region, wn, wn_prev, wn_next);
00450 if (wn == old_region_last)
00451 break;
00452 }
00453 if (!Valid_SNL_Region(region))
00454 DevWarn("SNL_Remove_Useless_Loops: Invalid SNL_REGION [0x%p,0x%p]",
00455 region.First, region.Last);
00456 return region;
00457 }
00458 if (iteration_count == 0) {
00459 WN* wn_before = WN_prev(wn_tree);
00460 WN* wn_after = WN_next(wn_tree);
00461 Remove_Zero_Trip_Loop(wn_tree);
00462 if (region.First == wn_tree && region.Last == wn_tree) {
00463 region.First = NULL;
00464 region.Last = NULL;
00465 } else if (region.First == wn_tree) {
00466 region.First = wn_after;
00467 } else if (region.Last == wn_tree) {
00468 region.Last = wn_before;
00469 }
00470 if (!Valid_SNL_Region(region))
00471 DevWarn("SNL_Remove_Useless_Loops: Returning SNL_REGION [0x%p,0x%p]",
00472 region.First, region.Last);
00473 return region;
00474 }
00475 }
00476 if (WN_opcode(wn_tree) == OPC_BLOCK) {
00477 WN* wnn = NULL;
00478 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = wnn) {
00479 wnn = WN_next(wn);
00480 WN* wn_prev = WN_prev(wn);
00481 WN* wn_next = WN_next(wn);
00482 local_region = SNL_Remove_Useless_Loops(wn, update_access);
00483 RUL_Region_Update(®ion, &local_region, wn, wn_prev, wn_next);
00484 }
00485 } else {
00486 for (INT i = 0; i < WN_kid_count(wn_tree); i++) {
00487 WN* wn = WN_kid(wn_tree, i);
00488 local_region = SNL_Remove_Useless_Loops(wn, update_access);
00489 RUL_Region_Update(®ion, &local_region, wn, NULL, NULL);
00490 }
00491 }
00492 if (!Valid_SNL_Region(region))
00493 DevWarn("SNL_Remove_Useless_Loops: Invalid SNL_REGION [0x%p,0x%p]",
00494 region.First, region.Last);
00495 return region;
00496 }
00497
00498
00499
00500
00501
00502
00503 extern void Remove_Useless_Loops(SNL_REGION* region)
00504 {
00505 WN* wnn = NULL;
00506 for (WN* wn = region->First; wn != NULL; wn = wnn) {
00507 wnn = WN_next(wn);
00508 WN* old_region_last = region->Last;
00509 WN* wn_prev = WN_prev(wn);
00510 WN* wn_next = WN_next(wn);
00511 SNL_REGION local_region = SNL_Remove_Useless_Loops(wn, TRUE);
00512 RUL_Region_Update(region, &local_region, wn, wn_prev, wn_next);
00513 if (wn == old_region_last)
00514 break;
00515 }
00516 }
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530 extern void Print_Interchange(FILE* file,
00531 WN* outer_loop,
00532 INT permutation[],
00533 INT nloops)
00534 {
00535 fprintf(file, "Interchange: (");
00536 INT i;
00537 for (i = 0; i < nloops; i++) {
00538 const char *name=WB_Whirl_Symbol(SNL_Get_Inner_Snl_Loop(outer_loop,i + 1));
00539 fprintf(file, "%s", name);
00540 if (i < nloops - 1)
00541 fprintf(file, ",");
00542 }
00543 fprintf(file, ") -> (");
00544 for (i = 0; i < nloops; i++) {
00545 const char *name = WB_Whirl_Symbol(SNL_Get_Inner_Snl_Loop(outer_loop,
00546 permutation[i] + 1));
00547 fprintf(file, "%s", name);
00548 if (i < nloops - 1)
00549 fprintf(file, ",");
00550 }
00551 fprintf(file, ") at (");
00552 for (i = 0; i < nloops; i++) {
00553 fprintf(file, "%d", (INT) WN_linenum(SNL_Get_Inner_Snl_Loop(outer_loop,
00554 i + 1)));
00555 if (i < nloops - 1)
00556 fprintf(file, ",");
00557 }
00558 fprintf(file, ")\n");
00559 }
00560
00561
00562
00563
00564
00565
00566
00567 extern void Prompf_Interchange(WN* wn_outer,
00568 INT permutation[],
00569 INT nloops)
00570 {
00571 if (nloops == 0 || Identity_Permutation(permutation, nloops))
00572 return;
00573 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00574 INT outer_depth = Do_Loop_Depth(wn_outer);
00575 DOLOOP_STACK stack(&LNO_local_pool);
00576 Build_Doloop_Stack(wn_inner, &stack);
00577 INT* old_id = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00578 INT* new_id = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00579 INT i;
00580 for (i = 0; i < nloops; i++) {
00581 WN* wn_loop = stack.Bottom_nth(outer_depth + i);
00582 old_id[i] = WN_MAP32_Get(Prompf_Id_Map, wn_loop);
00583 }
00584 for (i = 0; i < nloops; i++)
00585 new_id[i] = old_id[permutation[i]];
00586 Prompf_Info->Interchange(old_id, new_id, nloops);
00587 }
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598 extern void Prompf_Interchanges(WN* wn_outer,
00599 INT permutation[],
00600 INT nloops)
00601 {
00602 if (permutation == NULL)
00603 return;
00604 INT outer_depth = Do_Loop_Depth(wn_outer);
00605 DOLOOP_STACK stack(&PROMPF_pool);
00606 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00607 Build_Doloop_Stack(wn_inner, &stack);
00608 INT last = -1;
00609 INT* spermutation = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
00610 for (INT first = 0; first < nloops; first = last + 1) {
00611 last = Permutation_Last(first, permutation, nloops);
00612 for (INT i = first; i <= last; i++)
00613 spermutation[i - first] = permutation[i] - first;
00614 INT snloops = last - first + 1;
00615 WN* wn_souter = stack.Bottom_nth(outer_depth + first);
00616 Prompf_Interchange(wn_souter, spermutation, snloops);
00617 }
00618 }
00619
00620
00621
00622
00623
00624
00625 static void
00626 Scale_FB_Info_Loop(FB_Info_Loop *filp, float scale)
00627 {
00628 filp->freq_zero *= scale;
00629 filp->freq_positive *= scale;
00630 filp->freq_out *= scale;
00631 filp->freq_back *= scale;
00632 filp->freq_exit *= scale;
00633 filp->freq_iterate *= scale;
00634 }
00635
00636
00637
00638
00639
00640
00641
00642
00643 extern void
00644 LNO_FB_Inv_Interchange(WN* wn_outer, INT permutation[], INT nloops)
00645 {
00646 Is_True(Cur_PU_Feedback, ("NULL Cur_PU_Feedback"));
00647 if (!permutation || nloops == 0 ||
00648 Identity_Permutation(permutation, nloops))
00649 return;
00650
00651 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00652 INT outer_depth = Do_Loop_Depth(wn_outer);
00653 DOLOOP_STACK stack(&LNO_local_pool);
00654 Build_Doloop_Stack(wn_inner, &stack);
00655
00656 #ifdef KEY
00657
00658 for( int i = 0; i < nloops; i++ ){
00659 const WN* wn_loop = stack.Bottom_nth(outer_depth + i);
00660 const FB_Info_Loop fb_info = Cur_PU_Feedback->Query_loop(wn_loop);
00661 if( fb_info.freq_positive.Uninitialized() )
00662 return;
00663 }
00664 #endif
00665
00666 INT i;
00667
00668 FB_Info_Loop *old_fils = CXX_NEW_ARRAY(FB_Info_Loop, nloops,
00669 &LNO_local_pool);
00670 FB_Info_Loop *new_fils = CXX_NEW_ARRAY(FB_Info_Loop, nloops,
00671 &LNO_local_pool);
00672 for (i = 0; i < nloops; i++) {
00673 WN* wn_loop = stack.Bottom_nth(outer_depth + i);
00674 new_fils[i] = old_fils[i] = Cur_PU_Feedback->Query_loop(wn_loop);
00675 }
00676
00677
00678
00679
00680
00681
00682
00683
00684 for (i = 0; i < nloops; i++) {
00685 const INT idx = permutation[i];
00686 const FB_FREQ old_invokes = old_fils[idx].freq_zero +
00687 old_fils[idx].freq_positive;
00688 FB_FREQ new_invokes;
00689 if (i > 0) {
00690
00691 new_invokes = new_fils[permutation[i - 1]].freq_iterate;
00692 } else {
00693
00694 new_invokes = old_fils[0].freq_zero + old_fils[0].freq_positive;
00695 }
00696
00697 #ifdef KEY
00698
00699
00700
00701 if( old_invokes.Zero() )
00702 Scale_FB_Info_Loop(&new_fils[idx], old_invokes.Value());
00703 else
00704 #endif
00705 Scale_FB_Info_Loop(&new_fils[idx], (new_invokes / old_invokes).Value());
00706 }
00707
00708 for (i = 0; i < nloops; i++) {
00709 WN* wn_loop = stack.Bottom_nth(outer_depth + i);
00710 Cur_PU_Feedback->Annot_loop(wn_loop, new_fils[i]);
00711 }
00712
00713 CXX_DELETE_ARRAY(old_fils, &LNO_local_pool);
00714 CXX_DELETE_ARRAY(new_fils, &LNO_local_pool);
00715 }
00716
00717
00718
00719
00720
00721
00722
00723
00724 extern void
00725 LNO_FB_MP_Tile(WN* wn_tile_loop, INT tile_loop_tripcount, WN *wn_orig_loop)
00726 {
00727 Is_True(Cur_PU_Feedback, ("NULL Cur_PU_Feedback"));
00728 Is_True(tile_loop_tripcount > 0, ("tile_loop_tripcount <= 0"));
00729
00730 FB_Info_Loop orig_fil = Cur_PU_Feedback->Query_loop(wn_orig_loop),
00731 new_orig_fil = orig_fil, tile_fil = orig_fil;
00732
00733
00734 tile_fil.freq_out = tile_fil.freq_positive;
00735 tile_fil.freq_iterate = tile_fil.freq_positive * tile_loop_tripcount;
00736 tile_fil.freq_back = tile_fil.freq_iterate - tile_fil.freq_positive;
00737 tile_fil.freq_exit = tile_fil.freq_out + tile_fil.freq_zero;
00738
00739
00740
00741
00742 new_orig_fil.freq_exit = tile_fil.freq_iterate;
00743 if (new_orig_fil.freq_iterate.Value() >=
00744 9.0 * new_orig_fil.freq_exit.Value()) {
00745
00746
00747
00748
00749
00750 new_orig_fil.freq_zero = 0.1 * new_orig_fil.freq_exit;
00751
00752 new_orig_fil.freq_positive = new_orig_fil.freq_exit -
00753 new_orig_fil.freq_zero;
00754
00755 new_orig_fil.freq_back = new_orig_fil.freq_iterate -
00756 new_orig_fil.freq_positive;
00757
00758 } else {
00759
00760
00761 new_orig_fil.freq_back = new_orig_fil.freq_iterate * 0.9;
00762 new_orig_fil.freq_positive = new_orig_fil.freq_iterate -
00763 new_orig_fil.freq_back;
00764 new_orig_fil.freq_zero = new_orig_fil.freq_exit -
00765 new_orig_fil.freq_positive;
00766 }
00767
00768 new_orig_fil.freq_out = new_orig_fil.freq_positive;
00769
00770 Cur_PU_Feedback->Annot_loop(wn_orig_loop, new_orig_fil);
00771 Cur_PU_Feedback->Annot_loop(wn_tile_loop, tile_fil);
00772 }
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784 extern WN* SNL_Permute_Loops(WN* wn_outer,
00785 WN* wn_inner,
00786 INT permutation[],
00787 INT nloops,
00788 BOOL invariant,
00789 BOOL warn_lexneg)
00790 {
00791 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00792 INT outer_depth = Do_Loop_Depth(wn_outer);
00793 INT inner_depth = Do_Loop_Depth(wn_inner);
00794 FmtAssert(inner_depth - outer_depth + 1 == nloops,
00795 ("Inconsistent parameters to SNL_Permute_Loops"));
00796 DOLOOP_STACK stack(&LNO_local_pool);
00797 Build_Doloop_Stack(wn_inner, &stack);
00798 INT* spermutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00799 WN* wn_new_outer = NULL;
00800 INT last = -1;
00801 for (INT first = 0; first < nloops; first = last + 1) {
00802 last = Permutation_Last(first, permutation, nloops);
00803 for (INT i = first; i <= last; i++)
00804 spermutation[i - first] = permutation[i] - first;
00805 INT snloops = last - first + 1;
00806 WN* wn_souter = stack.Bottom_nth(outer_depth + first);
00807 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
00808 Prompf_Interchange(wn_souter, spermutation, snloops);
00809 if (invariant)
00810 wn_new_outer = SNL_INV_Permute_Loops(wn_souter, spermutation, snloops,
00811 warn_lexneg);
00812 else
00813 wn_new_outer = SNL_GEN_Permute_Loops(wn_souter, spermutation, snloops,
00814 warn_lexneg);
00815 if (!warn_lexneg)
00816 Repair_Bad_Dependences(wn_souter);
00817 if (first == 0)
00818 wn_new_outer = wn_new_outer;
00819 }
00820 return wn_new_outer;
00821 }
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837 extern BOOL SNL_Update_Strip_Dependence(INT current_depth,
00838 INT s,
00839 INT i_for_s,
00840 EINDEX16 e,
00841 WN* awn,
00842 WN* bwn,
00843 INT alex,
00844 INT blex)
00845 {
00846 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883 DEPV_ARRAY* orig_dv = dg->Depv_Array(e);
00884 INT ddepth = current_depth - orig_dv->Num_Unused_Dim();
00885 if (ddepth < 0) {
00886 DEPV_ARRAY* new_dv = Create_DEPV_ARRAY(orig_dv->Num_Vec(),
00887 orig_dv->Num_Dim(),
00888 orig_dv->Num_Unused_Dim() + 1,
00889 dg->Pool());
00890 for (INT ii = 0; ii < orig_dv->Num_Vec(); ii++) {
00891 DEPV* orig_depv = orig_dv->Depv(ii);
00892 DEPV* new_depv = new_dv->Depv(ii);
00893 for (INT jj = 0; jj < orig_dv->Num_Dim(); jj++)
00894 DEPV_Dep(new_depv, jj) = DEPV_Dep(orig_depv, jj);
00895 }
00896 dg->Set_Depv_Array(e, new_dv);
00897 Delete_DEPV_ARRAY(orig_dv, dg->Pool());
00898 if (SNL_Test_Reduction_Lexneg(e, awn, bwn, alex, blex))
00899 return TRUE;
00900 return FALSE;
00901 }
00902
00903 INT nvec = 0;
00904 INT v;
00905 for (v = 0; v < orig_dv->Num_Vec(); v++) {
00906 DEPV* d = orig_dv->Depv(v);
00907 switch (DEP_Direction(DEPV_Dep(d, ddepth + i_for_s))) {
00908 case DIR_EQ:
00909 case DIR_STAR:
00910 case DIR_POSNEG:
00911 nvec++;
00912 break;
00913 default:
00914 nvec += 2;
00915 break;
00916 }
00917 }
00918
00919
00920
00921
00922
00923
00924
00925
00926 BOOL conservative = nvec > UINT8_MAX;
00927 if (conservative)
00928 nvec = orig_dv->Num_Vec();
00929 DEPV_ARRAY* new_dv = Create_DEPV_ARRAY(nvec,
00930 orig_dv->Num_Dim()+1,
00931 orig_dv->Num_Unused_Dim(),
00932 dg->Pool());
00933
00934 INT vcount = 0;
00935
00936
00937
00938
00939
00940
00941 for (v = 0; v < orig_dv->Num_Vec(); v++) {
00942 INT i;
00943 DEPV* d = orig_dv->Depv(v);
00944 DIRECTION dir = DEP_Direction(DEPV_Dep(d, ddepth + i_for_s));
00945 DEPV* dd1 = dir == DIR_STAR || dir == DIR_POSNEG ?
00946 NULL : new_dv->Depv(vcount++);
00947 DEPV* dd2 = conservative && dir != DIR_STAR && dir != DIR_POSNEG
00948 || dir == DIR_EQ ? NULL : new_dv->Depv(vcount++);
00949
00950 FmtAssert(dd1 != NULL || dd2 != NULL,
00951 ("SNL_Update_Strip_Dependence: Must produce at least one dep"));
00952
00953
00954
00955 if (dd1) {
00956 for (i = 0; i < ddepth + s; i++)
00957 DEPV_Dep(dd1, i) = DEPV_Dep(d, i);
00958 if (conservative) {
00959 DIRECTION ndir = (dir == DIR_POS || dir == DIR_POSEQ) ? DIR_POSEQ
00960 : (dir == DIR_NEG || dir == DIR_NEGEQ) ? DIR_NEGEQ
00961 : dir;
00962 DEPV_Dep(dd1, i) = DEP_SetDirection(ndir);
00963 } else {
00964 DEPV_Dep(dd1, i) = DEP_SetDistance(0);
00965 }
00966 for (; i < orig_dv->Num_Dim(); i++)
00967 DEPV_Dep(dd1, i+1) = DEPV_Dep(d, i);
00968 }
00969
00970
00971
00972
00973
00974
00975 if (dd2) {
00976 DIRECTION ndir = (dir == DIR_POS || dir == DIR_POSEQ) ? DIR_POS :
00977 (dir == DIR_NEG || dir == DIR_NEGEQ) ? DIR_NEG :
00978 DIR_STAR;
00979 DEP ndep = DEP_SetDirection(ndir);
00980 for (i = 0; i < ddepth + s; i++)
00981 DEPV_Dep(dd2, i) = DEPV_Dep(d, i);
00982 DEPV_Dep(dd2, i) = ndep;
00983 for (; i < orig_dv->Num_Dim(); i++)
00984 DEPV_Dep(dd2, i+1) = DEPV_Dep(d, i);
00985 DEPV_Dep(dd2, ddepth + s + 1 + i_for_s) = ndep;
00986 }
00987 }
00988 Is_True(vcount == nvec, ("Bug in tile dependence stuff"));
00989 dg->Set_Depv_Array(e, new_dv);
00990
00991 Delete_DEPV_ARRAY(orig_dv, dg->Pool());
00992 if (SNL_Test_Reduction_Lexneg(e, awn, bwn, alex, blex))
00993 return TRUE;
00994 return FALSE;
00995 }
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011 extern WN* Tile_Loop(WN* wn_loop,
01012 INT tile_size,
01013 INT tile_level,
01014 SNL_INV_CACHE_BLOCK_REASON reason,
01015 SYMBOL* outersym,
01016 MEM_POOL *pool)
01017 {
01018 ARRAY_DIRECTED_GRAPH16 *dg = Array_Dependence_Graph;
01019 DU_MANAGER* du = Du_Mgr;
01020 REDUCTION_MANAGER* rm = red_manager;
01021 INT iloop[1];
01022 INT stripsz[1];
01023 INT striplevel[1];
01024 WN* permloop[1];
01025 SNL_INV_CACHE_BLOCK_REASON reason_array[1];
01026 Upper_Bound_Standardize(WN_end(wn_loop), FALSE);
01027 iloop[0] = 0;
01028 stripsz[0] = tile_size;
01029 striplevel[0] = tile_level;
01030 permloop[0] = wn_loop;
01031 reason_array[0] = reason;
01032 SNL_TILE_INFO ti(1, 1, iloop, stripsz, striplevel, reason_array, pool);
01033 LS_IN_LOOP loop_ls(wn_loop, dg, pool);
01034 SNL_REGION region;
01035 region.First = wn_loop;
01036 region.Last = wn_loop;
01037 DOLOOP_STACK stack(&LNO_local_pool);
01038 Build_Doloop_Stack(wn_loop, &stack);
01039 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01040 if (Bound_Is_Too_Messy(dli->LB))
01041 Hoist_Lower_Bound(wn_loop, &stack, &LNO_default_pool);
01042 if (Bound_Is_Too_Messy(dli->UB))
01043 Hoist_Upper_Bound(wn_loop, &stack, &LNO_default_pool);
01044 WN* outer_tile = SNL_INV_Cache_Block(NULL, &ti, permloop, loop_ls,
01045 ®ion, reason, outersym, pool, FALSE);
01046 if (Cur_PU_Feedback) {
01047 INT32 orig_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_start(wn_loop));
01048 if (orig_count > 0) {
01049 INT32 orig_test = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_end(wn_loop));
01050 INT32 outer_count = orig_count;
01051 INT32 outer_test = MAX(orig_test/stripsz[0],1);
01052 LWN_Set_Frequency(outer_tile, outer_count);
01053 LWN_Set_Frequency(WN_start(outer_tile), outer_count);
01054 LWN_Set_Frequency(WN_step(outer_tile), outer_test-1);
01055
01056 LWN_Set_Frequency(wn_loop, outer_test-1);
01057 LWN_Set_Frequency(WN_start(wn_loop), outer_test-1);
01058 }
01059 }
01060
01061 DOLOOP_STACK dostack(pool);
01062 Build_Doloop_Stack(LWN_Get_Parent(outer_tile), &dostack);
01063 LNO_Build_Access(outer_tile, &dostack, &LNO_default_pool);
01064 return outer_tile;
01065 }
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079 extern SNL_REGION SNL_Regtile_Loop(WN* outerloop,
01080 INT u,
01081 INT nloops,
01082 BOOL unroll_just_inner,
01083 EST_REGISTER_USAGE est_register_usage,
01084 SX_INFO* pinfo,
01085 INT pinfo_depth,
01086 BOOL no_further_unroll,
01087 HASH_TABLE<WN*,WN*>** loop_map_ptr,
01088 SX_INFO** wdpinfo_ptr)
01089 {
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01126 DU_MANAGER* du = Du_Mgr;
01127
01128 ST* st = WN_st(WN_index(outerloop));
01129 WN_OFFSET offset = WN_offset(WN_index(outerloop));
01130 TYPE_ID wtype = Do_Wtype(outerloop);
01131 SNL_REGION region(outerloop, outerloop);
01132 INT outerdepth = Do_Loop_Depth(outerloop);
01133
01134 SYMBOL indexsym(WN_index(outerloop));
01135 indexsym.Type = wtype;
01136
01137 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01138 INT loop_id = WN_MAP32_Get(Prompf_Id_Map, outerloop);
01139 Prompf_Info->Register_Tile(loop_id);
01140 }
01141
01142 WN* wdloop = NULL;
01143 SX_INFO* wdpinfo = NULL;
01144 HASH_TABLE<WN*,WN*>* loop_map = NULL;
01145
01146 FmtAssert(u > 1, ("Register unrolling too little: %d\n", u));
01147
01148 INT64 iters = Iterations(outerloop, &SNL_local_pool);
01149 BOOL do_winddown = (iters < 0 || iters%u);
01150
01151 if (do_winddown) {
01152
01153 EST_REGISTER_USAGE ru;
01154 ru.Set_Fits(est_register_usage.Fits());
01155 wdloop = Wind_Down(outerloop, iters<0 ? (u+1)/2 : iters%u, FALSE, ru);
01156 region.Last = wdloop;
01157
01158 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wdloop);
01159 dli->Est_Max_Iterations_Index = u;
01160
01161 loop_map = Make_Loop_Mapping(outerloop, wdloop, &SNL_local_pool);
01162
01163
01164
01165 WN* lower_bound = WN_kid0(WN_start(outerloop));
01166 WN* upper_bound = SNL_UBexp(WN_end(outerloop));
01167 if (WN_operator(lower_bound) == OPR_INTCONST
01168 && WN_operator(upper_bound) == OPR_INTCONST) {
01169
01170
01171 INT64 lb = WN_const_val(lower_bound);
01172 INT64 ub = WN_const_val(upper_bound);
01173 INT64 wdlb = lb + iters/u * u;
01174
01175 LWN_Delete_Tree(WN_kid0(WN_start(wdloop)));
01176 WN* wd_lower_bound = LWN_Copy_Tree(lower_bound, TRUE, LNO_Info_Map);
01177 WN_kid0(WN_start(wdloop)) = wd_lower_bound;
01178 LWN_Copy_Frequency(wd_lower_bound, WN_start(wdloop));
01179 LWN_Set_Parent(wd_lower_bound, WN_start(wdloop));
01180 WN_const_val(wd_lower_bound) = wdlb;
01181 }
01182 if ((wdloop && !no_further_unroll &&
01183 u >= LNO_Outer_Unroll_Min_For_Further_Unroll) ||
01184 wdpinfo_ptr) {
01185 wdpinfo = CXX_NEW(SX_INFO(*pinfo, outerloop,
01186 loop_map, &SNL_local_pool), &SNL_local_pool);
01187 }
01188
01189
01190
01191 Increase_By(SNL_UBexp(WN_end(outerloop)), -(u-1), WN_end(outerloop));
01192 }
01193
01194
01195
01196 INT64 ostep = Step_Size(outerloop, u);
01197 FmtAssert(ostep == 1, ("Non-unit step %lld for loop %s",
01198 ostep, SYMBOL(WN_index(outerloop)).Name()));
01199
01200
01201
01202 WN** unroll_body = CXX_NEW_ARRAY(WN*, u, &SNL_local_pool);
01203 unroll_body[0] = outerloop;
01204 LWN_Scale_Frequency(WN_end(outerloop), 1.0/u);
01205 LWN_Scale_Frequency(WN_step(outerloop), 1.0/u);
01206 INT i;
01207 for (i = 1; i < u; i++) {
01208 unroll_body[i] = LWN_Copy_Tree(outerloop, TRUE, LNO_Info_Map);
01209 LWN_Scale_Frequency_Tree(unroll_body[i], 1.0/u);
01210 }
01211
01212 if (!dg->Unrolled_Dependences_Update(unroll_body, u, Do_Depth(outerloop))) {
01213 for (i = 0; i < u; i++)
01214 LNO_Erase_Dg_From_Here_In(unroll_body[i], dg);
01215 Unmapped_Vertices_Here_Out(LWN_Get_Parent(outerloop));
01216 }
01217 if (red_manager)
01218 red_manager->Unroll_Update(unroll_body, u);
01219
01220 Unrolled_DU_Update(unroll_body, u, outerdepth-1+nloops, FALSE, TRUE);
01221
01222
01223
01224
01225
01226 INT privcnt = 0;
01227 SX_PITER ii(&pinfo->Plist);
01228 INT outer = Do_Depth(outerloop);
01229 for (SX_PNODE* n = ii.First(); !ii.Is_Empty(); n = ii.Next()) {
01230 switch (n->Transformable(outer)) {
01231 case SX_PNODE::SE_NOT_REQD:
01232 break;
01233 case SX_PNODE::SE_REQD:
01234 if (n->Expansion_Depth() >= pinfo_depth)
01235 privcnt++;
01236 break;
01237 case SX_PNODE::ILLEGAL:
01238 FmtAssert(0, ("Bug: can't expand scalar %s", n->Symbol().Name()));
01239 break;
01240 default:
01241 FmtAssert(0, ("Illegal value for SX_PNODE::STATUS"));
01242 break;
01243 }
01244 }
01245
01246 if (privcnt > 0) {
01247 SYMBOL* oldsyms = CXX_NEW_ARRAY(SYMBOL, privcnt, &SNL_local_pool);
01248 WN** rloop = CXX_NEW_ARRAY(WN*, privcnt, &SNL_local_pool);
01249 INT* srqd = CXX_NEW_ARRAY(INT, privcnt, &SNL_local_pool);
01250 INT* nsrqd = CXX_NEW_ARRAY(INT, privcnt, &SNL_local_pool);
01251 INT* ed = CXX_NEW_ARRAY(INT, privcnt, &SNL_local_pool);
01252
01253 INT privcnt2 = 0;
01254 SX_PITER ii(&pinfo->Plist);
01255 SX_PNODE* nnext = NULL;
01256 for (SX_PNODE* n = ii.First(); n; n = nnext) {
01257 nnext = ii.Next();
01258 if (n->Transformable(outer) != SX_PNODE::SE_REQD)
01259 continue;
01260
01261 if (n->Expansion_Depth() >= pinfo_depth) {
01262 oldsyms[privcnt2] = n->Symbol();
01263 rloop[privcnt2] = n->Reduction_Carried_By();
01264 srqd[privcnt2] = n->Outer_Se_Reqd();
01265 nsrqd[privcnt2] = n->Outer_Se_Not_Reqd();
01266 ed[privcnt2++] = n->Expansion_Depth();
01267
01268 }
01269 }
01270 FmtAssert(privcnt == privcnt2, ("Just checking .. easy to mess up"));
01271
01272 SYMBOL* newsyms = CXX_NEW_ARRAY(SYMBOL, (u-1)*privcnt, &SNL_local_pool);
01273 WN** ancestors = CXX_NEW_ARRAY(WN*, privcnt, &SNL_local_pool);
01274
01275 INT newsymscnt = 0;
01276 for (INT i = 1; i < u; i++) {
01277 for (INT j = 0; j < privcnt; j++) {
01278 const INT bufsz = 64;
01279 char buf[bufsz];
01280 INT bufcnt;
01281
01282 ancestors[j] = unroll_body[i];
01283 bufcnt = sprintf(buf, "$rse_");
01284 oldsyms[j].Name(buf+bufcnt, bufsz-bufcnt);
01285 SYMBOL newsym = Create_Preg_Symbol(buf, oldsyms[j].Type);
01286 newsyms[newsymscnt++] = newsym;
01287 pinfo->Enter(NULL, newsym, rloop[j], srqd[j], nsrqd[j], ed[j],
01288 FALSE, FALSE);
01289 }
01290 Replace_Symbols(unroll_body[i], oldsyms,
01291 &newsyms[privcnt*(i-1)], privcnt, NULL, ancestors);
01292 }
01293
01294 CXX_DELETE_ARRAY(oldsyms, &SNL_local_pool);
01295 CXX_DELETE_ARRAY(srqd, &SNL_local_pool);
01296 CXX_DELETE_ARRAY(nsrqd, &SNL_local_pool);
01297 CXX_DELETE_ARRAY(rloop, &SNL_local_pool);
01298 CXX_DELETE_ARRAY(ed, &SNL_local_pool);
01299
01300 CXX_DELETE_ARRAY(newsyms, &SNL_local_pool);
01301 CXX_DELETE_ARRAY(ancestors, &SNL_local_pool);
01302 }
01303
01304 for (i = 1; i < u; i++)
01305 Add_To_Symbol(unroll_body[i], i, indexsym, TRUE);
01306
01307
01308
01309
01310 WN** loop = CXX_NEW_ARRAY(WN*, u, &SNL_local_pool);
01311 WN** nloop = CXX_NEW_ARRAY(WN*, u, &SNL_local_pool);
01312
01313 for (i = 0; i < u; i++) {
01314 loop[i] = unroll_body[i];
01315 nloop[i] = Find_Next_Innermost_Do(loop[i]);
01316 }
01317
01318 for (INT d = 0; d < nloops-1; d++) {
01319
01320
01321 WN* wn_last = NULL;
01322 for (WN* wn = WN_first(WN_do_body(loop[0])); wn != NULL; wn = WN_next(wn))
01323 wn_last = wn;
01324
01325 Is_True(nloop[0], ("no way"));
01326 if (!unroll_just_inner) {
01327 for (i = 1; i < u; i++) {
01328 if (WN_prev(nloop[i])) {
01329 WN* above = LWN_Create_Block_From_Stmts_Above(nloop[i]);
01330 LWN_Insert_Block_Before(LWN_Get_Parent(nloop[0]), nloop[0], above);
01331 }
01332 }
01333 for (i = u-1; i >= 1; i--) {
01334 if (WN_next(nloop[i])) {
01335 WN* below = LWN_Create_Block_From_Stmts_Below(nloop[i]);
01336 LWN_Insert_Block_After(LWN_Get_Parent(wn_last), wn_last, below);
01337 }
01338 }
01339 }
01340
01341 for (i = 0; i < u; i++) {
01342
01343
01344 if (i != 0)
01345 SNL_Add_Du_To_Index_Ldid(loop[0], WN_do_body(loop[i]), du, TRUE);
01346
01347 loop[i] = nloop[i];
01348 nloop[i] = Find_Next_Innermost_Do(loop[i]);
01349 }
01350 }
01351
01352 for (i = 1; i < u; i++) {
01353 WN* bdy = WN_do_body(loop[i]);
01354 WN_do_body(loop[i]) = WN_CreateBlock();
01355 LWN_Insert_Block_Before(WN_do_body(loop[0]), NULL, bdy);
01356 LWN_Delete_Tree(unroll_body[i]);
01357 }
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387 if (wdloop && !no_further_unroll &&
01388 u >= LNO_Outer_Unroll_Min_For_Further_Unroll) {
01389 FmtAssert(wdpinfo, ("Bug"));
01390 SNL_REGION region2 = SNL_Regtile_Loop(wdloop, 2, nloops, unroll_just_inner,
01391 est_register_usage, wdpinfo,
01392 pinfo_depth, no_further_unroll);
01393 region.Last = region2.Last;
01394 }
01395
01396 if (loop_map_ptr)
01397 *loop_map_ptr = loop_map;
01398 else if (loop_map)
01399 CXX_DELETE(loop_map, &SNL_local_pool);
01400
01401 if (wdpinfo_ptr)
01402 *wdpinfo_ptr = wdpinfo;
01403 else if (wdpinfo)
01404 CXX_DELETE(wdpinfo, &SNL_local_pool);
01405
01406 if (!Valid_SNL_Region(region))
01407 DevWarn("SNL_Regtile_Loop: Invalid SNL_REGION [0x%p,0x%p]",
01408 region.First, region.Last);
01409 return region;
01410 }
01411
01412