00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00049 #define __STDC_LIMIT_MACROS
00050 #include <stdint.h>
00051 #ifdef USE_PCH
00052 #include "lno_pch.h"
00053 #endif // USE_PCH
00054 #pragma hdrstop
00055
00056 #define snl_CXX "snl.cxx"
00057 const static char *rcs_id = snl_CXX "$Revision: 1.5 $";
00058
00059 #include <sys/types.h>
00060 #include <alloca.h>
00061 #include "snl.h"
00062 #include "snl_xbounds.h"
00063 #include "config_targ.h"
00064 #include "lwn_util.h"
00065 #include "lnoutils.h"
00066 #include "cxx_graph.h"
00067 #include "opt_du.h"
00068 #include "opt_alias_interface.h"
00069 #include "wintrinsic.h"
00070 #include "scalar_expand.h"
00071 #include "strtab.h"
00072 #include "dvector.h"
00073 #include "lnopt_main.h"
00074 #include "fb_whirl.h"
00075 #include "move.h"
00076 #include "small_trips.h"
00077 #include "sxlimit.h"
00078 #include "ir_reader.h"
00079 #include "sxlist.h"
00080 #include "debug.h"
00081 #include "permute.h"
00082 #include "tile.h"
00083 #include "prompf.h"
00084 #include "anl_driver.h"
00085 #include "cond.h"
00086 #include "wind_down.h"
00087 #include "ff_utils.h"
00088 #include "fb_whirl.h"
00089
00090 #pragma weak New_Construct_Id
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 static void Rebuild_Access_Arrays(SNL_REGION region,
00104 MEM_POOL* pool)
00105 {
00106 for (WN* wn_reg = region.First; ; wn_reg = WN_next(wn_reg)) {
00107 DOLOOP_STACK dostack(pool);
00108 Build_Doloop_Stack(LWN_Get_Parent(wn_reg), &dostack);
00109 LNO_Build_Access(wn_reg, &dostack, &LNO_default_pool);
00110 if (wn_reg == region.Last)
00111 break;
00112 }
00113 }
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126 static void SNL_INV_Distribution(WN* wn_outer,
00127 INT nloops,
00128 BOOL find_split_depth,
00129 SNL_REGION* region,
00130 WN** the_newest_outer_loop)
00131 {
00132 WN* newup = NULL;
00133 WN* newdown = NULL;
00134 INT outer_depth = Do_Loop_Depth(wn_outer);
00135 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00136 DOLOOP_STACK stack(&LNO_local_pool);
00137 Build_Doloop_Stack(wn_inner, &stack);
00138 INT split_depth = find_split_depth ? Split_Depth(wn_outer, nloops) : -1;
00139 SNL_INV_Distribute(wn_outer, split_depth, nloops, &newup, &newdown);
00140 if (newup) {
00141 if (region->First == wn_outer)
00142 region->First = newup;
00143 *the_newest_outer_loop = newup;
00144 }
00145 if (newdown)
00146 if (region->Last == wn_outer)
00147 region->Last = newdown;
00148 }
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 static INT Perfect_Depth(WN* wn_loop,
00160 const INT nloops)
00161 {
00162 FmtAssert(WN_opcode(wn_loop) == OPC_DO_LOOP,
00163 ("Tried to find depth, but not a DO_LOOP"));
00164 WN* wn = wn_loop;
00165 WN* wn_last = wn_loop;
00166 INT perfect_depth;
00167 for (perfect_depth = 1; perfect_depth < nloops; perfect_depth++) {
00168 for (wn = WN_first(WN_do_body(wn)); wn != NULL
00169 && (WN_opcode(wn) == OPC_PRAGMA || WN_opcode(wn) == OPC_XPRAGMA);
00170 wn = WN_next(wn));
00171 if (wn == NULL || WN_opcode(wn) != OPC_DO_LOOP)
00172 return Do_Loop_Is_Inner(wn_last) ? perfect_depth : 0;
00173 wn_last = wn;
00174 WN *wn_next;
00175 for (wn_next = WN_next(wn); wn_next != NULL
00176 && (WN_opcode(wn_next) == OPC_PRAGMA
00177 || WN_opcode(wn_next) == OPC_XPRAGMA); wn_next = WN_next(wn_next));
00178 if (wn_next != NULL)
00179 return Do_Loop_Is_Inner(wn_last) ? perfect_depth : 0;
00180 }
00181 return Do_Loop_Is_Inner(wn_last) ? perfect_depth : 0;
00182 }
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192 static void Reduced_Permutation(const INT permutation[],
00193 const INT nloops,
00194 INT reduced_permutation[],
00195 const INT perfect_depth)
00196 {
00197 INT j = 0;
00198 FmtAssert(perfect_depth <= nloops,
00199 ("Reduced permutation is not really reducedi size."));
00200 for (INT i = 0; i < nloops; i++)
00201 if (permutation[i] < perfect_depth)
00202 reduced_permutation[j++] = permutation[i];
00203 FmtAssert(j == perfect_depth, ("Permutation is wrong length."));
00204 Is_True(Is_Permutation_Vector(reduced_permutation, perfect_depth),
00205 ("Permutation is not really a permutation."));
00206 }
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216 static WN* Distribute_Traverse(WN* wn_dst,
00217 const INT permutation[],
00218 const INT nloops,
00219 ARRAY_DIRECTED_GRAPH16* dg,
00220 DU_MANAGER* du,
00221 REDUCTION_MANAGER* rm)
00222 {
00223 INT reduced_permutation[SNL_MAX_LOOPS];
00224 INT perfect_depth = Perfect_Depth(wn_dst, nloops);
00225 WN* wn_outer = wn_dst;
00226 if (perfect_depth > 1 && perfect_depth < nloops) {
00227 Reduced_Permutation(permutation, nloops, reduced_permutation,
00228 perfect_depth);
00229 if (!Identity_Permutation(reduced_permutation, perfect_depth)) {
00230 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
00231 Prompf_Interchanges(wn_dst, reduced_permutation, perfect_depth);
00232 wn_outer = SNL_INV_Permute_Loops(wn_dst, reduced_permutation,
00233 perfect_depth, TRUE);
00234 }
00235 } else if (perfect_depth == 0) {
00236 WN* wn_next = NULL;
00237 WN* wn_first = WN_first(WN_do_body(wn_dst));
00238 for (WN* wn = wn_first; wn != NULL; wn = wn_next) {
00239 wn_next = WN_next(wn);
00240 if (WN_opcode(wn) == OPC_DO_LOOP)
00241 Distribute_Traverse(wn, permutation, nloops, dg, du, rm);
00242 }
00243 }
00244 return wn_outer;
00245 }
00246
00247
00248
00249
00250
00251
00252
00253
00254 static void SNL_INV_Permute_Distributends(SNL_REGION* region,
00255 const INT permutation[],
00256 const INT nloops,
00257 ARRAY_DIRECTED_GRAPH16* dg,
00258 DU_MANAGER* du,
00259 REDUCTION_MANAGER* rm)
00260 {
00261 WN* wn_dst_next = NULL;
00262 INT reduced_permutation[SNL_MAX_LOOPS];
00263 for (WN* wn_dst = region->First; wn_dst != NULL; wn_dst = wn_dst_next) {
00264 WN* wn_outer = Distribute_Traverse(wn_dst, permutation,
00265 nloops, dg, du, rm);
00266 if (region->First == wn_dst)
00267 region->First = wn_outer;
00268 if (region->Last == wn_dst)
00269 region->Last = wn_outer;
00270 if (wn_dst == region->Last)
00271 return;
00272 }
00273 }
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 static BOOL SNL_INV_Limited_Scalar_Expand_And_Distribute(WN* wn_outer,
00290 SNL_TILE_INFO** t_ptr,
00291 INT permutation[],
00292 INT nloops,
00293 SX_PLIST* plist,
00294 SNL_REGION* region,
00295 WN** wn_new_outer)
00296 {
00297 SNL_TILE_INFO* ti_se = NULL;
00298 SNL_TILE_INFO* ti_ct = NULL;
00299 SNL_TILE_INFO* t = *t_ptr;
00300 WN* wn_inner = SNL_Innermost_Do(wn_outer);
00301 WN* the_newest_outer_loop = wn_outer;
00302 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
00303 SE_CT_New_Tile_Infos(wn_outer, plist, t, permutation, nloops,
00304 &LNO_local_pool, &ti_se, &ti_ct, TRUE);
00305 if (ti_se == NULL || ti_se->Strips() == 0)
00306 return FALSE;
00307 WN* wn_new = SNL_INV_Limited_SE_And_Dist(wn_outer, ti_se, permutation,
00308 nloops, plist, TRUE);
00309 if (wn_new != NULL) {
00310 *wn_new_outer = wn_new;
00311 if (wn_outer != *wn_new_outer) {
00312 if (region->First == wn_outer)
00313 region->First = *wn_new_outer;
00314 if (region->Last == wn_outer)
00315 region->Last = *wn_new_outer;
00316 }
00317 }
00318 return TRUE;
00319 }
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339 static WN* SNL_INV_Local_Permute_Loops(SX_PLIST* plist,
00340 DOLOOP_STACK* stack,
00341 INT first_in_stack,
00342 const INT nloops,
00343 const INT permutation[],
00344 BOOL do_est_register_usage,
00345 EST_REGISTER_USAGE est_register_usage,
00346 WN* permloop[],
00347 BOOL print_lexneg,
00348 SNL_REGION* region,
00349 MEM_POOL* pool)
00350 {
00351 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00352 WN* loop[SNL_MAX_LOOPS];
00353 INT i;
00354 for (i = 0; i < nloops; i++)
00355 loop[i] = stack->Bottom_nth(first_in_stack+i);
00356
00357 if (!permutation) {
00358 for (i = 0; i < nloops; i++)
00359 permloop[i] = loop[i];
00360 if (do_est_register_usage) {
00361 DO_LOOP_INFO* inner_dli = Get_Do_Loop_Info(permloop[nloops-1]);
00362 inner_dli->Est_Register_Usage = est_register_usage;
00363 }
00364 return permloop[0];
00365 }
00366
00367 WN* parents[SNL_MAX_LOOPS];
00368 WN* prev[SNL_MAX_LOOPS];
00369 WN* body[SNL_MAX_LOOPS];
00370 DO_LOOP_INFO* dli[SNL_MAX_LOOPS];
00371 BOOL is_inner[SNL_MAX_LOOPS];
00372 INT depth[SNL_MAX_LOOPS];
00373 BOOL is_ivdep[SNL_MAX_LOOPS];
00374
00375 FmtAssert(Is_Permutation_Vector(permutation, nloops),
00376 ("Bad permutation vector for SNL_INV_Local_Permute_Loops"));
00377
00378 for (i = 0; i < nloops; i++) {
00379 if (permutation[i] != i)
00380 break;
00381 }
00382
00383 INT outerxloop = i;
00384
00385 for (i = nloops-1; i >= outerxloop; i--) {
00386 if (permutation[i] != i)
00387 break;
00388 }
00389
00390 INT innerxloop = i;
00391
00392 if (outerxloop >= innerxloop) {
00393 for (i = 0; i < nloops; i++)
00394 permloop[i] = loop[i];
00395 if (do_est_register_usage) {
00396 DO_LOOP_INFO* inner_dli = Get_Do_Loop_Info(permloop[nloops-1]);
00397 inner_dli->Est_Register_Usage = est_register_usage;
00398 }
00399 return permloop[0];
00400 }
00401
00402 BOOL permutation_transformation_invalid = FALSE;
00403
00404
00405 LS_IN_LOOP loop_ls(loop[outerxloop], dg, pool);
00406
00407 INT32 num_iter[SNL_MAX_LOOPS];
00408 for (i = 0; i < nloops; i++) {
00409 parents[i] = LWN_Get_Parent(loop[i]);
00410 prev[i] = WN_prev(loop[i]);
00411 body[i] = WN_do_body(loop[i]);
00412 WN_do_body(loop[i]) = NULL;
00413 LWN_Extract_From_Block(parents[i], loop[i]);
00414 dli[i] = Get_Do_Loop_Info(loop[i]);
00415 is_inner[i] = dli[i]->Is_Inner;
00416 depth[i] = dli[i]->Depth;
00417 is_ivdep[i] = dli[i]->Is_Ivdep;
00418 if (Cur_PU_Feedback) {
00419 num_iter[i] = (INT32) MAX(1.0,dli[i]->Est_Num_Iterations);
00420 }
00421 }
00422
00423
00424 float ratio[SNL_MAX_LOOPS];
00425 if (Cur_PU_Feedback) {
00426 ratio[0] = 1.0;
00427 for (i = 1; i < nloops; i++) {
00428 ratio[i] = ratio[i-1]*num_iter[i-1];
00429 }
00430
00431
00432 for (i = 0; i < nloops; i++) {
00433 float new_mul = 1.0;
00434 for (INT j = 0; j < nloops; j++) {
00435 if (permutation[j]<permutation[i])
00436 new_mul = new_mul*num_iter[j];
00437 }
00438 ratio[i] = new_mul/ratio[i];
00439 }
00440 }
00441
00442 for (i = 0; i < nloops; i++) {
00443 permloop[i] = loop[permutation[i]];
00444 LWN_Insert_Block_After(parents[i], prev[i], permloop[i]);
00445 WN_do_body(permloop[i]) = body[i];
00446 LWN_Set_Parent(body[i], permloop[i]);
00447
00448
00449 dli[permutation[i]]->Is_Inner = is_inner[i];
00450 dli[permutation[i]]->Depth = depth[i];
00451 stack->Bottom_nth(first_in_stack+i) = permloop[i];
00452
00453
00454
00455
00456 if (dli[permutation[i]]->Is_Ivdep) {
00457 for (INT ii = i+1; ii < nloops; ii++) {
00458 if (permutation[i] < permutation[ii] && !is_ivdep[ii]) {
00459 dli[permutation[i]]->Is_Ivdep = FALSE;
00460 break;
00461 }
00462 }
00463 }
00464 }
00465
00466 if (Cur_PU_Feedback) {
00467
00468 for (i = 0; i < nloops; i++) {
00469 LWN_Scale_Frequency(loop[i], ratio[i]);
00470 LWN_Scale_Frequency(WN_start(loop[i]), ratio[i]);
00471 LWN_Scale_Frequency(WN_step(loop[i]), ratio[i]);
00472 }
00473 }
00474
00475 SNL_Change_Reduction_Loop_Stmts(plist, loop[outerxloop],
00476 permloop[outerxloop]);
00477
00478 if (region->First == loop[0])
00479 region->First = permloop[0];
00480 if (region->Last == loop[0])
00481 region->Last = permloop[0];
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492 for (i = 0; i < stack->Elements(); i++) {
00493 WN* wn_loop = stack->Bottom_nth(i);
00494 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00495 if (!dli->Has_Bad_Mem)
00496 break;
00497 }
00498 if (i == stack->Elements())
00499 return permloop[0];
00500
00501 SNL_Expand_Reduction_Deps(loop[outerxloop]);
00502
00503 LS_IN_LOOP_ITER ali(&loop_ls);
00504 WN* awn;
00505 INT curgdepth = loop_ls.Good_Depth - outerxloop;
00506 while (awn = ali.Step()) {
00507 VINDEX16 v = dg->Get_Vertex(awn);
00508 if (v == 0)
00509 continue;
00510
00511 INT alex = loop_ls.In(awn);
00512 EINDEX16 enext = 0;
00513 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
00514 enext = dg->Get_Next_Out_Edge(e);
00515 INT components = dg->Depv_Array(e)->Num_Dim();
00516 if (components <= curgdepth + outerxloop)
00517 continue;
00518
00519 FmtAssert(curgdepth + innerxloop + 1 <= components,
00520 ("dependence vector too short ... imperfect interchange?"));
00521
00522
00523
00524 DEP olddep[LNO_MAX_DO_LOOP_DEPTH];
00525 for (INT i = 0; i < dg->Depv_Array(e)->Num_Vec(); i++) {
00526 DEPV* d = dg->Depv_Array(e)->Depv(i);
00527 INT j;
00528 for (j = outerxloop; j <= innerxloop; j++)
00529 olddep[j] = DEPV_Dep(d, j+curgdepth);
00530 for (j = outerxloop; j <= innerxloop; j++)
00531 DEPV_Dep(d, j+curgdepth) = olddep[permutation[j]];
00532 }
00533
00534 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
00535 INT blex = loop_ls.In(bwn);
00536
00537 if (SNL_Test_Reduction_Lexneg(e, awn, bwn, alex, blex)) {
00538 permutation_transformation_invalid = TRUE;
00539 SNL_DEBUG1(1, "permutation e=%d seemingly illegal\n", e);
00540 }
00541 }
00542 }
00543
00544 if (do_est_register_usage) {
00545 DO_LOOP_INFO* inner_dli = Get_Do_Loop_Info(permloop[nloops-1]);
00546 inner_dli->Est_Register_Usage = est_register_usage;
00547 }
00548
00549 if (print_lexneg && permutation_transformation_invalid)
00550 DevWarn("Permutation transformation invalid?!");
00551
00552 Is_True(LWN_Check_Parentize(permloop[0]), ("Parentize fail 1"));
00553 return permloop[0];
00554 }
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565 extern WN* SNL_INV_Permute_Loops(WN* outer_loop,
00566 INT permutation[],
00567 INT nloops,
00568 BOOL warn_lexneg)
00569 {
00570 if (nloops == 0 || Identity_Permutation(permutation, nloops))
00571 return outer_loop;
00572
00573 if (LNO_Verbose) {
00574 Print_Interchange(stdout, outer_loop, permutation, nloops);
00575 Print_Interchange(TFile, outer_loop, permutation, nloops);
00576 }
00577 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00578 DU_MANAGER* du = Du_Mgr;
00579 REDUCTION_MANAGER* rm = red_manager;
00580 SNL_REGION region(outer_loop, outer_loop);
00581 WN** permloop = CXX_NEW_ARRAY(WN*, nloops, &LNO_local_pool);
00582 WN* wn = outer_loop;
00583 INT i;
00584 for (i = 1; i < nloops; i++) {
00585 WN* wn_temp = Find_Next_Innermost_Do(wn);
00586 if (wn_temp == NULL)
00587 break;
00588 wn = wn_temp;
00589 }
00590 WN* inner_loop = wn;
00591 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00592 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(inner_loop);
00593 FmtAssert(dli_inner->Depth - dli_outer->Depth + 1 == nloops,
00594 ("SNL_INV_Permute_Loops not passed an SNL."));
00595 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00596 &LNO_local_pool);
00597 Build_Doloop_Stack(inner_loop, stack);
00598 INT first_in_stack = dli_inner->Depth - nloops + 1;
00599 i = 0;
00600 for (i = nloops - 1; i >= 0; i--)
00601 if (permutation[i] != i)
00602 break;
00603 WN* wn_sink = stack->Bottom_nth(first_in_stack + i);
00604 for (i = 0; i < nloops; i++)
00605 if (permutation[i] != i)
00606 break;
00607 WN* wn_outer = stack->Bottom_nth(first_in_stack + i);
00608 WN* wn_sunk = Sink_Sandwiched_Code_In(wn_outer, wn_sink);
00609 wn_outer = SNL_INV_Local_Permute_Loops(NULL, stack, first_in_stack, nloops,
00610 permutation, TRUE, EST_REGISTER_USAGE(), permloop, warn_lexneg,
00611 ®ion, &LNO_local_pool);
00612 Hoist_Necessary_Code_Up(wn_sunk, du);
00613 Hoist_Statements(wn_outer, du);
00614 Rebuild_Access_Arrays(region, &LNO_local_pool);
00615 CXX_DELETE_ARRAY(permloop, &LNO_local_pool);
00616 return wn_outer;
00617 }
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635 extern WN* SNL_INV_Cache_Block(SNL_NEST_INFO* ni,
00636 const SNL_TILE_INFO* t,
00637 WN* permloop[],
00638 LS_IN_LOOP& loop_ls,
00639 SNL_REGION* region,
00640 SNL_INV_CACHE_BLOCK_REASON reason,
00641 SYMBOL* outersym,
00642 MEM_POOL* pool,
00643 BOOL report_prompf)
00644 {
00645 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00646 DU_MANAGER* du = Du_Mgr;
00647 REDUCTION_MANAGER* rm = red_manager;
00648 BOOL cache_transformation_invalid = FALSE;
00649 INT strips = t ? t->Strips() : 0;
00650 WN* the_newest_outer_loop = NULL;
00651
00652 for (INT s = 0; s < strips; s++) {
00653 WN* olddo = permloop[t->Iloop(s)];
00654 SYMBOL oldsym(WN_index(olddo));
00655 oldsym.Type = Do_Wtype(olddo);
00656
00657 INT newstripsz = t->Stripsz(s);
00658 INT lvl = t->Striplevel(s);
00659
00660
00661
00662
00663
00664
00665
00666
00667 INT64 iters = Iterations(olddo, pool);
00668 if (iters == 0)
00669 DevWarn("No iterations in loop: quelle intrigue ...");
00670 BOOL need_wind_down_or_min = (iters < 0 || iters % newstripsz);
00671 const INT bufsz = 128;
00672 char buf[bufsz];
00673 INT bufcnt = 0;
00674
00675
00676 switch (reason) {
00677 case SNL_INV_TILE_ONLY:
00678 bufcnt = sprintf(buf, "$tile%d", lvl);
00679 break;
00680 case SNL_INV_SE_ONLY:
00681 bufcnt = sprintf(buf, "$seonly%d", lvl);
00682 break;
00683 case SNL_INV_TILE_SE:
00684 bufcnt = sprintf(buf, "$setile%d", lvl);
00685 break;
00686 case SNL_INV_LEGO_TILE:
00687 bufcnt = sprintf(buf, "$dsmtile%d", lvl);
00688 break;
00689 case SNL_INV_MP_TILE:
00690 bufcnt = sprintf(buf, "$datile%d", lvl);
00691 break;
00692 case SNL_INV_DOACROSS_TILE:
00693 bufcnt = sprintf(buf, "$doacross%d", lvl);
00694 break;
00695 default:
00696 Is_True(0, ("Impossible"));
00697 }
00698 oldsym.Name(buf+bufcnt, bufsz-bufcnt);
00699 TYPE_ID wtype = oldsym.Type;
00700 SYMBOL newsym = outersym == NULL ? Create_Preg_Symbol(buf, wtype)
00701 : *outersym;
00702
00703
00704
00705
00706 WN* stripindex = WN_CreateIdname(newsym.WN_Offset(), newsym.St());
00707
00708 WN* stripbegin = LWN_Copy_Tree(WN_start(olddo), TRUE, LNO_Info_Map);
00709 LWN_Copy_Def_Use(WN_kid0(WN_start(olddo)), WN_kid0(stripbegin), du);
00710 Replace_Symbol(stripbegin, oldsym, newsym, NULL, NULL);
00711 FmtAssert(WN_operator(stripbegin) == OPR_STID,
00712 ("stripbegin is not an stid"));
00713 FmtAssert(SYMBOL(stripbegin) == newsym,
00714 ("stripbegin's symbol is %s", SYMBOL(stripbegin).Name()));
00715 WN* newsym_alias_wn = stripbegin;
00716
00717 WN* stripend = LWN_Copy_Tree(WN_end(olddo), TRUE, LNO_Info_Map);
00718 LWN_Copy_Def_Use(SNL_UBexp(WN_end(olddo)),
00719 SNL_UBexp(stripend), du);
00720 Replace_Symbol(stripend, oldsym, newsym, NULL, NULL);
00721
00722 WN* stripstep = LWN_Copy_Tree(WN_step(olddo), TRUE, LNO_Info_Map);
00723 LWN_Copy_Def_Use(WN_kid0(WN_step(olddo)), WN_kid0(stripstep), du);
00724 Replace_Symbol(stripstep, oldsym, newsym, NULL, NULL);
00725 Step_Size(stripstep, newstripsz);
00726
00727 WN* loop0parent = LWN_Get_Parent(permloop[0]);
00728 WN* loop0prev = WN_prev(permloop[0]);
00729 LWN_Extract_From_Block(loop0parent, permloop[0]);
00730 WN* stripbody = WN_CreateBlock();
00731 LWN_Copy_Linenumber(WN_do_body(olddo), stripbody);
00732 LWN_Insert_Block_Before(stripbody, NULL, permloop[0]);
00733
00734 WN* stripdo = LWN_CreateDO(stripindex, stripbegin, stripend,
00735 stripstep, stripbody);
00736 LWN_Copy_Linenumber(olddo, stripdo);
00737 if (report_prompf && Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00738 INT old_id = WN_MAP32_Get(Prompf_Id_Map, olddo);
00739 INT new_id = New_Construct_Id();
00740 WN_MAP32_Set(Prompf_Id_Map, stripdo, new_id);
00741 Prompf_Info->Cache_Tile(old_id, new_id);
00742 }
00743 if (Cur_PU_Feedback) {
00744 LWN_Copy_Frequency(stripstep, WN_step(olddo));
00745 LWN_Scale_Frequency(stripstep, 1.0/newstripsz);
00746 INT32 outer = WN_MAP32_Get(WN_MAP_FEEDBACK, permloop[0]);
00747 INT32 outer_trip = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_step(permloop[0]));
00748 outer_trip = MAX(1,outer_trip/MAX(1,outer));
00749 LWN_Scale_Frequency(stripstep, 1.0/outer_trip);
00750 LWN_Copy_Frequency(stripdo, permloop[0]);
00751 LWN_Copy_Frequency_Tree(stripbegin, permloop[0]);
00752
00753 INT32 count = WN_MAP32_Get(WN_MAP_FEEDBACK, stripstep);
00754 INT32 tripc = count/MAX(1,WN_MAP32_Get(WN_MAP_FEEDBACK, stripdo));
00755 LWN_Set_Frequency(permloop[0],count);
00756 LWN_Set_Frequency(WN_start(permloop[0]),count);
00757 LWN_Scale_Frequency(WN_step(permloop[0]), tripc);
00758 LWN_Scale_Frequency(olddo, tripc);
00759 }
00760
00761 if (s == 0)
00762 the_newest_outer_loop = stripdo;
00763
00764 LWN_Insert_Block_After(loop0parent, loop0prev, stripdo);
00765
00766
00767
00768 du->Remove_Def_From_System(WN_start(stripdo));
00769 du->Remove_Def_From_System(WN_step(stripdo));
00770 SNL_Change_Du_To_Index_Ldid(stripdo, WN_start(stripdo), du, TRUE);
00771 SNL_Change_Du_To_Index_Ldid(stripdo, WN_end(stripdo), du, TRUE);
00772 SNL_Change_Du_To_Index_Ldid(stripdo, WN_step(stripdo), du, TRUE);
00773
00774
00775
00776
00777
00778 TY_IDX ty = Be_Type_Tbl(wtype);
00779
00780 LWN_Delete_Tree(WN_kid0(WN_start(olddo)));
00781 WN_kid0(WN_start(olddo)) =
00782 LWN_CreateLdid(OPCODE_make_op(OPR_LDID, Promote_Type(wtype), wtype),
00783 newsym_alias_wn);
00784 LWN_Set_Parent(WN_kid0(WN_start(olddo)), WN_start(olddo));
00785 SNL_Add_Du_To_Index_Ldid(stripdo, WN_kid0(WN_start(olddo)), du, TRUE);
00786 LWN_Copy_Frequency(WN_start(olddo), olddo);
00787
00788 Upper_Bound_Standardize(WN_end(olddo));
00789 WN* oldubval = SNL_UBexp(WN_end(olddo));
00790 OPCODE opld = OPCODE_make_op(OPR_LDID, Promote_Type(wtype), wtype);
00791 OPCODE opadd = OPCODE_make_op(OPR_ADD, Promote_Type(wtype), MTYPE_V);
00792 WN* ldid = LWN_CreateLdid(opld, newsym_alias_wn);
00793 WN* newub = LWN_CreateExp2(opadd, ldid,
00794 LWN_Make_Icon(Promote_Type(wtype), newstripsz - 1));
00795
00796 if (need_wind_down_or_min) {
00797 OPCODE opmin = OPCODE_make_op(OPR_MIN, Promote_Type(wtype), MTYPE_V);
00798 newub = LWN_CreateExp2(opmin, newub, oldubval);
00799 }
00800 else
00801 LWN_Delete_Tree(oldubval);
00802
00803 LWN_Copy_Frequency(newub, WN_step(olddo));
00804 SNL_UBexp(WN_end(olddo)) = newub;
00805 LWN_Set_Parent(newub, WN_end(olddo));
00806 SNL_Add_Du_To_Index_Ldid(stripdo, ldid, du, TRUE);
00807
00808
00809
00810 DO_LOOP_INFO* dli = CXX_NEW(DO_LOOP_INFO(&LNO_default_pool,
00811 NULL, NULL, NULL, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
00812 &LNO_default_pool);
00813 DO_LOOP_INFO* olddli = Get_Do_Loop_Info(olddo);
00814 Set_Do_Loop_Info(stripdo, dli);
00815 dli->Est_Num_Iterations = (olddli->Est_Num_Iterations +
00816 newstripsz - 1)/ newstripsz;
00817 dli->Is_Ivdep = olddli->Is_Ivdep;
00818 dli->Is_Concurrent_Call = olddli->Is_Concurrent_Call;
00819 dli->Concurrent_Directive = olddli->Concurrent_Directive;
00820 dli->Num_Iterations_Symbolic = TRUE;
00821 olddli->Est_Num_Iterations = MIN(newstripsz, olddli->Est_Num_Iterations);
00822 olddli->Is_Inner_Tile = TRUE;
00823 olddli->Tile_Size = newstripsz;
00824 dli->Is_Outer_Tile = TRUE;
00825 dli->Has_Calls = olddli->Has_Calls;
00826 dli->Has_Unsummarized_Calls = olddli->Has_Unsummarized_Calls;
00827 dli->Has_Gotos = olddli->Has_Gotos;
00828 dli->Has_Bad_Mem = olddli->Has_Bad_Mem;
00829
00830
00831
00832
00833
00834
00835
00836
00837 SNL_Expand_Reduction_Deps(the_newest_outer_loop);
00838
00839 LS_IN_LOOP_ITER ali(&loop_ls);
00840 WN* awn;
00841 while (awn = ali.Step()) {
00842
00843 VINDEX16 v = dg->Get_Vertex(awn);
00844 if (v == 0)
00845 continue;
00846
00847 INT alex = loop_ls.In(awn);
00848 EINDEX16 enext = 0;
00849 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
00850 enext = dg->Get_Next_Out_Edge(e);
00851 VINDEX16 vsink = dg->Get_Sink(e);
00852 FmtAssert(vsink, ("vsink cannot be zero"));
00853 if (!loop_ls.In(dg->Get_Wn(vsink)))
00854 continue;
00855
00856 WN* bwn = dg->Get_Wn(dg->Get_Sink(e));
00857 INT blex = loop_ls.In(bwn);
00858 if (SNL_Update_Strip_Dependence(loop_ls.Depth, s, t->Iloop(s),
00859 e, awn, bwn, alex, blex)) {
00860 cache_transformation_invalid = TRUE;
00861 SNL_DEBUG1(1, "invariant tiling e=%d seemingly illegal\n", e);
00862 }
00863 }
00864 }
00865 }
00866
00867 if (cache_transformation_invalid)
00868 DevWarn("Cache blocking transformation invalid?!");
00869
00870 if (the_newest_outer_loop) {
00871 Is_True(LWN_Check_Parentize(the_newest_outer_loop), ("Parentize fail 1"));
00872 if (region->First == permloop[0])
00873 region->First = the_newest_outer_loop;
00874 if (region->Last == permloop[0])
00875 region->Last = the_newest_outer_loop;
00876
00877 if (ni != NULL)
00878 SNL_Change_Reduction_Loop_Stmts(&(ni->Privatizability_Info().Plist),
00879 permloop[0], the_newest_outer_loop);
00880 }
00881 else
00882 Is_True(LWN_Check_Parentize(permloop[0]), ("Parentize fail 1"));
00883
00884 if (t)
00885 Renumber_Loops(region->First, region->Last, dg);
00886 return the_newest_outer_loop;
00887 }
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904 static SNL_REGION SNL_INV_Register_Tile(INT nloops,
00905 const INT* regstripsz,
00906 WN* permloop[],
00907 EST_REGISTER_USAGE est_register_usage,
00908 INT depth_inner,
00909 SX_INFO* pinfo)
00910 {
00911 SNL_REGION region(permloop[0], permloop[0]);
00912 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00913
00914 if (regstripsz == NULL) {
00915 if (!Valid_SNL_Region(region))
00916 DevWarn("SNL_INV_Register_Tile: Invalid SNL_REGION [0x%p,0x%p]",
00917 region.First, region.Last);
00918 return region;
00919 }
00920
00921 INT i;
00922 for (i = 0; i < nloops-1; i++) {
00923 if (regstripsz[i] <= 1) {
00924 Is_True(regstripsz[i] == 1, ("Unroll quantity %d", regstripsz[i]));
00925 continue;
00926 }
00927
00928 HASH_TABLE<WN*,WN*>* ht;
00929 INT depth = depth_inner - nloops + 1 + i;
00930 SX_INFO* pinfo2;
00931 SNL_REGION region2 = SNL_Regtile_Loop(permloop[i], regstripsz[i],
00932 nloops-i, FALSE,
00933 est_register_usage,
00934 pinfo, depth, FALSE,
00935 &ht, &pinfo2);
00936 if (i == 0)
00937 region = region2;
00938
00939
00940
00941
00942 Renumber_Loops(region2.First, region2.Last, dg);
00943
00944
00945
00946
00947 if (LNO_Outer_Unroll_Deep && ht && pinfo2 && i < nloops-2) {
00948 WN* permloop2[SNL_MAX_LOOPS];
00949 for (INT ii = i+1; ii < nloops; ii++) {
00950 permloop2[ii] = ht->Find(permloop[ii]);
00951 FmtAssert(permloop2[ii], ("Bad mapping"));
00952 }
00953
00954 SNL_REGION region2 = SNL_INV_Register_Tile(nloops-i-1,
00955 regstripsz+i+1, permloop2+i+1, est_register_usage,
00956 depth_inner, pinfo2);
00957 if (permloop2[i+1] == region.Last)
00958 region.Last = region2.Last;
00959 }
00960
00961 CXX_DELETE(pinfo2, &SNL_local_pool);
00962 CXX_DELETE(ht, &SNL_local_pool);
00963 }
00964
00965 #ifdef Is_True_On
00966 {
00967 for (WN *w = region.First; w; w = (w == region.Last) ? NULL : WN_next(w))
00968 Is_True(LWN_Check_Parentize(w), ("Parentize fail"));
00969 }
00970 #endif
00971
00972 Renumber_Loops(region.First, region.Last, dg);
00973
00974
00975
00976
00977
00978
00979
00980 BOOL found_unity = FALSE;
00981 for (i = 0; i < nloops-1; i++) {
00982 if (regstripsz[i] > 1 && Iterations(permloop[i], &SNL_local_pool) == 1) {
00983 found_unity = TRUE;
00984 WN* removing_loop = permloop[i];
00985 SNL_REGION region2 = SNL_Remove_Unity_Trip_Loop(removing_loop, FALSE);
00986 if (region.First == removing_loop)
00987 region.First = region2.First;
00988 if (region.Last == removing_loop)
00989 region.Last = region2.Last;
00990 }
00991 }
00992
00993 if (found_unity) {
00994 #ifdef Is_True_On
00995 {
00996 for (WN *w = region.First; w; w = (w == region.Last) ? NULL : WN_next(w))
00997 Is_True(LWN_Check_Parentize(w), ("Parentize fail"));
00998 }
00999 #endif
01000 Renumber_Loops(region.First, region.Last, dg);
01001 }
01002
01003 if (!Valid_SNL_Region(region)) {
01004 DevWarn("SNL_INV_Register_Tile: Invalid SNL_REGION [0x%p,0x%p]",
01005 region.First, region.Last);
01006 }
01007 return region;
01008 }
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033 extern SNL_REGION SNL_INV_Transforms(WN* wn_outer,
01034 INT permutation[],
01035 SNL_NEST_INFO* ni,
01036 INT nloops,
01037 SNL_TILE_INFO* t,
01038 INT regstripsz[],
01039 EST_REGISTER_USAGE est_register_usage,
01040 BOOL want_se_and_dist,
01041 SNL_REGION* old_region,
01042 BOOL hoist_outer_invar,
01043 SNL_REGION* rg_kernel)
01044 {
01045 INT outer_depth = Do_Loop_Depth(wn_outer);
01046 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01047 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01048 &LNO_local_pool);
01049 Build_Doloop_Stack(wn_inner, stack);
01050
01051 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
01052 WN* loop0 = stack->Bottom_nth(first_in_stack);
01053
01054 SNL_REGION region(loop0, loop0);
01055 if (old_region != NULL) {
01056 region.First = old_region->First;
01057 region.Last = old_region->Last;
01058 }
01059 MEM_POOL_Push_Freeze(&SNL_local_pool);
01060 WN* the_newest_outer_loop = loop0;
01061
01062 {
01063
01064
01065 FmtAssert(t == NULL || t->Rectangular(), ("Requires rectangular tiling"));
01066
01067
01068 FmtAssert(t == NULL || nloops == t->Nloops(), ("Bad nloops"));
01069
01070
01071
01072
01073
01074 BOOL se_introduces_lcd = FALSE;
01075 if (want_se_and_dist) {
01076 BOOL distribution_applied = FALSE;
01077 BOOL has_lcd = ni->Privatizability_Info().Lcd_Depth() != -1;
01078 if (!has_lcd && !Get_Trace(TP_LNOPT, TT_LNO_BIG_SCALAR_TILES)) {
01079 distribution_applied = SNL_INV_Limited_Scalar_Expand_And_Distribute(
01080 loop0, &t, permutation, nloops, &ni->Privatizability_Info().Plist,
01081 ®ion, &the_newest_outer_loop);
01082 }
01083 if (distribution_applied == FALSE) {
01084 SNL_INV_Scalar_Expand(loop0, permutation, nloops,
01085 &ni->Privatizability_Info().Plist, -1, NULL, FALSE, TRUE);
01086
01087 SNL_INV_Distribution(loop0, nloops, has_lcd, ®ion,
01088 &the_newest_outer_loop);
01089 }
01090 }
01091
01092
01093
01094
01095
01096
01097
01098 WN* permloop[SNL_MAX_LOOPS];
01099 SX_PLIST* plist = &(ni->Privatizability_Info().Plist);
01100 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
01101 Prompf_Interchanges(stack->Bottom_nth(first_in_stack),
01102 permutation, nloops);
01103 if (Cur_PU_Feedback)
01104 LNO_FB_Inv_Interchange(stack->Bottom_nth(first_in_stack),
01105 permutation, nloops);
01106 WN* outer_perm_loop = SNL_INV_Local_Permute_Loops(plist, stack,
01107 first_in_stack, nloops, permutation, TRUE, est_register_usage, permloop,
01108 TRUE, ®ion, &SNL_local_pool);
01109 if (outer_perm_loop != NULL && the_newest_outer_loop != NULL
01110 && Do_Loop_Depth(outer_perm_loop) < Do_Loop_Depth(the_newest_outer_loop))
01111 the_newest_outer_loop = outer_perm_loop;
01112 if (rg_kernel->First == loop0 && rg_kernel->First != outer_perm_loop)
01113 rg_kernel->First = outer_perm_loop;
01114 if (rg_kernel->Last == loop0 && rg_kernel->Last != outer_perm_loop)
01115 rg_kernel->Last = outer_perm_loop;
01116
01117
01118 if (!se_introduces_lcd && permutation != NULL
01119 && !Identity_Permutation(permutation, nloops))
01120 SNL_INV_Permute_Distributends(®ion, permutation, nloops,
01121 Array_Dependence_Graph, Du_Mgr, red_manager);
01122
01123
01124 LS_IN_LOOP loop_ls(permloop[0], Array_Dependence_Graph, &SNL_local_pool);
01125
01126
01127
01128
01129 WN* outer_cache_loop = SNL_INV_Cache_Block(ni, t, permloop,
01130 loop_ls, ®ion, SNL_INV_TILE_ONLY, NULL, &SNL_local_pool, TRUE);
01131 if (outer_cache_loop != NULL && the_newest_outer_loop != NULL
01132 && Do_Loop_Depth(outer_cache_loop)
01133 < Do_Loop_Depth(the_newest_outer_loop)) {
01134 the_newest_outer_loop = outer_cache_loop;
01135 }
01136
01137
01138
01139 BOOL did_reg_tile_and_hoist=FALSE;
01140 WN *hoist_inner;
01141 if (hoist_outer_invar) {
01142 Rebuild_Access_Arrays(region,&SNL_local_pool);
01143 hoist_inner = stack->Bottom_nth(first_in_stack);
01144 INT depth = Do_Loop_Depth(hoist_inner);
01145 INT i;
01146 for (i=first_in_stack+1; i<stack->Elements(); i++) {
01147 WN *wn = stack->Bottom_nth(i);
01148 if (Do_Loop_Depth(wn) > depth) {
01149 hoist_inner = wn;
01150 depth = Do_Loop_Depth(wn);
01151 }
01152 }
01153 INT outer_reg_tile=nloops;
01154 BOOL done=FALSE;
01155 if (regstripsz) {
01156 for (i=0; i<nloops-1 && !done; i++) {
01157 if (regstripsz[i] > 1) {
01158 outer_reg_tile = i;
01159 done = TRUE;
01160 }
01161 }
01162 }
01163 did_reg_tile_and_hoist = done;
01164 WN *outer = hoist_inner;
01165 for (i=0; i<nloops-1; i++) {
01166 outer = LWN_Get_Parent(LWN_Get_Parent(outer));
01167 }
01168 WN *one_before_outer = WN_prev(outer);
01169 WN *one_after_outer = WN_next(outer);
01170 WN *parent = LWN_Get_Parent(outer);
01171 void Hoist_Outer_Invar(WN *hoist_inner, INT nloops, INT outer_reg_tile,
01172 BOOL can_tile);
01173 Hoist_Outer_Invar(hoist_inner,nloops,outer_reg_tile,TRUE);
01174 if (region.First == outer) {
01175 if (one_before_outer) region.First = WN_next(one_before_outer);
01176 else region.First = WN_first(parent);
01177 }
01178 if (region.Last == outer) {
01179 if (one_after_outer) region.Last = WN_prev(one_after_outer);
01180 else region.Last = WN_last(parent);
01181 }
01182 }
01183
01184
01185
01186
01187
01188
01189
01190
01191 ni->Privatizability_Info().Update_Reduction_Loop_Stmts(wn_inner);
01192 SNL_REGION region2 = SNL_INV_Register_Tile(nloops, regstripsz, permloop,
01193 est_register_usage, ni->Depth_Inner(), &ni->Privatizability_Info());
01194 if (rg_kernel->First == permloop[0] && region2.First != rg_kernel->First)
01195 rg_kernel->First = region2.First;
01196 if (rg_kernel->Last == permloop[0] && region2.Last != rg_kernel->Last)
01197 rg_kernel->Last = region2.Last;
01198 if (region.First == permloop[0])
01199 region.First = region2.First;
01200 if (region.Last == permloop[0])
01201 region.Last = region2.Last;
01202
01203 if (did_reg_tile_and_hoist) {
01204 void Hoist_Outer_Invar(WN *wn_inner, INT nloops, INT outer_reg_tile,
01205 BOOL can_tile);
01206 Hoist_Outer_Invar(hoist_inner,1,1,TRUE);
01207 }
01208
01209
01210
01211
01212
01213 Rebuild_Access_Arrays(region, &SNL_local_pool);
01214 #ifdef Is_True_On
01215 for (WN* w = region.First; w; w = w==region.Last ? NULL : WN_next(w))
01216 Is_True(LWN_Check_Parentize(w), ("Check Parentize fails"));
01217 #endif
01218 }
01219
01220 MEM_POOL_Pop_Unfreeze(&SNL_local_pool);
01221 if (!Valid_SNL_Region(region))
01222 DevWarn("SNL_Invariant_Transforms: Invalid SNL_REGION [0x%p,0x%p]",
01223 region.First, region.Last);
01224 return region;
01225 }
01226
01227