00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00048 #define __STDC_LIMIT_MACROS
00049 #include <stdint.h>
00050 #ifdef USE_PCH
00051 #include "lno_pch.h"
00052 #endif // USE_PCH
00053 #pragma hdrstop
00054
00055 #define snl_test_CXX "snl_test.cxx"
00056 const static char *rcs_id = snl_test_CXX "$Revision: 1.9 $";
00057
00058 #include <sys/types.h>
00059 #include <alloca.h>
00060 #include <ctype.h>
00061 #include <limits.h>
00062
00063 #include "pu_info.h"
00064 #include "model.h"
00065 #include "snl.h"
00066 #include "lwn_util.h"
00067 #include "wn_simp.h"
00068 #include "opt_du.h"
00069 #include "config_lno.h"
00070 #include "config_targ.h"
00071 #include "config_targ_opt.h"
00072 #include "errors.h"
00073 #include "erbe.h"
00074 #include "erglob.h"
00075 #include "tlog.h"
00076 #include "lego.h"
00077 #include "fiz_fuse.h"
00078 #include "array_bounds.h"
00079 #include "small_trips.h"
00080 #include "config.h"
00081 #include "split_tiles.h"
00082
00083
00084
00085 #ifdef KEY
00086 #define MAX_NESTS_IN_FU 450
00087 #endif
00088 static void SNL_Optimize_Bounds_With_Access_Vectors(WN* wn_loop,
00089 DU_MANAGER* du);
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 static BOOL Inner_Loop_Is_Trapezoidal(WN* wn, SNL_NEST_INFO *ni)
00101 {
00102 DOLOOP_STACK* stack = &(ni->Dostack());
00103 INT i;
00104 for (i = 0; i < stack->Elements(); i++)
00105 if (wn == stack->Bottom_nth(i))
00106 break;
00107 FmtAssert(i < stack->Elements(), ("Could not find loop in stack."));
00108 for (i++; i < stack->Elements(); i++)
00109 if (Loop_Is_Trapezoidal(stack->Bottom_nth(i), Array_Dependence_Graph,
00110 Du_Mgr))
00111 return TRUE;
00112 return FALSE;
00113 }
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132 #define EST_CONSTANT 2
00133
00134 static BOOL Est_Num_Iters_Suspect(DOLOOP_STACK* stack)
00135 {
00136 INT depth = Do_Depth(stack->Bottom_nth(stack->Elements() - 1));
00137
00138
00139 INT i;
00140 for (i = depth - 1; i <= depth; i++) {
00141 WN* wn_loop = stack->Bottom_nth(i);
00142 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00143 if (dli_loop->Est_Num_Iterations != LNO_Num_Iters)
00144 break;
00145 }
00146 if (i == depth + 1) {
00147 if (snl_debug >= 3)
00148 fprintf(TFile, "!SUSPECT: Values already symbolic\n");
00149 return FALSE;
00150 }
00151
00152
00153 BOOL trapezoidal = FALSE;
00154 WN* wn_loop = stack->Bottom_nth(depth);
00155 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00156 ACCESS_ARRAY* array_LB = dli_loop->LB;
00157 for (i = 0; !trapezoidal && i < array_LB->Num_Vec(); i++)
00158 for (INT j = 0; !trapezoidal && j < depth; j++)
00159 if (array_LB->Dim(i)->Loop_Coeff(j) != 0)
00160 trapezoidal = TRUE;
00161 if (!trapezoidal) {
00162 ACCESS_ARRAY* array_UB = dli_loop->UB;
00163 for (i = 0; !trapezoidal && i < array_UB->Num_Vec(); i++)
00164 for (INT j = 0; !trapezoidal && j < depth; j++)
00165 if (array_UB->Dim(i)->Loop_Coeff(j) != 0)
00166 trapezoidal = TRUE;
00167 if (!trapezoidal) {
00168 if (snl_debug >= 3)
00169 fprintf(TFile, "!SUSPECT: Nest not trapezoidal\n");
00170 return FALSE;
00171 }
00172 }
00173
00174
00175 WN* wn_outer_loop = stack->Bottom_nth(depth-1);
00176 DO_LOOP_INFO* dli_outer_loop = Get_Do_Loop_Info(wn_outer_loop);
00177 ACCESS_ARRAY* array_outer_LB = dli_outer_loop->LB;
00178 if (array_outer_LB->Num_Vec() != 1) {
00179 if (snl_debug >= 3)
00180 fprintf(TFile, "SUSPECT: Outer loop LB not unique\n");
00181 return TRUE;
00182 }
00183 if (array_outer_LB->Dim(0)->Contains_Non_Lin_Symb()
00184 || array_outer_LB->Dim(0)->Contains_Lin_Symb()) {
00185 if (snl_debug >= 3)
00186 fprintf(TFile, "SUSPECT: Outer loop LB contains symbol\n");
00187 return TRUE;
00188 }
00189 INT prod = 0;
00190 INT j;
00191 for (j = 0; j < depth - 1; j++) {
00192 WN* wn_loop = stack->Bottom_nth(j);
00193 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00194 prod += dli_loop->Est_Num_Iterations
00195 * array_outer_LB->Dim(0)->Loop_Coeff(j);
00196 }
00197 INT outer_LB = prod - array_outer_LB->Dim(0)->Const_Offset;
00198 ACCESS_ARRAY* array_outer_UB = dli_outer_loop->UB;
00199 if (array_outer_UB->Num_Vec() != 1) {
00200 if (snl_debug >= 3)
00201 fprintf(TFile, "SUSPECT: Outer loop UB not unique\n");
00202 return TRUE;
00203 }
00204 if (array_outer_UB->Dim(0)->Contains_Non_Lin_Symb()
00205 || array_outer_UB->Dim(0)->Contains_Lin_Symb()) {
00206 if (snl_debug >= 3)
00207 fprintf(TFile, "SUSPECT: Outer loop UB contains symbol\n");
00208 return TRUE;
00209 }
00210 prod = 0;
00211 for (j = 0; j < depth - 1; j++) {
00212 WN* wn_loop = stack->Bottom_nth(j);
00213 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00214 prod += dli_loop->Est_Num_Iterations
00215 * array_outer_UB->Dim(0)->Loop_Coeff(j);
00216 }
00217 INT outer_UB = array_outer_UB->Dim(0)->Const_Offset - prod;
00218 if (outer_UB < outer_LB) {
00219 if (snl_debug >= 3)
00220 fprintf(TFile, "SUSPECT: Null range in outer loop\n");
00221 return TRUE;
00222 }
00223
00224
00225
00226
00227
00228 WN* wn_inner_loop = stack->Bottom_nth(depth);
00229 DO_LOOP_INFO* dli_inner_loop = Get_Do_Loop_Info(wn_inner_loop);
00230 ACCESS_ARRAY* array_inner_LB = dli_inner_loop->LB;
00231 ACCESS_ARRAY* array_inner_UB = dli_inner_loop->UB;
00232 for (i = 0; i < array_inner_LB->Num_Vec(); i++)
00233 if (array_inner_LB->Dim(i)->Contains_Non_Lin_Symb()
00234 || array_inner_LB->Dim(i)->Contains_Lin_Symb())
00235 break;
00236 if (i < array_inner_LB->Num_Vec()) {
00237 if (snl_debug >= 3)
00238 fprintf(TFile, "SUSPECT: Inner loop LB contains symbol\n");
00239 return TRUE;
00240 }
00241 for (i = 0; i < array_inner_UB->Num_Vec(); i++)
00242 if (array_inner_UB->Dim(i)->Contains_Non_Lin_Symb()
00243 || array_inner_UB->Dim(i)->Contains_Lin_Symb())
00244 break;
00245 if (i < array_inner_UB->Num_Vec()) {
00246 if (snl_debug >= 3)
00247 fprintf(TFile, "SUSPECT: Inner loop UB contains symbol\n");
00248 return TRUE;
00249 }
00250
00251
00252 BOOL found_min = FALSE;
00253 BOOL found_max = FALSE;
00254 INT min_value = 0;
00255 INT max_value = 0;
00256 for (i = 0; i < array_inner_LB->Num_Vec(); i++) {
00257 prod = 0;
00258 for (INT j = 0; j < depth - 1; j++) {
00259 WN* wn_loop = stack->Bottom_nth(j);
00260 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00261 prod += dli_loop->Est_Num_Iterations
00262 * array_inner_LB->Dim(i)->Loop_Coeff(j);
00263 }
00264 INT c0 = array_inner_LB->Dim(i)->Loop_Coeff(depth-1);
00265 INT c1 = array_inner_LB->Dim(i)->Loop_Coeff(depth);
00266 INT k0 = array_inner_LB->Dim(i)->Const_Offset;
00267 INT value = (k0 - c0 * outer_LB - prod) / c1;
00268 if (!found_min) {
00269 min_value = value;
00270 found_min = TRUE;
00271 } else if (value < min_value)
00272 min_value = value;
00273 value = (k0 - c0 * outer_UB - prod) / c1;
00274 if (value < min_value)
00275 min_value = value;
00276 }
00277 for (i = 0; i < array_inner_UB->Num_Vec(); i++) {
00278 INT prod = 0;
00279 for (INT j = 0; j < depth - 1; j++) {
00280 WN* wn_loop = stack->Bottom_nth(j);
00281 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00282 prod += dli_loop->Est_Num_Iterations
00283 * array_inner_UB->Dim(i)->Loop_Coeff(j);
00284 }
00285 INT c0 = array_inner_UB->Dim(i)->Loop_Coeff(depth-1);
00286 INT c1 = array_inner_UB->Dim(i)->Loop_Coeff(depth);
00287 INT k0 = array_inner_UB->Dim(i)->Const_Offset;
00288 INT value = (k0 - c0 * outer_UB - prod) / c1;
00289 if (!found_max) {
00290 max_value = value;
00291 found_max = TRUE;
00292 } else if (value > max_value)
00293 max_value = value;
00294 value = (k0 - c0 * outer_LB - prod) / c1;
00295 if (value > max_value)
00296 max_value = value;
00297 }
00298
00299 if (snl_debug >= 3) {
00300 fprintf(TFile, "outer LB: %d\n", outer_LB);
00301 fprintf(TFile, "outer UB: %d\n", outer_UB);
00302 fprintf(TFile, "min value inner loop: %d\n", min_value);
00303 fprintf(TFile, "max value inner loop: %d\n", max_value);
00304 }
00305
00306 INT est_iterations_inner = dli_inner_loop->Est_Num_Iterations;
00307 if (max_value - min_value > EST_CONSTANT * est_iterations_inner) {
00308 if (snl_debug >= 3)
00309 fprintf(TFile,
00310 "SUSPECT: Trapezoidal interchange does not preserve loop lengths\n");
00311 return TRUE;
00312 }
00313 if (snl_debug >= 3)
00314 fprintf(TFile,
00315 "!SUSPECT: Trapezoidal interchange preserves loop lengths\n");
00316 return FALSE;
00317 }
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 static void Find_Kernel_Stack_Nest_Traverse(WN* wn_tree,
00328 INT loop_count,
00329 DOLOOP_STACK* kernel_stack)
00330 {
00331 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
00332 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
00333 if (dli->Is_Inner && dli->Depth + 1 >= loop_count) {
00334 INT local_count = 0;
00335 WN *wn;
00336 for (wn = wn_tree; wn != NULL; wn = LWN_Get_Parent(wn)) {
00337 if (WN_opcode(wn) == OPC_DO_LOOP)
00338 local_count++;
00339 if (local_count == loop_count)
00340 break;
00341 }
00342 FmtAssert(wn != NULL,
00343 ("Find_Kernel_Stack_Nest_Traverse: Could not find loop"));
00344 if (SNL_Loop_Count(wn) == loop_count) {
00345 INT i;
00346 for (i = 0; i < kernel_stack->Elements(); i++)
00347 if (kernel_stack->Bottom_nth(i) == wn)
00348 break;
00349 if (i == kernel_stack->Elements())
00350 kernel_stack->Push(wn);
00351 }
00352 return;
00353 }
00354 }
00355
00356 if (WN_opcode(wn_tree) == OPC_BLOCK) {
00357 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
00358 Find_Kernel_Stack_Nest_Traverse(wn, loop_count, kernel_stack);
00359 } else {
00360 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
00361 Find_Kernel_Stack_Nest_Traverse(WN_kid(wn_tree, i), loop_count,
00362 kernel_stack);
00363 }
00364 }
00365
00366
00367
00368
00369
00370
00371
00372 static void Find_Kernel_Stack(SNL_REGION* rg_kernel,
00373 INT loop_count,
00374 DOLOOP_STACK* kernel_stack)
00375 {
00376
00377 for (WN* wn = rg_kernel->First; wn != NULL; wn = WN_next(wn)) {
00378 Find_Kernel_Stack_Nest_Traverse(wn, loop_count, kernel_stack);
00379 if (wn == rg_kernel->Last)
00380 break;
00381 }
00382 }
00383
00384
00385
00386
00387
00388
00389 INT Num_Cache_Strips;
00390
00391
00392 static SNL_REGION Do_Automatic_Transformation(WN* wn,
00393 INT nloops,
00394 SNL_NEST_INFO* ni,
00395 BOOL* changed)
00396 {
00397 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00398
00399 *changed = FALSE;
00400
00401
00402
00403
00404 SNL_REGION region(wn,wn);
00405 BOOL trying_invariant = (ni->Nloops_Invariant() >= ni->Nloops_General());
00406
00407 SNL_ANAL_INFO sinfo(ni, !trying_invariant, dg, &LNO_default_pool);
00408 if (snl_debug >= 3)
00409 ni->Print(TFile);
00410 if (snl_debug >= 2)
00411 sinfo.Print(TFile);
00412
00413 SNL_DEP_MATRIX smat(sinfo.Body_Deps(), &LNO_default_pool);
00414 SNL_DEP_MATRIX smati(sinfo.Imperfect_Deps(), &LNO_default_pool);
00415 if (snl_debug >= 3) {
00416 sinfo.Print(TFile);
00417
00418 fprintf(TFile, "summarized dependences: body\n");
00419 if (sinfo.Body_Deps().All_Stars())
00420 fprintf(TFile, "<all stars>\n");
00421 else
00422 smat.Print(TFile);
00423
00424 fprintf(TFile, "summarized dependences: imperfect\n");
00425 if (sinfo.Imperfect_Deps().All_Stars())
00426 fprintf(TFile, "<all stars>\n");
00427 else
00428 smati.Print(TFile);
00429 }
00430
00431 BOOL imperfect_stuff_is_blockable = FALSE;
00432 BOOL imperfect_stuff_is_distributable =
00433 (sinfo.Above_Is_Distributable() && sinfo.Below_Is_Distributable());
00434
00435
00436 INT i;
00437 for (i = 0; i < smat.Nloops(); i++) {
00438 if (i < smat.Nloops()-1 &&
00439 smat.Is_Fully_Permutable(i, smat.Nloops()-1) &&
00440 !sinfo.Body_Deps().All_Stars()) {
00441
00442
00443 imperfect_stuff_is_blockable =
00444 (smati.Is_Fully_Permutable(i, smat.Nloops()-1) &&
00445 !sinfo.Imperfect_Deps().All_Stars());
00446 if (imperfect_stuff_is_blockable || imperfect_stuff_is_distributable)
00447 break;
00448 }
00449
00450 INT depth = ni->Depth_Inner() - smat.Nloops() + 1 + i;
00451 DO_LOOP_INFO* dli = Get_Do_Loop_Info(ni->Dostack().Bottom_nth(depth));
00452
00453
00454 if (dli->Blockable_Specification == smat.Nloops() - i) {
00455
00456
00457
00458
00459
00460
00461
00462 if ( LNO_Apply_Illegal_Transformation_Directives ) {
00463 ErrMsgSrcpos(EC_LNO_Bad_Pragma_String_Advisory,
00464 WN_Get_Linenum(ni->Dostack().Bottom_nth(depth)),
00465 "BLOCK",
00466 "Seemingly illegal blocking being performed.");
00467 if (!imperfect_stuff_is_distributable)
00468 imperfect_stuff_is_blockable = TRUE;
00469 break;
00470 }
00471 ErrMsgSrcpos(EC_LNO_Bad_Pragma_String,
00472 WN_Get_Linenum(ni->Dostack().Bottom_nth(depth)),
00473 "BLOCK",
00474 "Seemingly illegal blocking pragma ignored");
00475 }
00476
00477
00478
00479
00480
00481
00482
00483 if (dli->Permutation_Spec_Count > 0
00484 && dli->Permutation_Spec_Count == smat.Nloops() - i) {
00485 if ( LNO_Apply_Illegal_Transformation_Directives ) {
00486 ErrMsgSrcpos(EC_LNO_Bad_Pragma_String_Advisory,
00487 WN_Get_Linenum(ni->Dostack().Bottom_nth(depth)),
00488 "INTERCHANGE",
00489 "Seemingly illegal permutation being performed.");
00490 if (!imperfect_stuff_is_distributable)
00491 imperfect_stuff_is_blockable = TRUE;
00492 break;
00493 }
00494 ErrMsgSrcpos(EC_LNO_Bad_Pragma_String,
00495 WN_Get_Linenum(ni->Dostack().Bottom_nth(depth)),
00496 "INTERCHANGE",
00497 "Seemingly illegal permutation pragma ignored");
00498 }
00499
00500 if (!LNO_Analysis)
00501 continue;
00502
00503
00504
00505
00506
00507
00508
00509 if (i == 0)
00510 fprintf(LNO_Analysis, " (DEPENDENCE_PROBLEMS");
00511 for (INT j = sinfo.Body_Deps().Bad_Deps().Lastidx(); j >= 0; j--) {
00512 if (sinfo.Body_Deps().Bad_Deps()[j].loop == i) {
00513 WN* src;
00514 WN* snk;
00515 EINDEX16 e = sinfo.Body_Deps().Bad_Deps()[j].e;
00516 if (e == 0) {
00517 src = NULL;
00518 snk = NULL;
00519
00520
00521
00522 DevWarn("not actually a dependence problem.");
00523 }
00524 else {
00525 src = dg->Get_Wn(dg->Get_Source(e));
00526 snk = dg->Get_Wn(dg->Get_Sink(e));
00527 if (src == NULL || snk == NULL)
00528 DevWarn("Missing wn mapping for vertex");
00529 }
00530
00531 fprintf(LNO_Analysis, " ((");
00532 for (INT ii = 0; ii <= i; ii++) {
00533 INT loop_depth = ni->Depth_Inner() - nloops + 1 + ii;
00534 WN* lp = ni->Dostack().Bottom_nth(loop_depth);
00535 fprintf(LNO_Analysis, "%s%d",
00536 ii==0 ? "" : " ", Srcpos_To_Line(WN_Get_Linenum(lp)));
00537 }
00538 INT srcline = src ? Srcpos_To_Line(LWN_Get_Linenum(src)) : -1;
00539 INT snkline = src ? Srcpos_To_Line(LWN_Get_Linenum(snk)) : -1;
00540 fprintf(LNO_Analysis, ") %d %d)", srcline, snkline);
00541 }
00542 }
00543
00544 if (!imperfect_stuff_is_distributable) {
00545 for (INT j = sinfo.Imperfect_Deps().Bad_Deps().Lastidx(); j >= 0; j--) {
00546 if (sinfo.Imperfect_Deps().Bad_Deps()[j].loop == i) {
00547 WN* src;
00548 WN* snk;
00549 EINDEX16 e = sinfo.Imperfect_Deps().Bad_Deps()[j].e;
00550 if (e == 0) {
00551 src = NULL;
00552 snk = NULL;
00553
00554
00555
00556 DevWarn("not actually a dependence problem.");
00557 }
00558 else {
00559 src = dg->Get_Wn(dg->Get_Source(e));
00560 snk = dg->Get_Wn(dg->Get_Sink(e));
00561 if (src == NULL || snk == NULL)
00562 DevWarn("Missing wn mapping for vertex");
00563 }
00564
00565 fprintf(LNO_Analysis, " ((");
00566 for (INT ii = 0; ii <= i; ii++) {
00567 INT loop_depth = ni->Depth_Inner() - nloops + 1 + ii;
00568 WN* lp = ni->Dostack().Bottom_nth(loop_depth);
00569 fprintf(LNO_Analysis, "%s%d",
00570 ii==0 ? "" : " ", Srcpos_To_Line(WN_Get_Linenum(lp)));
00571 }
00572 INT srcline = src ? Srcpos_To_Line(LWN_Get_Linenum(src)) : -1;
00573 INT snkline = src ? Srcpos_To_Line(LWN_Get_Linenum(snk)) : -1;
00574 fprintf(LNO_Analysis, ") %d %d)", srcline, snkline);
00575 }
00576 }
00577 }
00578 }
00579
00580 if (LNO_Analysis && i > 0)
00581 fprintf(LNO_Analysis, ")\n");
00582
00583 if (smat.Nloops() - i < 2 || sinfo.Body_Deps().All_Stars() ||
00584 !(imperfect_stuff_is_distributable ||
00585 imperfect_stuff_is_blockable)) {
00586 if (LNO_Run_Oinvar) {
00587 WN* innerloop = ni->Dostack().Bottom_nth(ni->Depth_Inner());
00588 extern void Hoist_Outer_Invar(WN *wn_inner, INT num_loops,
00589 INT outer_reg_tile,BOOL can_tile);
00590 Hoist_Outer_Invar(innerloop, ni->Nloops_Invariant(),
00591 ni->Nloops_Invariant(),FALSE);
00592 }
00593 SNL_DEBUG0(1, "Dependences are uncooperative---nest untransformable");
00594 if (LNO_Tlog) {
00595 WN* wn = ni->Dostack().Bottom_nth(ni->Depth_Inner()-(ni->Nloops()-1));
00596 char buffer[20];
00597 sprintf(buffer,"level=%d",ni->Nloops());
00598 SRCPOS srcpos=WN_Get_Linenum(wn);
00599 Generate_Tlog("LNO","snl", Srcpos_To_Line(srcpos),
00600 ST_name(WN_st(WN_index(wn))),
00601 buffer, "", "SNL_FAILURES generic dependence problem");
00602 }
00603
00604 if (LNO_Analysis) {
00605 for (INT i = ni->Nloops() - 1; i >= 0; i--) {
00606 WN* wn = ni->Dostack().Bottom_nth(ni->Depth_Inner() - i);
00607 if (i == ni->Nloops() - 1)
00608 fprintf(LNO_Analysis, " (SNL_FAILURES ");
00609 else
00610 fprintf(LNO_Analysis, " ");
00611 fprintf(LNO_Analysis, "(%d \"generic dependence problem\")",
00612 Srcpos_To_Line(WN_Get_Linenum(wn)));
00613 fprintf(LNO_Analysis, "%s\n", (i == 0) ? ")" : "");
00614 }
00615 }
00616 if (LNO_Verbose) {
00617 printf("Line %d: SNL nest not transformable\n",
00618 Srcpos_To_Line(WN_Get_Linenum(ni->Dostack().
00619 Bottom_nth(ni->Depth_Inner() - nloops + 1))));
00620 }
00621 if (!Valid_SNL_Region(region))
00622 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
00623 region.First, region.Last);
00624 return region;
00625 }
00626
00627 if (i) {
00628 FmtAssert(wn == ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1),
00629 ("Failed sanity check"));
00630 ni->Exclude_Outer_Loops(i);
00631 nloops = trying_invariant ? ni->Nloops_Invariant() : ni->Nloops_General();
00632 wn = ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1);
00633 FmtAssert(wn == ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1),
00634 ("Failed sanity check"));
00635
00636
00637 if (!trying_invariant && nloops <= ni->Nloops_Invariant())
00638 trying_invariant = TRUE;
00639 }
00640
00641 BOOL* can_be_inner = CXX_NEW_ARRAY(BOOL, ni->Depth_Inner() + 1,
00642 &LNO_default_pool);
00643 BOOL* can_be_unrolled = CXX_NEW_ARRAY(BOOL, ni->Depth_Inner() + 1,
00644 &LNO_default_pool);
00645
00646 WN* innerloop = ni->Dostack().Bottom_nth(ni->Depth_Inner());
00647
00648 INT outermost_can_be_tiled = ni->Depth_Inner() - nloops + 1;
00649 INT outermost_can_be_inner;
00650 INT outermost_can_be_unrolled;
00651
00652 BOOL can_interchange = LNO_Interchange;
00653 INT loop_count = 0;
00654 for (WN* lp = innerloop; lp != NULL; lp = LWN_Get_Parent(lp)) {
00655 if (WN_opcode(lp) == OPC_DO_LOOP) {
00656 loop_count++;
00657 DO_LOOP_INFO* dli = Get_Do_Loop_Info(lp);
00658 if (dli->Cannot_Interchange)
00659 can_interchange = FALSE;
00660 if (loop_count >= nloops)
00661 break;
00662 }
00663 }
00664 if (can_interchange)
00665 can_interchange = !Est_Num_Iters_Suspect(&(ni->Dostack()));
00666
00667
00668
00669
00670
00671
00672
00673
00674 if (can_interchange) {
00675 if (trying_invariant) {
00676 outermost_can_be_inner = outermost_can_be_tiled;
00677 outermost_can_be_unrolled = outermost_can_be_tiled;
00678 }
00679 else {
00680
00681
00682
00683 if ((ni->Above_Is_Distributable() ||
00684 WN_prev_executable(innerloop) == NULL) &&
00685 (ni->Below_Is_Distributable() ||
00686 WN_next_executable(innerloop) == NULL))
00687 outermost_can_be_inner = ni->Depth_Inner() - 1;
00688 else
00689 outermost_can_be_inner = ni->Depth_Inner();
00690 outermost_can_be_unrolled = ni->Depth_Inner() - 1;
00691 }
00692 }
00693 else {
00694 outermost_can_be_inner = ni->Depth_Inner();
00695 if (trying_invariant)
00696 outermost_can_be_unrolled = outermost_can_be_tiled;
00697 else
00698 outermost_can_be_unrolled = ni->Depth_Inner() - 1;
00699 }
00700
00701 for (i = 0; i <= ni->Depth_Inner(); i++) {
00702 can_be_inner[i] = (i >= outermost_can_be_inner);
00703 can_be_unrolled[i] = (i >= outermost_can_be_unrolled);
00704 }
00705
00706 LOOP_MODEL lm;
00707 {
00708 HASH_TABLE<WN *,BIT_VECTOR *> htable(500,&LNO_local_pool);
00709 if (LNO_Run_Oinvar && trying_invariant) {
00710 extern void Mark_Invar(WN *region, INT num_loops, DOLOOP_STACK *do_stack,
00711 HASH_TABLE<WN *,BIT_VECTOR *> *htable, MEM_POOL *pool, BOOL outer_only);
00712
00713 DOLOOP_STACK *do_stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00714 &LNO_local_pool);
00715 Build_Doloop_Stack(innerloop,do_stack);
00716 Mark_Invar(WN_do_body(innerloop),nloops,do_stack,&htable,&LNO_local_pool,FALSE);
00717 extern BOOL Sort_Invar_Expressions(WN *wn,
00718 HASH_TABLE<WN *,BIT_VECTOR *> *htable);
00719 if (Roundoff_Level >= ROUNDOFF_ASSOC &&
00720 Sort_Invar_Expressions(innerloop,&htable)) {
00721 HASH_TABLE<WN *,BIT_VECTOR *> htable2(500,&LNO_local_pool);
00722 Mark_Invar(WN_do_body(innerloop),nloops,do_stack,
00723 &htable2,&LNO_local_pool,FALSE);
00724 lm.Model(innerloop, can_be_inner, can_be_unrolled,
00725 outermost_can_be_tiled,
00726 dg, &ni->Privatizability_Info(), nloops,&htable2);
00727 } else {
00728 lm.Model(innerloop, can_be_inner, can_be_unrolled,
00729 outermost_can_be_tiled,
00730 dg, &ni->Privatizability_Info(), nloops,&htable);
00731 }
00732 } else {
00733 lm.Model(innerloop, can_be_inner, can_be_unrolled,
00734 outermost_can_be_tiled,
00735 dg, &ni->Privatizability_Info(), nloops,NULL);
00736 }
00737 }
00738
00739 CXX_DELETE_ARRAY(can_be_inner, &LNO_default_pool);
00740 CXX_DELETE_ARRAY(can_be_unrolled, &LNO_default_pool);
00741
00742 INT outermost;
00743 for (outermost = 0; outermost < ni->Depth_Inner(); outermost++) {
00744 if (lm.New_Order(outermost) != outermost ||
00745 (lm.Block_Number(outermost) != 1 && !LNO_Outer_Unroll_Model_Only) ||
00746 (lm.Nstrips() != 0 && lm.Stripdepth() == outermost))
00747 break;
00748 }
00749
00750 EST_REGISTER_USAGE reg_usage;
00751 reg_usage.Set_Est_Regs(lm.Num_Fp_Regs(), Target_FPRs,
00752 lm.Num_Int_Regs(), Target_INTRs,
00753 lm.Num_TLB(), Mhd.L[0].TLB_Entries);
00754
00755 if (outermost >= ni->Depth_Inner()) {
00756
00757 DO_LOOP_INFO* dli = Get_Do_Loop_Info(innerloop);
00758 dli->Est_Register_Usage = reg_usage;
00759 if (LNO_Verbose) {
00760 printf("Line %d: SNL not transforming nest\n",
00761 Srcpos_To_Line(WN_Get_Linenum(ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1))));
00762 }
00763
00764
00765 if (LNO_Run_Oinvar) {
00766 extern void Hoist_Outer_Invar(WN *wn_inner, INT num_loops,
00767 INT outer_reg_tile, BOOL can_tile);
00768 Hoist_Outer_Invar(innerloop, ni->Nloops_Invariant(),
00769 ni->Nloops_Invariant(),TRUE);
00770 }
00771 if (!Valid_SNL_Region(region))
00772 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
00773 region.First, region.Last);
00774 if (!Valid_SNL_Region(region))
00775 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
00776 region.First, region.Last);
00777 return region;
00778 }
00779
00780 FmtAssert(outermost >= ni->Depth_Inner() - nloops + 1,
00781 ("Transforming outside nloops???"));
00782
00783 if (outermost > ni->Depth_Inner() - nloops + 1) {
00784 INT i = outermost - ni->Depth_Inner() + nloops - 1;
00785 FmtAssert(wn == ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1),
00786 ("Failed sanity check"));
00787 ni->Exclude_Outer_Loops(i);
00788 FmtAssert(nloops - i == (trying_invariant ? ni->Nloops_Invariant() : ni->Nloops_General()), ("Attempted transformation too deep?"));
00789 nloops = trying_invariant ? ni->Nloops_Invariant() : ni->Nloops_General();
00790 wn = ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1);
00791
00792
00793 if (!trying_invariant && nloops <= ni->Nloops_Invariant())
00794 trying_invariant = TRUE;
00795 }
00796
00797 INT order[SNL_MAX_LOOPS];
00798 INT regstripsz[SNL_MAX_LOOPS];
00799 BOOL regstripping = FALSE;
00800 BOOL cachestripping = (lm.Nstrips() != 0);
00801 BOOL reordering = FALSE;
00802
00803 for (i = outermost; i <= ni->Depth_Inner(); i++) {
00804 order[i-outermost] = lm.New_Order(i)-outermost;
00805 if (lm.New_Order(i) != i)
00806 reordering = TRUE;
00807 WN* my_loop = ni->Dostack().Bottom_nth(i);
00808 if (!LNO_Outer_Unroll_Model_Only
00809 && (LNO_Trapezoidal_Outer_Unroll
00810 || !Inner_Loop_Is_Trapezoidal(my_loop, ni))) {
00811 regstripsz[i-outermost] = lm.Block_Number(order[i-outermost]+outermost);
00812 if (i-outermost < nloops-1 && regstripsz[i-outermost] != 1)
00813 regstripping = TRUE;
00814 }
00815 else
00816 regstripsz[i-outermost] = 1;
00817 }
00818 FmtAssert(ni->Depth_Inner() + 1 <= outermost + nloops,
00819 ("Transformed loops outside of innermost nloops"));
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845 if (!trying_invariant) {
00846 UINT32 seen = 0;
00847
00848 for (INT s_cur = lm.Nstrips() - 1; s_cur > 0; s_cur--) {
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865 if (seen & (1<<lm.Iloop()[s_cur]))
00866 continue;
00867
00868
00869 INT s_biggest = s_cur;
00870 INT s;
00871 for (s = s_cur - 1; s >= 0; s--) {
00872 if (lm.Iloop()[s] > lm.Iloop()[s_biggest] &&
00873 (seen & (1 << lm.Iloop()[s])) == 0)
00874 s_biggest = s;
00875 }
00876
00877 Is_True((seen & (1 << lm.Iloop()[s_biggest])) == 0, ("Bug"));
00878 seen |= (1 << lm.Iloop()[s_biggest]);
00879
00880 if (s_biggest == s_cur)
00881 continue;
00882
00883
00884 DevWarn("Unusual double blocking: ok, but verify with LNO group");
00885
00886
00887 INT tmp_iloop = lm.Iloop()[s_cur];
00888 INT tmp_stripsz = lm.Stripsz()[s_cur];
00889 INT tmp_striplevel = lm.Striplevel()[s_cur];
00890 for (s = s_cur; s > s_biggest; s--) {
00891 lm.Iloop()[s] = lm.Iloop()[s-1];
00892 lm.Stripsz()[s] = lm.Stripsz()[s-1];
00893 lm.Striplevel()[s] = lm.Striplevel()[s-1];
00894 }
00895 lm.Iloop()[s_biggest] = tmp_iloop;
00896 lm.Stripsz()[s_biggest] = tmp_stripsz;
00897 lm.Striplevel()[s_biggest] = tmp_striplevel;
00898 }
00899 }
00900
00901
00902 if (nloops != ni->Depth_Inner() + 1 - outermost) {
00903 FmtAssert(wn == ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1),
00904 ("Failed sanity check"));
00905 ni->Exclude_Outer_Loops(nloops - (ni->Depth_Inner() + 1 - outermost));
00906 nloops = ni->Depth_Inner() + 1 - outermost;
00907 wn = ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1);
00908 FmtAssert(wn == ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1),
00909 ("Failed sanity check"));
00910
00911 if (!trying_invariant && nloops <= ni->Nloops_Invariant())
00912 trying_invariant = TRUE;
00913 }
00914
00915 if (LNO_Verbose) {
00916 INT outerdepth = ni->Depth_Inner() + 1 - nloops;
00917
00918 printf("Line %d:", Srcpos_To_Line(WN_Get_Linenum(ni->Dostack().Bottom_nth(outerdepth))));
00919 printf(" [");
00920 for (i = 0; i < nloops; i++) {
00921 INT depth = i + outerdepth;
00922 if (i > 0)
00923 printf(",");
00924 printf("%s", SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth))).Name());
00925 }
00926 printf(" --> ");
00927 for (i = 0; i < nloops; i++) {
00928 INT depth1 = order[i] + outerdepth;
00929 if (i > 0)
00930 printf(",");
00931 printf("%s",
00932 SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth1))).Name());
00933 if (regstripsz[i] > 1)
00934 printf("(u=%d)", regstripsz[i]);
00935 }
00936 if (cachestripping) {
00937 printf(" block%s ", lm.Nstrips() > 1 ? "s" : "");
00938 for (i = 0; i < lm.Nstrips(); i++) {
00939 INT depth = order[lm.Iloop(i) - outerdepth] + outerdepth;
00940 if (i > 0)
00941 printf(",");
00942 printf("%s(%d)[L%d]",
00943 SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth))).Name(),
00944 lm.Stripsz(i), lm.Striplevel(i));
00945 }
00946 INT d = order[lm.Stripdepth() - outerdepth] + outerdepth;
00947 printf(" outside %s",
00948 SYMBOL(WN_index(ni->Dostack().Bottom_nth(d))).Name());
00949 }
00950 printf("]\n");
00951 }
00952 if (snl_debug >= 1) {
00953 INT outerdepth = ni->Depth_Inner() + 1 - nloops;
00954
00955 fprintf(TFile, "Line %d:", Srcpos_To_Line(WN_Get_Linenum(ni->Dostack().Bottom_nth(outerdepth))));
00956 fprintf(TFile, " [");
00957 for (i = 0; i < nloops; i++) {
00958 INT depth = i + outerdepth;
00959 if (i > 0)
00960 fprintf(TFile, ",");
00961 fprintf(TFile,
00962 "%s", SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth))).Name());
00963 }
00964 fprintf(TFile, " --> ");
00965 for (i = 0; i < nloops; i++) {
00966 INT depth1 = order[i] + outerdepth;
00967 if (i > 0)
00968 fprintf(TFile, ",");
00969 fprintf(TFile, "%s",
00970 SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth1))).Name());
00971 if (regstripsz[i] > 1)
00972 fprintf(TFile, "(u=%d)", regstripsz[i]);
00973 }
00974 if (cachestripping) {
00975 fprintf(TFile, " block%s ", lm.Nstrips() > 1 ? "s" : "");
00976 for (i = 0; i < lm.Nstrips(); i++) {
00977 INT depth = order[lm.Iloop(i) - outerdepth] + outerdepth;
00978 if (i > 0)
00979 fprintf(TFile, ",");
00980 fprintf(TFile, "%s(%d)[L%d]",
00981 SYMBOL(WN_index(ni->Dostack().Bottom_nth(depth))).Name(),
00982 lm.Stripsz(i), lm.Striplevel(i));
00983 }
00984 INT d = order[lm.Stripdepth() - outerdepth] + outerdepth;
00985 fprintf(TFile, " outside %s",
00986 SYMBOL(WN_index(ni->Dostack().Bottom_nth(d))).Name());
00987 }
00988 fprintf(TFile, "]\n");
00989 }
00990
00991 if (!reordering && !cachestripping && !regstripping) {
00992 DO_LOOP_INFO* dli = Get_Do_Loop_Info(innerloop);
00993 dli->Est_Register_Usage = reg_usage;
00994 if (!Valid_SNL_Region(region))
00995 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
00996 region.First, region.Last);
00997 return region;
00998 }
00999
01000
01001
01002 INT* orderp = reordering ? order : NULL;
01003 INT* regstripszp = regstripping ? regstripsz : NULL;
01004
01005
01006
01007 Num_Cache_Strips = 0;
01008 SNL_TILE_INFO* ti = NULL;
01009 if (cachestripping) {
01010 SNL_INV_CACHE_BLOCK_REASON* reason
01011 = CXX_NEW_ARRAY(SNL_INV_CACHE_BLOCK_REASON, lm.Nstrips(),
01012 &LNO_default_pool);
01013 for (INT i = 0; i < lm.Nstrips(); i++) {
01014 lm.Iloop()[i] -= lm.Stripdepth();
01015 reason[i] = SNL_INV_TILE_ONLY;
01016 }
01017 ti = CXX_NEW(SNL_TILE_INFO(ni->Depth_Inner() + 1 - lm.Stripdepth(),
01018 lm.Nstrips(), lm.Iloop(), lm.Stripsz(),
01019 lm.Striplevel(), reason,
01020 &LNO_default_pool), &LNO_default_pool);
01021 Num_Cache_Strips = lm.Nstrips();
01022 }
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074 if (trying_invariant) {
01075 #ifdef Is_True_On
01076 if (snl_debug >= 3) {
01077 fprintf(TFile, "Beginning invariant transformation\n");
01078 }
01079 #endif
01080 BOOL want_se_and_dist = orderp != NULL || ti != NULL && ti->Strips() > 0
01081 || regstripsz && (ni->Privatizability_Info().Lcd_Depth() != -1
01082 || (ni->Privatizability_Info().Must_Finalize()));
01083 DOLOOP_STACK* stack = &ni->Dostack();
01084 INT first_in_stack = ni->Depth_Inner() - nloops + 1;
01085 WN* wn_outer = stack->Bottom_nth(first_in_stack);
01086 SNL_REGION rg_kernel(wn_outer, wn_outer);
01087 if (ti == NULL || ti->Nloops() == nloops) {
01088 region = SNL_INV_Transforms(wn_outer, orderp, ni, nloops, ti,
01089 regstripszp, reg_usage, want_se_and_dist, NULL, LNO_Run_Oinvar,
01090 &rg_kernel);
01091 }
01092 else if (!regstripszp) {
01093 region = SNL_INV_Transforms(wn_outer, orderp, ni, nloops, NULL,
01094 NULL, reg_usage, want_se_and_dist, NULL,FALSE, &rg_kernel);
01095 DOLOOP_STACK kernel_stack(&LNO_local_pool);
01096 Find_Kernel_Stack(&rg_kernel, ti->Nloops(), &kernel_stack);
01097 for (i = 0; i < kernel_stack.Elements(); i++) {
01098 WN* wn_new_outer = kernel_stack.Bottom_nth(i);
01099 WN* wn_new_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, ti->Nloops());
01100 ni->Privatizability_Info().Update_Reduction_Loop_Stmts(wn_new_inner);
01101 SNL_REGION rg_local = rg_kernel;
01102 SNL_REGION rg_local_kernel;
01103 BOOL might_reset_first = wn_new_outer == region.First;
01104 BOOL might_reset_last = wn_new_outer == region.Last;
01105 rg_local = SNL_INV_Transforms(wn_new_outer, NULL, ni, ti->Nloops(),
01106 ti, NULL, EST_REGISTER_USAGE(), FALSE, NULL, LNO_Run_Oinvar,
01107 &rg_local_kernel);
01108 if (might_reset_first && rg_local.First != region.First)
01109 region.First = rg_local.First;
01110 if (might_reset_last && rg_local.Last != region.Last)
01111 region.Last = rg_local.Last;
01112 }
01113 } else {
01114 INT rg1[SNL_MAX_LOOPS];
01115 INT rg2[SNL_MAX_LOOPS];
01116 INT i;
01117 for (i = 0; i < nloops-ti->Nloops(); i++)
01118 rg1[i] = regstripsz[i];
01119 for ( ; i < nloops; i++)
01120 rg2[i-(nloops-ti->Nloops())] = regstripsz[i], rg1[i] = 1;
01121 region = SNL_INV_Transforms(wn_outer, orderp, ni, nloops, NULL,
01122 rg1, reg_usage, want_se_and_dist, NULL,FALSE, &rg_kernel);
01123 DOLOOP_STACK kernel_stack(&LNO_local_pool);
01124 Find_Kernel_Stack(&rg_kernel, ti->Nloops(), &kernel_stack);
01125 for (i = 0; i < kernel_stack.Elements(); i++) {
01126 WN* wn_new_outer = kernel_stack.Bottom_nth(i);
01127 WN* wn_new_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, ti->Nloops());
01128 ni->Privatizability_Info().Update_Reduction_Loop_Stmts(wn_new_inner);
01129 SNL_REGION rg_local = rg_kernel;
01130 SNL_REGION rg_local_kernel;
01131 BOOL might_reset_first = wn_new_outer == region.First;
01132 BOOL might_reset_last = wn_new_outer == region.Last;
01133 rg_local = SNL_INV_Transforms(wn_new_outer, NULL, ni, ti->Nloops(),
01134 ti, rg2, EST_REGISTER_USAGE(), FALSE, NULL, LNO_Run_Oinvar,
01135 &rg_local_kernel);
01136 if (might_reset_first && rg_local.First != region.First)
01137 region.First = rg_local.First;
01138 if (might_reset_last && rg_local.Last != region.Last)
01139 region.Last = rg_local.Last;
01140 }
01141 }
01142 SNL_SPL_Split_Inner_Tile_Loops(region.First, region.Last,
01143 LNO_Split_Tiles, "$spl_", TRUE);
01144 Remove_Useless_Loops(®ion);
01145
01146 #ifdef Is_True_On
01147 if (snl_debug) {
01148 fprintf(TFile, "WN DUMP AFTER AUTOMATIC INVARIANT TRANSFORMATION\n");
01149 Dump_WN(region, TFile, snl_debug);
01150 fprintf(TFile, "END WN DUMP AFTER AUTOMATIC INVARIANT TRANSFORMATION\n");
01151 }
01152 SNL_Sanity_Check_Region(region);
01153 #endif
01154
01155 *changed = TRUE;
01156 if (!Valid_SNL_Region(region))
01157 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
01158 region.First, region.Last);
01159 return region;
01160 }
01161
01162 {
01163
01164
01165 #ifdef Is_True_On
01166 if (snl_debug >= 3) {
01167 fprintf(TFile, "Beginning general transformation\n");
01168 }
01169 #endif
01170 IMAT uu(nloops, nloops, &LNO_default_pool);
01171 for (INT i = 0; i < nloops; i++)
01172 for (INT j = 0; j < nloops; j++)
01173 uu(i,j) = (i == order[j]);
01174
01175 BOOL failed = FALSE;
01176 region = SNL_GEN_Protect_Nest_With_Conditionals(ni, &failed);
01177 if (failed) {
01178 if (!Valid_SNL_Region(region))
01179 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
01180 region.First, region.Last);
01181 return region;
01182 }
01183
01184 BOOL has_lcd = ni->Privatizability_Info().Lcd_Depth() != -1;
01185 SNL_REGION dregion = SNL_GEN_Distribution(
01186 ni->Dostack().Bottom_nth(ni->Depth_Inner() - nloops + 1), &uu, ti,
01187 ni->Nloops_General(), has_lcd, &ni->Privatizability_Info().Plist,
01188 ni->Above_Is_Distributable(), ni->Below_Is_Distributable());
01189
01190 DOLOOP_STACK* stack = &ni->Dostack();
01191
01192 INT firstin = ni->Depth_Inner() - nloops + 1;
01193 if (region.First == stack->Bottom_nth(firstin))
01194 region.First = dregion.First;
01195 if (region.Last == stack->Bottom_nth(firstin))
01196 region.Last = dregion.Last;
01197
01198 if (reordering || ti) {
01199 IMAT* uup = reordering ? &uu : NULL;
01200 SNL_REGION ctregion;
01201 if (uup == NULL || ti == NULL || nloops == ti->Nloops())
01202 ctregion = SNL_GEN_U_Ctiling(ni->Dostack().Bottom_nth(firstin), nloops,
01203 uup, ti, ni->Bi(), &(ni->Privatizability_Info().Plist), reg_usage,
01204 TRUE, FALSE);
01205 else {
01206
01207
01208 ctregion = SNL_GEN_U_Ctiling(ni->Dostack().Bottom_nth(firstin), nloops,
01209 uup, NULL, ni->Bi(), &(ni->Privatizability_Info().Plist), reg_usage,
01210 TRUE, FALSE);
01211 (void) SNL_GEN_U_Ctiling(ni->Dostack().Bottom_nth(firstin), nloops,
01212 NULL, ti, ni->Bi(), &(ni->Privatizability_Info().Plist),
01213 EST_REGISTER_USAGE(), TRUE, FALSE);
01214 }
01215 if (region.First == stack->Bottom_nth(firstin))
01216 region.First = ctregion.First;
01217 if (region.Last == stack->Bottom_nth(firstin))
01218 region.Last = ctregion.Last;
01219 }
01220
01221 if (regstripsz[nloops-2] > 1) {
01222 SNL_REGION regregion = SNL_GEN_2D_Regtile(ni, regstripsz[nloops-2]);
01223 if (region.First == stack->Bottom_nth(firstin+nloops-2))
01224 region.First = regregion.First;
01225 if (region.Last == stack->Bottom_nth(firstin+nloops-2))
01226 region.Last = regregion.Last;
01227 }
01228 SNL_SPL_Split_Inner_Tile_Loops(region.First, region.Last,
01229 LNO_Split_Tiles, "$spl_", TRUE);
01230 Remove_Useless_Loops(®ion);
01231
01232 #ifdef Is_True_On
01233 if (snl_debug) {
01234 fprintf(TFile, "WN DUMP AFTER AUTOMATIC GENERAL TRANSFORMATION\n");
01235 Dump_WN(region, TFile, snl_debug);
01236 fprintf(TFile, "END WN DUMP AFTER AUTOMATIC GENERAL TRANSFORMATION\n");
01237 }
01238 SNL_Sanity_Check_Region(region);
01239 #endif
01240
01241 *changed = TRUE;
01242 if (!Valid_SNL_Region(region))
01243 DevWarn("Do_Automatic_Transformation: Invalid SNL_REGION [0x%p,0x%p]",
01244 region.First, region.Last);
01245 return region;
01246 }
01247 }
01248
01249
01250
01251
01252
01253
01254
01255
01256 static BOOL Make_Edge_Blockable(EINDEX16 edge,
01257 DEPV_ARRAY* dva,
01258 INT start_depth,
01259 INT stop_depth)
01260 {
01261 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01262 DEPV_LIST dl(dva, &LNO_local_pool);
01263 DEPV_LIST dl_block(dl.Num_Dim(), dl.Num_Unused_Dim(), &LNO_local_pool);
01264 dl.Blockable_Part(&LNO_local_pool, &dl_block, dl.Num_Unused_Dim(),
01265 dl.Num_Dim(), start_depth, stop_depth);
01266 DEPV_ARRAY* dva_new = Create_DEPV_ARRAY(&dl_block, dg->Pool());
01267 if (dva_new != NULL) {
01268 Delete_DEPV_ARRAY(dva, dg->Pool());
01269 dg->Set_Depv_Array(edge, dva_new);
01270 }
01271 return dva_new != NULL;
01272 }
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282 static BOOL SNL_Fix_Blockable_Dependendences_Traverse(WN* wn_outer,
01283 INT nloops,
01284 WN* wn_tree)
01285 {
01286 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01287 INT start_depth = Do_Loop_Depth(wn_outer);
01288 INT stop_depth = start_depth + nloops - 1;
01289
01290 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
01291 if (Do_Loop_Depth(wn_tree) >= Do_Loop_Depth(wn_outer) + nloops)
01292 return TRUE;
01293 } else if (dg->Get_Vertex(wn_tree)) {
01294 VINDEX16 v = dg->Get_Vertex(wn_tree);
01295 EINDEX16 enext = 0;
01296 for (EINDEX16 e = dg->Get_Out_Edge(v); e; e = enext) {
01297 enext = dg->Get_Next_Out_Edge(e);
01298 DEPV_ARRAY* dva = dg->Depv_Array(e);
01299 if (dva->Is_Blockable(start_depth, stop_depth))
01300 continue;
01301 if (!Make_Edge_Blockable(e, dva, start_depth, stop_depth))
01302 return FALSE;
01303 }
01304 }
01305
01306 if (WN_opcode(wn_tree) == OPC_BLOCK) {
01307 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
01308 SNL_Fix_Blockable_Dependendences_Traverse(wn_outer, nloops, wn);
01309 } else {
01310 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01311 SNL_Fix_Blockable_Dependendences_Traverse(wn_outer, nloops,
01312 WN_kid(wn_tree, i));
01313 }
01314 return TRUE;
01315 }
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325 static BOOL SNL_Fix_Blockable_Dependendences(WN* wn_outer,
01326 INT nloops)
01327 {
01328
01329 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01330 return SNL_Fix_Blockable_Dependendences_Traverse(wn_outer, nloops,
01331 wn_outer);
01332 }
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342 static BOOL Fix_Blockable_Dependences(WN* wn_outer,
01343 INT nloops)
01344 {
01345 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
01346 DOLOOP_STACK stack(&LNO_local_pool);
01347 Build_Doloop_Stack(wn_inner, &stack);
01348 for (INT i = 0; i < stack.Elements(); i++) {
01349 WN* wn_loop = stack.Bottom_nth(i);
01350 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
01351 if (dli_loop->Blockable_Specification > 0) {
01352 INT block_nloops = dli_loop->Blockable_Specification;
01353 if (!SNL_Fix_Blockable_Dependendences(wn_loop, block_nloops))
01354 return FALSE;
01355 }
01356 }
01357 return TRUE;
01358 }
01359
01360 static void SNL_Transform(WN* wn,
01361 INT nloops)
01362 {
01363 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
01364 WN* wn_original = wn;
01365 INT nloops_original = nloops;
01366 if (wn == NULL)
01367 return;
01368
01369
01370 if (!Fix_Blockable_Dependences(wn, nloops))
01371 return;
01372
01373 if (!SNL_Fix_Array_Deps_On_Index_Variable(wn, nloops))
01374 return;
01375 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn, nloops);
01376 WN* wn_ok = NULL;
01377 WN *wnn;
01378 for (wnn = wn_inner; wnn != NULL; wnn = LWN_Get_Parent(wnn)) {
01379 if (WN_opcode(wnn) == OPC_DO_LOOP) {
01380 if (Need_Fix_Array_Deps_On_Index_Variable(wnn))
01381 break;
01382 wn_ok = wnn;
01383 if (wn_ok == wn)
01384 break;
01385 }
01386 }
01387 if (wn_ok == NULL)
01388 return;
01389 nloops -= Do_Loop_Depth(wn_ok) - Do_Loop_Depth(wn);
01390 wn = wn_ok;
01391
01392
01393
01394
01395 while (nloops > SNL_MAX_LOOPS) {
01396 nloops--;
01397 wn = Good_Do_Next_Innermost(wn);
01398 FmtAssert(wn, ("Can't find loop in deep loop nest"));
01399 }
01400
01401 while (nloops >= 2 && wn != NULL
01402 && (!Do_Loop_Is_Good(wn) || Do_Loop_Has_Calls(wn) ||
01403 Do_Loop_Has_Gotos(wn))) {
01404 nloops--;
01405 wn = Good_Do_Next_Innermost(wn);
01406
01407 }
01408
01409 WN* wn_innermost_legotile = NULL;
01410 for (WN* wn_lego = wn; wn_lego != NULL;
01411 wn_lego = Good_Do_Next_Innermost(wn_lego)) {
01412 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_lego);
01413 if (dli->Is_Outer_Lego_Tile)
01414 wn_innermost_legotile = wn_lego;
01415 }
01416 if (wn_innermost_legotile != NULL) {
01417 WN* wn_new = Good_Do_Next_Innermost(wn_innermost_legotile);
01418 if (wn_new == NULL)
01419 return;
01420 nloops = nloops - (Do_Depth(wn_new) - Do_Depth(wn));
01421 wn = wn_new;
01422 }
01423 if (wn == NULL)
01424 return;
01425
01426 if (nloops <= 1) {
01427 if (LNO_Analysis) {
01428 fprintf(LNO_Analysis," (NESTING_DEPTH %d)\n", nloops);
01429 fprintf(LNO_Analysis," (LINE_POS %d)\n",
01430 Srcpos_To_Line(WN_Get_Linenum(wn)));
01431 }
01432 if (nloops == 1 && LNO_Run_Oinvar) {
01433 if (Roundoff_Level >= ROUNDOFF_ASSOC) {
01434 MEM_POOL_Push(&LNO_local_pool);
01435 {
01436 HASH_TABLE<WN *,BIT_VECTOR *> htable(500,&LNO_local_pool);
01437 extern void Mark_Invar(WN *region, INT num_loops,
01438 DOLOOP_STACK *do_stack, HASH_TABLE<WN *,BIT_VECTOR *> *htable,
01439 MEM_POOL *pool, BOOL outer_only);
01440 extern BOOL Sort_Invar_Expressions(WN *wn,
01441 HASH_TABLE<WN *,BIT_VECTOR *> *htable);
01442
01443 DOLOOP_STACK *do_stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01444 &LNO_local_pool);
01445 Build_Doloop_Stack(wn,do_stack);
01446 Mark_Invar(WN_do_body(wn),nloops,do_stack,&htable,&LNO_local_pool,
01447 FALSE);
01448 Sort_Invar_Expressions(wn,&htable);
01449 }
01450 MEM_POOL_Pop(&LNO_local_pool);
01451 }
01452 extern void Hoist_Outer_Invar(WN *wn_inner, INT num_loops,
01453 INT outer_reg_tile, BOOL can_tile);
01454 Hoist_Outer_Invar(wn, 1,1,TRUE);
01455 }
01456
01457
01458 return;
01459 }
01460
01461 for (wnn = wn; wnn != NULL; wnn = Good_Do_Next_Innermost(wnn))
01462 SNL_Optimize_Bounds_With_Access_Vectors(wnn, Du_Mgr);
01463
01464 SNL_NEST_INFO ni(wn, nloops, &LNO_default_pool, TRUE);
01465
01466 if (LNO_Analysis) {
01467 fprintf(LNO_Analysis," (NESTING_DEPTH %d)\n",nloops);
01468 fprintf(LNO_Analysis," (LINE_POS");
01469 for (INT i = 0; i < nloops; i++) {
01470 INT stack_pos = ni.Depth_Inner()-nloops+1+i;
01471 fprintf(LNO_Analysis," %d",
01472 Srcpos_To_Line(WN_Get_Linenum(ni.Dostack().Bottom_nth(stack_pos))));
01473 }
01474 fprintf(LNO_Analysis,")\n");
01475 }
01476
01477 if (!ni.Innermost()) {
01478 SNL_DEBUG2(1, "Nest at line %d (nloops = %d): inner loop not innermost",
01479 Srcpos_To_Line(WN_Get_Linenum(wn)), nloops);
01480 if (LNO_Analysis) {
01481 fprintf(LNO_Analysis,
01482 " (SNL_FAILURES (%d \"inner loop not innermost\"))\n",
01483 Srcpos_To_Line(WN_Get_Linenum(wn)));
01484 }
01485 SNL_Optimize_Bounds(SNL_REGION(wn, wn));
01486 return;
01487 }
01488
01489 INT nloops_g = ni.Nloops_General();
01490 INT nloops_i = ni.Nloops_Invariant();
01491
01492 while (nloops >= 2 && nloops > nloops_g && nloops > nloops_i) {
01493 nloops--;
01494 ni.Exclude_Outer_Loops(1);
01495 nloops_g = ni.Nloops_General();
01496 nloops_i = ni.Nloops_Invariant();
01497 if (LNO_Analysis || LNO_Tlog) {
01498 const char* message;
01499 char* scalar_message = NULL;
01500 char buf[32];
01501 switch (ni.Problem(ni.Depth_Inner() - nloops).Problem) {
01502 case SNL_LOOP_PROBLEM_NONE:
01503 SNL_DEBUG0(1, "Unknown reason why loop is not transformable");
01504 message = "unknown reason";
01505 break;
01506 case SNL_LOOP_PROBLEM_DISTRIBUTION:
01507 message = "imperfect nest undistributable";
01508 break;
01509 case SNL_LOOP_PROBLEM_SCALAR: {
01510 SYMBOL var = ni.Problem(ni.Depth_Inner() - nloops).Var;
01511 const char* name = var.Name();
01512 scalar_message = (char*) alloca(strlen(name) + 25);
01513 sprintf(scalar_message, "%s unexpandable scalar", name);
01514 }
01515 break;
01516 case SNL_LOOP_PROBLEM_LOOP: {
01517 WN* wn = ni.Problem(ni.Depth_Inner() - nloops).Wn;
01518 Is_True(wn, ("missing wn for loop problem"));
01519 sprintf(buf, "%d loop of bad form",
01520 Srcpos_To_Line(WN_Get_Linenum(wn)));
01521 message = buf;
01522 }
01523 break;
01524 case SNL_LOOP_PROBLEM_INNER_MIGHT_NOT_GO:
01525 message = "can't be certain that all loops further in execute";
01526 break;
01527 case SNL_LOOP_PROBLEM_INNER_DOES_NOT_GO:
01528 message = "some loop further in does not execute";
01529 break;
01530 default:
01531 message = NULL;
01532 Is_True(0, ("Missing excuse"));
01533 }
01534 if (scalar_message) message = scalar_message;
01535 if (message) {
01536 if (LNO_Analysis)
01537 fprintf(LNO_Analysis,
01538 " (SNL_FAILURES (%d \"%s\"))\n",
01539 Srcpos_To_Line(WN_Get_Linenum(wn)), message);
01540 if (LNO_Tlog) {
01541 char tlog_buf[256];
01542 if (strlen(message)>=256)
01543 sprintf(tlog_buf,"SNL_FAILURES message too long");
01544 else
01545 sprintf(tlog_buf,"SNL_FAILURES %s", message);
01546 Generate_Tlog("LNO","snl", Srcpos_To_Line(WN_Get_Linenum(wn)),
01547 ST_name(WN_st(WN_index(wn))),
01548 "", "", tlog_buf);
01549 }
01550 }
01551 }
01552 wn = Good_Do_Next_Innermost(wn);
01553 FmtAssert(wn, ("Can't find loop in loop nest"));
01554 }
01555
01556 if (nloops <= 1) {
01557 if (LNO_Verbose) {
01558 printf("SNL not transforming nest,");
01559 printf(" transformable depth is less than two.\n");
01560 }
01561 SNL_Optimize_Bounds(SNL_REGION(wn, wn));
01562 SNL_Transform(Find_Next_Innermost_Do(wn_original), nloops_original - 1);
01563 return;
01564 }
01565
01566 SNL_DEBUG2(1, "SNL FOUND A NEST AT LINE %d (nloops = %d):\n",
01567 Srcpos_To_Line(WN_Get_Linenum(wn)), ni.Nloops());
01568
01569 if (snl_debug >= 2) {
01570 Dump_WN(wn, TFile, snl_debug, 2, 2,
01571 Array_Dependence_Graph, NULL, NULL);
01572 fprintf(TFile, "END OF FOUND NEST\n");
01573 if (snl_debug >= 3) {
01574 WN* f = NULL;
01575 for (WN* p = wn; p; p = LWN_Get_Parent(p))
01576 f = p;
01577 Print_Def_Use(f, TFile);
01578 }
01579 }
01580
01581
01582 SNL_DEBUG2(1, "max loops in transformation: invariant %d, general %d\n",
01583 ni.Nloops_Invariant(), ni.Nloops_General());
01584
01585 FmtAssert(nloops >= 2 &&
01586 (nloops == ni.Nloops_Invariant() || nloops == ni.Nloops_General()),
01587 ("This is what loop above was for"));
01588
01589 if (snl_debug >= 3) {
01590 LNO_Print_Access(TFile, wn);
01591 Array_Dependence_Graph->Print(TFile);
01592 }
01593
01594 SNL_REGION region;
01595 BOOL changed;
01596
01597 #ifdef KEY
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609 for (INT i = 0; i < ni.Dostack().Elements(); i ++) {
01610 WN* loop = ni.Dostack().Bottom_nth(i);
01611 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
01612 DOLOOP_STACK stack(&LNO_local_pool);
01613 Build_Doloop_Stack(loop, &stack);
01614 dli->Set_Est_Num_Iterations(&stack);
01615 }
01616 #endif
01617 region = Do_Automatic_Transformation(wn, nloops, &ni, &changed);
01618
01619 if (snl_debug) {
01620 if (changed && region.First) {
01621 fprintf(TFile, "\nDump After Transformation:\n");
01622 WN *rwn;
01623 for (rwn = region.First; rwn; rwn = WN_next(rwn)) {
01624 Dump_WN(rwn, TFile, snl_debug);
01625 if (rwn == region.Last)
01626 break;
01627 }
01628 FmtAssert(rwn == region.Last, ("Bad region"));
01629 fprintf(TFile, "\n");
01630
01631 if (snl_debug) {
01632 WN* f = NULL;
01633 for (WN* p = region.First; p; p = LWN_Get_Parent(p))
01634 f = p;
01635 Print_Def_Use(f, TFile);
01636 }
01637 }
01638 else {
01639 SNL_DEBUG0(1, "\nNo transformation applied.\n\n");
01640 }
01641 }
01642
01643 SNL_Optimize_Bounds(region);
01644 return;
01645 }
01646
01647 extern void SNL_Phase(WN* func_nd)
01648 {
01649 if (Get_Trace(TP_LNOPT, TT_LNO_SKIP_PH2))
01650 return;
01651 if (Get_Trace(TP_LNOPT2, TT_SHACKLE_ONLY))
01652 return;
01653 if (Get_Trace(TP_LNOPT2, TT_TILE_ONLY)) {
01654 LNO_Run_Oinvar = FALSE;
01655 LNO_Outer_Unroll = 1;
01656 }
01657
01658 FIZ_FUSE_INFO* ffi=
01659 CXX_NEW(FIZ_FUSE_INFO(&LNO_local_pool), &LNO_local_pool);
01660 ffi->Build(func_nd);
01661
01662
01663
01664
01665
01666 #ifdef KEY
01667 if(ffi->Num_Snl() > MAX_NESTS_IN_FU)
01668 LNO_Outer_Unroll = 1;
01669 #endif
01670
01671 if (LNO_Test_Dump)
01672 for (INT i = 0; i < ffi->Num_Snl(); i++)
01673 ffi->Print(i, TFile);
01674
01675 for (INT i = 0; i < ffi->Num_Snl(); i++) {
01676 WN* wn = ffi->Get_Wn(i);
01677 INT nloops = ffi->Get_Depth(i);
01678 if (nloops < 1 || ffi->Get_Type(i) != Inner)
01679 continue;
01680 if (LNO_Analysis)
01681 fprintf(LNO_Analysis, "(LNO_SNL\n");
01682 SNL_Upper_Bound_Standardize(wn, nloops);
01683 Hoist_Bounds_One_Level(wn);
01684 SNL_Transform(wn, nloops);
01685 if (LNO_Analysis)
01686 fprintf(LNO_Analysis,")\n");
01687 }
01688 }
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698 enum RED_CLASS {RED_ACCESS, RED_BOUND, RED_NEITHER};
01699
01700
01701
01702
01703
01704
01705
01706
01707 static RED_CLASS SNL_Bound_Lin_Symb_Worth_Optimizing(WN* wn_exp,
01708 ACCESS_VECTOR* av)
01709 {
01710 if (av->Lin_Symb == NULL)
01711 return RED_NEITHER;
01712 INTSYMB_CONST_ITER iter(av->Lin_Symb);
01713 const INTSYMB_NODE* first = iter.First();
01714 for (const INTSYMB_NODE* node = first; !iter.Is_Empty(); node = iter.Next()) {
01715 if (node->Coeff == 0) {
01716 DevWarn("Access vector has zero coefficient linear symbol");
01717 return RED_ACCESS;
01718 }
01719 INT count = Symbol_Count(wn_exp, node->Symbol);
01720 if (count == 0) {
01721 DevWarn("Access vector has redundant linear symbol");
01722 return RED_ACCESS;
01723 }
01724 if (count > 1)
01725 return RED_BOUND;
01726 }
01727 return RED_NEITHER;
01728 }
01729
01730
01731
01732
01733
01734
01735
01736
01737 static RED_CLASS SNL_Bound_Non_Lin_Symb_Worth_Optimizing(WN* wn_exp,
01738 ACCESS_VECTOR* av)
01739 {
01740 if (av->Non_Lin_Symb == NULL)
01741 return RED_NEITHER;
01742 SUMPROD_CONST_ITER iter(av->Non_Lin_Symb);
01743 const SUMPROD_NODE* first = iter.First();
01744 for (const SUMPROD_NODE* node = first; !iter.Is_Empty(); node = iter.Next()) {
01745 if (node->Coeff == 0) {
01746 DevWarn("Access vector has zero coefficient nonlinear symbol");
01747 return RED_ACCESS;
01748 }
01749 SYMBOL_CONST_ITER sitr(node->Prod_List);
01750 const SYMBOL_NODE* f = sitr.First();
01751 for (const SYMBOL_NODE* node = f; !sitr.Is_Empty(); node = sitr.Next()) {
01752 INT count = Symbol_Count(wn_exp, node->Symbol);
01753 if (count == 0) {
01754 DevWarn("Access vector has redundant nonlinear symbol");
01755 return RED_ACCESS;
01756 }
01757 if (count > 1)
01758 return RED_BOUND;
01759 }
01760 }
01761 return RED_NEITHER;
01762 }
01763
01764
01765
01766
01767
01768
01769
01770
01771 static WN* Enclosing_Loop_At_Depth(WN* wn_exp,
01772 INT depth)
01773 {
01774 for (WN* wn = wn_exp; wn != NULL; wn = LWN_Get_Parent(wn))
01775 if (WN_opcode(wn) == OPC_DO_LOOP && Do_Loop_Depth(wn) == depth)
01776 return wn;
01777 return NULL;
01778 }
01779
01780
01781
01782
01783
01784
01785
01786
01787 static RED_CLASS SNL_Bound_Loop_Coeff_Worth_Optimizing(WN* wn_exp,
01788 ACCESS_VECTOR* av,
01789 INT loop_depth)
01790 {
01791 for (INT j = 0; j < loop_depth; j++) {
01792 WN* wn_jloop = Enclosing_Loop_At_Depth(wn_exp, j);
01793 INT count = Symbol_Count(wn_exp, SYMBOL(WN_index(wn_jloop)));
01794 if (count == 0 && av->Loop_Coeff(j) > 0) {
01795 DevWarn("Access vector has redundant index");
01796 return RED_ACCESS;
01797 }
01798 if (count > 1)
01799 return RED_BOUND;
01800 }
01801 return RED_NEITHER;
01802 }
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814 enum ACOPR_CLASS {ACOPR_MAX, ACOPR_MIN, ACOPR_ONE, ACOPR_INVALID};
01815
01816
01817
01818
01819
01820
01821
01822 static ACOPR_CLASS SNL_Access_Operator(WN* wn_exp)
01823 {
01824 INT max_count = Num_Maxs(wn_exp);
01825 INT min_count = Num_Mins(wn_exp);
01826 if (max_count > 0 && min_count == 0)
01827 return ACOPR_MAX;
01828 if (min_count > 0 && min_count == 0)
01829 return ACOPR_MIN;
01830 if (min_count == 0 && max_count == 0)
01831 return ACOPR_ONE;
01832 return ACOPR_INVALID;
01833 }
01834
01835
01836
01837
01838
01839
01840
01841 static BOOL SNL_Bound_Worth_Optimizing(WN* wn_exp,
01842 ACCESS_ARRAY* aa,
01843 INT loop_depth)
01844 {
01845 if (Bound_Is_Too_Messy(aa))
01846 return FALSE;
01847 ACOPR_CLASS acopr = SNL_Access_Operator(wn_exp);
01848 if (acopr == ACOPR_INVALID)
01849 return FALSE;
01850 if (acopr == ACOPR_ONE && aa->Num_Vec() > 1)
01851 return FALSE;
01852 for (INT i = 0; i < aa->Num_Vec(); i++) {
01853 ACCESS_VECTOR* av = aa->Dim(i);
01854 RED_CLASS red = SNL_Bound_Loop_Coeff_Worth_Optimizing(wn_exp, av,
01855 loop_depth);
01856 if (red == RED_ACCESS)
01857 return FALSE;
01858 if (red == RED_BOUND)
01859 return TRUE;
01860 red = SNL_Bound_Lin_Symb_Worth_Optimizing(wn_exp, av);
01861 if (red == RED_ACCESS)
01862 return FALSE;
01863 if (red == RED_BOUND)
01864 return TRUE;
01865 red = SNL_Bound_Non_Lin_Symb_Worth_Optimizing(wn_exp, av);
01866 if (red == RED_ACCESS)
01867 return FALSE;
01868 if (red == RED_BOUND)
01869 return TRUE;
01870 }
01871 return FALSE;
01872 }
01873
01874
01875
01876
01877
01878
01879
01880
01881 static BOOL SNL_LB_Worth_Optimizing(WN* wn_loop)
01882 {
01883 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01884 ACCESS_ARRAY* aa = dli->LB;
01885 for (INT i = 0; i < aa->Num_Vec(); i++) {
01886 ACCESS_VECTOR* av = aa->Dim(i);
01887 if (av->Loop_Coeff(i) > 0)
01888 return FALSE;
01889 }
01890 return SNL_Bound_Worth_Optimizing(WN_start(wn_loop), aa,
01891 Do_Loop_Depth(wn_loop));
01892 }
01893
01894
01895
01896
01897
01898
01899
01900
01901 static BOOL SNL_UB_Worth_Optimizing(WN* wn_loop)
01902 {
01903 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01904 ACCESS_ARRAY* aa = dli->UB;
01905 for (INT i = 0; i < aa->Num_Vec(); i++) {
01906 ACCESS_VECTOR* av = aa->Dim(i);
01907 if (av->Loop_Coeff(i) < 0)
01908 return FALSE;
01909 }
01910 if (!Upper_Bound_Standardize(WN_end(wn_loop), TRUE))
01911 return FALSE;
01912 if (UBexp(WN_end(wn_loop)) == NULL)
01913 return FALSE;
01914 return SNL_Bound_Worth_Optimizing(UBexp(WN_end(wn_loop)), aa,
01915 Do_Loop_Depth(wn_loop));
01916 }
01917
01918
01919
01920
01921
01922
01923
01924 static WN* SNL_Opr(OPERATOR opr,
01925 WN* wn_exp1,
01926 WN* wn_exp2)
01927 {
01928 if (wn_exp1 == NULL)
01929 return wn_exp2;
01930 if (wn_exp2 == NULL)
01931 return wn_exp1;
01932 TYPE_ID type = Max_Wtype(WN_rtype(wn_exp1), WN_rtype(wn_exp2));
01933 OPCODE op = OPCODE_make_op(opr, Promote_Type(type), MTYPE_V);
01934 return LWN_CreateExp2(op, wn_exp1, wn_exp2);
01935 }
01936
01937
01938
01939
01940
01941
01942
01943
01944 static WN* SNL_Access_Index_Section(WN* wn_exp,
01945 ACCESS_VECTOR* av,
01946 INT direction,
01947 INT loop_depth,
01948 DU_MANAGER* du)
01949 {
01950 FmtAssert(direction == 1 || direction == -1, ("Invalid direction value"));
01951 WN* wn_result = NULL;
01952 for (INT i = 0; i < loop_depth; i++) {
01953 if (av->Loop_Coeff(i) != 0) {
01954 SYMBOL sym_index = SYMBOL(WN_index(Enclosing_Loop_At_Depth(wn_exp, i)));
01955 WN* wn_index = Find_Node(sym_index, wn_exp);
01956 WN* wn_index_copy = LWN_Copy_Tree(wn_index);
01957 LWN_Copy_Def_Use(wn_index, wn_index_copy, du);
01958 WN* wn_const = LWN_Make_Icon(WN_rtype(wn_index_copy),
01959 direction * av->Loop_Coeff(i));
01960 WN* wn_prod = SNL_Opr(OPR_MPY, wn_const, wn_index_copy);
01961 wn_result = SNL_Opr(OPR_ADD, wn_result, wn_prod);
01962 }
01963 }
01964 return wn_result;
01965 }
01966
01967
01968
01969
01970
01971
01972
01973
01974 static WN* SNL_Access_Linear_Section(WN* wn_exp,
01975 ACCESS_VECTOR* av,
01976 INT direction,
01977 DU_MANAGER* du)
01978 {
01979 FmtAssert(direction == 1 || direction == -1, ("Invalid direction value"));
01980 if (av->Lin_Symb == NULL)
01981 return NULL;
01982 WN* wn_result = NULL;
01983 INTSYMB_CONST_ITER iter(av->Lin_Symb);
01984 const INTSYMB_NODE* first = iter.First();
01985 for (const INTSYMB_NODE* node = first; !iter.Is_Empty(); node = iter.Next()) {
01986 WN* wn_index = Find_Node(node->Symbol, wn_exp);
01987 WN* wn_index_copy = LWN_Copy_Tree(wn_index);
01988 LWN_Copy_Def_Use(wn_index, wn_index_copy, du);
01989 WN* wn_const = LWN_Make_Icon(WN_rtype(wn_index_copy),
01990 direction * node->Coeff);
01991 WN* wn_prod = SNL_Opr(OPR_MPY, wn_const, wn_index_copy);
01992 wn_result = SNL_Opr(OPR_ADD, wn_result, wn_prod);
01993 }
01994 return wn_result;
01995 }
01996
01997
01998
01999
02000
02001
02002
02003
02004 static WN* SNL_Access_Nonlinear_Section(WN* wn_exp,
02005 ACCESS_VECTOR* av,
02006 INT direction,
02007 DU_MANAGER* du)
02008 {
02009 FmtAssert(direction == 1 || direction == -1, ("Invalid direction value"));
02010 if (av->Non_Lin_Symb == NULL)
02011 return NULL;
02012 WN* wn_result = NULL;
02013 SUMPROD_CONST_ITER iter(av->Non_Lin_Symb);
02014 const SUMPROD_NODE* first = iter.First();
02015 for (const SUMPROD_NODE* node = first; !iter.Is_Empty(); node = iter.Next()) {
02016 WN* wn_prod = NULL;
02017 SYMBOL_CONST_ITER sitr(node->Prod_List);
02018 const SYMBOL_NODE* f = sitr.First();
02019 for (const SYMBOL_NODE* snode = f; !sitr.Is_Empty(); snode = sitr.Next()) {
02020 WN* wn_index = Find_Node(snode->Symbol, wn_exp);
02021 WN* wn_index_copy = LWN_Copy_Tree(wn_index);
02022 LWN_Copy_Def_Use(wn_index, wn_index_copy, du);
02023 wn_prod = SNL_Opr(OPR_MPY, wn_prod, wn_index);
02024 }
02025 WN* wn_const = LWN_Make_Icon(WN_rtype(wn_prod), direction * node->Coeff);
02026 wn_prod = SNL_Opr(OPR_MPY, wn_prod, wn_const);
02027 wn_result = SNL_Opr(OPR_ADD, wn_result, wn_prod);
02028 }
02029 return wn_result;
02030 }
02031
02032
02033
02034
02035
02036
02037
02038 static void SNL_Optimize_LB_With_Access_Vectors(WN* wn_loop,
02039 DU_MANAGER* du)
02040 {
02041 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
02042 if (!dli->Step->Is_Const())
02043 return;
02044 ACCESS_ARRAY* aa = dli->Step->Const_Offset > 0 ? dli->LB : dli->UB;
02045 if (Bound_Is_Too_Messy(aa))
02046 return;
02047 WN* wn_exp = WN_kid0(WN_start(wn_loop));
02048 WN* wn_result = NULL;
02049 ACOPR_CLASS acopr = SNL_Access_Operator(wn_exp);
02050 OPERATOR opr = acopr == ACOPR_MAX ? OPR_MAX : OPR_MIN;
02051 for (INT i = 0; i < aa->Num_Vec(); i++) {
02052 ACCESS_VECTOR* av = aa->Dim(i);
02053 WN* wn_local_result = NULL;
02054 WN* wn_index = SNL_Access_Index_Section(wn_exp, av, 1,
02055 Do_Loop_Depth(wn_loop), du);
02056 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_index);
02057 WN* wn_linear = SNL_Access_Linear_Section(wn_exp, av, 1, du);
02058 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_linear);
02059 WN* wn_nonlinear = SNL_Access_Nonlinear_Section(wn_exp, av, 1, du);
02060 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_nonlinear);
02061 TYPE_ID index_type = wn_local_result == NULL
02062 ? WN_rtype(wn_exp) : WN_rtype(wn_local_result);
02063 WN* wn_const = LWN_Make_Icon(index_type, -av->Const_Offset);
02064 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_const);
02065 INT divisor = av->Loop_Coeff(Do_Loop_Depth(wn_loop));
02066 FmtAssert(divisor <= -1, ("Should have screened out other values"));
02067 if (divisor < -1)
02068 wn_local_result = LWN_CreateDivfloor(WN_rtype(wn_local_result),
02069 wn_local_result, LWN_Make_Icon(WN_rtype(wn_local_result), -divisor));
02070 wn_result = SNL_Opr(opr, wn_result, wn_local_result);
02071 }
02072 Replace_Wnexp_With_Exp_Copy(wn_exp, wn_result, du);
02073 LWN_Delete_Tree(wn_result);
02074 }
02075
02076
02077
02078
02079
02080
02081 static void SNL_Optimize_UB_With_Access_Vectors(WN* wn_loop,
02082 DU_MANAGER* du)
02083 {
02084 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
02085 if (!dli->Step->Is_Const())
02086 return;
02087 ACCESS_ARRAY* aa = dli->Step->Const_Offset > 0 ? dli->UB : dli->LB;
02088 if (Bound_Is_Too_Messy(aa))
02089 return;
02090 if (!Upper_Bound_Standardize(WN_end(wn_loop), TRUE))
02091 return;
02092 if (UBvar(WN_end(wn_loop)) == NULL || UBexp(WN_end(wn_loop)) == NULL)
02093 return;
02094 WN* wn_exp = UBexp(WN_end(wn_loop));
02095 WN* wn_result = NULL;
02096 ACOPR_CLASS acopr = SNL_Access_Operator(wn_exp);
02097 OPERATOR opr = acopr == ACOPR_MAX ? OPR_MAX : OPR_MIN;
02098 for (INT i = 0; i < aa->Num_Vec(); i++) {
02099 ACCESS_VECTOR* av = aa->Dim(i);
02100 WN* wn_local_result = NULL;
02101 WN* wn_index = SNL_Access_Index_Section(wn_exp, av, -1,
02102 Do_Loop_Depth(wn_loop), du);
02103 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_index);
02104 WN* wn_linear = SNL_Access_Linear_Section(wn_exp, av, -1, du);
02105 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_linear);
02106 WN* wn_nonlinear = SNL_Access_Nonlinear_Section(wn_exp, av, -1, du);
02107 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_nonlinear);
02108 TYPE_ID index_type = wn_local_result == NULL
02109 ? WN_rtype(wn_exp) : WN_rtype(wn_local_result);
02110 WN* wn_const = LWN_Make_Icon(index_type, av->Const_Offset);
02111 wn_local_result = SNL_Opr(OPR_ADD, wn_local_result, wn_const);
02112 INT divisor = av->Loop_Coeff(Do_Loop_Depth(wn_loop));
02113 FmtAssert(divisor >= 1,
02114 ("Should have screened out other values"));
02115 if (divisor > 1)
02116 wn_local_result = LWN_CreateDivfloor(WN_rtype(wn_local_result),
02117 wn_local_result, LWN_Make_Icon(WN_rtype(wn_local_result), divisor));
02118 wn_result = SNL_Opr(opr, wn_result, wn_local_result);
02119 }
02120 WN* wn_ldid = LWN_Copy_Tree(UBvar(WN_end(wn_loop)));
02121 LWN_Copy_Def_Use(UBvar(WN_end(wn_loop)), wn_ldid, du);
02122 OPCODE op_orig = WN_opcode(WN_end(wn_loop));
02123 OPCODE op_le = OPCODE_make_op(OPR_LE, OPCODE_rtype(op_orig),
02124 OPCODE_desc(op_orig));
02125 wn_result = LWN_CreateExp2(op_le, wn_ldid, wn_result);
02126 Replace_Wnexp_With_Exp_Copy(WN_end(wn_loop), wn_result, du);
02127 LWN_Delete_Tree(wn_result);
02128 }
02129
02130
02131
02132
02133
02134
02135
02136
02137 static void SNL_Optimize_Bounds_With_Access_Vectors(WN* wn_loop,
02138 DU_MANAGER* du)
02139 {
02140 if (SNL_LB_Worth_Optimizing(wn_loop))
02141 SNL_Optimize_LB_With_Access_Vectors(wn_loop, du);
02142 if (SNL_UB_Worth_Optimizing(wn_loop))
02143 SNL_Optimize_UB_With_Access_Vectors(wn_loop, du);
02144 }
02145