00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #define __STDC_LIMIT_MACROS
00041 #include <stdint.h>
00042 #ifdef USE_PCH
00043 #include "lno_pch.h"
00044 #endif // USE_PCH
00045 #pragma hdrstop
00046
00047 #include <sys/types.h>
00048 #include <alloca.h>
00049 #include "snl.h"
00050 #include "snl_xbounds.h"
00051 #include "config_targ.h"
00052 #include "lwn_util.h"
00053 #include "lnoutils.h"
00054 #include "cxx_graph.h"
00055 #include "opt_du.h"
00056 #include "opt_alias_interface.h"
00057 #include "wintrinsic.h"
00058 #include "scalar_expand.h"
00059 #include "strtab.h"
00060 #include "dvector.h"
00061 #include "lnopt_main.h"
00062 #include "move.h"
00063 #include "small_trips.h"
00064 #include "sxlimit.h"
00065 #include "ir_reader.h"
00066 #include "prompf.h"
00067 #include "anl_driver.h"
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 static BOOL Scalar_Expansion_Tile(WN* wn_loop,
00080 INT tile_size)
00081 {
00082 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00083 return !(!dli->Num_Iterations_Symbolic
00084 && dli->Est_Num_Iterations <= tile_size
00085 || dli->Est_Max_Iterations_Index != -1
00086 && dli->Est_Max_Iterations_Index <= tile_size
00087 || dli->Is_Inner_Tile && dli->Tile_Size > 0
00088 && dli->Tile_Size <= tile_size);
00089 }
00090
00091
00092
00093
00094
00095
00096
00097
00098 extern INT SNL_INV_Compute_Tile_Size(INT depth)
00099 {
00100 FmtAssert(depth > 0,
00101 ("Scalar expanded array has null or negative number of dimensions."));
00102 if (LNO_SE_Tile_Size != 0)
00103 return LNO_SE_Tile_Size;
00104 if (depth == 1)
00105 return 1000;
00106 if (depth == 2)
00107 return 300;
00108 if (depth >= 3)
00109 return 80;
00110 return 1000/depth;
00111 }
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121 extern WN* SE_Tile_Inner_Loop(WN* loop,
00122 MEM_POOL *pool)
00123 {
00124 if (!Scalar_Expansion_Tile(loop, SNL_INV_Compute_Tile_Size(1)))
00125 return NULL;
00126 if (!Upper_Bound_Standardize(WN_end(loop), TRUE))
00127 return NULL;
00128 return Tile_Loop(loop, SNL_INV_Compute_Tile_Size(1), 0, SNL_INV_SE_ONLY,
00129 NULL, pool);
00130 }
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 static INT SNL_INV_Add_Tile(INT loop_index,
00142 INT strips,
00143 INT tile_size,
00144 INT level,
00145 SNL_INV_CACHE_BLOCK_REASON add_reason,
00146 INT iloop[],
00147 INT stripsz[],
00148 INT striplevel[],
00149 SNL_INV_CACHE_BLOCK_REASON reason[])
00150 {
00151 INT i;
00152 for (i = 0; i < strips; i++)
00153 if (iloop[i] > loop_index)
00154 break;
00155 for (INT j = strips - 1; j >= i; j--) {
00156 iloop[j+1] = iloop[j];
00157 stripsz[j+1] = stripsz[j];
00158 striplevel[j+1] = striplevel[j];
00159 }
00160 iloop[i] = loop_index;
00161 stripsz[i] = tile_size;
00162 striplevel[i] = level;
00163 reason[i] = add_reason;
00164 return ++strips;
00165 }
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 static INT SNL_INV_Remove_Tile(INT loop_index,
00176 INT strips,
00177 INT iloop[],
00178 INT stripsz[],
00179 INT striplevel[])
00180 {
00181 INT i;
00182 for (i = 0; i < strips; i++)
00183 if (iloop[i] == loop_index)
00184 break;
00185 FmtAssert(i < strips, ("Tried to remove non-existent tile."));
00186 for (INT j = i + 1; j < strips; j++) {
00187 iloop[j-1] = iloop[j];
00188 stripsz[j-1] = stripsz[j];
00189 striplevel[j-1] = striplevel[j];
00190 }
00191 return --strips;
00192 }
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202 static void SNL_INV_Update_Tile(INT loop_index,
00203 INT strips,
00204 INT tile_size,
00205 INT level,
00206 SNL_INV_CACHE_BLOCK_REASON update_reason,
00207 INT iloop[],
00208 INT stripsz[],
00209 INT striplevel[],
00210 SNL_INV_CACHE_BLOCK_REASON reason[])
00211 {
00212 INT i;
00213 for (i = 0; i < strips; i++)
00214 if (iloop[i] == loop_index)
00215 break;
00216 FmtAssert(i < strips, ("Tried to update non-existent tile."));
00217 stripsz[i] = tile_size;
00218 striplevel[i] = level;
00219 reason[i] = update_reason;
00220 }
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230 static INT SNL_INV_Get_Tile_Size(INT loop_index,
00231 INT strips,
00232 INT iloop[],
00233 INT stripsz[],
00234 INT striplevel[],
00235 INT* level)
00236 {
00237 INT i;
00238 for (i = 0; i < strips; i++)
00239 if (iloop[i] == loop_index)
00240 break;
00241 return (i == strips ? 0 : (*level = striplevel[i], stripsz[i]));
00242 }
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254 extern void SE_CT_New_Tile_Infos(WN* wn_outer,
00255 SX_PLIST* plist,
00256 SNL_TILE_INFO *ti,
00257 INT permutation[],
00258 INT nloops,
00259 MEM_POOL* pool,
00260 SNL_TILE_INFO **ti_se,
00261 SNL_TILE_INFO **ti_ct,
00262 BOOL full_dist)
00263 {
00264
00265
00266
00267 extern INT Num_Cache_Strips;
00268 INT max_strips_se = 15 - nloops - Num_Cache_Strips;
00269
00270 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00271 DOLOOP_STACK stack(&LNO_local_pool);
00272 Build_Doloop_Stack(wn_inner, &stack);
00273
00274 FmtAssert(ti == NULL || ti->Rectangular(),
00275 ("Trying invariant code on non-invariant case."));
00276 FmtAssert(*ti_se == NULL,
00277 ("Scalar expansion tiles have not yet been computed."));
00278 FmtAssert(*ti_ct == NULL,
00279 ("Revised cache tiles have not yet been computed."));
00280
00281 INT strips_se = 0;
00282 INT strips_ct = ti != NULL ? ti->Strips() : 0;
00283 INT iloop_se[SNL_MAX_LOOPS];
00284 INT* iloop_ct = (INT*)alloca(sizeof(INT)*strips_ct);
00285 INT stripsz_se[SNL_MAX_LOOPS];
00286 INT* stripsz_ct = (INT*)alloca(sizeof(INT)*strips_ct);
00287 INT striplevel_se[SNL_MAX_LOOPS];
00288 INT* striplevel_ct = (INT*)alloca(sizeof(INT)*strips_ct);
00289 INT strip_invariant[SNL_MAX_LOOPS];
00290
00291 SNL_INV_CACHE_BLOCK_REASON reason_se[SNL_MAX_LOOPS];
00292 SNL_INV_CACHE_BLOCK_REASON reason_ct[SNL_MAX_LOOPS];
00293
00294 INT i;
00295 for (i = 0; i < SNL_MAX_LOOPS; i++)
00296 reason_se[i] = reason_ct[i] = SNL_INV_UNDEFINED;
00297
00298 for (i = 0; i < strips_ct; i++) {
00299 iloop_ct[i] = ti->Iloop(i);
00300 stripsz_ct[i] = ti->Stripsz(i);
00301 striplevel_ct[i] = ti->Striplevel(i);
00302 }
00303 for (i = 0; i < nloops; i++)
00304 strip_invariant[i] = FALSE;
00305
00306
00307
00308 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
00309 INT outer = first_in_stack;
00310 SX_PITER ii(plist);
00311 INT* tpermutation = !full_dist ? permutation : NULL;
00312 for (SX_PNODE* np = ii.First(); !ii.Is_Empty(); np = ii.Next()) {
00313 SX_PNODE::STATUS status = np->Transformable(outer, tpermutation, nloops);
00314 if (status == SX_PNODE::SE_NOT_REQD)
00315 continue;
00316 INT depth = np->Expansion_Depth() - first_in_stack + 1;
00317 for (i = 0; i < depth && strips_se < max_strips_se; i++) {
00318 INT lvl;
00319 if (!Scalar_Expansion_Tile(stack.Bottom_nth(first_in_stack+i),
00320 SNL_INV_Compute_Tile_Size(depth)))
00321 continue;
00322 if (strip_invariant[i])
00323 continue;
00324 INT cache_size = SNL_INV_Get_Tile_Size(i, strips_ct, iloop_ct,
00325 stripsz_ct, striplevel_ct, &lvl);
00326 if (cache_size > 0) {
00327 strips_se = SNL_INV_Add_Tile(i, strips_se, cache_size, lvl,
00328 SNL_INV_TILE_SE, iloop_se, stripsz_se, striplevel_se, reason_se);
00329 strips_ct = SNL_INV_Remove_Tile(i, strips_ct, iloop_ct, stripsz_ct,
00330 striplevel_ct);
00331 strip_invariant[i] = TRUE;
00332 continue;
00333 }
00334 cache_size = SNL_INV_Get_Tile_Size(i, strips_se, iloop_se, stripsz_se,
00335 striplevel_se, &lvl);
00336 INT new_cache_size = SNL_INV_Compute_Tile_Size(depth);
00337 if (cache_size == 0) {
00338 strips_se = SNL_INV_Add_Tile(i, strips_se, new_cache_size, 0,
00339 SNL_INV_SE_ONLY, iloop_se, stripsz_se, striplevel_se, reason_se);
00340 } else if (new_cache_size < cache_size) {
00341 SNL_INV_Update_Tile(i, strips_se, cache_size, lvl, SNL_INV_TILE_SE,
00342 iloop_se, stripsz_se, striplevel_se, reason_se);
00343 }
00344 }
00345 }
00346
00347
00348
00349
00350 BOOL found_big_loop = FALSE;
00351 INT max_block = 1;
00352 INT scalar_block = 1;
00353 for (i = 0; i < strips_se; i++) {
00354 WN* loop = stack.Bottom_nth(first_in_stack + iloop_se[i]);
00355 DO_LOOP_INFO* dli = Get_Do_Loop_Info(loop);
00356 if (dli->Num_Iterations_Symbolic && dli->Est_Max_Iterations_Index == -1) {
00357 found_big_loop = TRUE;
00358 break;
00359 }
00360 INT est_iterations = dli->Num_Iterations_Symbolic
00361 ? -1 : dli->Est_Num_Iterations;
00362 INT index_iterations = dli->Est_Max_Iterations_Index;
00363 FmtAssert(index_iterations != -1 || est_iterations != -1,
00364 ("Don't really have a loop with known loop iteration limit."));
00365 INT iterations = est_iterations == -1 ? index_iterations
00366 : index_iterations == -1 ? est_iterations
00367 : index_iterations < est_iterations ? index_iterations
00368 : est_iterations;
00369 max_block *= iterations;
00370 scalar_block *= SNL_INV_Compute_Tile_Size(strips_se);
00371 }
00372 if (!found_big_loop && max_block <= scalar_block)
00373 return;
00374
00375
00376 if (strips_se > 0)
00377 *ti_se = CXX_NEW(SNL_TILE_INFO(nloops, strips_se, iloop_se, stripsz_se,
00378 striplevel_se, reason_se, pool), pool);
00379 if (strips_ct > 0)
00380 *ti_ct = CXX_NEW(SNL_TILE_INFO(nloops, strips_ct, iloop_ct, stripsz_ct,
00381 striplevel_ct, reason_ct, pool), pool);
00382 }
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392 extern void SE_New_Tile_Infos(WN* wn_outer,
00393 SX_PLIST* plist,
00394 INT permutation[],
00395 INT nloops,
00396 MEM_POOL* pool,
00397 SNL_TILE_INFO **ti_se,
00398 BOOL full_dist)
00399 {
00400 SNL_TILE_INFO* ti_ct = NULL;
00401 SE_CT_New_Tile_Infos(wn_outer, plist, NULL, permutation, nloops, pool,
00402 ti_se, &ti_ct, full_dist);
00403 FmtAssert(ti_ct == NULL, ("Cache tiling not being done here."));
00404 }
00405
00406
00407
00408
00409
00410
00411
00412 static WN* SNL_INV_Get_Next_Outermost_Loop(WN* loop)
00413 {
00414 WN *wn;
00415 for (wn = LWN_Get_Parent(loop);
00416 wn != NULL && WN_opcode(wn) != OPC_DO_LOOP;
00417 wn = LWN_Get_Parent(wn));
00418 return wn;
00419 }
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430 static void SNL_INV_Scalar_Expand_Tile(WN* wn_outer,
00431 SNL_TILE_INFO *ti,
00432 INT nloops,
00433 MEM_POOL *pool,
00434 WN* tile_loops[],
00435 WN** the_newest_outer_loop)
00436 {
00437 if (ti == NULL)
00438 return;
00439 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00440 DOLOOP_STACK stack(&LNO_local_pool);
00441 Build_Doloop_Stack(wn_inner, &stack);
00442 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
00443 WN** compressed_tile_loops = (WN**)alloca(sizeof(WN**)*ti->Strips());
00444 INT i;
00445 for (i = 0; i < ti->Strips(); i++) {
00446 WN* loop_one = stack.Bottom_nth(first_in_stack + ti->Iloop(i));
00447 WN* new_outer_loop = Tile_Loop(loop_one, ti->Stripsz(i), ti->Striplevel(i),
00448 ti->Reason(i), NULL, &LNO_local_pool);
00449 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00450 INT new_id = New_Construct_Id();
00451 INT old_id = WN_MAP32_Get(Prompf_Id_Map, loop_one);
00452 WN_MAP32_Set(Prompf_Id_Map, new_outer_loop, new_id);
00453 if (ti->Reason(i) == SNL_INV_SE_ONLY)
00454 Prompf_Info->Se_Tile(old_id, new_id);
00455 else if (ti->Reason(i) == SNL_INV_TILE_SE)
00456 Prompf_Info->Se_Cache_Tile(old_id, new_id);
00457 }
00458 compressed_tile_loops[i] = SNL_INV_Get_Next_Outermost_Loop(loop_one);
00459 }
00460 for (i = 0; i < ti->Strips(); i++)
00461 tile_loops[i] = compressed_tile_loops[i];
00462 if (ti->Strips() > 0) {
00463 DOLOOP_STACK dostack(pool);
00464 Build_Doloop_Stack(LWN_Get_Parent(compressed_tile_loops[0]), &dostack);
00465 LNO_Build_Access(compressed_tile_loops[0], &dostack, &LNO_default_pool);
00466 }
00467 if (Do_Loop_Depth(compressed_tile_loops[0])
00468 < Do_Loop_Depth(*the_newest_outer_loop))
00469 *the_newest_outer_loop = compressed_tile_loops[0];
00470 }
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484 static void SNL_INV_Get_Dist_Limits(INT section_number,
00485 INT nstrips,
00486 DOLOOP_STACK* all_loop_stack,
00487 WN* tile_loops[],
00488 INT first_stack_index,
00489 INT nloops_total,
00490 INT& first_dist_index,
00491 INT& dist_count)
00492 {
00493 FmtAssert(section_number >= 0 && section_number <= nstrips,
00494 ("Section number out of range."));
00495 INT last_dist_index = 0;
00496 INT upper_limit = first_stack_index + nloops_total;
00497 if (section_number == 0) {
00498 first_dist_index = first_stack_index;
00499 } else {
00500 INT i;
00501 for (i = first_stack_index; i < upper_limit; i++)
00502 if (all_loop_stack->Bottom_nth(i) == tile_loops[section_number - 1])
00503 break;
00504 FmtAssert(i < upper_limit, ("Stack index out of range."));
00505 first_dist_index = i + 1;
00506 }
00507 if (section_number == nstrips) {
00508 last_dist_index = nloops_total - 1;
00509 } else {
00510 INT i;
00511 for (i = first_dist_index; i < upper_limit; i++)
00512 if (all_loop_stack->Bottom_nth(i) == tile_loops[section_number])
00513 break;
00514 FmtAssert(i < upper_limit, ("Stack index out of range."));
00515 last_dist_index = i - 1;
00516 }
00517 dist_count = last_dist_index - first_dist_index + 1;
00518 if (dist_count < 0)
00519 dist_count = 0;
00520 }
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531 static void SNL_INV_Distribute_Block_Of_Loops(DOLOOP_STACK* all_loop_stack,
00532 INT first_in_stack,
00533 INT first_loop_index,
00534 INT loop_count,
00535 WN** the_newest_outer_loop)
00536 {
00537 WN* newup = NULL;
00538 WN* newdown = NULL;
00539 for (INT lp = first_loop_index + 1; lp < first_loop_index + loop_count + 1;
00540 lp++) {
00541 if (lp >= all_loop_stack->Elements())
00542 continue;
00543 WN* wn = all_loop_stack->Bottom_nth(lp);
00544 if (WN_prev_executable(wn)) {
00545 if (newup == NULL)
00546 newup = SNL_Distribute(all_loop_stack, lp, first_loop_index, TRUE);
00547 else
00548 SNL_Distribute(all_loop_stack, lp, first_loop_index, TRUE);
00549 }
00550 if (WN_next_executable(wn)) {
00551 if (newdown == NULL)
00552 newdown = SNL_Distribute(all_loop_stack, lp, first_loop_index, FALSE);
00553 else
00554 SNL_Distribute(all_loop_stack, lp, first_loop_index, FALSE);
00555 }
00556 }
00557 if (first_loop_index == first_in_stack) {
00558 if (newup)
00559 *the_newest_outer_loop = newup;
00560 }
00561 }
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573 static void SNL_INV_Shift_Loops(DOLOOP_STACK* all_loop_stack,
00574 INT first_in_stack,
00575 INT first_loop_index,
00576 INT second_loop_index,
00577 INT nloops_total,
00578 WN** the_newest_outer_loop)
00579 {
00580 if (first_loop_index == second_loop_index)
00581 return;
00582 if (first_loop_index < first_in_stack || second_loop_index >= nloops_total)
00583 return;
00584 INT permutation[SNL_MAX_LOOPS];
00585 WN* permloop[SNL_MAX_LOOPS];
00586 INT i;
00587 for (i = 0; i < nloops_total - first_in_stack; i++)
00588 permutation[i] = i;
00589 for (i = second_loop_index; i >= first_loop_index + 1; i--)
00590 permutation[i-first_in_stack] = permutation[i-1-first_in_stack];
00591 permutation[first_loop_index-first_in_stack] =
00592 second_loop_index-first_in_stack;
00593 for (i = 0; i < nloops_total - first_in_stack; i++)
00594 permloop[i] = all_loop_stack->Bottom_nth(first_in_stack + permutation[i]);
00595 INT short_permutation[SNL_MAX_LOOPS];
00596 INT j;
00597 for (j = 0; j < nloops_total - first_in_stack; j++)
00598 if (permutation[j] != j)
00599 break;
00600 for (INT k = j; k < nloops_total - first_in_stack; k++)
00601 short_permutation[k - j] = permutation[k] - j;
00602 WN* wn_outer_loop = all_loop_stack->Bottom_nth(first_in_stack + j);
00603 WN* outer_perm_loop = SNL_INV_Permute_Loops(wn_outer_loop,
00604 short_permutation, nloops_total - first_in_stack - j, TRUE);
00605 for (i = 0; i < nloops_total - first_in_stack; i++)
00606 all_loop_stack->Bottom_nth(first_in_stack + i) = permloop[i];
00607 if (first_loop_index == first_in_stack)
00608 *the_newest_outer_loop = outer_perm_loop;
00609 }
00610
00611
00612
00613
00614
00615
00616
00617
00618 static void SNL_INV_SE_Distribute(WN* wn_new_outer,
00619 INT first_in_stack,
00620 INT nloops,
00621 WN* tile_loops[],
00622 INT nstrips,
00623 MEM_POOL* pool,
00624 WN** the_newest_outer_loop)
00625 {
00626 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00627
00628 DOLOOP_STACK all_loop_stack(pool);
00629 INT nloops_total = first_in_stack + nloops + nstrips;
00630 INT first_dist_index = 0;
00631 INT dist_count = 0;
00632 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, nloops + nstrips);
00633 Build_Doloop_Stack(wn_inner, &all_loop_stack);
00634 for (INT i = 0; i <= nstrips; i++) {
00635 SNL_INV_Get_Dist_Limits(i, nstrips, &all_loop_stack, tile_loops,
00636 first_in_stack, nloops_total, first_dist_index, dist_count);
00637 SNL_INV_Distribute_Block_Of_Loops(&all_loop_stack, first_in_stack,
00638 first_dist_index, dist_count, the_newest_outer_loop);
00639 INT second_dist_index = first_dist_index + dist_count;
00640 SNL_INV_Shift_Loops(&all_loop_stack, first_in_stack, first_dist_index,
00641 second_dist_index, nloops_total, the_newest_outer_loop);
00642 }
00643 DOLOOP_STACK do_stack(pool);
00644 Build_Doloop_Stack(LWN_Get_Parent(*the_newest_outer_loop), &do_stack);
00645 LNO_Build_Access(*the_newest_outer_loop, &do_stack, &LNO_default_pool);
00646 }
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659 static void SNL_INV_Compact_Scalar_Expand(DOLOOP_STACK* original_stack,
00660 INT first_in_stack,
00661 SX_PLIST* plist,
00662 SNL_TILE_INFO* ti_se,
00663 WN* tile_loops[],
00664 INT nstrips,
00665 INT permutation[],
00666 INT nloops,
00667 WN* guard_tests[],
00668 BOOL full_dist)
00669 {
00670 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00671 DU_MANAGER* du = Du_Mgr;
00672 SX_PITER ii(plist);
00673 SX_PNODE* nnext = NULL;
00674
00675 INT* tpermutation = !full_dist ? permutation : NULL;
00676 for (SX_PNODE* n = ii.First(); !ii.Is_Empty(); n = nnext) {
00677 nnext = ii.Next();
00678
00679 SNL_DEBUG1(3, "SNL_INV_Compact_Scalar_Expand() consider expanding %s\n",
00680 n->Symbol().Name());
00681
00682 SX_PNODE::STATUS status = n->Transformable(first_in_stack, tpermutation,
00683 nloops);
00684 if (status == SX_PNODE::SE_NOT_REQD)
00685 continue;
00686 FmtAssert(status == SX_PNODE::SE_REQD,
00687 ("Bug: can't expand scalar %s", n->Symbol().Name()));
00688
00689
00690
00691
00692
00693 WN* loops[SNL_MAX_LOOPS];
00694 INT order[SNL_MAX_LOOPS];
00695 INT dimcnt = n->Expansion_Depth() - first_in_stack + 1;
00696 INT lp;
00697 for (lp = 0; lp < dimcnt; lp++) {
00698 loops[lp] = original_stack->Bottom_nth(first_in_stack+lp);
00699 order[lp] = lp;
00700 }
00701
00702 INT strip_sizes[SNL_MAX_LOOPS];
00703 INT i;
00704 for (i = 0; i < dimcnt; i++)
00705 strip_sizes[i] = 0;
00706 for (i = 0; i < ti_se->Strips(); i++)
00707 strip_sizes[ti_se->Iloop(i)] = ti_se->Stripsz(i);
00708
00709 if (permutation) {
00710 for (i = 0; i < lp; i++) {
00711 INT jsmall = -1;
00712 for (INT j = 0; j < lp; j++) {
00713 BOOL ok = TRUE;
00714 for (INT ii = 0; ii < i; ii++)
00715 if (order[ii] == j)
00716 ok = FALSE;
00717 if (ok && (jsmall == -1 || permutation[jsmall] > permutation[j]))
00718 jsmall = j;
00719 }
00720 order[i] = jsmall;
00721 }
00722 }
00723
00724 INT this_symbols_nstrips = 0;
00725 for (i = 0; i < dimcnt; i++)
00726 if (strip_sizes[i] > 0)
00727 this_symbols_nstrips++;
00728
00729 WN* outermost_loop = loops[0];
00730 if (nstrips > 0 && Get_Do_Loop_Info(tile_loops[0])->Depth
00731 < Get_Do_Loop_Info(loops[0])->Depth)
00732 outermost_loop = tile_loops[0];
00733
00734 Scalar_Expand(outermost_loop,
00735 original_stack->Bottom_nth(n->Expansion_Depth()),
00736 n->Wn_Symbol(), n->Symbol(), loops, order, dimcnt,
00737 TRUE, n->Finalize(), FALSE, guard_tests, NULL,
00738 tile_loops, strip_sizes, this_symbols_nstrips);
00739
00740 plist->Remove(n);
00741 }
00742 }
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756 extern WN* SNL_INV_Limited_SE_And_Dist(WN* wn_outer,
00757 SNL_TILE_INFO* ti_se,
00758 INT permutation[],
00759 INT nloops,
00760 SX_PLIST* plist,
00761 BOOL full_dist)
00762 {
00763 if (ti_se == NULL)
00764 return NULL;
00765
00766 INT outer_depth = Do_Loop_Depth(wn_outer);
00767 INT guard_depth = SE_Guard_Depth(wn_outer, permutation, nloops, plist,
00768 -1, NULL, FALSE, full_dist);
00769 INT guard_loops = guard_depth - outer_depth + 1;
00770 WN** guard_tests = guard_depth == -1
00771 ? NULL : CXX_NEW_ARRAY(WN*, guard_loops, &LNO_local_pool);
00772 SE_Guard_Tests(wn_outer, nloops, guard_tests, guard_depth);
00773
00774 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00775 DOLOOP_STACK original_stack(&LNO_local_pool);
00776 Build_Doloop_Stack(wn_inner, &original_stack);
00777 INT first_in_stack = Do_Loop_Depth(wn_inner) - nloops + 1;
00778 WN* wn_new_outer = wn_outer;
00779 WN** tile_loops = (WN**) alloca(sizeof(WN**) * ti_se->Strips());
00780 SNL_INV_Scalar_Expand_Tile(wn_outer, ti_se, nloops, &LNO_local_pool,
00781 tile_loops, &wn_new_outer);
00782 SNL_INV_Compact_Scalar_Expand(&original_stack, first_in_stack, plist,
00783 ti_se, tile_loops, ti_se->Strips(), permutation, nloops, guard_tests,
00784 full_dist);
00785 SNL_INV_SE_Distribute(wn_new_outer, first_in_stack, nloops, tile_loops,
00786 ti_se->Strips(), &LNO_local_pool, &wn_new_outer);
00787 return wn_new_outer;
00788 }
00789