00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifdef USE_PCH
00039 #include "lno_pch.h"
00040 #endif // USE_PCH
00041 #pragma hdrstop
00042
00043 #include <sys/types.h>
00044 #include <alloca.h>
00045 #include "pu_info.h"
00046 #include "lwn_util.h"
00047 #include "lnoutils.h"
00048 #include "prompf.h"
00049 #include "config.h"
00050 #include "debug.h"
00051 #include "glob.h"
00052 #include "lnopt_main.h"
00053 #include "snl_utils.h"
00054 #include "reduc.h"
00055 #include "small_trips.h"
00056 #include "wind_down.h"
00057 #include "forward.h"
00058
00059 #define SPL_NO_SPLIT 0
00060 #define SPL_SPLIT_INNER 1
00061 #define SPL_SPLIT_ALL 2
00062
00063
00064
00065
00066
00067
00068
00069
00070 static BOOL SNL_SPL_Is_Tile_Plus_Constant(WN* wn_exp,
00071 SYMBOL wn_tile_symbol,
00072 INT* wn_tile_size)
00073 {
00074 if (WN_operator(wn_exp) == OPR_LDID
00075 && SYMBOL(wn_exp) == wn_tile_symbol) {
00076 *wn_tile_size = 1;
00077 return TRUE;
00078 }
00079 if (WN_operator(wn_exp) != OPR_ADD)
00080 return FALSE;
00081 WN* wn_constant = NULL;
00082 if (WN_operator(WN_kid0(wn_exp)) == OPR_LDID
00083 && SYMBOL(WN_kid0(wn_exp)) == wn_tile_symbol)
00084 wn_constant = WN_kid1(wn_exp);
00085 else if (WN_operator(WN_kid1(wn_exp)) == OPR_LDID
00086 && SYMBOL(WN_kid1(wn_exp)) == wn_tile_symbol)
00087 wn_constant = WN_kid0(wn_exp);
00088 if (wn_constant == NULL)
00089 return FALSE;
00090 if (WN_operator(wn_constant) != OPR_INTCONST)
00091 return FALSE;
00092 *wn_tile_size = WN_const_val(wn_constant) + 1;
00093 return TRUE;
00094 }
00095
00096
00097
00098
00099
00100
00101
00102 static BOOL Is_Multiple(WN* wn_tree,
00103 INT tile_const)
00104 {
00105 if (tile_const == 1)
00106 return TRUE;
00107 if (WN_operator(wn_tree) == OPR_INTCONST) {
00108 INT tree_const = WN_const_val(wn_tree);
00109 return tree_const % tile_const == 0;
00110 }
00111 if (WN_operator(wn_tree) == OPR_MPY) {
00112 if (Is_Multiple(WN_kid0(wn_tree), tile_const)
00113 || Is_Multiple(WN_kid1(wn_tree), tile_const))
00114 return TRUE;
00115 }
00116 return FALSE;
00117 }
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 extern WN* SNL_SPL_Loop_Is_Inner_Tile(WN* wn_loop,
00129 INT* tile_size)
00130 {
00131
00132 if (!Do_Loop_Is_Good(wn_loop) || Do_Loop_Has_Gotos(wn_loop))
00133 return NULL;
00134
00135
00136 BOOL need_lb_fs = FALSE;
00137 WN* wn_outer = NULL;
00138 DU_MANAGER* du = Du_Mgr;
00139 WN* wn_tile_load = WN_kid0(WN_start(wn_loop));
00140 if (WN_operator(wn_tile_load) != OPR_LDID)
00141 return NULL;
00142 WN* wn_first = LWN_Get_Parent(wn_loop);
00143 WN* wn = 0;
00144 for (wn = wn_first; wn != NULL; wn = LWN_Get_Parent(wn))
00145 if (WN_opcode(wn) == OPC_DO_LOOP
00146 && SYMBOL(wn_tile_load) == SYMBOL(WN_index(wn)))
00147 break;
00148 if (wn == NULL) {
00149 WN* wn_fs_tile_def = Forward_Substitutable(wn_tile_load, du);
00150 if (wn_fs_tile_def != NULL) {
00151 wn_tile_load = WN_kid0(wn_fs_tile_def);
00152 if (WN_operator(wn_tile_load) != OPR_LDID)
00153 return NULL;
00154 WN* wn_first = LWN_Get_Parent(wn_loop);
00155 WN* wn = 0;
00156 for (wn = wn_first; wn != NULL; wn = LWN_Get_Parent(wn))
00157 if (WN_opcode(wn) == OPC_DO_LOOP
00158 && SYMBOL(wn_tile_load) == SYMBOL(WN_index(wn)))
00159 break;
00160 if (wn == NULL)
00161 return NULL;
00162 wn_outer = wn;
00163 need_lb_fs = TRUE;
00164 }
00165 } else {
00166 wn_outer = wn;
00167 }
00168 if (wn_outer == NULL)
00169 return NULL;
00170 SYMBOL wn_tile_sym(wn_tile_load);
00171
00172
00173 BOOL need_ub_fs = FALSE;
00174 Upper_Bound_Standardize(WN_end(wn_loop));
00175 WN* wn_le = WN_end(wn_loop);
00176 FmtAssert(WN_operator(wn_le) == OPR_LE,
00177 ("Did not standardize inner tile loop test."));
00178 WN* wn_ind = WN_kid0(wn_le);
00179 if (WN_operator(wn_ind) != OPR_LDID)
00180 return NULL;
00181 WN* wn_min = WN_kid1(wn_le);
00182 if (WN_operator(wn_min) == OPR_LDID) {
00183 WN* wn_min_def = Forward_Substitutable(wn_min, du);
00184 if (wn_min_def != NULL) {
00185 wn_min = WN_kid0(wn_min_def);
00186 need_ub_fs = TRUE;
00187 }
00188 }
00189 if (WN_operator(wn_min) != OPR_MIN)
00190 return NULL;
00191 INT tile_const = 0;
00192 WN* wn_tile_ub = NULL;
00193 if (SNL_SPL_Is_Tile_Plus_Constant(WN_kid0(wn_min), wn_tile_sym, &tile_const))
00194 wn_tile_ub = LWN_Copy_Tree(WN_kid1(wn_min), TRUE, LNO_Info_Map);
00195 else if (SNL_SPL_Is_Tile_Plus_Constant(WN_kid1(wn_min), wn_tile_sym,
00196 &tile_const))
00197 wn_tile_ub = LWN_Copy_Tree(WN_kid0(wn_min), TRUE, LNO_Info_Map);
00198 if (wn_tile_ub == NULL)
00199 return NULL;
00200
00201
00202 INT wn_step = Step_Size(wn_loop);
00203 if (wn_step != 1)
00204 return NULL;
00205
00206
00207 if (!Do_Loop_Is_Good(wn_outer) || Do_Loop_Has_Gotos(wn_outer))
00208 return NULL;
00209 BOOL need_outer_step_fs = FALSE;
00210 Upper_Bound_Standardize(WN_end(wn_outer));
00211 WN* wn_outer_step = Loop_Step(wn_outer);
00212 if (WN_operator(wn_outer_step) == OPR_LDID) {
00213 WN* wn_step_def = Forward_Substitutable(wn_outer_step, du);
00214 if (wn_step_def != NULL) {
00215 need_outer_step_fs = TRUE;
00216 wn_outer_step = WN_kid0(wn_step_def);
00217 }
00218 }
00219 if (!Is_Multiple(wn_outer_step, tile_const))
00220 return FALSE;
00221
00222
00223 if (need_lb_fs)
00224 Forward_Substitute_Ldids(WN_kid0(WN_start(wn_loop)), du);
00225 if (need_ub_fs)
00226 Forward_Substitute_Ldids(WN_kid1(WN_end(wn_loop)), du);
00227 if (need_outer_step_fs)
00228 Forward_Substitute_Ldids(Loop_Step(wn_outer), du);
00229
00230 *tile_size = tile_const;
00231 return wn_outer;
00232 }
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 static void SNL_SPL_Build_Loop_List(WN* wn_first,
00247 WN* wn_last,
00248 DOLOOP_STACK* inner_do_stack,
00249 DOLOOP_STACK* outer_do_stack,
00250 STACK<INT>* tile_size_stack,
00251 INT split_flag)
00252 {
00253 if (wn_first == NULL)
00254 return;
00255
00256 for (WN* wn = wn_first; ; wn = WN_next(wn)) {
00257 switch(WN_opcode(wn)) {
00258 case OPC_DO_LOOP:
00259 {
00260 if (split_flag == SPL_SPLIT_ALL || Get_Do_Loop_Info(wn)->Is_Inner) {
00261 SYMBOL wn_tile_symbol(WN_start(wn));
00262 INT tile_size = 0;
00263 WN* wn_outer = SNL_SPL_Loop_Is_Inner_Tile(wn, &tile_size);
00264 if (wn_outer != NULL) {
00265 inner_do_stack->Push(wn);
00266 outer_do_stack->Push(wn_outer);
00267 tile_size_stack->Push(tile_size);
00268 }
00269 if (Get_Do_Loop_Info(wn)->Is_Inner)
00270 return;
00271 }
00272 WN* wn_newfirst = WN_first(WN_do_body(wn));
00273 WN* wn_newlast = WN_last(WN_do_body(wn));
00274 SNL_SPL_Build_Loop_List(wn_newfirst, wn_newlast, inner_do_stack,
00275 outer_do_stack, tile_size_stack, split_flag);
00276 }
00277 break;
00278 case OPC_IF:
00279 SNL_SPL_Build_Loop_List(WN_first(WN_then(wn)), WN_last(WN_then(wn)),
00280 inner_do_stack, outer_do_stack, tile_size_stack, split_flag);
00281 SNL_SPL_Build_Loop_List(WN_first(WN_else(wn)), WN_last(WN_else(wn)),
00282 inner_do_stack, outer_do_stack, tile_size_stack, split_flag);
00283 break;
00284 case OPC_DO_WHILE:
00285 case OPC_WHILE_DO:
00286 SNL_SPL_Build_Loop_List(WN_first(WN_while_body(wn)),
00287 WN_last(WN_while_body(wn)), inner_do_stack, outer_do_stack,
00288 tile_size_stack, split_flag);
00289 break;
00290 }
00291 if (wn == wn_last)
00292 break;
00293 }
00294 }
00295
00296
00297
00298
00299
00300
00301
00302 static void SNL_SPL_Sort_Stack(DOLOOP_STACK* outer_do_stack,
00303 DOLOOP_STACK* inner_do_stack,
00304 STACK<INT>* tile_size_stack)
00305 {
00306 for (INT i = 0; i < outer_do_stack->Elements(); i++) {
00307 INT compare_tag = (INT)(INTPTR)outer_do_stack->Bottom_nth(i);
00308 for (INT j = i + 1; j < outer_do_stack->Elements(); j++) {
00309 INT current_tag = (INT)(INTPTR)outer_do_stack->Bottom_nth(j);
00310 if (current_tag < compare_tag) {
00311 WN* outer_loop = outer_do_stack->Bottom_nth(i);
00312 outer_do_stack->Bottom_nth(i) = outer_do_stack->Bottom_nth(j);
00313 outer_do_stack->Bottom_nth(j) = outer_loop;
00314 WN* inner_loop = inner_do_stack->Bottom_nth(i);
00315 inner_do_stack->Bottom_nth(i) = inner_do_stack->Bottom_nth(j);
00316 inner_do_stack->Bottom_nth(j) = inner_loop;
00317 INT tile_size = tile_size_stack->Bottom_nth(i);
00318 tile_size_stack->Bottom_nth(i) = tile_size_stack->Bottom_nth(j);
00319 tile_size_stack->Bottom_nth(j) = tile_size;
00320 }
00321 }
00322 }
00323 }
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336 static void SNL_SPL_Get_Tile_Loops(DOLOOP_STACK* outer_do_stack,
00337 DOLOOP_STACK* inner_do_stack,
00338 STACK<INT>* tile_size_stack,
00339 WN** outer_tile_loop,
00340 DOLOOP_STACK* inner_tile_stack,
00341 INT* tile_size)
00342 {
00343 *outer_tile_loop = NULL;
00344 inner_tile_stack->Clear();
00345 if (outer_do_stack->Elements() == 0)
00346 return;
00347 *outer_tile_loop = outer_do_stack->Top();
00348 *tile_size = tile_size_stack->Top();
00349 while (outer_do_stack->Elements() > 0
00350 && outer_do_stack->Top() == *outer_tile_loop) {
00351 inner_tile_stack->Push(inner_do_stack->Top());
00352 outer_do_stack->Pop();
00353 inner_do_stack->Pop();
00354 FmtAssert(tile_size_stack->Top() == *tile_size,
00355 ("SNL_SPL_Get_Tile_Loops(): Inner tiles with nonmatching tile sizes"));
00356 tile_size_stack->Pop();
00357 }
00358 }
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369 static void SNL_SPL_Make_Inner_Tile_Stack(WN* wn_root,
00370 DOLOOP_STACK* stack,
00371 WN* cp_wn_root,
00372 DOLOOP_STACK* cp_stack)
00373 {
00374 if (wn_root == NULL)
00375 return;
00376
00377 switch (WN_opcode(wn_root)) {
00378 case OPC_DO_LOOP:
00379 {
00380 INT i;
00381 for (i = 0; i < stack->Elements(); i++)
00382 if (stack->Bottom_nth(i) == wn_root)
00383 break;
00384 if (i < stack->Elements())
00385 cp_stack->Bottom_nth(i) = cp_wn_root;
00386 SNL_SPL_Make_Inner_Tile_Stack(WN_do_body(wn_root), stack,
00387 WN_do_body(cp_wn_root), cp_stack);
00388 }
00389 break;
00390 case OPC_IF:
00391 SNL_SPL_Make_Inner_Tile_Stack(WN_then(wn_root), stack,
00392 WN_then(cp_wn_root), cp_stack);
00393 SNL_SPL_Make_Inner_Tile_Stack(WN_else(wn_root), stack,
00394 WN_else(cp_wn_root), cp_stack);
00395 break;
00396 case OPC_DO_WHILE:
00397 case OPC_WHILE_DO:
00398 SNL_SPL_Make_Inner_Tile_Stack(WN_while_body(wn_root),
00399 stack, WN_while_body(cp_wn_root), cp_stack);
00400 break;
00401 case OPC_BLOCK:
00402 WN* cp_wn = WN_first(cp_wn_root);
00403 for (WN* wn = WN_first(wn_root); wn != NULL; wn = WN_next(wn)) {
00404 SNL_SPL_Make_Inner_Tile_Stack(wn, stack, cp_wn,
00405 cp_stack);
00406 cp_wn = WN_next(cp_wn);
00407 }
00408 break;
00409 }
00410 }
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421 static void SNL_SPL_Copy_Inner_Tile_Stack(WN* outer_tile_loop,
00422 DOLOOP_STACK* inner_tile_stack,
00423 WN* cp_outer_tile_loop,
00424 DOLOOP_STACK* cp_inner_tile_stack)
00425 {
00426 WN* null_addr = NULL;
00427 cp_inner_tile_stack->Clear();
00428 for (INT i = 0; i < inner_tile_stack->Elements(); i++)
00429 cp_inner_tile_stack->Push(null_addr);
00430 SNL_SPL_Make_Inner_Tile_Stack(outer_tile_loop, inner_tile_stack,
00431 cp_outer_tile_loop, cp_inner_tile_stack);
00432 }
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443 static void SNL_SPL_Fix_First_Outer_Loop_Limits(WN* outer_tile_loop,
00444 INT tile_size)
00445 {
00446 SYMBOL sym_index(WN_start(outer_tile_loop));
00447 INT64 outer_step = tile_size - 1;
00448 WN* diff = LWN_Make_Icon(sym_index.Type, outer_step);
00449 OPCODE sub_opc = OPCODE_make_op(OPR_SUB, sym_index.Type, MTYPE_V);
00450 WN* new_ub = LWN_CreateExp2(sub_opc, WN_kid1(WN_end(outer_tile_loop)),
00451 diff);
00452 WN_kid1(WN_end(outer_tile_loop)) = new_ub;
00453 LWN_Set_Parent(WN_kid1(WN_end(outer_tile_loop)), WN_end(outer_tile_loop));
00454 }
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465 static void SNL_SPL_Fix_Second_Outer_Loop_Limits(WN* outer_tile_loop,
00466 WN* cp_outer_tile_loop,
00467 INT tile_size)
00468 {
00469 TYPE_ID wtype = WN_desc(WN_start(cp_outer_tile_loop));
00470 WN* newbegin = WN_start(cp_outer_tile_loop);
00471 LWN_Delete_Tree(WN_kid0(newbegin));
00472 WN_kid0(newbegin) = LWN_CreateLdid(OPCODE_make_op(OPR_LDID, wtype, wtype),
00473 WN_start(outer_tile_loop));
00474 LWN_Copy_Frequency(newbegin,outer_tile_loop);
00475 LWN_Set_Parent(WN_kid0(newbegin), newbegin);
00476 Fix_Do_Du_Info(newbegin, NULL, FALSE, outer_tile_loop, 0);
00477 DO_LOOP_INFO* dli = Get_Do_Loop_Info(cp_outer_tile_loop);
00478 dli->LB->Too_Messy = TRUE;
00479 dli->Est_Num_Iterations = tile_size / 2;
00480 dli->Num_Iterations_Symbolic = FALSE;
00481 }
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495 static void SNL_SPL_Fix_Inner_Loop_Limits(DOLOOP_STACK* inner_tile_stack,
00496 BOOL want_normal)
00497 {
00498 for (INT i = 0; i < inner_tile_stack->Elements(); i++) {
00499 WN* wn_inner = inner_tile_stack->Bottom_nth(i);
00500 WN* wn_min = WN_kid1(WN_end(wn_inner));
00501 FmtAssert(WN_operator(wn_min) == OPR_MIN,
00502 ("Could not find MIN in test of inner tile loop."));
00503 INT wn_const = 0;
00504 SYMBOL wn_tile_sym(WN_kid0(WN_start(wn_inner)));
00505 WN* wn_normal = NULL;
00506 WN* wn_last = NULL;
00507 if (SNL_SPL_Is_Tile_Plus_Constant(WN_kid0(wn_min), wn_tile_sym,
00508 &wn_const)) {
00509 wn_normal = WN_kid0(wn_min);
00510 wn_last = WN_kid1(wn_min);
00511 } else if (SNL_SPL_Is_Tile_Plus_Constant(WN_kid1(wn_min), wn_tile_sym,
00512 &wn_const)) {
00513 wn_normal = WN_kid1(wn_min);
00514 wn_last = WN_kid0(wn_min);
00515 }
00516 WN* wn_want = want_normal ? wn_normal : wn_last;
00517 FmtAssert(wn_normal != NULL,
00518 ("Could not find normal branch in inner tile loop."));
00519 WN* wn_new = LWN_Copy_Tree(wn_want, TRUE, LNO_Info_Map);
00520 LWN_Copy_Def_Use(wn_want, wn_new, Du_Mgr);
00521 LWN_Copy_Frequency_Tree(wn_new,WN_kid1(WN_end(wn_inner)));
00522 LWN_Delete_Tree(WN_kid1(WN_end(wn_inner)));
00523 WN_kid1(WN_end(wn_inner)) = wn_new;
00524 LWN_Set_Parent(wn_new, WN_end(wn_inner));
00525 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_inner);
00526 if (want_normal) {
00527 dli->Est_Num_Iterations = wn_const;
00528 dli->Num_Iterations_Symbolic = FALSE;
00529 }
00530 }
00531 }
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542 static void SNL_SPL_Propagate_Tiled_Lower_Bound(WN* outer_tile_loop,
00543 WN* inner_tile_loop)
00544 {
00545 WN* lower_bound = WN_kid0(WN_start(outer_tile_loop));
00546 WN* upper_bound = SNL_UBexp(WN_end(outer_tile_loop));
00547 WN* step = Loop_Step(outer_tile_loop);
00548 if (WN_operator(lower_bound) == OPR_INTCONST
00549 && WN_operator(upper_bound) == OPR_INTCONST
00550 && WN_operator(step) == OPR_INTCONST) {
00551 INT64 iters = Iterations(outer_tile_loop, &LNO_local_pool);
00552 INT64 lb = WN_const_val(lower_bound);
00553 INT64 ub = WN_const_val(upper_bound);
00554 INT64 u = Step_Size(outer_tile_loop);
00555 INT64 wdlb = lb + iters * u;
00556 WN* wd_lower_bound = LWN_Copy_Tree(lower_bound, TRUE, LNO_Info_Map);
00557 LWN_Copy_Frequency(wd_lower_bound,WN_start(inner_tile_loop));
00558 LWN_Delete_Tree(WN_kid0(WN_start(inner_tile_loop)));
00559 WN_kid0(WN_start(inner_tile_loop)) = wd_lower_bound;
00560 LWN_Set_Parent(wd_lower_bound, WN_start(inner_tile_loop));
00561 WN_const_val(wd_lower_bound) = wdlb;
00562 }
00563 }
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576 static void Prompf_Tile_Split(WN* outer_tile_loop,
00577 WN* cp_outer_tile_loop,
00578 BOOL cache_annotate)
00579 {
00580 STACK<WN*> st_old(&PROMPF_pool);
00581 STACK<WN*> st_new(&PROMPF_pool);
00582 Prompf_Assign_Ids(outer_tile_loop, cp_outer_tile_loop, &st_old, &st_new,
00583 FALSE);
00584 INT nloops = st_old.Elements();
00585 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00586 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00587 for (INT i = 0; i < nloops; i++) {
00588 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
00589 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
00590 }
00591 if (cache_annotate)
00592 Prompf_Info->Cache_Winddown(old_ids, new_ids, nloops);
00593 else
00594 Prompf_Info->Interleaved_Winddown(old_ids, new_ids, nloops);
00595 }
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615 static BOOL SNL_SPL_Split_Tile_Sets(WN* outer_tile_loop,
00616 DOLOOP_STACK* inner_tile_stack,
00617 INT tile_size,
00618 const char prefix[],
00619 BOOL cache_annotate)
00620 {
00621 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00622 DOLOOP_STACK cp_inner_tile_stack(&LNO_local_pool);
00623
00624
00625 WN* cp_outer_tile_loop = LWN_Copy_Tree(outer_tile_loop, TRUE, LNO_Info_Map);
00626 SNL_SPL_Copy_Inner_Tile_Stack(outer_tile_loop, inner_tile_stack,
00627 cp_outer_tile_loop, &cp_inner_tile_stack);
00628
00629
00630 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
00631 Prompf_Tile_Split(outer_tile_loop, cp_outer_tile_loop, cache_annotate);
00632
00633
00634 if (Cur_PU_Feedback) {
00635 INT64 size = tile_size;
00636 INT32 loop_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_start(outer_tile_loop));
00637 INT32 trip_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_step(outer_tile_loop));
00638 if (loop_count>0) {
00639 INT32 trip = MAX(1,trip_count/loop_count);
00640 float ratio = 1.0/(float) trip;
00641 float ratio1 = 1.0-ratio;
00642 INT32 adjust= size*loop_count;
00643 LWN_Scale_Frequency_Tree(WN_do_body(outer_tile_loop),ratio1);
00644 LWN_Scale_Frequency(WN_step(outer_tile_loop),ratio1);
00645
00646 LWN_Scale_Frequency_Tree(WN_do_body(cp_outer_tile_loop),ratio);
00647 LWN_Scale_Frequency(WN_step(cp_outer_tile_loop),ratio);
00648 }
00649 }
00650
00651
00652 LWN_Insert_Block_After(LWN_Get_Parent(outer_tile_loop),
00653 outer_tile_loop, cp_outer_tile_loop);
00654
00655
00656 WN* wn_holder[2];
00657 wn_holder[0] = outer_tile_loop;
00658 wn_holder[1] = cp_outer_tile_loop;
00659 if (!dg->Add_Deps_To_Copy_Block(outer_tile_loop, cp_outer_tile_loop, TRUE)) {
00660 LWN_Update_Dg_Delete_Tree(cp_outer_tile_loop, dg);
00661 LWN_Delete_Tree(cp_outer_tile_loop);
00662 return FALSE;
00663 }
00664 INT depth = Do_Depth(outer_tile_loop);
00665 HASH_TABLE<VINDEX16,VINDEX16>
00666 hash_table(MIN(dg->Get_Vertex_Count(), 512), &LNO_local_pool);
00667 Wind_Down_Dep_V(outer_tile_loop, cp_outer_tile_loop, &hash_table, dg);
00668 if (!Wind_Down_Dep_E(&hash_table, Do_Depth(outer_tile_loop), dg)) {
00669 LWN_Update_Dg_Delete_Tree(cp_outer_tile_loop, dg);
00670 LWN_Delete_Tree(cp_outer_tile_loop);
00671 return FALSE;
00672 }
00673
00674
00675 if (red_manager)
00676 red_manager->Unroll_Update(wn_holder, 2);
00677
00678
00679 Unrolled_DU_Update(wn_holder, 2, Do_Loop_Depth(outer_tile_loop) - 1,
00680 TRUE, FALSE);
00681
00682
00683 Replace_Index_Variable(outer_tile_loop, cp_outer_tile_loop, prefix);
00684
00685
00686 SNL_SPL_Fix_First_Outer_Loop_Limits(outer_tile_loop, tile_size);
00687 SNL_SPL_Fix_Second_Outer_Loop_Limits(outer_tile_loop, cp_outer_tile_loop,
00688 tile_size);
00689 if (cache_annotate)
00690 Set_Winddown_Annotations(cp_outer_tile_loop, 1, EST_REGISTER_USAGE(),
00691 TRUE);
00692
00693
00694 SNL_SPL_Fix_Inner_Loop_Limits(inner_tile_stack, 1);
00695 SNL_SPL_Fix_Inner_Loop_Limits(&cp_inner_tile_stack, 0);
00696
00697
00698 SNL_REGION region(cp_outer_tile_loop, cp_outer_tile_loop);
00699 SNL_SPL_Propagate_Tiled_Lower_Bound(outer_tile_loop, cp_outer_tile_loop);
00700 if (Iterations(cp_outer_tile_loop, &LNO_local_pool) == 1)
00701 Remove_Unity_Trip_Loop(cp_outer_tile_loop, FALSE, &(region.First),
00702 &(region.Last), Array_Dependence_Graph, Du_Mgr);
00703
00704
00705 SNL_Rebuild_Access_Arrays(outer_tile_loop);
00706 for (WN* wn = region.First; ; wn = WN_next(wn)) {
00707 SNL_Rebuild_Access_Arrays(wn);
00708 if (wn == region.Last)
00709 break;
00710 }
00711
00712
00713 if (LNO_Verbose) {
00714 fprintf(stdout, "Splitting cache tiles: (");
00715 INT i;
00716 for (i = 0; i < inner_tile_stack->Elements(); i++) {
00717 fprintf(stdout, "0x%p", inner_tile_stack->Bottom_nth(i));
00718 if (i < inner_tile_stack->Elements() - 1)
00719 fprintf(stdout, ",");
00720 }
00721 fprintf(stdout, ")\n");
00722 fprintf(TFile, "Splitting cache tiles: (");
00723 for (i = 0; i < inner_tile_stack->Elements(); i++) {
00724 fprintf(TFile, "0x%p", inner_tile_stack->Bottom_nth(i));
00725 if (i < inner_tile_stack->Elements() - 1)
00726 fprintf(TFile, ",");
00727 }
00728 fprintf(TFile, ")\n");
00729 }
00730 return TRUE;
00731 }
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743 extern BOOL SNL_SPL_Split_Inner_Tile_Loop(WN* wn_outer,
00744 WN* wn_inner,
00745 INT tile_size,
00746 const char prefix[],
00747 BOOL cache_annotate)
00748 {
00749 DOLOOP_STACK inner_do_stack(&LNO_local_pool);
00750 inner_do_stack.Push(wn_inner);
00751 return SNL_SPL_Split_Tile_Sets(wn_outer, &inner_do_stack, tile_size,
00752 prefix, cache_annotate);
00753 }
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00770
00771 extern void SNL_SPL_Split_Inner_Tile_Loops(WN* wn_first,
00772 WN* wn_last,
00773 INT split_flag,
00774 const char prefix[],
00775 BOOL cache_annotate)
00776 {
00777 WN* outer_tile_loop;
00778 INT tile_size = 0;
00779 DOLOOP_STACK inner_do_stack(&LNO_local_pool);
00780 DOLOOP_STACK outer_do_stack(&LNO_local_pool);
00781 DOLOOP_STACK inner_tile_stack(&LNO_local_pool);
00782 STACK<INT> tile_size_stack(&LNO_local_pool);
00783 if (split_flag == SPL_NO_SPLIT)
00784 return;
00785
00786 inner_do_stack.Clear();
00787 outer_do_stack.Clear();
00788 SNL_SPL_Build_Loop_List(wn_first, wn_last, &inner_do_stack, &outer_do_stack,
00789 &tile_size_stack, split_flag);
00790 SNL_SPL_Sort_Stack(&outer_do_stack, &inner_do_stack, &tile_size_stack);
00791 while (outer_do_stack.Elements() != 0) {
00792 SNL_SPL_Get_Tile_Loops(&outer_do_stack, &inner_do_stack, &tile_size_stack,
00793 &outer_tile_loop, &inner_tile_stack, &tile_size);
00794 if (LNO_Split_Tiles_Size > 0 && tile_size > LNO_Split_Tiles_Size)
00795 continue;
00796 BOOL success = SNL_SPL_Split_Tile_Sets(outer_tile_loop,
00797 &inner_tile_stack, tile_size, prefix, cache_annotate);
00798 if (!success)
00799 return;
00800 }
00801 }
00802