00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #ifdef USE_PCH
00037 #include "lno_pch.h"
00038 #endif // USE_PCH
00039 #pragma hdrstop
00040
00041 #ifdef _KEEP_RCS_ID
00042
00043 static char *rcs_id = "$Source$ $Revision$";
00044 #endif
00045
00046 #include <sys/types.h>
00047 #include <ctype.h>
00048 #include <limits.h>
00049 #include <alloca.h>
00050
00051 #include "pu_info.h"
00052 #include "lnoutils.h"
00053 #include "lnopt_main.h"
00054 #include "stab.h"
00055 #include "targ_const.h"
00056 #include "wn_simp.h"
00057 #include "stdlib.h"
00058 #include "lwn_util.h"
00059 #include "strtab.h"
00060 #include "config.h"
00061 #include "optimizer.h"
00062 #include "opt_du.h"
00063 #include "name.h"
00064 #include "wintrinsic.h"
00065 #include "lno_bv.h"
00066 #include "dep_graph.h"
00067 #include "debug.h"
00068 #include "cxx_memory.h"
00069 #include "snl.h"
00070 #include "snl_trans.h"
00071 #include "snl_utils.h"
00072 #include "fusion.h"
00073 #include "lego_pragma.h"
00074 #include "lego_util.h"
00075 #include "lego_opts.h"
00076 #include "tile.h"
00077 #include "move.h"
00078 #include "tlog.h"
00079
00080 #include "ir_reader.h"
00081
00082 #define MAX_NAME_LENGTH 132
00083 #define TLOG_STRING_LENGTH 1000
00084
00085 static ARRAY_DIRECTED_GRAPH16* dg;
00086 static DU_MANAGER* du;
00087
00088 enum KERNEL_SECTION {KERNEL_NONE, KERNEL_OUTER, KERNEL_MIDDLE, KERNEL_INNER};
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 static KERNEL_SECTION SNL_Kernel_Section(WN* wn,
00112 DOLOOP_STACK* stack,
00113 INT permutation[],
00114 INT nloops)
00115 {
00116 INT i;
00117 for (i = 0; i < stack->Elements(); i++)
00118 if (stack->Bottom_nth(i) == wn)
00119 break;
00120 if (i == stack->Elements())
00121 return KERNEL_NONE;
00122 INT first_in_stack = stack->Elements() - nloops;
00123 INT loop_index = i - first_in_stack;
00124 for (i = 0; i < loop_index && permutation[i] == i; i++);
00125 if (i == loop_index)
00126 return KERNEL_OUTER;
00127 for (i = nloops - 1; i > loop_index && permutation[i] == i; i--);
00128 if (i == loop_index)
00129 return KERNEL_INNER;
00130 return KERNEL_MIDDLE;
00131 }
00132
00133
00134 static BOOL Is_Non_Dependent_Expression(WN* wn, WN* outerloop);
00135
00136
00137
00138
00139
00140
00141
00142
00143 static BOOL Is_Non_Dependent_Load(WN* wn,
00144 WN* outerloop)
00145 {
00146 switch (WN_operator(wn)) {
00147 case OPR_LDID:
00148 case OPR_ILOAD:
00149 case OPR_IO:
00150 case OPR_RETURN:
00151 #ifdef KEY
00152 case OPR_GOTO_OUTER_BLOCK:
00153 #endif
00154 case OPR_CALL:
00155 case OPR_ICALL:
00156 case OPR_INTRINSIC_CALL:
00157 break;
00158 default:
00159 FmtAssert(0, ("Is_Non_Dependent_Load called with improper node type."));
00160 }
00161
00162 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn);
00163 if (def_list == NULL)
00164 return TRUE;
00165 if (def_list != NULL && def_list->Incomplete())
00166 return FALSE;
00167 DEF_LIST_ITER iter(def_list);
00168 const DU_NODE* node = NULL;
00169 if (WN_operator(wn) == OPR_LDID) {
00170 if (SYMBOL(wn) == SYMBOL(WN_index(outerloop)))
00171 return FALSE;
00172 for (WN* wn_tp = wn; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
00173 if (WN_opcode(wn_tp) == OPC_DO_LOOP
00174 && SYMBOL(wn) == SYMBOL(WN_index(wn_tp)))
00175 return TRUE;
00176 if (def_list->Loop_stmt() == 0x0) {
00177 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00178 WN* def = node->Wn();
00179 if (!Is_Non_Dependent_Expression(def, outerloop))
00180 return FALSE;
00181 }
00182 }
00183 } else {
00184 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00185 WN* def = node->Wn();
00186 if (Wn_Is_Inside(def, outerloop))
00187 return FALSE;
00188 }
00189 }
00190 return TRUE;
00191 }
00192
00193
00194
00195
00196
00197
00198
00199
00200 static BOOL Is_Non_Dependent_Expression(WN* wn,
00201 WN* outerloop)
00202 {
00203 switch (WN_operator(wn)) {
00204 case OPR_LDID:
00205 case OPR_ILOAD:
00206 case OPR_IO:
00207 case OPR_RETURN:
00208 #ifdef KEY
00209 case OPR_GOTO_OUTER_BLOCK:
00210 #endif
00211 case OPR_CALL:
00212 case OPR_ICALL:
00213 case OPR_INTRINSIC_CALL:
00214 if (!Is_Non_Dependent_Load(wn, outerloop))
00215 return FALSE;
00216 }
00217
00218 for (INT kid = 0; kid < WN_kid_count(wn); kid++)
00219 if (!Is_Non_Dependent_Expression(WN_kid(wn, kid), outerloop))
00220 return FALSE;
00221 return TRUE;
00222 }
00223
00224
00225
00226
00227
00228
00229
00230 static BOOL SNL_Legal_Perm_Bounds(DOLOOP_STACK* stack,
00231 INT permutation[],
00232 INT nloops)
00233 {
00234 INT first_in_stack = stack->Elements() - nloops;
00235 for (INT i = 0; i < nloops; i++) {
00236 WN* wn_iloop = stack->Bottom_nth(first_in_stack + permutation[i]);
00237 for (INT j = i; j < nloops; j++) {
00238 if (permutation[j] < permutation[i]) {
00239 WN* wn_jloop = stack->Bottom_nth(first_in_stack + permutation[j]);
00240 if (!Is_Non_Dependent_Expression(WN_start(wn_iloop), wn_jloop))
00241 return FALSE;
00242 if (UBvar(WN_end(wn_iloop)) == NULL)
00243 return FALSE;
00244 if (!Is_Non_Dependent_Expression(UBexp(WN_end(wn_iloop)), wn_jloop))
00245 return FALSE;
00246 }
00247 }
00248 }
00249 return TRUE;
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259 static BOOL SNL_Legal_Perm_Scalar(DOLOOP_STACK* stack,
00260 WN* wn,
00261 INT permutation[],
00262 INT nloops)
00263 {
00264 if (WN_operator(wn) == OPR_STID) {
00265 if (dg->Get_Vertex(wn))
00266 return FALSE;
00267 WN* wn_loop = stack->Bottom_nth(stack->Elements() - 1);
00268 WN* wn_tp = 0;
00269 for (wn_tp = wn_loop; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
00270 if (WN_opcode(wn_tp) == OPC_DO_LOOP
00271 && SYMBOL(wn) == SYMBOL(WN_index(wn_tp)))
00272 return TRUE;
00273 for (wn_tp = wn; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
00274 if (WN_opcode(wn_tp) == OPC_IF)
00275 break;
00276 if (wn_tp != NULL)
00277 return FALSE;
00278 return TRUE;
00279 } else if (WN_operator(wn) == OPR_LDID) {
00280 if (dg->Get_Vertex(wn))
00281 return FALSE;
00282 if (red_manager != NULL && red_manager->Which_Reduction(wn) != RED_NONE)
00283 return TRUE;
00284 WN* wn_loop = stack->Bottom_nth(stack->Elements() - 1);
00285 for (WN* wn_tp = wn_loop; wn_tp != NULL; wn_tp = LWN_Get_Parent(wn_tp))
00286 if (WN_opcode(wn_tp) == OPC_DO_LOOP
00287 && SYMBOL(wn) == SYMBOL(WN_index(wn_tp)))
00288 return TRUE;
00289 DEF_LIST *def_list = du->Ud_Get_Def(wn);
00290 if (def_list == NULL)
00291 return TRUE;
00292 if (def_list->Incomplete())
00293 return FALSE;
00294 if (def_list->Loop_stmt() == NULL)
00295 return TRUE;
00296 INT kernel_section = SNL_Kernel_Section(def_list->Loop_stmt(), stack,
00297 permutation, nloops);
00298 if (kernel_section == KERNEL_MIDDLE || kernel_section == KERNEL_INNER)
00299 return FALSE;
00300 }
00301 return TRUE;
00302 }
00303
00304
00305
00306
00307
00308
00309
00310
00311 static BOOL SNL_Legal_Perm_Scalars(DOLOOP_STACK* stack,
00312 WN* wn,
00313 INT permutation[],
00314 INT nloops)
00315 {
00316 if (!SNL_Legal_Perm_Scalar(stack, wn, permutation, nloops))
00317 return FALSE;
00318 if (WN_opcode(wn) == OPC_BLOCK) {
00319 for (WN* wn_tp = WN_first(wn); wn_tp != NULL; wn_tp = WN_next(wn_tp))
00320 if (!SNL_Legal_Perm_Scalars(stack, wn_tp, permutation, nloops))
00321 return FALSE;
00322 } else {
00323 for (INT i = 0; i < WN_kid_count(wn); i++)
00324 if (!SNL_Legal_Perm_Scalars(stack, WN_kid(wn, i), permutation, nloops))
00325 return FALSE;
00326 }
00327 return TRUE;
00328 }
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338 static BOOL SNL_Depv_Is_Permutable(DEPV_ARRAY* depv_array,
00339 INT array_index,
00340 DOLOOP_STACK* stack,
00341 INT permutation[],
00342 INT nloops)
00343 {
00344 DEPV* depv = depv_array->Depv(array_index);
00345 INT depv_lb = depv_array->Num_Unused_Dim();
00346 INT depv_ub = depv_array->Num_Unused_Dim() + depv_array->Num_Dim() - 1;
00347 INT loop_lb = Do_Loop_Depth(stack->Bottom_nth(0));
00348 INT loop_ub = loop_lb + nloops - 1;
00349 INT i;
00350 for (i = loop_lb; i < depv_lb; i++)
00351 if (permutation[i - loop_lb] != i - loop_lb)
00352 return FALSE;
00353 for (i = depv_ub + 1; i <= loop_ub; i++)
00354 if (permutation[i - loop_lb] != i - loop_lb)
00355 return FALSE;
00356 for (i = depv_lb; i <= depv_ub; i++) {
00357 INT dep_index = i - depv_lb;
00358 if (i >= loop_lb && i <= loop_ub)
00359 dep_index = permutation[i - loop_lb] + loop_lb - depv_lb;
00360 DIRECTION dir = DEP_Direction(DEPV_Dep(depv, dep_index));
00361 if (dir == DIR_POS)
00362 return TRUE;
00363 if (dir == DIR_NEG || dir == DIR_POSNEG || dir == DIR_NEGEQ
00364 || dir == DIR_STAR)
00365 return FALSE;
00366 }
00367 return TRUE;
00368 }
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379 static BOOL SNL_Legal_Perm_Array(DOLOOP_STACK* stack,
00380 WN* wn,
00381 INT permutation[],
00382 INT nloops,
00383 HASH_TABLE<EINDEX16,INT>* edge_table,
00384 BOOL extended_test)
00385 {
00386 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00387 OPERATOR opr = WN_operator(wn);
00388 if (opr != OPR_ILOAD && opr != OPR_ISTORE && opr != OPR_LDID
00389 && opr != OPR_STID)
00390 return TRUE;
00391 VINDEX16 v = dg->Get_Vertex(wn);
00392 if (v == 0) {
00393 if (opr == OPR_LDID || opr == OPR_STID)
00394 return TRUE;
00395 else
00396 return FALSE;
00397 }
00398 EINDEX16 e = 0;
00399 if (extended_test) {
00400 WN* wn_enclosing = Enclosing_Loop(wn);
00401 KERNEL_SECTION ks_enclosing = SNL_Kernel_Section(wn_enclosing,
00402 stack, permutation, nloops);
00403 WN* wn_enclosing_body = Enclosing_Loop_Body(wn);
00404 if (wn_enclosing_body == NULL)
00405 return (ks_enclosing == KERNEL_NONE || ks_enclosing == KERNEL_OUTER);
00406 KERNEL_SECTION ks_enclosing_body = SNL_Kernel_Section(wn_enclosing_body,
00407 stack, permutation, nloops);
00408 if (ks_enclosing == KERNEL_NONE || ks_enclosing == KERNEL_OUTER) {
00409 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
00410 WN* wn_loop = Enclosing_Loop(dg->Get_Wn(dg->Get_Source(e)));
00411 KERNEL_SECTION ks_enclosing = SNL_Kernel_Section(wn_loop, stack,
00412 permutation, nloops);
00413 if (!(ks_enclosing == KERNEL_NONE || ks_enclosing == KERNEL_OUTER))
00414 return FALSE;
00415 }
00416 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
00417 WN* wn_loop = Enclosing_Loop(dg->Get_Wn(dg->Get_Sink(e)));
00418 KERNEL_SECTION ks_enclosing = SNL_Kernel_Section(wn_loop, stack,
00419 permutation, nloops);
00420 if (!(ks_enclosing == KERNEL_NONE || ks_enclosing == KERNEL_OUTER))
00421 return FALSE;
00422 }
00423 return TRUE;
00424 }
00425 if (ks_enclosing_body != KERNEL_INNER)
00426 return FALSE;
00427 }
00428 for (e = dg->Get_In_Edge(v); e != 0; e = dg->Get_Next_In_Edge(e)) {
00429 if (edge_table->Find(e))
00430 continue;
00431 edge_table->Enter(e, 1);
00432 DEPV_ARRAY* depv_array = dg->Depv_Array(e);
00433 for (INT i = 0; i < depv_array->Num_Vec(); i++)
00434 if (!SNL_Depv_Is_Permutable(depv_array, i, stack, permutation, nloops))
00435 return FALSE;
00436 }
00437 for (e = dg->Get_Out_Edge(v); e != 0; e = dg->Get_Next_Out_Edge(e)) {
00438 if (edge_table->Find(e))
00439 continue;
00440 edge_table->Enter(e, 1);
00441 DEPV_ARRAY* depv_array = dg->Depv_Array(e);
00442 for (INT i = 0; i < depv_array->Num_Vec(); i++)
00443 if (!SNL_Depv_Is_Permutable(depv_array, i, stack, permutation, nloops))
00444 return FALSE;
00445 }
00446 return TRUE;
00447 }
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458 extern BOOL SNL_Legal_Perm_Arrays(DOLOOP_STACK* stack,
00459 WN* wn,
00460 INT permutation[],
00461 INT nloops,
00462 HASH_TABLE<EINDEX16,INT>* edge_table,
00463 BOOL extended_test)
00464 {
00465 if (!SNL_Legal_Perm_Array(stack, wn, permutation, nloops, edge_table,
00466 extended_test))
00467 return FALSE;
00468
00469 if (WN_opcode(wn) == OPC_BLOCK) {
00470 for (WN* wn_tp = WN_first(wn); wn_tp != NULL; wn_tp = WN_next(wn_tp))
00471 if (!SNL_Legal_Perm_Arrays(stack, wn_tp, permutation, nloops, edge_table, extended_test))
00472 return FALSE;
00473 } else {
00474 for (INT i = 0; i < WN_kid_count(wn); i++) {
00475 if (!SNL_Legal_Perm_Arrays(stack, WN_kid(wn, i), permutation, nloops,
00476 edge_table, extended_test))
00477 return FALSE;
00478 }
00479 }
00480 return TRUE;
00481 }
00482
00483
00484
00485
00486
00487
00488
00489
00490 static BOOL SNL_Good_Perm_Loops(DOLOOP_STACK* stack,
00491 INT permutation[],
00492 INT nloops)
00493 {
00494 INT first_in_stack = stack->Elements() - nloops;
00495 INT i;
00496 for (i = 0; i < nloops; i++)
00497 if (permutation[i] != i)
00498 break;
00499 INT low_index = i;
00500 if (low_index == nloops)
00501 return TRUE;
00502 for (i = nloops - 1; i >= 0; i--)
00503 if (permutation[i] != i)
00504 break;
00505 INT high_index = i;
00506 for (i = low_index; i <= high_index; i++) {
00507 WN* wn_loop = stack->Bottom_nth(first_in_stack + i);
00508 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00509 if (dli->Has_Bad_Mem)
00510 return FALSE;
00511 }
00512 WN* wn_start = stack->Bottom_nth(first_in_stack + high_index);
00513 WN* wn_stop = stack->Bottom_nth(first_in_stack + low_index);
00514 for (WN* wn = wn_start; ; wn = LWN_Get_Parent(wn)) {
00515 if (WN_opcode(wn) == OPC_REGION && Is_Mp_Region(wn))
00516 return FALSE;
00517 if (wn == wn_stop)
00518 break;
00519 }
00520 return TRUE;
00521 }
00522
00523
00524
00525
00526
00527
00528
00529 static BOOL SNL_Parallel_Serial_Order_OK(DOLOOP_STACK* stack,
00530 INT permutation[],
00531 INT nloops)
00532 {
00533 BOOL found_serial = FALSE;
00534 INT first_in_stack = stack->Elements() - nloops;
00535 for (INT i = 0; i < nloops; i++) {
00536 WN* wn_loop = stack->Bottom_nth(first_in_stack + permutation[i]);
00537 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00538 if (dli->Mp_Info != NULL) {
00539 if (found_serial)
00540 return FALSE;
00541 } else {
00542 found_serial = TRUE;
00543 }
00544 }
00545 return TRUE;
00546 }
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557 extern BOOL SNL_Legal_Permutation(WN* outer_loop,
00558 WN* inner_loop,
00559 INT permutation[],
00560 INT nloops)
00561 {
00562 dg = Array_Dependence_Graph;
00563 du = Du_Mgr;
00564 DOLOOP_STACK stack(&LNO_local_pool);
00565 Build_Doloop_Stack(inner_loop, &stack);
00566 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(inner_loop);
00567 INT first_in_stack = dli_inner->Depth - nloops + 1;
00568 INT i;
00569 for (i = 0; i < nloops; i++)
00570 if (permutation[i] != i)
00571 break;
00572 if (i == nloops)
00573 return TRUE;
00574 WN* wn_outer_loop = stack.Bottom_nth(first_in_stack + i);
00575 for (i = nloops - 1; i >= 0; i--)
00576 if (permutation[i] != i)
00577 break;
00578 WN* wn_inner_loop = stack.Bottom_nth(first_in_stack + i);
00579 if (!SNL_Parallel_Serial_Order_OK(&stack, permutation, nloops))
00580 return FALSE;
00581 if (!SNL_Good_Perm_Loops(&stack, permutation, nloops))
00582 return FALSE;
00583 if (!SNL_Legal_Perm_Bounds(&stack, permutation, nloops))
00584 return FALSE;
00585 if (!SNL_Legal_Perm_Scalars(&stack, WN_do_body(inner_loop), permutation,
00586 nloops))
00587 return FALSE;
00588 if (!Sandwiched_Code_Sinkable_In(wn_outer_loop, wn_inner_loop, du))
00589 return FALSE;
00590 INT hash_table_size = MIN(dg->Get_Edge_Count(), 512);
00591 HASH_TABLE<EINDEX16,INT> edge_table(hash_table_size, &LNO_local_pool);
00592 if (!SNL_Legal_Perm_Arrays(&stack, outer_loop, permutation, nloops,
00593 &edge_table, TRUE))
00594 return FALSE;
00595 return TRUE;
00596 }
00597
00598
00599
00600
00601
00602
00603
00604 static void Tlog_Lego_Interchange(WN* outer_loop,
00605 INT permutation[],
00606 INT nloops)
00607 {
00608 char tlog_instring[TLOG_STRING_LENGTH];
00609 char tlog_outstring[TLOG_STRING_LENGTH];
00610 INT char_count = 0;
00611 char_count += sprintf(tlog_instring + char_count, "(");
00612 INT i;
00613 for (i = 0; i < nloops; i++) {
00614 const char *name =
00615 WB_Whirl_Symbol(SNL_Get_Inner_Snl_Loop(outer_loop, i + 1));
00616 if (strlen(tlog_instring) + strlen(name) + 1 < TLOG_STRING_LENGTH)
00617 char_count += sprintf(tlog_instring + char_count, "%s", name);
00618 if (i < nloops - 1 && strlen(tlog_instring) + 2 < TLOG_STRING_LENGTH)
00619 char_count += sprintf(tlog_instring + char_count, ",");
00620 }
00621 if (strlen(tlog_instring) + 2 < TLOG_STRING_LENGTH)
00622 char_count += sprintf(tlog_instring + char_count, ")");
00623 char_count = 0;
00624 char_count += sprintf(tlog_outstring + char_count, "(");
00625 for (i = 0; i < nloops; i++) {
00626 const char *name = WB_Whirl_Symbol(SNL_Get_Inner_Snl_Loop(outer_loop,
00627 permutation[i] + 1));
00628 if (strlen(tlog_outstring) + strlen(name) + 1 < TLOG_STRING_LENGTH)
00629 char_count += sprintf(tlog_outstring + char_count, "%s", name);
00630 if (i < nloops - 1 && strlen(tlog_outstring) + 2 < TLOG_STRING_LENGTH)
00631 char_count += sprintf(tlog_outstring + char_count, ",");
00632 }
00633 if (strlen(tlog_outstring) + 2 < TLOG_STRING_LENGTH)
00634 char_count += sprintf(tlog_outstring + char_count, ")");
00635 char *tmp_char = (char *) WB_Whirl_Symbol(outer_loop);
00636 Generate_Tlog("LNO", "lego_interchange",
00637 Srcpos_To_Line(WN_linenum(outer_loop)), tmp_char,
00638 tlog_instring, tlog_outstring, "");
00639 }
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650 static BOOL SNL_Lift_Lego_Tile_Loops_Once(WN* outer_loop,
00651 WN* inner_loop)
00652 {
00653 BOOL successful = FALSE;
00654 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00655 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(inner_loop);
00656 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00657 &LNO_local_pool);
00658 Hoist_Statements(outer_loop, du);
00659 DOLOOP_STACK rebuild_stack(&LNO_local_pool);
00660 Build_Doloop_Stack(LWN_Get_Parent(outer_loop), &rebuild_stack);
00661 LNO_Build_Access(outer_loop, &rebuild_stack, &LNO_default_pool);
00662 Build_Doloop_Stack(inner_loop, stack);
00663 INT nloops = dli_inner->Depth - dli_outer->Depth + 1;
00664 INT first_in_stack = dli_inner->Depth - nloops + 1;
00665 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00666 INT j = 0;
00667 INT i;
00668 for (i = first_in_stack; i < stack->Elements(); i++) {
00669 WN* wn_loop = stack->Bottom_nth(i);
00670 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00671 if (dli_loop->Is_Outer_Lego_Tile)
00672 permutation[j++] = i - first_in_stack;
00673 }
00674 for (i = first_in_stack; i < stack->Elements(); i++) {
00675 WN* wn_loop = stack->Bottom_nth(i);
00676 DO_LOOP_INFO* dli_loop = Get_Do_Loop_Info(wn_loop);
00677 if (!dli_loop->Is_Outer_Lego_Tile)
00678 permutation[j++] = i - first_in_stack;
00679 }
00680 if (SNL_Legal_Permutation(outer_loop, inner_loop, permutation, nloops)) {
00681 if (LNO_Tlog)
00682 Tlog_Lego_Interchange(outer_loop, permutation, nloops);
00683 SNL_INV_Permute_Loops(outer_loop, permutation, nloops, TRUE);
00684 successful = TRUE;
00685 }
00686 CXX_DELETE_ARRAY(permutation, &LNO_local_pool);
00687 return successful;
00688 }
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699 static void SNL_Lift_Lego_Tile_Loops_Shifts(WN* outer_loop,
00700 WN* inner_loop)
00701 {
00702 DO_LOOP_INFO* dli_outer = Get_Do_Loop_Info(outer_loop);
00703 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(inner_loop);
00704 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00705 &LNO_local_pool);
00706 Hoist_Statements(outer_loop, du);
00707 DOLOOP_STACK rebuild_stack(&LNO_local_pool);
00708 Build_Doloop_Stack(LWN_Get_Parent(outer_loop), &rebuild_stack);
00709 LNO_Build_Access(outer_loop, &rebuild_stack, &LNO_default_pool);
00710 Build_Doloop_Stack(inner_loop, stack);
00711 INT nloops = dli_inner->Depth - dli_outer->Depth + 1;
00712 INT first_in_stack = dli_inner->Depth - nloops + 1;
00713 INT* permutation = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00714 for (INT i = 0; i < nloops; i++) {
00715 WN* wn_loop = stack->Bottom_nth(first_in_stack + i);
00716 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
00717 if (dli->Is_Outer_Lego_Tile)
00718 continue;
00719 INT j = i + 1;
00720 while (TRUE) {
00721 for (; j < nloops; j++) {
00722 WN* wn_next = stack->Bottom_nth(first_in_stack + j);
00723 DO_LOOP_INFO* dli_next = Get_Do_Loop_Info(wn_next);
00724 if (dli_next->Is_Outer_Lego_Tile)
00725 break;
00726 }
00727 if (j == nloops)
00728 break;
00729 INT k;
00730 for (k = 0; k < i; k++)
00731 permutation[k] = k;
00732 permutation[i] = j;
00733 for (k = i + 1; k <= j; k++)
00734 permutation[k] = k - 1;
00735 for (k = j + 1; k < nloops; k++)
00736 permutation[k] = k;
00737 if (SNL_Legal_Permutation(outer_loop, inner_loop, permutation, nloops)) {
00738 if (LNO_Tlog)
00739 Tlog_Lego_Interchange(outer_loop, permutation, nloops);
00740 SNL_INV_Permute_Loops(outer_loop, permutation, nloops, TRUE);
00741 WN* wn_temp = stack->Bottom_nth(first_in_stack + i);
00742 for (k = i + 1; k <= j; k++)
00743 stack->Bottom_nth(first_in_stack + k - 1)
00744 = stack->Bottom_nth(first_in_stack + k);
00745 stack->Bottom_nth(first_in_stack + j) = wn_temp;
00746 outer_loop = stack->Bottom_nth(first_in_stack);
00747 inner_loop = stack->Bottom_nth(first_in_stack + nloops - 1);
00748 break;
00749 }
00750 j++;
00751 }
00752 }
00753 CXX_DELETE_ARRAY(permutation, &LNO_local_pool);
00754 CXX_DELETE(stack, &LNO_local_pool);
00755 }
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766 static void SNL_Find_Traverse(WN* wn_tree,
00767 STACK<WN*>* outer_stack,
00768 STACK<WN*>* inner_stack,
00769 STACK<WN*>* local_stack)
00770 {
00771 if (WN_opcode(wn_tree) == OPC_BLOCK) {
00772 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
00773 SNL_Find_Traverse(wn, outer_stack, inner_stack, local_stack);
00774 } else {
00775 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
00776 SNL_Find_Traverse(WN_kid(wn_tree, i), outer_stack, inner_stack,
00777 local_stack);
00778 }
00779 switch (WN_opcode(wn_tree)) {
00780 case OPC_DO_LOOP:
00781 {
00782 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
00783 if (dli->Is_Inner) {
00784 outer_stack->Push(wn_tree);
00785 inner_stack->Push(wn_tree);
00786 local_stack->Push(wn_tree);
00787 } else if (local_stack->Elements() > 0) {
00788 INT inner_count = 0;
00789 for (INT i = 0; i < local_stack->Elements(); i++) {
00790 WN* wn = local_stack->Bottom_nth(i);
00791 switch (WN_opcode(wn)) {
00792 case OPC_DO_LOOP:
00793 {
00794 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(wn);
00795 if (dli_inner->Depth == dli->Depth + 1)
00796 inner_count++;
00797 }
00798 break;
00799 case OPC_DO_WHILE:
00800 case OPC_WHILE_DO:
00801 case OPC_IF:
00802 break;
00803 }
00804 if (inner_count > 1)
00805 break;
00806 }
00807 if (inner_count == 1) {
00808 local_stack->Push(wn_tree);
00809 outer_stack->Top_nth(0) = wn_tree;
00810 } else {
00811 local_stack->Clear();
00812 }
00813 }
00814 }
00815 break;
00816 case OPC_DO_WHILE:
00817 case OPC_WHILE_DO:
00818 local_stack->Clear();
00819 break;
00820 case OPC_IF:
00821 INT i;
00822 for (i = 0; i < local_stack->Elements(); i++)
00823 if (Wn_Is_Inside(local_stack->Bottom_nth(i), wn_tree))
00824 break;
00825 if (i < local_stack->Elements())
00826 local_stack->Clear();
00827 break;
00828 }
00829 }
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839 extern void SNL_Find(WN* wn_root,
00840 STACK<WN*>* outer_stack,
00841 STACK<WN*>* inner_stack)
00842 {
00843 STACK<WN*> local_stack(&LNO_local_pool);
00844 SNL_Find_Traverse(wn_root, outer_stack, inner_stack, &local_stack);
00845 }
00846
00847
00848
00849
00850
00851
00852
00853 extern void Lego_Interchange(WN* wn_root)
00854 {
00855 dg = Array_Dependence_Graph;
00856 du = Du_Mgr;
00857 STACK<WN*> outer_stack(&LNO_local_pool);
00858 STACK<WN*> inner_stack(&LNO_local_pool);
00859 SNL_Find(wn_root, &outer_stack, &inner_stack);
00860 FmtAssert(outer_stack.Elements() == inner_stack.Elements(),
00861 ("Unmatched outer and inner stacks while finding SNLs"));
00862 for (INT i = 0; i < outer_stack.Elements(); i++) {
00863 WN* outer_loop = outer_stack.Bottom_nth(i);
00864 WN* inner_loop = inner_stack.Bottom_nth(i);
00865 if (outer_loop == inner_loop)
00866 continue;
00867
00868 SNL_Lift_Lego_Tile_Loops_Shifts(outer_loop, inner_loop);
00869 }
00870 }
00871
00872
00873 static void Lego_Peel_Traverse(WN* wn_tree);
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884 static void Lego_Block_Peel_Traverse(WN* wn_tree) {
00885 Is_True (wn_tree && LWN_Get_Parent(wn_tree) &&
00886 WN_opcode(LWN_Get_Parent(wn_tree)) == OPC_BLOCK,
00887 ("Incorrect arguments to Lego_Block_Peel_Traverse"));
00888
00889 WN* wn_prev = WN_prev(wn_tree);
00890 Lego_Peel_Traverse(wn_tree);
00891 if (wn_prev != WN_prev(wn_tree)) {
00892
00893
00894
00895
00896
00897 WN* wn_new;
00898 if (wn_prev == NULL) wn_new = WN_first(LWN_Get_Parent(wn_tree));
00899 else wn_new = WN_next(wn_prev);
00900
00901 while (wn_new != wn_tree) {
00902 Lego_Block_Peel_Traverse(wn_new);
00903 wn_new = WN_next(wn_new);
00904 }
00905 }
00906 }
00907
00908
00909 static const INT PEEL_UNROLL_LIMIT = 20;
00910
00911
00912
00913
00914
00915
00916
00917 static void Lego_Peel_Traverse(WN* wn_tree)
00918 {
00919 if (WN_opcode(wn_tree) == OPC_DO_LOOP) {
00920 INT statement_count = 0;
00921 for (WN* wn = WN_first(WN_do_body(wn_tree)); wn != NULL;
00922 wn = WN_next(wn))
00923 statement_count++;
00924 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_tree);
00925 LEGO_INFO* lgi = dli->Lego_Info;
00926 if (lgi != NULL) {
00927 INT front_peel = lgi->Front_Peel();
00928 BOOL create_loop;
00929 if (front_peel > 0) {
00930 create_loop = (front_peel > 1 &&
00931 (statement_count*front_peel > PEEL_UNROLL_LIMIT));
00932 if (LNO_Verbose) {
00933 fprintf(stdout, "Lego Peeling Loop %s, %d iterations\n",
00934 WB_Whirl_Symbol(wn_tree), front_peel);
00935 fprintf(TFile, "Lego Peeling Loop %s, %d iterations\n",
00936 WB_Whirl_Symbol(wn_tree), front_peel);
00937 }
00938
00939 WN* prev_wn = WN_prev(wn_tree);
00940 Pre_loop_peeling(wn_tree, front_peel, !create_loop);
00941 Pre_Peel_RR_Map_Update (wn_tree, prev_wn, create_loop);
00942 if (create_loop) {
00943 WN* peeled_wn = WN_prev(wn_tree);
00944 Is_True (peeled_wn && WN_opcode(peeled_wn) == OPC_DO_LOOP,
00945 ("Where is the peeled portion?"));
00946 DO_LOOP_INFO* dli = Get_Do_Loop_Info (peeled_wn);
00947 LEGO_INFO* li = dli->Lego_Info;
00948 Is_True (li, ("Where is the lego-info"));
00949 CXX_DELETE (li, LEGO_pool);
00950 dli->Lego_Info = NULL;
00951 }
00952 }
00953 INT back_peel = lgi->Back_Peel();
00954 if (back_peel > 0) {
00955 create_loop = (back_peel > 1 &&
00956 (statement_count*back_peel > PEEL_UNROLL_LIMIT));
00957 if (LNO_Verbose) {
00958 fprintf(stdout, "Lego Peeling Loop %s, %d iterations\n",
00959 WB_Whirl_Symbol(wn_tree), back_peel);
00960 fprintf(TFile, "Lego Peeling Loop %s, %d iterations\n",
00961 WB_Whirl_Symbol(wn_tree), back_peel);
00962 }
00963 WN* next_wn = WN_next(wn_tree);
00964 Post_loop_peeling(wn_tree, back_peel, !create_loop);
00965 Post_Peel_RR_Map_Update (wn_tree, next_wn, create_loop);
00966 if (create_loop) {
00967 WN* peeled_wn = WN_next(wn_tree);
00968 Is_True (peeled_wn && WN_opcode(peeled_wn) == OPC_DO_LOOP,
00969 ("Where is the peeled portion?"));
00970 DO_LOOP_INFO* dli = Get_Do_Loop_Info (peeled_wn);
00971 LEGO_INFO* li = dli->Lego_Info;
00972 Is_True (li, ("Where is the lego-info"));
00973 CXX_DELETE (li, LEGO_pool);
00974 dli->Lego_Info = NULL;
00975 }
00976 }
00977 }
00978 }
00979 if (WN_opcode(wn_tree) == OPC_BLOCK) {
00980 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
00981 Lego_Block_Peel_Traverse(wn);
00982 } else {
00983 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
00984 Lego_Peel_Traverse(WN_kid(wn_tree, i));
00985 }
00986 }
00987
00988
00989
00990
00991
00992
00993
00994 extern void Lego_Peel(WN* wn_root)
00995 {
00996 Lego_Peel_Traverse(wn_root);
00997 }
00998
00999
01000
01001
01002
01003
01004
01005 static BOOL Is_Nested_Do_Across(WN* wn_loop)
01006 {
01007 if (WN_opcode(wn_loop) != OPC_DO_LOOP)
01008 return FALSE;
01009 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01010 if (dli->Mp_Info == NULL)
01011 return FALSE;
01012 return dli->Mp_Info->Nest_Index() == 0
01013 && dli->Mp_Info->Nest_Total() > 1;
01014 }
01015
01016
01017
01018
01019
01020
01021
01022 static BOOL Is_Rectangular_Nested_Doacross(WN* wn_loop)
01023 {
01024 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01025 INT nloops = dli->Mp_Info->Nest_Total();
01026 WN* wn_inner_loop = SNL_Get_Inner_Snl_Loop(wn_loop, nloops);
01027 DOLOOP_STACK stack(&LNO_local_pool);
01028 Build_Doloop_Stack(wn_loop, &stack);
01029 for (INT i = 1; i < nloops; i++)
01030 if (!SNL_Is_Invariant(&stack, 0, i))
01031 return FALSE;
01032 return TRUE;
01033 }
01034
01035
01036
01037
01038
01039
01040
01041 static BOOL Redundant_Pragma(WN* wn_pragma,
01042 WN* wn_region)
01043 {
01044 OPCODE op = WN_opcode(wn_pragma);
01045 FmtAssert(op == OPC_PRAGMA || op == OPC_XPRAGMA,
01046 ("Redundant_Pragma: Expected only OPC_[X]PRAGMA nodes"));
01047
01048 switch (WN_pragma(wn_pragma)) {
01049 case WN_PRAGMA_LOCAL:
01050 case WN_PRAGMA_SHARED:
01051 case WN_PRAGMA_LASTLOCAL:
01052 case WN_PRAGMA_REDUCTION:
01053 case WN_PRAGMA_FIRSTPRIVATE:
01054 case WN_PRAGMA_END_MARKER:
01055 {
01056 WN* wn_first = WN_first(WN_region_pragmas(wn_region));
01057 for (WN* wn = wn_first; wn != NULL; wn = WN_next(wn))
01058 if (WN_pragma(wn) == WN_pragma(wn_pragma)
01059 && WN_st(wn) == WN_st(wn_pragma)
01060 && WN_pragma_arg1(wn) == WN_pragma_arg1(wn_pragma))
01061 return TRUE;
01062 return FALSE;
01063 }
01064 default:
01065 FmtAssert(FALSE, ("Bad pragma in inner nested do across."));
01066 return FALSE;
01067 }
01068 }
01069
01070
01071
01072
01073
01074
01075
01076
01077 static void Mp_Extract_Nested_Pragmas(WN* wn_region,
01078 WN* wn_inregion)
01079 {
01080 WN* wnn = NULL;
01081 WN* wn_pragmas = WN_region_pragmas(wn_region);
01082 WN* wn_first = WN_first(WN_region_pragmas(wn_inregion));
01083 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
01084 wnn = WN_next(wn);
01085 LWN_Extract_From_Block(wn);
01086 if ((WN_opcode(wn) == OPC_PRAGMA || WN_opcode(wn) == OPC_XPRAGMA)
01087 && (WN_pragma(wn) == WN_PRAGMA_DOACROSS
01088 || WN_pragma(wn) == WN_PRAGMA_PDO_BEGIN
01089 || WN_pragma(wn) == WN_PRAGMA_PARALLEL_DO)
01090 || Redundant_Pragma(wn, wn_region))
01091 LWN_Delete_Tree(wn);
01092 else
01093 LWN_Insert_Block_After(wn_pragmas, WN_first(wn_pragmas), wn);
01094 }
01095 }
01096
01097
01098
01099
01100
01101
01102
01103
01104 static void Mp_Remove_Nested_Region(WN* wn_region,
01105 STACK<WN*>* stack_regions)
01106 {
01107 WN* wnn = NULL;
01108 WN* wn_after = wn_region;
01109 WN* wn_first = WN_first(WN_region_body(wn_region));
01110 for (WN* wn = wn_first; wn != NULL; wn = wnn) {
01111 wnn = WN_next(wn);
01112 LWN_Extract_From_Block(wn);
01113 LWN_Insert_Block_After(LWN_Get_Parent(wn_region), wn_after, wn);
01114 wn_after = wn;
01115 }
01116 LWN_Extract_From_Block(wn_region);
01117 stack_regions->Push(wn_region);
01118 }
01119
01120
01121
01122
01123
01124
01125
01126 static WN* Mp_Region_Under_Loop(WN* wn_loop)
01127 {
01128 WN* wn_first = WN_first(WN_do_body(wn_loop));
01129 WN* wn = 0;
01130 for (wn = wn_first; wn != NULL; wn = WN_next(wn))
01131 if (WN_opcode(wn) == OPC_REGION)
01132 break;
01133 return wn;
01134 }
01135
01136
01137
01138
01139
01140
01141
01142
01143 extern void Mp_Compress_Nested_Loop(WN* wn_loop)
01144 {
01145 WN* wn_inloop = NULL;
01146 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_loop);
01147 WN* wn_region = LWN_Get_Parent(LWN_Get_Parent(wn_loop));
01148 INT region_count = dli->Mp_Info->Nest_Index();
01149 INT region_total = dli->Mp_Info->Nest_Total();
01150 FmtAssert(region_count == 0,
01151 ("Bad Nest_Index() in outer nested do across"));
01152 FmtAssert(region_total >= 1,
01153 ("Bad Nest_Total() in outer nested do across"));
01154 STACK<WN*> stack_regions(&LNO_local_pool);
01155 for (WN* wn_inregion = Mp_Region_Under_Loop(wn_loop); TRUE;
01156 wn_inregion = Mp_Region_Under_Loop(wn_inloop)) {
01157 if (wn_inregion == NULL || WN_opcode(wn_inregion) != OPC_REGION)
01158 break;
01159 wn_inloop = NULL;
01160 WN* wn_first = WN_first(WN_region_body(wn_inregion));
01161 for (WN* wn = wn_first; wn != NULL; wn = WN_next(wn))
01162 if (WN_opcode(wn) == OPC_DO_LOOP)
01163 wn_inloop = wn;
01164 FmtAssert(wn_inloop != NULL && WN_opcode(wn_inloop) == OPC_DO_LOOP,
01165 ("Didn't find nested doacross loop as expected."));
01166 DO_LOOP_INFO* dli_inner = Get_Do_Loop_Info(wn_inloop);
01167 region_count++;
01168 FmtAssert(dli_inner->Mp_Info->Nest_Index() == region_count,
01169 ("Bad Nest_Index() in inner nested do across"));
01170 FmtAssert(dli_inner->Mp_Info->Nest_Total() == region_total,
01171 ("Bad Nest_Total() in inner nested do across"));
01172 Mp_Extract_Nested_Pragmas(wn_region, wn_inregion);
01173 Mp_Remove_Nested_Region(wn_inregion, &stack_regions);
01174 }
01175 for (INT i = 0; i < stack_regions.Elements(); i++)
01176 LWN_Delete_Tree(stack_regions.Top_nth(i));
01177 }
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190 extern INT Permutation_Last(INT first,
01191 INT permutation[],
01192 INT nloops)
01193 {
01194 INT last = permutation[first];
01195 for (INT i = first; i < nloops; i++) {
01196 if (permutation[i] > last)
01197 last = permutation[i];
01198 if (last == i)
01199 return last;
01200 }
01201 FmtAssert(TRUE, ("last must be in [0..nloops-1]"));
01202 return nloops;
01203 }
01204