00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #ifdef USE_PCH
00043 #include "lno_pch.h"
00044 #endif // USE_PCH
00045 #pragma hdrstop
00046
00047 #include <sys/types.h>
00048 #include <limits.h>
00049 #include "pu_info.h"
00050 #include "fusion.h"
00051 #include "lwn_util.h"
00052 #include "lnopt_main.h"
00053 #include "errors.h"
00054 #include "dep_graph.h"
00055 #include "lnoutils.h"
00056 #include "ff_utils.h"
00057 #include "wn_simp.h"
00058 #include "soe.h"
00059 #include "cond.h"
00060 #include "config_targ.h"
00061 #include "opt_du.h"
00062 #include "reduc.h"
00063 #include "reverse.h"
00064 #include "glob.h"
00065 #include "fission.h"
00066 #include "tlog.h"
00067 #include "prompf.h"
00068 #include "anl_driver.h"
00069 #include "parallel.h"
00070
00071 #pragma weak New_Construct_Id
00072
00073 typedef HASH_TABLE<INT,void *> INT2PTR;
00074
00075
00076
00077 #define MAX_INT32 0x7fffffff
00078 #define MIN_INT32 0x80000000
00079
00080 static BOOL fusion_initialized=FALSE;
00081
00082 static void fusion_verbose_info(
00083 SRCPOS srcpos1,
00084 SRCPOS srcpos2,
00085 UINT32 fusion_level,
00086 const char* message)
00087 {
00088 printf("#### Fusion(%d+%d:%d): %s\n",
00089 Srcpos_To_Line(srcpos1),
00090 Srcpos_To_Line(srcpos2),
00091 fusion_level, message);
00092 }
00093
00094 static void fusion_analysis_info(
00095 BOOL success,
00096 SRCPOS srcpos1,
00097 SRCPOS srcpos2,
00098 UINT32 fusion_level,
00099 const char* message)
00100 {
00101
00102 if (success)
00103 fprintf(LNO_Analysis,"( LNO_Fusion_Success ");
00104 else
00105 fprintf(LNO_Analysis,"( LNO_Fusion_Failure ");
00106
00107 fprintf(LNO_Analysis,"(%s %d) (%s %d) %d \"%s\" )\n",
00108 Cur_PU_Name, Srcpos_To_Line(srcpos1),
00109 Cur_PU_Name, Srcpos_To_Line(srcpos2),
00110 fusion_level, message);
00111 }
00112
00113 static void fusion_tlog_info(
00114 FISSION_FUSION_STATUS status,
00115 WN* loop1,
00116 WN* loop2,
00117 UINT32 fusion_level,
00118 const char* message)
00119 {
00120 char in_string[30];
00121 char out_string[30];
00122 SRCPOS srcpos1=WN_Get_Linenum(loop1);
00123 SRCPOS srcpos2=WN_Get_Linenum(loop2);
00124
00125 sprintf(in_string,"%d %d %d",
00126 Srcpos_To_Line(srcpos1), Srcpos_To_Line(srcpos2), fusion_level);
00127 sprintf(out_string,"%d",status);
00128 Generate_Tlog("LNO","fusion", Srcpos_To_Line(srcpos1),
00129 ST_name(WN_st(WN_index(loop1))),
00130 in_string, out_string, message);
00131 }
00132
00133
00134 static void pre_peeling_verbose_info(
00135 SRCPOS srcpos,
00136 UINT32 iter_count)
00137 {
00138 printf("#### Pre_peeling(%d): for %d iteration(s)\n",
00139 Srcpos_To_Line(srcpos), iter_count);
00140 }
00141
00142 static void pre_peeling_analysis_info(
00143 SRCPOS srcpos,
00144 UINT32 iter_count)
00145 {
00146
00147 fprintf(LNO_Analysis,"( LNO_Pre_Peel ");
00148
00149 fprintf(LNO_Analysis,"(%s %d) %d )\n",
00150 Cur_PU_Name, Srcpos_To_Line(srcpos), iter_count);
00151 }
00152
00153 static void pre_peeling_tlog_info(
00154 WN* loop,
00155 UINT32 iter_count)
00156 {
00157 char in_string[30];
00158 SRCPOS srcpos=WN_Get_Linenum(loop);
00159
00160 sprintf(in_string,"%d %d", Srcpos_To_Line(srcpos), iter_count);
00161 Generate_Tlog("LNO","pre_peeling", Srcpos_To_Line(srcpos),
00162 ST_name(WN_st(WN_index(loop))),
00163 in_string, "", "");
00164 }
00165
00166 static void post_peeling_verbose_info(
00167 SRCPOS srcpos,
00168 UINT32 iter_count)
00169 {
00170 printf("#### Post_peeling(%d): for %d iteration(s)\n",
00171 Srcpos_To_Line(srcpos), iter_count);
00172 }
00173
00174 static void post_peeling_analysis_info(
00175 SRCPOS srcpos,
00176 UINT32 iter_count)
00177 {
00178
00179 fprintf(LNO_Analysis,"( LNO_Post_Peel ");
00180
00181 fprintf(LNO_Analysis,"(%s %d) %d )\n",
00182 Cur_PU_Name, Srcpos_To_Line(srcpos), iter_count);
00183 }
00184
00185 static void post_peeling_tlog_info(
00186 WN* loop,
00187 UINT32 iter_count)
00188 {
00189 char in_string[30];
00190 SRCPOS srcpos=WN_Get_Linenum(loop);
00191
00192 sprintf(in_string,"%d %d", Srcpos_To_Line(srcpos), iter_count);
00193 Generate_Tlog("LNO","post_peeling", Srcpos_To_Line(srcpos),
00194 ST_name(WN_st(WN_index(loop))),
00195 in_string, "", "");
00196 }
00197
00198 MEM_POOL FUSION_default_pool;
00199
00200 static UINT New_Name_Count=0;
00201
00202
00203 static BOOL loop_var_is_live_on_exit(WN* loop) {
00204
00205 WN* loop_start=WN_start(loop);
00206 USE_LIST *use_list=Du_Mgr->Du_Get_Use(loop_start);
00207
00208 if (use_list->Incomplete())
00209 return TRUE;
00210
00211 USE_LIST_ITER u_iter(use_list);
00212 for (DU_NODE *use_node=(DU_NODE *)u_iter.First(); !u_iter.Is_Empty();
00213 use_node=(DU_NODE *)u_iter.Next()) {
00214
00215 WN* use=use_node->Wn();
00216 while (use != loop && WN_opcode(use)!=OPC_FUNC_ENTRY)
00217 use=LWN_Get_Parent(use);
00218 if (use != loop)
00219 return TRUE;
00220 }
00221
00222 return FALSE;
00223 }
00224
00225
00226
00227
00228
00229
00230 extern WN* Get_Only_Loop_Inside(const WN* wn, BOOL regions_ok) {
00231
00232 WN* wn1=WN_first(WN_do_body(wn));
00233 WN* first_loop=NULL;
00234
00235 while (wn1) {
00236 OPCODE opc=WN_opcode(wn1);
00237 if (opc==OPC_DO_LOOP) {
00238 if (!first_loop)
00239 first_loop=wn1;
00240 else
00241 return NULL;
00242 } else if (opc==OPC_IF) {
00243 IF_INFO* ii=Get_If_Info(wn1);
00244 if (ii->Contains_Do_Loops ||
00245 (ii->Contains_Regions && regions_ok==FALSE)) {
00246 return NULL;
00247 }
00248 } else if (opc==OPC_DO_WHILE || opc==OPC_WHILE_DO) {
00249 return NULL;
00250 } else if (opc==OPC_REGION)
00251 if (regions_ok==FALSE)
00252 return NULL;
00253 wn1=WN_next (wn1);
00254 }
00255 return first_loop;
00256
00257 }
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272 extern INT Compare_Bounds(
00273 WN* bound1,
00274 WN* index1,
00275 WN* bound2,
00276 WN* index2)
00277 {
00278
00279 OPCODE opc1=WN_opcode(bound1);
00280 OPCODE opc2=WN_opcode(bound2);
00281 if (opc1!=opc2)
00282 return -1;
00283
00284 if (!WN_Equiv(bound1,bound2))
00285 if (OPCODE_has_sym(opc1)) {
00286 SYMBOL sym1(bound1);
00287 SYMBOL sym2(bound2);
00288 if (sym1!=sym2) {
00289 SYMBOL loop_sym1(index1);
00290 SYMBOL loop_sym2(index2);
00291 if (sym1!=loop_sym1 || sym2!=loop_sym2)
00292 return -1;
00293 } else
00294 return -1;
00295 } else
00296 return -1;
00297
00298 for (INT kidno=0; kidno<WN_kid_count(bound1); kidno++) {
00299 if (Compare_Bounds(
00300 WN_kid(bound1,kidno),index1,
00301 WN_kid(bound2,kidno),index2)!=0)
00302 return -1;
00303 }
00304
00305 return 0;
00306
00307 }
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 static mINT32* Max_Dep_Distance(
00328 REF_LIST_STACK *source_list, REF_LIST_STACK *sink_list,
00329 mUINT8 common_nest, mUINT8 max_dv_dim, INT32 step[],
00330 BOOL use_bounds)
00331 {
00332
00333 INT i;
00334
00335 mINT32 *max_distance=CXX_NEW_ARRAY(mINT32, max_dv_dim, &FUSION_default_pool);
00336
00337 for (i = 0; i< max_dv_dim; i++) max_distance[i] = MIN_INT32;
00338
00339 if (max_dv_dim>LNO_MAX_DO_LOOP_DEPTH) {
00340 Is_True(0, ("Loops nested too deep (>%d) in Max_Dep_Distance()\n",
00341 LNO_MAX_DO_LOOP_DEPTH));
00342 max_distance[max_dv_dim-1] = MAX_INT32;
00343 return max_distance;
00344 }
00345
00346
00347 for (INT ii=0;ii<source_list->Elements(); ii++) {
00348 for (INT jj=0;jj<sink_list->Elements(); jj++) {
00349 ST *base1 = source_list->Bottom_nth(ii)->ST_Base;
00350 ST *base2 = sink_list->Bottom_nth(jj)->ST_Base;
00351
00352 if (base1 && base2 && (base1 != base2))
00353 continue;
00354
00355 REFERENCE_ITER iter1(source_list->Bottom_nth(ii));
00356 for (REFERENCE_NODE *n1=iter1.First(); !iter1.Is_Empty();
00357 n1=iter1.Next()) {
00358
00359 REDUCTION_TYPE red_type;
00360 if (red_manager)
00361 red_type=red_manager->Which_Reduction(LWN_Get_Parent(n1->Wn));
00362 else
00363 red_type=RED_NONE;
00364
00365 REFERENCE_ITER iter2(sink_list->Bottom_nth(jj));
00366 for (REFERENCE_NODE *n2=iter2.First();!iter2.Is_Empty();
00367 n2=iter2.Next()) {
00368
00369 if (red_type!=RED_NONE &&
00370 red_manager->Which_Reduction(LWN_Get_Parent(n2->Wn))==red_type)
00371 continue;
00372
00373 MEM_POOL_Push(&FUSION_default_pool);
00374
00375 mINT8 local_common_nest =
00376 (n1->Stack->Elements()<n2->Stack->Elements())?
00377 n1->Stack->Elements() : n2->Stack->Elements();
00378 mINT8 dv_dim;
00379
00380 if (local_common_nest>=common_nest) {
00381 local_common_nest = common_nest;
00382 dv_dim = max_dv_dim;
00383 } else
00384 dv_dim = local_common_nest - (common_nest-max_dv_dim);
00385
00386
00387 WN* src_wn=n1->Wn;
00388 WN* sink_wn=n2->Wn;
00389 DEPV_LIST *tmp = CXX_NEW(DEPV_LIST(src_wn,sink_wn, local_common_nest,
00390 dv_dim,use_bounds,&FUSION_default_pool,n1->Stack,n2->Stack),
00391 &FUSION_default_pool);
00392 if (!tmp->Is_Empty()) {
00393
00394 if (LNO_Test_Dump) {
00395
00396 Dump_WN(src_wn, stdout, TRUE, 4, 4);
00397 printf("-->");
00398 Dump_WN(sink_wn, stdout, TRUE, 4, 4);
00399 tmp->Print(stdout);
00400 printf("\n");
00401 }
00402
00403 DEPV_ITER dep_iter(tmp);
00404
00405
00406 for (DEPV_NODE *dn=dep_iter.First();
00407 !dep_iter.Is_Empty(); dn=dep_iter.Next()) {
00408
00409
00410 for (i=0; i<dv_dim; i++) {
00411
00412 DEP dep = DEPV_Dep(dn->Depv,i);
00413 if ( !DEP_IsDistance(dep) ) {
00414 if (i==0) {
00415
00416
00417 MEM_POOL_Pop(&FUSION_default_pool);
00418 max_distance[max_dv_dim-1] = MAX_INT32;
00419 return (max_distance);
00420 } else {
00421
00422
00423
00424 INT j;
00425 for (j=i-1; j>=0; j--)
00426 if (max_distance[j]<0)
00427 break;
00428 if (j<0) {
00429 max_distance[i-1]+=step[i-1];
00430 } else {
00431
00432
00433 max_distance[j]=step[j];
00434 }
00435 break;
00436 }
00437 } else if (max_distance[i] > step[i]*DEP_Distance(dep))
00438 break;
00439
00440 else if (max_distance[i] == step[i]*DEP_Distance(dep))
00441 continue;
00442 else
00443 max_distance[i] = step[i]*DEP_Distance(dep);
00444
00445 }
00446
00447 }
00448 }
00449 MEM_POOL_Pop(&FUSION_default_pool);
00450 }
00451 }
00452 }
00453 }
00454 return(max_distance);
00455 }
00456
00457
00458
00459
00460
00461
00462
00463
00464 static mINT32* Max_Dep_Distance(
00465 WN *in_loop1, WN *in_loop2,
00466 mUINT8 dv_dim, INT32 step[], BOOL use_bounds)
00467 {
00468
00469 WN* wn1;
00470 WN* wn2;
00471 mINT8 i;
00472
00473 wn1 = in_loop1;
00474 wn2 = in_loop2;
00475
00476 WN* loop_body1=WN_do_body(wn1);
00477 WN* loop_body2=WN_do_body(wn2);
00478
00479 for (i=0; i<dv_dim-1; i++) {
00480 wn1 = Get_Only_Loop_Inside(wn1,FALSE);
00481 wn2 = Get_Only_Loop_Inside(wn2,FALSE);
00482 }
00483
00484 mINT32 *max_distance = CXX_NEW_ARRAY(mINT32, dv_dim, &FUSION_default_pool);
00485 for (i = 0; i< dv_dim; i++) max_distance[i] = MIN_INT32;
00486
00487 REF_LIST_STACK *writes1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00488 &FUSION_default_pool);
00489 REF_LIST_STACK *reads1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00490 &FUSION_default_pool);
00491 SCALAR_STACK *scalar_writes1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00492 &FUSION_default_pool);
00493 SCALAR_STACK *scalar_reads1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00494 &FUSION_default_pool);
00495 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
00496 &FUSION_default_pool);
00497 Build_Doloop_Stack(in_loop1, stack1);
00498
00499 REF_LIST_STACK *writes2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00500 &FUSION_default_pool);
00501 REF_LIST_STACK *reads2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00502 &FUSION_default_pool);
00503 SCALAR_STACK *scalar_writes2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00504 &FUSION_default_pool);
00505 SCALAR_STACK *scalar_reads2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00506 &FUSION_default_pool);
00507 SCALAR_REF_STACK *params1 =
00508 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
00509 SCALAR_REF_STACK *params2 =
00510 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
00511 DOLOOP_STACK *stack2=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
00512 &FUSION_default_pool);
00513 Build_Doloop_Stack(in_loop2, stack2);
00514
00515
00516 INT32 status = 0;
00517 Init_Ref_Stmt_Counter();
00518 WN* wn = 0;
00519 for (wn=WN_first(loop_body1); wn && status!= -1; wn=WN_next(wn)) {
00520 status=New_Gather_References(wn,writes1,reads1,stack1,
00521 scalar_writes1,scalar_reads1,
00522 params1,&FUSION_default_pool);
00523 }
00524 if (status == -1) {
00525 max_distance[dv_dim-1] = MAX_INT32;
00526 return max_distance;
00527 }
00528
00529
00530 for (wn=WN_first(loop_body2); wn && status!= -1; wn=WN_next(wn)) {
00531 status=New_Gather_References(wn,writes2,reads2,stack2,
00532 scalar_writes2,scalar_reads2,
00533 params2,&FUSION_default_pool);
00534 }
00535 if (status == -1) {
00536 max_distance[dv_dim-1] = MAX_INT32;
00537 return max_distance;
00538 }
00539
00540 mINT32 *tmp;
00541 mUINT8 common_nest =
00542 ((DO_LOOP_INFO*)WN_MAP_Get(LNO_Info_Map,in_loop1))->Depth+dv_dim;
00543
00544 BOOL abort=FALSE;
00545
00546
00547
00548 tmp = Max_Dep_Distance(writes2, writes1, common_nest,
00549 dv_dim, step, use_bounds);
00550 for (i = 0; i< dv_dim; i++) {
00551 if (tmp[i] > max_distance[i]) max_distance[i] = tmp[i];
00552
00553 if (tmp[i] == MAX_INT32)
00554 abort = TRUE;
00555 }
00556 if (abort) {
00557 max_distance[dv_dim-1] = MAX_INT32;
00558 return max_distance;
00559 }
00560
00561
00562
00563 tmp = Max_Dep_Distance(writes2, reads1, common_nest,
00564 dv_dim, step, use_bounds);
00565 for (i = 0; i< dv_dim; i++) {
00566 if (tmp[i] > max_distance[i]) max_distance[i] = tmp[i];
00567 if (tmp[i] == MAX_INT32)
00568 abort = TRUE;
00569 }
00570 if (abort) {
00571 max_distance[dv_dim-1] = MAX_INT32;
00572 return max_distance;
00573 }
00574
00575
00576
00577 tmp = Max_Dep_Distance(reads2, writes1, common_nest,
00578 dv_dim, step, use_bounds);
00579 for (i = 0; i< dv_dim; i++) {
00580 if (tmp[i] > max_distance[i]) max_distance[i] = tmp[i];
00581 if (tmp[i] == MAX_INT32)
00582 abort = TRUE;
00583 }
00584 if (abort) {
00585 max_distance[dv_dim-1] = MAX_INT32;
00586 return max_distance;
00587 }
00588
00589 return max_distance;
00590
00591 }
00592
00593 static WN* Scalar_Dependence_Prevent_Fusion(WN* in_loop1, WN* in_loop2)
00594 {
00595 MEM_POOL_Push(&FUSION_default_pool);
00596
00597 REF_LIST_STACK *writes1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00598 &FUSION_default_pool);
00599 REF_LIST_STACK *reads1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00600 &FUSION_default_pool);
00601 SCALAR_STACK *scalar_writes1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00602 &FUSION_default_pool);
00603 SCALAR_STACK *scalar_reads1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00604 &FUSION_default_pool);
00605 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
00606 &FUSION_default_pool);
00607 Build_Doloop_Stack(in_loop1, stack1);
00608
00609 REF_LIST_STACK *writes2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00610 &FUSION_default_pool);
00611 REF_LIST_STACK *reads2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
00612 &FUSION_default_pool);
00613 SCALAR_STACK *scalar_writes2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00614 &FUSION_default_pool);
00615 SCALAR_STACK *scalar_reads2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
00616 &FUSION_default_pool);
00617 SCALAR_REF_STACK *params1 =
00618 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
00619 SCALAR_REF_STACK *params2 =
00620 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
00621 DOLOOP_STACK *stack2=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
00622 &FUSION_default_pool);
00623 Build_Doloop_Stack(in_loop2, stack2);
00624
00625 Init_Ref_Stmt_Counter();
00626 New_Gather_References(in_loop1,writes1,reads1,stack1,
00627 scalar_writes1,scalar_reads1, params1,&FUSION_default_pool,
00628 Gather_Scalar_Refs | Gather_Params);
00629
00630 New_Gather_References(in_loop2,writes2,reads2,stack2,
00631 scalar_writes2,scalar_reads2, params2,&FUSION_default_pool,
00632 Gather_Scalar_Refs | Gather_Params);
00633
00634 INT si;
00635 for (si=0; si<scalar_writes1->Elements(); si++) {
00636 SCALAR_NODE* sinode=scalar_writes1->Bottom_nth(si);
00637 WN* write1=sinode->Bottom_nth(0)->Wn;
00638 INT sj;
00639 for (sj=0; sj<scalar_writes2->Elements(); sj++) {
00640 SCALAR_NODE* sjnode=scalar_writes2->Bottom_nth(sj);
00641 WN* write2=sjnode->Bottom_nth(0)->Wn;
00642 if (Aliased(Alias_Mgr,write1,write2)!=NOT_ALIASED) {
00643 for (INT sii=0; sii<sinode->Elements(); sii++) {
00644 write1=sinode->Bottom_nth(sii)->Wn;
00645 REDUCTION_TYPE red_type;
00646 if (red_manager)
00647 red_type=red_manager->Which_Reduction(write1);
00648 else
00649 red_type=RED_NONE;
00650 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
00651 write2=sjnode->Bottom_nth(sjj)->Wn;
00652 if (red_type==RED_NONE ||
00653 red_manager->Which_Reduction(write2)!=red_type) {
00654 MEM_POOL_Pop(&FUSION_default_pool);
00655 return write1;
00656 }
00657 }
00658 }
00659 }
00660 }
00661 for (sj=0; sj<scalar_reads2->Elements(); sj++) {
00662 SCALAR_NODE* sjnode=scalar_reads2->Bottom_nth(sj);
00663 WN* read2=sjnode->Bottom_nth(0)->Wn;
00664 if (Aliased(Alias_Mgr,write1,read2)!=NOT_ALIASED) {
00665 for (INT sii=0; sii<sinode->Elements(); sii++) {
00666 write1=sinode->Bottom_nth(sii)->Wn;
00667 REDUCTION_TYPE red_type;
00668 if (red_manager)
00669 red_type=red_manager->Which_Reduction(write1);
00670 else
00671 red_type=RED_NONE;
00672 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
00673 read2=sjnode->Bottom_nth(sjj)->Wn;
00674 if (red_type==RED_NONE ||
00675 red_manager->Which_Reduction(read2)!=red_type) {
00676 MEM_POOL_Pop(&FUSION_default_pool);
00677 return write1;
00678 }
00679 }
00680 }
00681 }
00682 }
00683 for (sj=0; sj<params2->Elements(); sj++) {
00684 SCALAR_REF sjnode=params2->Bottom_nth(sj);
00685 WN* param=sjnode.Wn;
00686
00687
00688
00689
00690
00691 if (Aliased(Alias_Mgr,param,write1)!=NOT_ALIASED) {
00692 MEM_POOL_Pop(&FUSION_default_pool);
00693 return write1;
00694 }
00695 }
00696 }
00697 for (si=0; si<scalar_reads1->Elements(); si++) {
00698 SCALAR_NODE* sinode=scalar_reads1->Bottom_nth(si);
00699 WN* read1=sinode->Bottom_nth(0)->Wn;
00700 for (INT sj=0; sj<params2->Elements(); sj++) {
00701 SCALAR_REF sjnode=params2->Bottom_nth(sj);
00702 WN* param=sjnode.Wn;
00703
00704
00705
00706
00707
00708 if (Aliased(Alias_Mgr,param,read1)!=NOT_ALIASED) {
00709 MEM_POOL_Pop(&FUSION_default_pool);
00710 return read1;
00711 }
00712 }
00713 }
00714
00715 for (si=0; si<scalar_writes2->Elements(); si++) {
00716 SCALAR_NODE* sinode=scalar_writes2->Bottom_nth(si);
00717 WN* write2=sinode->Bottom_nth(0)->Wn;
00718 INT sj;
00719 for (sj=0; sj<scalar_reads1->Elements(); sj++) {
00720 SCALAR_NODE* sjnode=scalar_reads1->Bottom_nth(sj);
00721 WN* read1=sjnode->Bottom_nth(0)->Wn;
00722 if (Aliased(Alias_Mgr,write2,read1)!=NOT_ALIASED) {
00723 for (INT sii=0; sii<sinode->Elements(); sii++) {
00724 write2=sinode->Bottom_nth(sii)->Wn;
00725 REDUCTION_TYPE red_type;
00726 if (red_manager)
00727 red_type=red_manager->Which_Reduction(write2);
00728 else
00729 red_type=RED_NONE;
00730 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
00731 read1=sjnode->Bottom_nth(sjj)->Wn;
00732 if (red_type==RED_NONE ||
00733 red_manager->Which_Reduction(read1)!=red_type) {
00734 MEM_POOL_Pop(&FUSION_default_pool);
00735 return write2;
00736 }
00737 }
00738 }
00739 }
00740 }
00741 for (sj=0; sj<params1->Elements(); sj++) {
00742 SCALAR_REF sjnode=params1->Bottom_nth(sj);
00743 WN* param=sjnode.Wn;
00744
00745
00746
00747
00748
00749 if (Aliased(Alias_Mgr,param,write2)!=NOT_ALIASED) {
00750 MEM_POOL_Pop(&FUSION_default_pool);
00751 return write2;
00752 }
00753 }
00754 }
00755 for (si=0; si<scalar_reads2->Elements(); si++) {
00756 SCALAR_NODE* sinode=scalar_reads2->Bottom_nth(si);
00757 WN* read2=sinode->Bottom_nth(0)->Wn;
00758 for (INT sj=0; sj<params1->Elements(); sj++) {
00759 SCALAR_REF sjnode=params1->Bottom_nth(sj);
00760 WN* param=sjnode.Wn;
00761
00762
00763
00764
00765
00766 if (Aliased(Alias_Mgr,param,read2)!=NOT_ALIASED) {
00767 MEM_POOL_Pop(&FUSION_default_pool);
00768 return read2;
00769 }
00770 }
00771 }
00772
00773 MEM_POOL_Pop(&FUSION_default_pool);
00774
00775 return NULL;
00776
00777 }
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789 static FISSION_FUSION_STATUS
00790 Fuse_Test(WN* in_loop1, WN* in_loop2, mUINT8 fusion_level, UINT32 threshold,
00791 UINT64 *prolog_out, UINT64 *epilog_out, WN** epilog_loop_out,
00792 mINT32 offset[])
00793 {
00794
00795 WN* loop_nest1[LNO_MAX_DO_LOOP_DEPTH];
00796 WN* loop_nest2[LNO_MAX_DO_LOOP_DEPTH];
00797 UINT8 i, j;
00798
00799 char loop1_var_name[80];
00800 char loop2_var_name[80];
00801 if (strlen(ST_name(WN_st(WN_index(in_loop1))))>=80) {
00802 DevWarn("Loop var name %s too long",ST_name(WN_st(WN_index(in_loop1))));
00803 strcpy(loop1_var_name,"name_too_long");
00804 } else
00805 strcpy(loop1_var_name,ST_name(WN_st(WN_index(in_loop1))));
00806 if (strlen(ST_name(WN_st(WN_index(in_loop2))))>=80) {
00807 DevWarn("Loop var name %s too long",ST_name(WN_st(WN_index(in_loop2))));
00808 strcpy(loop2_var_name,"name_too_long");
00809 } else
00810 strcpy(loop2_var_name,ST_name(WN_st(WN_index(in_loop2))));
00811
00812 SRCPOS srcpos1=WN_Get_Linenum(in_loop1);
00813 SRCPOS srcpos2=WN_Get_Linenum(in_loop2);
00814
00815 FmtAssert(WN_opcode(in_loop1)==OPC_DO_LOOP,
00816 ("non-loop input node in Fuse_Test()\n") );
00817 FmtAssert(WN_opcode(in_loop2)==OPC_DO_LOOP,
00818 ("non-loop input node in Fuse_Test()\n") );
00819
00820 UINT8 inner_most_level = fusion_level - 1;
00821
00822 offset[inner_most_level] = MAX_INT32;
00823 for (i=1; i<fusion_level-1; i++) offset[i] = 0;
00824
00825 *prolog_out = *epilog_out = MAX_INT32;
00826
00827 if (WN_next(in_loop1)!=in_loop2) {
00828 DevWarn("non-adjacent input loop nodes in Fuse_Test()");
00829 return Failed;
00830 }
00831
00832
00833
00834
00835
00836 loop_nest1[0]=in_loop1;
00837 OPERATOR opr=
00838 WN_operator(WN_kid0(WN_start(in_loop1)));
00839 if (opr == OPR_MAX || opr==OPR_MIN) {
00840 if (LNO_Verbose)
00841 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00842 "Loop with MIN, MAX lower bound cannot be fused.");
00843 if (LNO_Analysis)
00844 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00845 "Loop with MIN, MAX lower bound cannot be fused.");
00846 if (LNO_Tlog)
00847 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00848 "Loop with MIN, MAX lower bound cannot be fused.");
00849 return Failed;
00850 }
00851 opr=WN_operator(WN_kid1(WN_end(in_loop1)));
00852 if (opr == OPR_MAX || opr==OPR_MIN) {
00853 if (LNO_Verbose)
00854 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00855 "Loop with MIN, MAX upper bound cannot be fused.");
00856 if (LNO_Analysis)
00857 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00858 "Loop with MIN, MAX upper bound cannot be fused.");
00859 if (LNO_Tlog)
00860 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00861 "Loop with MIN, MAX upper bound cannot be fused.");
00862 return Failed;
00863 }
00864
00865 loop_nest2[0]=in_loop2;
00866 opr=WN_operator(WN_kid0(WN_start(in_loop2)));
00867 if (opr == OPR_MAX || opr==OPR_MIN) {
00868 if (LNO_Verbose)
00869 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00870 "Loop with MIN, MAX lower bound cannot be fused.");
00871 if (LNO_Analysis)
00872 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00873 "Loop with MIN, MAX lower bound cannot be fused.");
00874 if (LNO_Tlog)
00875 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00876 "Loop with MIN, MAX lower bound cannot be fused.");
00877 return Failed;
00878 }
00879 opr=WN_operator(WN_kid1(WN_end(in_loop2)));
00880 if (opr == OPR_MAX || opr==OPR_MIN) {
00881 if (LNO_Verbose)
00882 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00883 "Loop with MIN, MAX upper bound cannot be fused.");
00884 if (LNO_Analysis)
00885 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00886 "Loop with MIN, MAX upper bound cannot be fused.");
00887 if (LNO_Tlog)
00888 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00889 "Loop with MIN, MAX upper bound cannot be fused.");
00890 return Failed;
00891 }
00892
00893 if (!Do_Loop_Is_Good(in_loop1) || !Do_Loop_Is_Good(in_loop2) ||
00894 Do_Loop_Has_Calls(in_loop1) || Do_Loop_Has_Calls(in_loop2) ||
00895 Do_Loop_Has_Gotos(in_loop1) || Do_Loop_Has_Gotos(in_loop2)) {
00896 if (LNO_Verbose)
00897 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00898 "Loops with calls, exits, or gotos cannot be fused.");
00899 if (LNO_Analysis)
00900 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00901 "Loops with calls, exits, or gotos cannot be fused.");
00902 if (LNO_Tlog)
00903 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00904 "Loops with calls, exits, or gotos cannot be fused.");
00905 return Failed;
00906 }
00907
00908 #ifdef KEY
00909 {
00910 DO_LOOP_INFO* dli1 = Get_Do_Loop_Info(in_loop1);
00911 DO_LOOP_INFO* dli2 = Get_Do_Loop_Info(in_loop2);
00912 if (LNO_Run_Simd > 0 && LNO_Simd_Avoid_Fusion &&
00913 (dli1->Vectorizable ^ dli2->Vectorizable)) {
00914 if (LNO_Verbose)
00915 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00916 "Vectorizable loop can not be fused with a serial loop.");
00917 if (LNO_Analysis)
00918 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00919 "Vectorizable loop can not be fused with a serial loop.");
00920 if (LNO_Tlog)
00921 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00922 "Vectorizable loop can not be fused with a serial loop.");
00923 return Failed;
00924 }
00925 }
00926 #endif
00927 for (i=1; i<fusion_level; i++) {
00928 WN* lwn= Get_Only_Loop_Inside(loop_nest1[i-1],FALSE);
00929 if (!lwn) {
00930 if (LNO_Verbose)
00931 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00932 "Non-simply nested loops cannot be fused.");
00933 if (LNO_Analysis)
00934 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00935 "Non-simply nested loops cannot be fused.");
00936 if (LNO_Tlog)
00937 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00938 "Non-simply nested loops cannot be fused.");
00939 return Failed;
00940 }
00941
00942 opr=WN_operator(WN_kid0(WN_start(lwn)));
00943 if (opr == OPR_MAX || opr==OPR_MIN) {
00944 if (LNO_Verbose)
00945 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00946 "Loop with MIN, MAX lower bound cannot be fused.");
00947 if (LNO_Analysis)
00948 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00949 "Loop with MIN, MAX lower bound cannot be fused.");
00950 if (LNO_Tlog)
00951 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00952 "Loop with MIN, MAX lower bound cannot be fused.");
00953 return Failed;
00954 }
00955 opr=WN_operator(WN_kid1(WN_end(lwn)));
00956 if (opr == OPR_MAX || opr==OPR_MIN) {
00957 if (LNO_Verbose)
00958 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00959 "Loop with MIN, MAX upper bound cannot be fused.");
00960 if (LNO_Analysis)
00961 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00962 "Loop with MIN, MAX upper bound cannot be fused.");
00963 if (LNO_Tlog)
00964 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00965 "Loop with MIN, MAX upper bound cannot be fused.");
00966 return Failed;
00967 }
00968
00969 loop_nest1[i] = lwn;
00970
00971 lwn= Get_Only_Loop_Inside(loop_nest2[i-1],FALSE);
00972 if (!lwn) {
00973 if (LNO_Verbose)
00974 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00975 "Non-simply nested loops cannot be fused.");
00976 if (LNO_Analysis)
00977 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00978 "Non-simply nested loops cannot be fused.");
00979 if (LNO_Tlog)
00980 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00981 "Non-simply nested loops cannot be fused.");
00982 return Failed;
00983 }
00984
00985 opr=WN_operator(WN_kid0(WN_start(lwn)));
00986 if (opr == OPR_MAX || opr==OPR_MIN) {
00987 if (LNO_Verbose)
00988 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
00989 "Loop with MIN, MAX lower bound cannot be fused.");
00990 if (LNO_Analysis)
00991 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
00992 "Loop with MIN, MAX lower bound cannot be fused.");
00993 if (LNO_Tlog)
00994 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
00995 "Loop with MIN, MAX lower bound cannot be fused.");
00996 return Failed;
00997 }
00998 opr=WN_operator(WN_kid1(WN_end(lwn)));
00999 if (opr == OPR_MAX || opr==OPR_MIN) {
01000 if (LNO_Verbose)
01001 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01002 "Loop with MIN, MAX upper bound cannot be fused.");
01003 if (LNO_Analysis)
01004 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01005 "Loop with MIN, MAX upper bound cannot be fused.");
01006 if (LNO_Tlog)
01007 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01008 "Loop with MIN, MAX upper bound cannot be fused.");
01009 return Failed;
01010 }
01011
01012 loop_nest2[i] = lwn;
01013
01014 }
01015
01016
01017 DO_LOOP_INFO *loop_info1[LNO_MAX_DO_LOOP_DEPTH];
01018 DO_LOOP_INFO *loop_info2[LNO_MAX_DO_LOOP_DEPTH];
01019 INT32 lb_diff[LNO_MAX_DO_LOOP_DEPTH];
01020 INT32 ub_diff[LNO_MAX_DO_LOOP_DEPTH];
01021 INT32 step[LNO_MAX_DO_LOOP_DEPTH];
01022 BOOL is_positive[LNO_MAX_DO_LOOP_DEPTH];
01023 BOOL steps_are_constant[LNO_MAX_DO_LOOP_DEPTH];
01024 BOOL bounds_are_equal=TRUE;
01025
01026 for (i=0; i<fusion_level; i++) {
01027
01028 ACCESS_VECTOR *diff;
01029
01030 loop_info1[i]=(DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map, loop_nest1[i]);
01031 loop_info2[i]=(DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map, loop_nest2[i]);
01032 DO_LOOP_INFO* dli1=loop_info1[i];
01033 DO_LOOP_INFO* dli2=loop_info2[i];
01034
01035 if (!loop_info1[i]->Step->Is_Const() || !loop_info2[i]->Step->Is_Const()) {
01036
01037
01038
01039 steps_are_constant[i]=FALSE;
01040 } else
01041 steps_are_constant[i]=TRUE;
01042
01043 diff = Subtract(loop_info1[i]->Step, loop_info2[i]->Step,
01044 &FUSION_default_pool);
01045
01046 if (!diff->Is_Const() || diff->Const_Offset != 0) {
01047
01048 if (LNO_Verbose)
01049 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01050 "Steps have to be equal in both loops.");
01051 if (LNO_Analysis)
01052 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01053 "Steps have to be equal in both loops.");
01054 if (LNO_Tlog)
01055 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01056 "Steps have to be equal in both loops.");
01057 return Failed;
01058 }
01059
01060
01061 if (steps_are_constant[i]) {
01062 step[i] = loop_info1[i]->Step->Const_Offset;
01063 } else {
01064 step[i]=1;
01065
01066
01067 }
01068 is_positive[i] = step[i] > 0;
01069 if (!is_positive[i]) step[i] = - step[i];
01070
01071
01072
01073
01074
01075
01076
01077 BOOL identical_expression=FALSE;
01078 if (is_positive[i]) {
01079 if (Compare_Bounds(
01080 WN_start(loop_nest1[i]),WN_index(loop_nest1[i]),
01081 WN_start(loop_nest2[i]),WN_index(loop_nest2[i]))==0) {
01082
01083 identical_expression=TRUE;
01084
01085 } else if (Bound_Is_Too_Messy(dli1->LB) || dli1->LB->Num_Vec()!=1 ||
01086 Bound_Is_Too_Messy(dli2->LB) || dli2->LB->Num_Vec()!=1){
01087 if (LNO_Verbose)
01088 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01089 "Loops with messy lower bounds cannot be fused.");
01090 if (LNO_Analysis)
01091 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01092 "Loops with messy lower bounds cannot be fused.");
01093 if (LNO_Tlog)
01094 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01095 "Loops with messy lower bounds cannot be fused.");
01096 return Failed;
01097 } else {
01098
01099 diff = Subtract(loop_info1[i]->LB->Dim(0), loop_info2[i]->LB->Dim(0),
01100 &FUSION_default_pool);
01101 }
01102
01103 } else {
01104 if (Compare_Bounds(
01105 WN_end(loop_nest1[i]),WN_index(loop_nest1[i]),
01106 WN_end(loop_nest2[i]),WN_index(loop_nest2[i]))==0) {
01107
01108 identical_expression=TRUE;
01109
01110 } else if (Bound_Is_Too_Messy(dli1->UB) || dli1->UB->Num_Vec()!=1 ||
01111 Bound_Is_Too_Messy(dli2->UB) || dli2->UB->Num_Vec()!=1){
01112 if (LNO_Verbose)
01113 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01114 "Loops with messy upper bounds cannot be fused.");
01115 if (LNO_Analysis)
01116 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01117 "Loops with messy upper bounds cannot be fused.");
01118 if (LNO_Tlog)
01119 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01120 "Loops with messy upper bounds cannot be fused.");
01121 return Failed;
01122 } else {
01123
01124 diff = Subtract(loop_info1[i]->UB->Dim(0), loop_info2[i]->UB->Dim(0),
01125 &FUSION_default_pool);
01126 }
01127 }
01128
01129 if (identical_expression)
01130 lb_diff[i] = 0;
01131 else if (!diff->Is_Const()) {
01132 if (LNO_Verbose)
01133 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01134 "Difference in lower bounds must be constant.");
01135 if (LNO_Analysis)
01136 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01137 "Difference in lower bounds must be constant.");
01138 if (LNO_Tlog)
01139 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01140 "Difference in lower bounds must be constant.");
01141 return Failed;
01142 } else {
01143 INT64 loop_coeff;
01144 if (is_positive[i])
01145 loop_coeff=loop_info1[i]->LB->Dim(0)->Loop_Coeff(
01146 loop_info1[i]->LB->Dim(0)->Nest_Depth()-1);
01147
01148 else
01149 loop_coeff=loop_info1[i]->UB->Dim(0)->Loop_Coeff(
01150 loop_info1[i]->UB->Dim(0)->Nest_Depth()-1);
01151 if ( (diff->Const_Offset % loop_coeff) != 0) {
01152 if (LNO_Verbose)
01153 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01154 "Difference in lower bounds must be constant.");
01155 if (LNO_Analysis)
01156 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01157 "Difference in lower bounds must be constant.");
01158 if (LNO_Tlog)
01159 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01160 "Difference in lower bounds must be constant.");
01161 return Failed;
01162 } else {
01163 lb_diff[i] = diff->Const_Offset / loop_coeff;
01164 bounds_are_equal = bounds_are_equal && (lb_diff[i]==0);
01165 }
01166
01167 }
01168
01169 identical_expression=FALSE;
01170 if (is_positive[i]) {
01171 if (Compare_Bounds(
01172 WN_end(loop_nest1[i]),WN_index(loop_nest1[i]),
01173 WN_end(loop_nest2[i]),WN_index(loop_nest2[i]))==0) {
01174
01175 identical_expression=TRUE;
01176
01177 } else if (Bound_Is_Too_Messy(dli1->UB) || dli1->UB->Num_Vec()!=1 ||
01178 Bound_Is_Too_Messy(dli2->UB) || dli2->UB->Num_Vec()!=1){
01179 if (LNO_Verbose)
01180 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01181 "Loops with messy upper bounds cannot be fused.");
01182 if (LNO_Analysis)
01183 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01184 "Loops with messy upper bounds cannot be fused.");
01185 if (LNO_Tlog)
01186 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01187 "Loops with messy upper bounds cannot be fused.");
01188 return Failed;
01189 } else {
01190
01191 diff = Subtract(loop_info1[i]->UB->Dim(0), loop_info2[i]->UB->Dim(0),
01192 &FUSION_default_pool);
01193 }
01194 } else {
01195 if (Compare_Bounds(
01196 WN_start(loop_nest1[i]),WN_index(loop_nest1[i]),
01197 WN_start(loop_nest2[i]),WN_index(loop_nest2[i]))==0) {
01198
01199 identical_expression=TRUE;
01200
01201 } else if (Bound_Is_Too_Messy(dli1->LB) || dli1->LB->Num_Vec()!=1 ||
01202 Bound_Is_Too_Messy(dli2->LB) || dli2->LB->Num_Vec()!=1){
01203 if (LNO_Verbose)
01204 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01205 "Loops with messy lower bounds cannot be fused.");
01206 if (LNO_Analysis)
01207 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01208 "Loops with messy lower bounds cannot be fused.");
01209 if (LNO_Tlog)
01210 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01211 "Loops with messy lower bounds cannot be fused.");
01212 return Failed;
01213 } else {
01214
01215 diff = Subtract(loop_info1[i]->LB->Dim(0), loop_info2[i]->LB->Dim(0),
01216 &FUSION_default_pool);
01217 }
01218 }
01219
01220 if (identical_expression)
01221 ub_diff[i]=0;
01222 else if (!diff->Is_Const()) {
01223 if (LNO_Verbose)
01224 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01225 "Difference in upper bounds must be constant.");
01226 if (LNO_Analysis)
01227 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01228 "Difference in upper bounds must be constant.");
01229 if (LNO_Tlog)
01230 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01231 "Difference in upper bounds must be constant.");
01232 return Failed;
01233 } else {
01234 INT64 loop_coeff;
01235 if (is_positive[i])
01236 loop_coeff=loop_info1[i]->UB->Dim(0)->Loop_Coeff(
01237 loop_info1[i]->UB->Dim(0)->Nest_Depth()-1);
01238 else
01239 loop_coeff=loop_info1[i]->LB->Dim(0)->Loop_Coeff(
01240 loop_info1[i]->LB->Dim(0)->Nest_Depth()-1);
01241 if ( (diff->Const_Offset % loop_coeff) != 0) {
01242 if (LNO_Verbose)
01243 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01244 "Difference in upper bounds must be constant.");
01245 if (LNO_Analysis)
01246 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01247 "Difference in upper bounds must be constant.");
01248 if (LNO_Tlog)
01249 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01250 "Difference in upper bounds must be constant.");
01251 return Failed;
01252 } else {
01253 ub_diff[i] = diff->Const_Offset / loop_coeff;
01254 bounds_are_equal = bounds_are_equal && (ub_diff[i]==0);
01255 }
01256
01257 }
01258
01259
01260
01261
01262
01263
01264 }
01265
01266 WN* scalar_wn;
01267 if (scalar_wn=Scalar_Dependence_Prevent_Fusion(in_loop1,in_loop2)) {
01268 if (LNO_Verbose)
01269 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01270 "Failed because of scalar dependences.");
01271 if (LNO_Analysis)
01272 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01273 "Failed because of scalar dependences.");
01274 if (LNO_Tlog)
01275 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01276 "Failed because of scalar dependences.");
01277 if (LNO_Test_Dump) {
01278 SYMBOL tmp_symbol(scalar_wn);
01279 printf("Scalar Dependence caused by %s.\n", tmp_symbol.Name());
01280 }
01281 return Failed;
01282 }
01283
01284
01285 mINT32* dist =
01286 Max_Dep_Distance(in_loop1,in_loop2,fusion_level,step,bounds_are_equal);
01287 if (dist[fusion_level-1]==MAX_INT32) {
01288 if (LNO_Verbose)
01289 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01290 "Failed because of backward dependences.");
01291 if (LNO_Analysis)
01292 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01293 "Failed because of backward dependences.");
01294 if (LNO_Tlog)
01295 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01296 "Failed because of backward dependences.");
01297 return Failed;
01298 }
01299
01300
01301
01302
01303
01304 BOOL completely_aligned = TRUE;
01305 BOOL outer_peeling = FALSE;
01306
01307
01308
01309 for (i=0; i<fusion_level; i++) {
01310
01311 if (!steps_are_constant && offset[i]!=0) {
01312 if (LNO_Verbose)
01313 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01314 "Failed due to non-zero offset required at a level with non-constant step");
01315 if (LNO_Analysis)
01316 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01317 "Failed due to non-zero offset required at a level with non-constant step");
01318 if (LNO_Tlog)
01319 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01320 "Failed due to non-zero offset required at a level with non-constant step");
01321 return Failed;
01322 }
01323
01324 if (is_positive[i]) {
01325 if ((offset[i]=lb_diff[i]) < dist[i])
01326 offset[i] = dist[i];
01327
01328
01329 INT32 r = ( -lb_diff[i] + offset[i] )%step[i];
01330 if (r!=0) offset[i] += (step[i] - r);
01331
01332 if (offset[i]>dist[i])
01333 for (INT j=i+1; j<fusion_level; j++)
01334 dist[j]=MIN_INT32;
01335
01336
01337
01338
01339
01340
01341 if (offset[i]!=lb_diff[i] ||
01342 lb_diff[i] != ub_diff[i]) {
01343 if (abs(offset[i]-lb_diff[i])>threshold ||
01344 abs(offset[i]-ub_diff[i])>threshold) {
01345 if (LNO_Verbose)
01346 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01347 "Failed because of too many pre-peeled iterations required after fusion.");
01348 if (LNO_Analysis)
01349 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01350 "Failed because of too many pre-peeled iterations required after fusion.");
01351 if (LNO_Tlog)
01352 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01353 "Failed because of too many pre-peeled iterations required after fusion.");
01354 offset[inner_most_level] = MAX_INT32;
01355 return Failed;
01356 } else if (i==0 && i!=inner_most_level)
01357 outer_peeling=TRUE;
01358 else if (i!=inner_most_level) {
01359 if (LNO_Verbose)
01360 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01361 "Peeling needed for middle loops.");
01362 if (LNO_Analysis)
01363 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01364 "Peeling needed for middle loops.");
01365 if (LNO_Tlog)
01366 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01367 "Peeling needed for middle loops.");
01368 offset[inner_most_level] = MAX_INT32;
01369 return Failed;
01370 }
01371 completely_aligned = FALSE;
01372 }
01373
01374
01375 for (j=i+1; j<fusion_level; j++) {
01376 lb_diff[j] +=
01377 (offset[i] *
01378 loop_info1[j]->LB->Dim(0)->Loop_Coeff(loop_info1[i]->Depth));
01379 ub_diff[j] +=
01380 (offset[i] *
01381 loop_info1[j]->UB->Dim(0)->Loop_Coeff(loop_info1[i]->Depth));
01382 }
01383
01384
01385 } else {
01386
01387 if ((offset[i]= - lb_diff[i]) < dist[i])
01388 offset[i] = dist[i];
01389
01390
01391 INT32 r = ( lb_diff[i] + offset[i] )%step[i];
01392 if (r!=0) offset[i] += (step[i] - r);
01393
01394 if (offset[i]>dist[i])
01395 for (INT j=i+1; j<fusion_level; j++)
01396 dist[j]=MIN_INT32;
01397
01398
01399
01400
01401
01402
01403 if (offset[i] != - lb_diff[i] ||
01404 lb_diff[i] != ub_diff[i]) {
01405 if (abs(offset[i]+lb_diff[i])>threshold ||
01406 abs(offset[i]+ub_diff[i])>threshold) {
01407 if (LNO_Verbose)
01408 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01409 "Failed because of too many pre-peeled iterations required after fusion.");
01410 if (LNO_Analysis)
01411 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01412 "Failed because of too many pre-peeled iterations required after fusion.");
01413 if (LNO_Tlog)
01414 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01415 "Failed because of too many pre-peeled iterations required after fusion.");
01416 offset[inner_most_level] = MAX_INT32;
01417 return Failed;
01418 } else if (i==0 && i!=inner_most_level)
01419 outer_peeling=TRUE;
01420 else if (i!=inner_most_level) {
01421 if (LNO_Verbose)
01422 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01423 "Peeling needed for middle loops.");
01424 if (LNO_Analysis)
01425 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01426 "Peeling needed for middle loops.");
01427 if (LNO_Tlog)
01428 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01429 "Peeling needed for middle loops.");
01430 offset[inner_most_level] = MAX_INT32;
01431 return Failed;
01432 }
01433 completely_aligned = FALSE;
01434 }
01435
01436
01437 for (j=i+1; j<fusion_level; j++) {
01438 lb_diff[j] -=
01439 (offset[i] *
01440 loop_info1[j]->UB->Dim(0)->Loop_Coeff(loop_info1[i]->Depth));
01441 ub_diff[j] -=
01442 (offset[i] *
01443 loop_info1[j]->LB->Dim(0)->Loop_Coeff(loop_info1[i]->Depth));
01444 }
01445
01446 }
01447
01448 }
01449
01450
01451 if (completely_aligned) {
01452
01453 *prolog_out = *epilog_out = 0;
01454 *epilog_loop_out = NULL;
01455 return Succeeded;
01456
01457 } else if (outer_peeling) {
01458
01459 offset[inner_most_level] = MAX_INT32;
01460 return Try_Level_By_Level;
01461
01462 }
01463
01464
01465
01466 INT64 loop1_lb_ub;
01467 INT64 loop2_lb_ub;
01468
01469
01470
01471 ACCESS_VECTOR *loop1_iter = Add(loop_info1[inner_most_level]->UB->Dim(0),
01472 loop_info1[inner_most_level]->LB->Dim(0),
01473 &FUSION_default_pool);
01474
01475 BOOL iteration_count_unknown = FALSE;
01476
01477
01478
01479
01480
01481
01482
01483
01484 if (! loop1_iter->Is_Const()) {
01485 if ( step[inner_most_level] != 1) {
01486 offset[inner_most_level] = MAX_INT32;
01487 if (LNO_Verbose)
01488 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
01489 "Iteration count has to be constant for non-stride-1 loop.");
01490 if (LNO_Analysis)
01491 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
01492 "Iteration count has to be constant for non-stride-1 loop.");
01493 if (LNO_Tlog)
01494 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
01495 "Iteration count has to be constant for non-stride-1 loop.");
01496 return Failed;
01497 } else
01498 iteration_count_unknown = TRUE;
01499 } else {
01500
01501 ACCESS_VECTOR *loop2_iter =
01502 Add(loop_info2[inner_most_level]->UB->Dim(0),
01503 loop_info2[inner_most_level]->LB->Dim(0),
01504 &FUSION_default_pool);
01505
01506 loop1_lb_ub = loop1_iter->Const_Offset;
01507 loop2_lb_ub = loop2_iter->Const_Offset;
01508
01509 if (!is_positive[inner_most_level]) {
01510 loop1_lb_ub = - loop1_lb_ub;
01511 loop2_lb_ub = - loop2_lb_ub;
01512 }
01513
01514 loop1_lb_ub -= loop1_lb_ub%step[inner_most_level];
01515 loop2_lb_ub -= loop2_lb_ub%step[inner_most_level];
01516
01517 }
01518
01519 if (step[inner_most_level]>0) {
01520
01521 INT64 prolog;
01522 INT64 epilog;
01523
01524
01525 if (iteration_count_unknown) {
01526 prolog = - lb_diff[inner_most_level] +
01527 offset[inner_most_level] - step[inner_most_level] ;
01528 } else {
01529 prolog = - lb_diff[inner_most_level] +
01530 offset[inner_most_level] - step[inner_most_level] ;
01531 if (loop1_lb_ub < prolog)
01532 prolog = loop1_lb_ub;
01533 }
01534 if (prolog<0) prolog = 0;
01535 else {
01536 FmtAssert(prolog%step[inner_most_level] ==0, ("misaligned prolog\n"));
01537 prolog = prolog / step[inner_most_level];
01538 prolog ++;
01539 }
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550 if (ub_diff[inner_most_level] > offset[inner_most_level]) {
01551
01552 if (iteration_count_unknown) {
01553 epilog = ub_diff[inner_most_level] -
01554 offset[inner_most_level] - step[inner_most_level];
01555 } else {
01556 epilog = lb_diff[inner_most_level] +
01557 loop1_lb_ub - loop2_lb_ub -
01558 offset[inner_most_level] - step[inner_most_level];
01559 if (loop1_lb_ub < epilog)
01560 epilog = loop1_lb_ub;
01561 }
01562 *epilog_loop_out = loop_nest1[inner_most_level];
01563
01564 } else {
01565
01566
01567
01568
01569 if (iteration_count_unknown) {
01570 epilog = - ub_diff[inner_most_level] +
01571 offset[inner_most_level] - step[inner_most_level];
01572 } else {
01573 epilog = - lb_diff[inner_most_level] -
01574 loop1_lb_ub + loop2_lb_ub +
01575 offset[inner_most_level] - step[inner_most_level];
01576 if (loop2_lb_ub < epilog)
01577 epilog = loop2_lb_ub;
01578 }
01579 *epilog_loop_out = loop_nest2[inner_most_level];
01580 }
01581 if (epilog<0) {
01582 epilog = 0;
01583 *epilog_loop_out = NULL;
01584 } else {
01585 FmtAssert(epilog%step[inner_most_level] ==0, ("misaligned epilog\n"));
01586 epilog = epilog / step[inner_most_level];
01587 epilog ++;
01588 }
01589
01590 *prolog_out = prolog;
01591 *epilog_out = epilog;
01592
01593 } else {
01594
01595 INT64 prolog;
01596 INT64 epilog;
01597
01598
01599 if (iteration_count_unknown) {
01600 prolog = lb_diff[inner_most_level] +
01601 offset[inner_most_level] - step[inner_most_level] ;
01602 } else {
01603 prolog = lb_diff[inner_most_level] +
01604 offset[inner_most_level] - step[inner_most_level] ;
01605 if (loop1_lb_ub < prolog)
01606 prolog = loop1_lb_ub;
01607 }
01608 if (prolog<0) prolog = 0;
01609 else {
01610 FmtAssert(prolog%step[inner_most_level] ==0, ("misaligned prolog\n"));
01611 prolog = prolog / step[inner_most_level];
01612 prolog ++;
01613 }
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624 if (ub_diff[inner_most_level] < -offset[inner_most_level]) {
01625
01626 if (iteration_count_unknown) {
01627 epilog = - ub_diff[inner_most_level] -
01628 offset[inner_most_level] - step[inner_most_level];
01629 } else {
01630 epilog = - lb_diff[inner_most_level] +
01631 loop1_lb_ub - loop2_lb_ub -
01632 offset[inner_most_level] - step[inner_most_level];
01633 if (loop1_lb_ub < epilog)
01634 epilog = loop1_lb_ub;
01635 }
01636 *epilog_loop_out = loop_nest1[inner_most_level];
01637
01638 } else {
01639
01640
01641
01642
01643 if (iteration_count_unknown) {
01644 epilog = ub_diff[inner_most_level] +
01645 offset[inner_most_level] - step[inner_most_level];
01646 } else {
01647 epilog = lb_diff[inner_most_level] -
01648 loop1_lb_ub + loop2_lb_ub +
01649 offset[inner_most_level] - step[inner_most_level];
01650 if (loop2_lb_ub < epilog)
01651 epilog = loop2_lb_ub;
01652 }
01653 *epilog_loop_out = loop_nest2[inner_most_level];
01654 }
01655 if (epilog<0) {
01656 epilog = 0;
01657 *epilog_loop_out = NULL;
01658 } else {
01659 FmtAssert(epilog%step[inner_most_level] ==0, ("misaligned epilog\n"));
01660 epilog = epilog / step[inner_most_level];
01661 epilog ++;
01662 }
01663
01664 *prolog_out = prolog;
01665 *epilog_out = epilog;
01666
01667
01668 }
01669
01670 return Succeeded;
01671
01672 }
01673
01674
01675 void Loop_Stmt_Update(
01676 WN* wn,
01677 WN* old_loop,
01678 WN* new_loop)
01679 {
01680 MEM_POOL_Push(&LNO_local_pool);
01681
01682 REF_LIST_STACK *writes = CXX_NEW(REF_LIST_STACK(&LNO_local_pool),
01683 &LNO_local_pool);
01684 REF_LIST_STACK *reads = CXX_NEW(REF_LIST_STACK(&LNO_local_pool),
01685 &LNO_local_pool);
01686 SCALAR_STACK *scalar_writes = CXX_NEW(SCALAR_STACK(&LNO_local_pool),
01687 &LNO_local_pool);
01688 SCALAR_STACK *scalar_reads = CXX_NEW(SCALAR_STACK(&LNO_local_pool),
01689 &LNO_local_pool);
01690 DOLOOP_STACK *stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01691 &LNO_local_pool);
01692
01693 Build_Doloop_Stack(wn, stack);
01694
01695 SCALAR_REF_STACK *params =
01696 CXX_NEW(SCALAR_REF_STACK(&LNO_local_pool), &LNO_local_pool);
01697
01698
01699 Init_Ref_Stmt_Counter();
01700 New_Gather_References(wn,writes,reads,stack,
01701 scalar_writes,scalar_reads,
01702 params,&LNO_local_pool,Gather_Scalar_Refs);
01703
01704 for (INT sj=0; sj<scalar_reads->Elements(); sj++) {
01705 SCALAR_NODE* sjnode=scalar_reads->Bottom_nth(sj);
01706 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
01707 WN* read=sjnode->Bottom_nth(sjj)->Wn;
01708 if (!Du_Mgr->Ud_Get_Def(read)) {
01709 DevWarn ("Exposed use before def: \n");
01710 Dump_WN (read, stdout, TRUE, 4, 4);
01711 }
01712 else if (Du_Mgr->Ud_Get_Def(read)->Loop_stmt()==old_loop)
01713 Du_Mgr->Ud_Get_Def(read)->Set_loop_stmt(new_loop);
01714 }
01715 }
01716
01717 MEM_POOL_Pop(&LNO_local_pool);
01718
01719 }
01720
01721
01722
01723
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733
01734
01735
01736
01737
01738
01739
01740
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754
01755
01756 extern void Pre_loop_peeling(WN* in_loop, UINT32 iter_count,
01757 BOOL unrolled, BOOL preserve_loop_index)
01758 {
01759
01760 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
01761
01762 FmtAssert(WN_opcode(in_loop)==OPC_DO_LOOP,
01763 ("non-loop input node\n") );
01764
01765
01766 if (iter_count<=0) return;
01767 if (WN_first(WN_do_body(in_loop))==NULL)
01768 return;
01769
01770
01771
01772 if (preserve_loop_index && loop_var_is_live_on_exit(in_loop)) {
01773 Finalize_Index_Variable(in_loop,FALSE);
01774 scalar_rename(WN_start(in_loop));
01775 }
01776
01777 MEM_POOL_Push(&LNO_local_pool);
01778
01779
01780 WN *prev_stmt=WN_prev(in_loop);
01781 WN *parent = LWN_Get_Parent(in_loop);
01782
01783 TYPE_ID index_type = WN_desc(WN_start(in_loop));
01784 SYMBOL index_symbol(WN_start(in_loop));
01785 OPERATOR step_operator =
01786 WN_operator(WN_kid0(WN_step(in_loop)));
01787
01788
01789 if (unrolled==FALSE) {
01790
01791
01792 WN **new_iter=CXX_NEW_ARRAY(WN*, 2, &LNO_local_pool);
01793
01794 new_iter[0]=WN_do_body(in_loop);
01795 BOOL out_of_edge=FALSE;
01796 new_iter[1] = LWN_Copy_Tree(WN_do_body(in_loop),TRUE,LNO_Info_Map);
01797 BOOL all_internal = WN_Rename_Duplicate_Labels(WN_do_body(in_loop),
01798 new_iter[1], Current_Func_Node, &LNO_local_pool);
01799 Is_True(all_internal, ("external labels renamed"));
01800
01801
01802 STACK<WN*> st_old(&LNO_local_pool);
01803 STACK<WN*> st_new(&LNO_local_pool);
01804 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
01805 Prompf_Assign_Ids(new_iter[0], new_iter[1], &st_old, &st_new, FALSE);
01806
01807
01808 if (!adg->Add_Deps_To_Copy_Block(new_iter[0],new_iter[1], FALSE)) {
01809
01810 out_of_edge=TRUE;
01811 LWN_Update_Dg_Delete_Tree(new_iter[1], adg);
01812 }
01813 if (out_of_edge) {
01814 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
01815 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop),adg);
01816 }
01817
01818 Unrolled_DU_Update(new_iter, 2, Do_Loop_Depth(in_loop));
01819 WN* last_stmt=WN_last(new_iter[0]);
01820 LWN_Insert_Block_Before(new_iter[0], NULL, new_iter[1]);
01821 DYN_ARRAY<FF_STMT_LIST> loop(&LNO_local_pool);
01822 loop.Newidx(); loop[0].Init(NULL);
01823 loop.Newidx(); loop[1].Init(NULL);
01824 WN *wn;
01825 for (wn=WN_first(new_iter[0]); wn!=last_stmt; wn=WN_next(wn)) {
01826 loop[0].Append(wn,&LNO_local_pool);
01827 }
01828 loop[0].Append(last_stmt,&LNO_local_pool);
01829 for (wn=WN_next(last_stmt); wn; wn=WN_next(wn)) {
01830 loop[1].Append(wn,&LNO_local_pool);
01831 }
01832 Separate_And_Update(in_loop, loop, 1);
01833 WN* new_loop=WN_next(in_loop);
01834
01835
01836
01837 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01838 INT old_outer_id = WN_MAP32_Get(Prompf_Id_Map, in_loop);
01839 INT new_outer_id = New_Construct_Id();
01840 WN_MAP32_Set(Prompf_Id_Map, new_loop, new_outer_id);
01841 INT nloops = st_old.Elements();
01842 if (nloops >= 0) {
01843 INT* old_ids = CXX_NEW_ARRAY(INT, nloops + 1, &LNO_local_pool);
01844 INT* new_ids = CXX_NEW_ARRAY(INT, nloops + 1, &LNO_local_pool);
01845 old_ids[0] = old_outer_id;
01846 new_ids[0] = new_outer_id;
01847 for (INT i = 1; i < nloops + 1; i++) {
01848 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i-1));
01849 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i-1));
01850 }
01851 Prompf_Info->Pre_Peel(old_ids, new_ids, nloops + 1);
01852 }
01853 }
01854
01855
01856
01857
01858
01859
01860
01861 for (INT i=0; i<WN_kid_count(in_loop); i++) {
01862 wn=WN_kid(in_loop,i);
01863 WN_kid(in_loop,i)=WN_kid(new_loop,i);
01864 LWN_Set_Parent(WN_kid(in_loop,i), in_loop);
01865 WN_kid(new_loop,i)=wn;
01866 LWN_Set_Parent(WN_kid(new_loop,i), new_loop);
01867 }
01868 LWN_Extract_From_Block(new_loop);
01869 LWN_Insert_Block_Before(LWN_Get_Parent(in_loop), in_loop, new_loop);
01870 Loop_Stmt_Update(in_loop,new_loop,in_loop);
01871 Loop_Stmt_Update(new_loop,in_loop,new_loop);
01872 Get_Do_Loop_Info(in_loop)->Est_Num_Iterations= -1;
01873 Get_Do_Loop_Info(new_loop)->Est_Num_Iterations= -1;
01874
01875 WN_kid0(WN_start(in_loop))=
01876 LWN_CreateExp2(
01877 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
01878 WN_kid0(WN_start(in_loop)),
01879 LWN_Make_Icon(index_type,(iter_count)*Step_Size(in_loop)));
01880
01881 wn=LWN_Copy_Tree(WN_kid0(WN_start(new_loop)),TRUE,LNO_Info_Map);
01882 if (!Array_Dependence_Graph->
01883 Add_Deps_To_Copy_Block(WN_kid0(WN_start(in_loop)), wn, FALSE))
01884 LNO_Erase_Dg_From_Here_In(WN_kid0(WN_start(in_loop)),
01885 Array_Dependence_Graph);
01886
01887 LWN_Copy_Def_Use(WN_kid0(WN_start(new_loop)),wn, Du_Mgr);
01888 WN_kid1(WN_end(new_loop))=
01889 LWN_CreateExp2(
01890 OPCODE_make_op(OPR_MIN, index_type, MTYPE_V),
01891 LWN_CreateExp2(
01892 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
01893 wn,
01894 LWN_Make_Icon(index_type,(iter_count-1)*Step_Size(in_loop))),
01895 WN_kid1(WN_end(new_loop)));
01896
01897 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01898 &LNO_local_pool);
01899 Build_Doloop_Stack(LWN_Get_Parent(in_loop), loop_stack);
01900
01901
01902 LWN_Parentize(in_loop);
01903 LWN_Parentize(new_loop);
01904
01905 LNO_Build_Access(new_loop, loop_stack, &LNO_default_pool);
01906 LNO_Build_Do_Access(in_loop, loop_stack);
01907
01908 if (!adg->Build_Region(new_loop,in_loop,loop_stack,TRUE,TRUE)) {
01909 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
01910 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop), adg);
01911 }
01912
01913 } else {
01914
01915 WN *index_expr;
01916 WN **new_iter=CXX_NEW_ARRAY(WN*, iter_count+1, &LNO_local_pool);
01917
01918 new_iter[0]=WN_do_body(in_loop);
01919 BOOL out_of_edge=FALSE;
01920 INT i;
01921 for (i=1; i<=iter_count; i++) {
01922 new_iter[i] = LWN_Copy_Tree(WN_do_body(in_loop),TRUE,LNO_Info_Map);
01923 BOOL all_internal = WN_Rename_Duplicate_Labels(WN_do_body(in_loop),
01924 new_iter[i], Current_Func_Node, &LNO_local_pool);
01925 Is_True(all_internal, ("external labels renamed"));
01926
01927 if (Good_Do_Depth(in_loop)>0 &&
01928 !adg->Add_Deps_To_Copy_Block(new_iter[0],new_iter[i], FALSE)) {
01929
01930 out_of_edge=TRUE;
01931 LWN_Update_Dg_Delete_Tree(new_iter[i], adg);
01932 }
01933 }
01934
01935 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01936 STACK<WN*> st_old(&LNO_local_pool);
01937 STACK<WN*> st_new(&LNO_local_pool);
01938 for (INT i = 1; i <= iter_count; i++) {
01939 st_old.Clear();
01940 st_new.Clear();
01941 Prompf_Assign_Ids(new_iter[0], new_iter[i], &st_old, &st_new, FALSE);
01942 INT nloops = st_old.Elements();
01943 if (nloops > 0) {
01944 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01945 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
01946 for (INT i = 0; i < nloops; i++) {
01947 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
01948 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
01949 }
01950 Prompf_Info->Pre_Peel(old_ids, new_ids, nloops);
01951 CXX_DELETE_ARRAY(old_ids, &LNO_local_pool);
01952 CXX_DELETE_ARRAY(new_ids, &LNO_local_pool);
01953 }
01954 }
01955 }
01956
01957 if (out_of_edge) {
01958 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
01959 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop),adg);
01960 }
01961
01962 Unrolled_DU_Update(new_iter, iter_count+1, Do_Loop_Depth(in_loop));
01963 WN* wn_tmp=LWN_Get_Parent(in_loop);
01964 while (wn_tmp && WN_opcode(wn_tmp)!=OPC_DO_LOOP)
01965 wn_tmp=LWN_Get_Parent(wn_tmp);
01966 for (i=1; i<=iter_count; i++)
01967 Loop_Stmt_Update(new_iter[i],in_loop,wn_tmp);
01968
01969 WN* wn;
01970
01971 index_expr = LWN_Copy_Tree(WN_kid0(WN_start(in_loop)),TRUE,LNO_Info_Map);
01972 if (!Array_Dependence_Graph->
01973 Add_Deps_To_Copy_Block(WN_kid0(WN_start(in_loop)), index_expr, FALSE))
01974 LNO_Erase_Dg_From_Here_In(WN_kid0(WN_start(in_loop)),
01975 Array_Dependence_Graph);
01976
01977 LWN_Copy_Def_Use(WN_kid0(WN_start(in_loop)), index_expr, Du_Mgr);
01978
01979 WN_kid0(WN_start(in_loop))=
01980 LWN_CreateExp2(
01981 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
01982 WN_kid0(WN_start(in_loop)),
01983 LWN_Make_Icon(index_type,(iter_count)*Step_Size(in_loop)));
01984
01985 LWN_Parentize(WN_start(in_loop));
01986
01987 for (i=0; i<iter_count; i++) {
01988
01989 WN* guard= LWN_Copy_Tree(WN_end(in_loop),TRUE,LNO_Info_Map);
01990 if (!Array_Dependence_Graph->
01991 Add_Deps_To_Copy_Block(WN_end(in_loop), guard, FALSE))
01992 LNO_Erase_Dg_From_Here_In(WN_end(in_loop), Array_Dependence_Graph);
01993
01994 LWN_Copy_Def_Use(WN_end(in_loop), guard, Du_Mgr);
01995
01996 Replace_Ldid_With_Exp_Copy(index_symbol, guard, index_expr,
01997 Du_Mgr);
01998
01999
02000 Replace_Ldid_With_Exp_Copy(index_symbol,new_iter[i+1],index_expr,
02001 Du_Mgr);
02002
02003 WN* stride=
02004 LWN_Copy_Tree(WN_kid1(WN_kid0(WN_step(in_loop))),TRUE,LNO_Info_Map);
02005 if (!Array_Dependence_Graph->
02006 Add_Deps_To_Copy_Block(WN_kid1(WN_kid0(WN_step(in_loop))),stride,FALSE))
02007 LNO_Erase_Dg_From_Here_In(WN_kid1(WN_kid0(WN_step(in_loop))),
02008 Array_Dependence_Graph);
02009
02010
02011 index_expr=LWN_CreateExp2(
02012 OPCODE_make_op(step_operator, index_type, MTYPE_V),
02013 index_expr, stride);
02014
02015 WN *if_wn;
02016
02017 if_wn=LWN_CreateIf(
02018 guard,
02019 new_iter[i+1],
02020 WN_CreateBlock());
02021 LWN_Copy_Linenumber(in_loop, if_wn);
02022
02023 IF_INFO* if_info =
02024 CXX_NEW(IF_INFO(&LNO_default_pool,
02025 !Get_Do_Loop_Info(in_loop)->Is_Inner,
02026 Find_SCF_Inside(in_loop,OPC_REGION)!=NULL),
02027 &LNO_default_pool);
02028 WN_MAP_Set(LNO_Info_Map, if_wn, (void*)if_info);
02029
02030 LWN_Copy_Frequency_Tree(if_wn, in_loop);
02031 LWN_Insert_Block_Before(parent, in_loop, if_wn);
02032
02033 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02034 &LNO_local_pool);
02035 Build_Doloop_Stack(LWN_Get_Parent(in_loop), loop_stack);
02036
02037 LNO_Build_If_Access(if_wn, loop_stack);
02038 COND_IF_INFO cond_result=COND_If_Info(if_wn, &LNO_local_pool);
02039
02040 if (cond_result==COND_IF_THEN_ONLY) {
02041
02042 LWN_Extract_From_Block(if_wn);
02043 LWN_Insert_Block_Before(parent, in_loop, new_iter[i+1]);
02044 WN_then(if_wn)=WN_CreateBlock();
02045 LWN_Update_Def_Use_Delete_Tree(if_wn,Du_Mgr);
02046 LWN_Delete_Tree(if_wn);
02047
02048 } else if (cond_result==COND_IF_ELSE_ONLY) {
02049
02050 LWN_Delete_Tree(if_wn);
02051 LWN_Update_Def_Use_Delete_Tree(if_wn,Du_Mgr);
02052
02053
02054 }
02055 if (i==iter_count-1) {
02056 LWN_Update_Def_Use_Delete_Tree(index_expr,Du_Mgr);
02057 LWN_Delete_Tree(index_expr);
02058 }
02059
02060
02061 }
02062
02063
02064 WN* start_stmt;
02065
02066 if (prev_stmt)
02067 start_stmt = WN_next(prev_stmt);
02068 else
02069 start_stmt = WN_first(parent);
02070
02071 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02072 &LNO_local_pool);
02073 Build_Doloop_Stack(LWN_Get_Parent(start_stmt), loop_stack);
02074
02075 for (wn=start_stmt; wn!=in_loop; wn=WN_next(wn)) {
02076 LNO_Build_Access(wn, loop_stack, &LNO_default_pool);
02077 Remark_Depth(wn, loop_stack->Elements());
02078 }
02079 Get_Do_Loop_Info(in_loop)->Est_Num_Iterations= -1;
02080 LNO_Build_Do_Access(in_loop, loop_stack);
02081
02082 if (!adg->Build_Region(start_stmt,in_loop,loop_stack,TRUE,TRUE)) {
02083 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
02084 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop), adg);
02085 }
02086
02087 LWN_Adjust_Frequency_Tree(WN_step(in_loop), -iter_count);
02088 LWN_Adjust_Frequency_Tree(WN_end(in_loop), -iter_count);
02089
02090 }
02091
02092 MEM_POOL_Pop(&LNO_local_pool);
02093
02094 SRCPOS srcpos=WN_Get_Linenum(in_loop);
02095 if (LNO_Verbose)
02096 pre_peeling_verbose_info(srcpos,iter_count);
02097 if (LNO_Analysis)
02098 pre_peeling_analysis_info(srcpos,iter_count);
02099 if (LNO_Tlog)
02100 pre_peeling_tlog_info(in_loop,iter_count);
02101 }
02102
02103 static void Build_Regions(WN* in_loop,
02104 WN* wn_tree,
02105 DOLOOP_STACK* loop_stack)
02106 {
02107 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
02108 if (WN_operator(wn_tree) == OPR_DO_LOOP) {
02109 if (!adg->Build_Region(wn_tree,wn_tree,loop_stack,TRUE,TRUE)) {
02110 DevWarn("Array dependence graph overflowed in Post_loop_peeling()");
02111 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop), adg);
02112 }
02113 return;
02114 }
02115
02116 if (WN_operator(wn_tree) == OPR_BLOCK) {
02117 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
02118 Build_Regions(in_loop, wn, loop_stack);
02119 } else {
02120 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
02121 Build_Regions(in_loop, WN_kid(wn_tree, i), loop_stack);
02122 }
02123 }
02124
02125
02126
02127
02128
02129
02130
02131
02132
02133
02134
02135
02136
02137
02138
02139
02140
02141
02142
02143
02144
02145
02146
02147
02148
02149
02150
02151
02152
02153
02154
02155
02156
02157
02158 extern void Post_loop_peeling(WN* in_loop, UINT32 iter_count,
02159 BOOL unrolled, BOOL preserve_loop_index)
02160 {
02161 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
02162
02163 FmtAssert(WN_opcode(in_loop)==OPC_DO_LOOP,
02164 ("non-loop input node\n") );
02165
02166
02167 if (iter_count<=0) return;
02168
02169
02170
02171 if (preserve_loop_index && loop_var_is_live_on_exit(in_loop)) {
02172 Finalize_Index_Variable(in_loop,FALSE);
02173 scalar_rename(WN_start(in_loop));
02174 }
02175
02176 MEM_POOL_Push(&LNO_local_pool);
02177
02178 WN *wn;
02179
02180 WN *next_stmt=WN_next(in_loop);
02181 WN *parent = LWN_Get_Parent(in_loop);
02182
02183 SYMBOL loop_index_symbol(WN_start(in_loop));
02184 TYPE_ID index_type = WN_desc(WN_start(in_loop));
02185 OPERATOR step_operator =
02186 WN_operator(WN_kid0(WN_step(in_loop)));
02187 OPERATOR inverse_step_operator;
02188 if (step_operator == OPR_ADD)
02189 inverse_step_operator = OPR_SUB;
02190 else
02191 inverse_step_operator = OPR_ADD;
02192
02193 if (unrolled==FALSE) {
02194
02195
02196 WN **new_iter=CXX_NEW_ARRAY(WN*, 2, &LNO_local_pool);
02197
02198 new_iter[0]=WN_do_body(in_loop);
02199 BOOL out_of_edge=FALSE;
02200 new_iter[1] = LWN_Copy_Tree(WN_do_body(in_loop),TRUE,LNO_Info_Map);
02201 BOOL all_internal = WN_Rename_Duplicate_Labels(WN_do_body(in_loop),
02202 new_iter[1], Current_Func_Node, &LNO_local_pool);
02203 Is_True(all_internal, ("external labels renamed"));
02204
02205
02206 STACK<WN*> st_old(&LNO_local_pool);
02207 STACK<WN*> st_new(&LNO_local_pool);
02208 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
02209 Prompf_Assign_Ids(new_iter[0], new_iter[1], &st_old, &st_new, FALSE);
02210
02211
02212 if (!adg->Add_Deps_To_Copy_Block(new_iter[0],new_iter[1], FALSE)) {
02213
02214 out_of_edge=TRUE;
02215 LWN_Update_Dg_Delete_Tree(new_iter[1], adg);
02216 }
02217 if (out_of_edge) {
02218 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
02219 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop),adg);
02220 }
02221
02222 Unrolled_DU_Update(new_iter, 2, Do_Loop_Depth(in_loop));
02223 WN* last_stmt=WN_last(new_iter[0]);
02224 LWN_Insert_Block_Before(new_iter[0], NULL, new_iter[1]);
02225 DYN_ARRAY<FF_STMT_LIST> loop(&LNO_local_pool);
02226 loop.Newidx(); loop[0].Init(NULL);
02227 loop.Newidx(); loop[1].Init(NULL);
02228 for (wn=WN_first(new_iter[0]); wn!=last_stmt; wn=WN_next(wn)) {
02229 loop[0].Append(wn,&LNO_local_pool);
02230 }
02231 loop[0].Append(last_stmt,&LNO_local_pool);
02232 for (wn=WN_next(last_stmt); wn; wn=WN_next(wn)) {
02233 loop[1].Append(wn,&LNO_local_pool);
02234 }
02235 Separate_And_Update(in_loop, loop, 1, FALSE);
02236 WN* new_loop=WN_next(in_loop);
02237
02238
02239
02240 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02241 INT old_outer_id = WN_MAP32_Get(Prompf_Id_Map, in_loop);
02242 INT new_outer_id = New_Construct_Id();
02243 WN_MAP32_Set(Prompf_Id_Map, new_loop, new_outer_id);
02244 INT nloops = st_old.Elements();
02245 if (nloops >= 0) {
02246 INT* old_ids = CXX_NEW_ARRAY(INT, nloops + 1, &LNO_local_pool);
02247 INT* new_ids = CXX_NEW_ARRAY(INT, nloops + 1, &LNO_local_pool);
02248 old_ids[0] = old_outer_id;
02249 new_ids[0] = new_outer_id;
02250 for (INT i = 1; i < nloops + 1; i++) {
02251 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i-1));
02252 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i-1));
02253 }
02254 Prompf_Info->Post_Peel(old_ids, new_ids, nloops + 1);
02255 }
02256 }
02257
02258 scalar_rename(WN_start(new_loop));
02259 Get_Do_Loop_Info(new_loop)->Est_Num_Iterations= -1;
02260 Get_Do_Loop_Info(in_loop)->Est_Num_Iterations= -1;
02261
02262
02263
02264
02265
02266 WN_kid1(WN_end(in_loop))=
02267 LWN_CreateExp2(OPCODE_make_op(OPR_SUB, index_type, MTYPE_V),
02268 WN_kid1(WN_end(in_loop)),
02269 LWN_Make_Icon(index_type,iter_count*Step_Size(in_loop)));
02270
02271 if (abs(Step_Size(new_loop)==1)) {
02272 WN* loop_end=LWN_Copy_Tree(WN_kid1(WN_end(new_loop)),TRUE,LNO_Info_Map);
02273
02274 if (!Array_Dependence_Graph->
02275 Add_Deps_To_Copy_Block(WN_kid1(WN_end(new_loop)), loop_end, FALSE))
02276 LNO_Erase_Dg_From_Here_In(WN_kid1(WN_end(new_loop)),
02277 Array_Dependence_Graph);
02278
02279 LWN_Copy_Def_Use(WN_kid1(WN_end(new_loop)),loop_end, Du_Mgr);
02280 WN_kid0(WN_start(new_loop))=
02281 LWN_CreateExp2(
02282 OPCODE_make_op(OPR_MAX, index_type, MTYPE_V),
02283 WN_kid0(WN_start(new_loop)),
02284 LWN_CreateExp2(
02285 OPCODE_make_op(OPR_SUB, index_type, MTYPE_V),
02286 loop_end,
02287 LWN_Make_Icon(index_type,(iter_count-1)*Step_Size(new_loop))));
02288 } else {
02289 LWN_Update_Def_Use_Delete_Tree(WN_kid0(WN_start(new_loop)),Du_Mgr);
02290 LWN_Delete_Tree(WN_kid0(WN_start(new_loop)));
02291 WN* wn_orig = Find_Node(WN_index(in_loop), WN_end(in_loop));
02292 wn=LWN_Copy_Tree(wn_orig);
02293 LWN_Copy_Def_Use(wn_orig, wn, Du_Mgr);
02294 Du_Mgr->Ud_Get_Def(wn)->Set_loop_stmt(NULL);
02295 WN_kid0(WN_start(new_loop))=wn;
02296 }
02297
02298
02299
02300 DOLOOP_STACK* loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02301 &LNO_local_pool);
02302 Build_Doloop_Stack(LWN_Get_Parent(in_loop), loop_stack);
02303 LWN_Parentize(in_loop);
02304 LWN_Parentize(new_loop);
02305 LNO_Build_Do_Access(in_loop, loop_stack);
02306 LNO_Build_Access(new_loop, loop_stack, &LNO_default_pool);
02307
02308 if (!adg->Build_Region(in_loop,new_loop,loop_stack,TRUE,TRUE)) {
02309 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
02310 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop), adg);
02311 }
02312 } else {
02313
02314 WN *index_expr;
02315 WN **new_iter=CXX_NEW_ARRAY(WN*, iter_count+1, &LNO_local_pool);
02316
02317 new_iter[0]=WN_do_body(in_loop);
02318 BOOL out_of_edge=FALSE;
02319 INT i;
02320 for (i=1; i<=iter_count; i++) {
02321 new_iter[i] = LWN_Copy_Tree(WN_do_body(in_loop),TRUE,LNO_Info_Map);
02322 BOOL all_internal = WN_Rename_Duplicate_Labels(WN_do_body(in_loop),
02323 new_iter[i], Current_Func_Node, &LNO_local_pool);
02324 Is_True(all_internal, ("external labels renamed"));
02325 if (Good_Do_Depth(in_loop)>0 &&
02326 !adg->Add_Deps_To_Copy_Block(new_iter[0],new_iter[i], FALSE)) {
02327 out_of_edge=TRUE;
02328 LWN_Update_Dg_Delete_Tree(new_iter[i], adg);
02329 }
02330 }
02331
02332 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02333 STACK<WN*> st_old(&LNO_local_pool);
02334 STACK<WN*> st_new(&LNO_local_pool);
02335 for (INT i = 1; i <= iter_count; i++) {
02336 st_old.Clear();
02337 st_new.Clear();
02338 Prompf_Assign_Ids(new_iter[0], new_iter[i], &st_old, &st_new, FALSE);
02339 INT nloops = st_old.Elements();
02340 if (nloops > 0) {
02341 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
02342 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
02343 for (INT i = 0; i < nloops; i++) {
02344 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
02345 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
02346 }
02347 Prompf_Info->Post_Peel(old_ids, new_ids, nloops);
02348 CXX_DELETE_ARRAY(old_ids, &LNO_local_pool);
02349 CXX_DELETE_ARRAY(new_ids, &LNO_local_pool);
02350 }
02351 }
02352 }
02353
02354 if (out_of_edge) {
02355 DevWarn("Array dependence graph overflowed in Pre_loop_peeling()");
02356 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(in_loop),adg);
02357 }
02358 Unrolled_DU_Update(new_iter, iter_count+1, Do_Loop_Depth(in_loop));
02359 WN* wn_tmp=LWN_Get_Parent(in_loop);
02360 while (wn_tmp && WN_opcode(wn_tmp)!=OPC_DO_LOOP)
02361 wn_tmp=LWN_Get_Parent(wn_tmp);
02362 for (i=1; i<=iter_count; i++)
02363 Loop_Stmt_Update(new_iter[i],in_loop,wn_tmp);
02364
02365 SYMBOL kid0_symbol;
02366 SYMBOL kid1_symbol;
02367 if (WN_operator(WN_kid0(WN_end(in_loop)))==OPR_LDID)
02368 kid0_symbol.Init(WN_kid0(WN_end(in_loop)));
02369 if (WN_operator(WN_kid1(WN_end(in_loop)))==OPR_LDID)
02370 kid1_symbol.Init(WN_kid1(WN_end(in_loop)));
02371 OPERATOR opr=WN_operator(WN_end(in_loop));
02372
02373 if (opr==OPR_LT) {
02374 if (loop_index_symbol == kid0_symbol) {
02375 WN_set_opcode(WN_end(in_loop),
02376 OPCODE_make_op(OPR_LE, Boolean_type, index_type));
02377 opr=OPR_LE;
02378 WN_kid1(WN_end(in_loop)) = LWN_CreateExp2(
02379 OPCODE_make_op(OPR_SUB, index_type, MTYPE_V),
02380 WN_kid1(WN_end(in_loop)), LWN_Make_Icon(index_type, 1));
02381 } else if (loop_index_symbol == kid1_symbol) {
02382 WN_set_opcode(WN_end(in_loop),
02383 OPCODE_make_op(OPR_LE, Boolean_type, index_type));
02384 opr=OPR_LE;
02385 WN_kid0(WN_end(in_loop)) = LWN_CreateExp2(
02386 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
02387 WN_kid0(WN_end(in_loop)), LWN_Make_Icon(index_type, 1));
02388 }
02389 } else if (opr==OPR_GT) {
02390 if (loop_index_symbol == kid0_symbol) {
02391 WN_set_opcode(WN_end(in_loop),
02392 OPCODE_make_op(OPR_GE, Boolean_type, index_type));
02393 opr=OPR_GE;
02394 WN_kid1(WN_end(in_loop)) = LWN_CreateExp2(
02395 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
02396 WN_kid1(WN_end(in_loop)), LWN_Make_Icon(index_type, 1));
02397 } else if (loop_index_symbol == kid1_symbol) {
02398 WN_set_opcode(WN_end(in_loop),
02399 OPCODE_make_op(OPR_GE, Boolean_type, index_type));
02400 opr=OPR_GE;
02401 WN_kid0(WN_end(in_loop)) = LWN_CreateExp2(
02402 OPCODE_make_op(OPR_SUB, index_type, MTYPE_V),
02403 WN_kid0(WN_end(in_loop)), LWN_Make_Icon(index_type, 1));
02404 }
02405 }
02406
02407 if ((opr==OPR_GE || opr==OPR_LE) &&
02408 (loop_index_symbol == kid0_symbol || loop_index_symbol == kid1_symbol)) {
02409
02410
02411
02412 WN* UB;
02413 WN* LB=WN_kid0(WN_start(in_loop));
02414 WN* STEP=WN_kid1(WN_kid0(WN_step(in_loop)));
02415 WN* END=WN_end(in_loop);
02416
02417 if (kid0_symbol!=loop_index_symbol) {
02418 OPERATOR new_operator;
02419 switch(opr) {
02420 case OPR_GE: new_operator=OPR_LE; break;
02421 case OPR_LE: new_operator=OPR_GE; break;
02422 }
02423 WN_set_opcode(END,
02424 OPCODE_make_op(new_operator, Boolean_type, index_type));
02425 WN* temp_wn=WN_kid1(END);
02426 WN_kid1(END)=WN_kid0(END);
02427 WN_kid0(END)=temp_wn;
02428 }
02429 UB=WN_kid1(END);
02430
02431 if (WN_operator(STEP)==OPR_INTCONST &&
02432 WN_const_val(STEP)==1) {
02433
02434 wn=LWN_Make_Icon(Promote_Type(index_type), 0);
02435
02436 } else {
02437
02438
02439 WN* kid0=LWN_Copy_Tree(UB,TRUE,LNO_Info_Map);
02440 if (!Array_Dependence_Graph->
02441 Add_Deps_To_Copy_Block(UB, kid0, FALSE))
02442 LNO_Erase_Dg_From_Here_In(UB, Array_Dependence_Graph);
02443 LWN_Copy_Def_Use(UB, kid0, Du_Mgr);
02444
02445 WN* kid1=LWN_Copy_Tree(LB,TRUE,LNO_Info_Map);
02446 if (!Array_Dependence_Graph->
02447 Add_Deps_To_Copy_Block(LB, kid1, FALSE))
02448 LNO_Erase_Dg_From_Here_In(LB, Array_Dependence_Graph);
02449 LWN_Copy_Def_Use(LB, kid1, Du_Mgr);
02450
02451 wn = LWN_CreateExp2(
02452 OPCODE_make_op(OPR_SUB, index_type, MTYPE_V),
02453 kid0, kid1);
02454
02455
02456 wn = LWN_CreateExp1(
02457 OPCODE_make_op(OPR_ABS, index_type, MTYPE_V),
02458 wn);
02459
02460
02461 kid1=LWN_Copy_Tree(STEP,TRUE,LNO_Info_Map);
02462 if (!Array_Dependence_Graph->
02463 Add_Deps_To_Copy_Block(STEP, kid1, FALSE))
02464 LNO_Erase_Dg_From_Here_In(STEP, Array_Dependence_Graph);
02465
02466 LWN_Copy_Def_Use(STEP, kid1, Du_Mgr);
02467 wn = LWN_CreateExp2(
02468 OPCODE_make_op(OPR_MOD, index_type, MTYPE_V),
02469 wn, kid1);
02470 }
02471
02472
02473 WN* kid0=LWN_Copy_Tree(UB,TRUE,LNO_Info_Map);
02474 if (!Array_Dependence_Graph->
02475 Add_Deps_To_Copy_Block(UB, kid0, FALSE))
02476 LNO_Erase_Dg_From_Here_In(UB, Array_Dependence_Graph);
02477 LWN_Copy_Def_Use(UB, kid0, Du_Mgr);
02478 wn = LWN_CreateExp2(
02479 OPCODE_make_op(inverse_step_operator, Promote_Type(index_type),MTYPE_V),
02480 kid0, wn);
02481
02482 } else {
02483
02484
02485
02486
02487 WN* wn1=LWN_Copy_Tree(WN_kid0(WN_kid0(WN_step(in_loop))));
02488 WN* wn2=
02489 LWN_Copy_Tree(WN_kid1(WN_kid0(WN_step(in_loop))),TRUE,LNO_Info_Map);
02490 if (!Array_Dependence_Graph->
02491 Add_Deps_To_Copy_Block(WN_kid1(WN_kid0(WN_step(in_loop))),wn2,FALSE))
02492 LNO_Erase_Dg_From_Here_In(WN_kid1(WN_kid0(WN_step(in_loop))),
02493 Array_Dependence_Graph);
02494
02495 LWN_Copy_Def_Use(WN_kid0(WN_kid0(WN_step(in_loop))), wn1, Du_Mgr);
02496 LWN_Copy_Def_Use(WN_kid1(WN_kid0(WN_step(in_loop))), wn2, Du_Mgr);
02497 #ifndef KEY // bug 6239
02498 Du_Mgr->Ud_Get_Def(wn1)->Set_loop_stmt(NULL);
02499 #else
02500 if (WN_operator(wn1) != OPR_CVT)
02501 Du_Mgr->Ud_Get_Def(wn1)->Set_loop_stmt(NULL);
02502 else
02503 Du_Mgr->Ud_Get_Def(WN_kid0(wn1))->Set_loop_stmt(NULL);
02504 #endif
02505
02506
02507 wn = LWN_CreateExp2(
02508 OPCODE_make_op(OPR_ADD, index_type, MTYPE_V),
02509 wn1,
02510 LWN_CreateExp2(OPCODE_make_op(OPR_MPY, index_type, MTYPE_V),
02511 wn2,
02512 LWN_Make_Icon(index_type, iter_count-1))
02513 );
02514
02515 }
02516
02517 LWN_Adjust_Frequency_Tree(WN_end(in_loop), -iter_count);
02518 LWN_Adjust_Frequency_Tree(WN_step(in_loop), -iter_count);
02519
02520
02521 index_expr = wn;
02522 for (i=0; i<iter_count; i++) {
02523
02524
02525 WN* kid0=LWN_Copy_Tree(WN_kid0(WN_start(in_loop)),TRUE,LNO_Info_Map);
02526 if (!Array_Dependence_Graph->
02527 Add_Deps_To_Copy_Block(WN_kid0(WN_start(in_loop)),kid0,FALSE))
02528 LNO_Erase_Dg_From_Here_In(WN_kid0(WN_start(in_loop)),
02529 Array_Dependence_Graph);
02530 LWN_Copy_Def_Use(WN_kid0(WN_start(in_loop)), kid0, Du_Mgr);
02531 WN* kid1=LWN_Copy_Tree(index_expr,TRUE,LNO_Info_Map);
02532 if (!Array_Dependence_Graph->
02533 Add_Deps_To_Copy_Block(index_expr,kid1,FALSE))
02534 LNO_Erase_Dg_From_Here_In(index_expr,Array_Dependence_Graph);
02535 LWN_Copy_Def_Use(index_expr, kid1, Du_Mgr);
02536
02537 WN* guard =
02538 LWN_CreateExp2(
02539 WN_opcode(WN_end(in_loop)),
02540 kid0,
02541 kid1);
02542
02543 Replace_Ldid_With_Exp_Copy(
02544 loop_index_symbol,new_iter[i+1],index_expr,Du_Mgr);
02545
02546 WN* if_wn=LWN_CreateIf(
02547 guard,
02548 new_iter[i+1],
02549 WN_CreateBlock());
02550 LWN_Copy_Linenumber(in_loop, if_wn);
02551
02552 LWN_Copy_Frequency_Tree(if_wn, in_loop);
02553 LWN_Insert_Block_After(parent, in_loop, if_wn);
02554
02555 IF_INFO* if_info =
02556 CXX_NEW(IF_INFO(&LNO_default_pool,
02557 !Get_Do_Loop_Info(in_loop)->Is_Inner,
02558 Find_SCF_Inside(in_loop,OPC_REGION)!=NULL),
02559 &LNO_default_pool);
02560 WN_MAP_Set(LNO_Info_Map, if_wn, (void*)if_info);
02561
02562 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02563 &LNO_local_pool);
02564 Build_Doloop_Stack(LWN_Get_Parent(in_loop), loop_stack);
02565
02566 LNO_Build_If_Access(if_wn, loop_stack);
02567 COND_IF_INFO cond_result=COND_If_Info(if_wn, &LNO_local_pool);
02568
02569 if (cond_result==COND_IF_THEN_ONLY) {
02570
02571 LWN_Insert_Block_After(parent, in_loop, new_iter[i+1]);
02572 WN_then(if_wn)=WN_CreateBlock();
02573 LWN_Extract_From_Block(if_wn);
02574 LWN_Delete_Tree(if_wn);
02575
02576 } else if (cond_result==COND_IF_ELSE_ONLY) {
02577
02578 LWN_Delete_Tree(if_wn);
02579
02580
02581 }
02582
02583 kid1=LWN_Copy_Tree(WN_kid1(WN_kid0(WN_step(in_loop))),TRUE,LNO_Info_Map);
02584 if (!Array_Dependence_Graph->
02585 Add_Deps_To_Copy_Block(WN_kid1(WN_kid0(WN_step(in_loop))),kid1,FALSE))
02586 LNO_Erase_Dg_From_Here_In(WN_kid1(WN_kid0(WN_step(in_loop))),
02587 Array_Dependence_Graph);
02588
02589 LWN_Copy_Def_Use(WN_kid1(WN_kid0(WN_step(in_loop))), kid1, Du_Mgr);
02590
02591 index_expr = LWN_CreateExp2(
02592 OPCODE_make_op(inverse_step_operator, Promote_Type(index_type),MTYPE_V),
02593 index_expr,
02594 kid1);
02595
02596 }
02597
02598 LWN_Delete_Tree(index_expr);
02599
02600 Add_To_Symbol(WN_end(in_loop), iter_count, loop_index_symbol);
02601
02602
02603
02604 Upper_Bound_Standardize(WN_end(in_loop),TRUE);
02605
02606 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
02607 &LNO_local_pool);
02608
02609 Build_Doloop_Stack(LWN_Get_Parent(in_loop), loop_stack);
02610
02611 Get_Do_Loop_Info(in_loop)->Est_Num_Iterations= -1;
02612 LNO_Build_Do_Access(in_loop, loop_stack);
02613
02614 for (wn=WN_next(in_loop); wn!=next_stmt; wn=WN_next(wn)) {
02615 LNO_Build_Access(wn, loop_stack, &LNO_default_pool);
02616 Remark_Depth(wn, loop_stack->Elements());
02617 }
02618
02619 WN *end_stmt;
02620 if (next_stmt) {
02621 end_stmt = WN_prev(next_stmt);
02622 } else {
02623 WN* wnn = NULL;
02624 for (WN* wn = in_loop; wn != NULL; wn = WN_next(wn))
02625 wnn = wn;
02626 end_stmt = wnn;
02627 FmtAssert(end_stmt != NULL,
02628 ("Post_loop_peeling: Could not find last statement"));
02629 }
02630
02631 if (Enclosing_Do_Loop(LWN_Get_Parent(in_loop))) {
02632 adg->Build_Region(in_loop, end_stmt, loop_stack, TRUE, TRUE);
02633 } else {
02634 for (wn = in_loop; wn != WN_next(end_stmt); wn = WN_next(wn))
02635 Build_Regions(in_loop, wn, loop_stack);
02636
02637 }
02638 }
02639
02640 MEM_POOL_Pop(&LNO_local_pool);
02641
02642 SRCPOS srcpos=WN_Get_Linenum(in_loop);
02643 if (LNO_Verbose)
02644 post_peeling_verbose_info(srcpos,iter_count);
02645 if (LNO_Analysis)
02646 post_peeling_analysis_info(srcpos,iter_count);
02647 if (LNO_Tlog)
02648 post_peeling_tlog_info(in_loop,iter_count);
02649 }
02650
02651 extern BOOL Move_Adjacent(WN* stmt1, WN* stmt2) {
02652
02653 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
02654 WN* parent;
02655 if (stmt1 && stmt2) {
02656 if (WN_next(stmt1)==stmt2) return TRUE;
02657 parent=LWN_Get_Parent(stmt1);
02658 if (LWN_Get_Parent(stmt2)!=parent) {
02659 DevWarn("Input statements have different parents in Move_Adjacent()");
02660 return FALSE;
02661 }
02662 } else if (stmt1) {
02663 if (WN_next(stmt1)==stmt2) return TRUE;
02664 else if (!WN_next(stmt1)) return FALSE;
02665 parent=LWN_Get_Parent(stmt1);
02666 } else if (stmt2) {
02667 if (WN_prev(stmt2)==stmt1) return TRUE;
02668 else if (!WN_prev(stmt2)) return FALSE;
02669 parent=LWN_Get_Parent(stmt2);
02670 } else return FALSE;
02671
02672 WN* parent_loop=LWN_Get_Parent(parent);
02673 if (WN_opcode(parent_loop)!=OPC_DO_LOOP || !Do_Loop_Is_Good(parent_loop) ||
02674 Do_Loop_Has_Gotos(parent_loop)) {
02675
02676 return FALSE;
02677 }
02678
02679 MEM_POOL_Push(&FUSION_default_pool);
02680
02681 WN_MAP sdm=WN_MAP_Create(&FUSION_default_pool);
02682
02683 ARRAY_DIRECTED_GRAPH16* sdg=Build_Statement_Dependence_Graph(
02684 parent_loop, red_manager, adg, sdm,
02685 &FUSION_default_pool);
02686 Statement_Dependence_Graph = sdg;
02687 if (sdg==NULL) {
02688 DevWarn("Statement dependence graph problem");
02689 WN_MAP_Delete(sdm);
02690 MEM_POOL_Pop(&FUSION_default_pool);
02691 return FALSE;
02692 }
02693
02694 if (stmt1) {
02695 INT2PTR* up_hash_table=
02696 CXX_NEW(INT2PTR(50, &FUSION_default_pool),&FUSION_default_pool);
02697 up_hash_table->Enter((INT)sdg->Get_Vertex(stmt1), (void*)1);
02698 WN* next_stmt=NULL;
02699 WN* stmt = 0;
02700 for (stmt=WN_next(stmt1); stmt && stmt != stmt2;
02701 stmt=next_stmt) {
02702 next_stmt = WN_next(stmt);
02703 VINDEX16 stmt_v = sdg->Get_Vertex(stmt);
02704 if (stmt_v==0) {
02705
02706 CXX_DELETE(sdg,&FUSION_default_pool);
02707 WN_MAP_Delete(sdm);
02708 MEM_POOL_Pop(&FUSION_default_pool);
02709 return FALSE;
02710 }
02711
02712 EINDEX16 dep_e = sdg->Get_In_Edge(stmt_v);
02713 BOOL can_be_moved_up = TRUE;
02714 while (dep_e) {
02715 if (sdg->Get_Level_Property(dep_e,HAS_ALL_ZERO)) {
02716 VINDEX16 source_v = sdg->Get_Source(dep_e);
02717 if (up_hash_table->Find((INT)source_v)) {
02718 can_be_moved_up = FALSE;
02719 up_hash_table->Enter((INT)stmt_v, (void*)1);
02720 break;
02721 }
02722 }
02723 dep_e = sdg->Get_Next_In_Edge(dep_e);
02724 }
02725 #ifdef KEY
02726
02727 if (WN_operator(stmt) == OPR_PRAGMA &&
02728 WN_pragma(stmt) == WN_PRAGMA_BARRIER)
02729 can_be_moved_up = FALSE;
02730 #endif
02731 if (can_be_moved_up) {
02732 LWN_Insert_Block_Before(parent,stmt1,LWN_Extract_From_Block(stmt));
02733 }
02734 }
02735 Is_True(stmt==stmt2, ("Incorrect order of input in Move_Adjacent()\n"));
02736 if (stmt!=stmt2)
02737 return FALSE;
02738 }
02739
02740 if (stmt2) {
02741 INT2PTR* down_hash_table=
02742 CXX_NEW(INT2PTR(50, &FUSION_default_pool),&FUSION_default_pool);
02743 down_hash_table->Enter((INT)sdg->Get_Vertex(stmt2), (void*)1);
02744 WN* prev_stmt=NULL;
02745 WN* stmt = 0;
02746 for (stmt=WN_prev(stmt2); stmt && stmt != stmt1;
02747 stmt=prev_stmt) {
02748 prev_stmt = WN_prev(stmt);
02749 VINDEX16 stmt_v = sdg->Get_Vertex(stmt);
02750 if (stmt_v==0) {
02751
02752 CXX_DELETE(sdg,&FUSION_default_pool);
02753 WN_MAP_Delete(sdm);
02754 MEM_POOL_Pop(&FUSION_default_pool);
02755 return FALSE;
02756 }
02757 EINDEX16 dep_e = sdg->Get_Out_Edge(stmt_v);
02758 BOOL can_be_moved_down = TRUE;
02759 while (dep_e) {
02760 if (sdg->Get_Level_Property(dep_e,HAS_ALL_ZERO)) {
02761 VINDEX16 sink_v = sdg->Get_Sink(dep_e);
02762 if (down_hash_table->Find((INT)sink_v)) {
02763 can_be_moved_down = FALSE;
02764 down_hash_table->Enter((INT)stmt_v, (void*)1);
02765 break;
02766 }
02767 }
02768 dep_e = sdg->Get_Next_Out_Edge(dep_e);
02769 }
02770 #ifdef KEY
02771
02772 if (WN_operator(stmt) == OPR_PRAGMA &&
02773 WN_pragma(stmt) == WN_PRAGMA_BARRIER)
02774 can_be_moved_down = FALSE;
02775 #endif
02776 if (can_be_moved_down) {
02777 LWN_Insert_Block_After(parent,stmt2,LWN_Extract_From_Block(stmt));
02778 }
02779 }
02780 Is_True(stmt==stmt1, ("Incorrect order of input in Move_Adjacent()\n"));
02781 if (stmt!=stmt1)
02782 return FALSE;
02783 }
02784
02785 Statement_Dependence_Graph = NULL;
02786 CXX_DELETE(sdg,&FUSION_default_pool);
02787 WN_MAP_Delete(sdm);
02788 MEM_POOL_Pop(&FUSION_default_pool);
02789
02790 if (stmt1 && WN_next(stmt1)==stmt2)
02791 return TRUE;
02792 else if (stmt2 && WN_prev(stmt2)==stmt1)
02793 return TRUE;
02794 else
02795 return FALSE;
02796 }
02797
02798
02799
02800
02801
02802
02803
02804
02805
02806
02807
02808
02809
02810
02811
02812
02813
02814
02815
02816
02817
02818
02819
02820
02821
02822
02823
02824
02825
02826
02827
02828
02829
02830
02831
02832
02833
02834
02835
02836
02837
02838
02839
02840
02841
02842
02843
02844
02845
02846
02847
02848
02849
02850
02851
02852
02853
02854
02855
02856 static void Fusion_Du_Update(
02857 WN** loop_nest1,
02858 WN** loop_nest2,
02859 DU_MANAGER* Du_Mgr)
02860 {
02861 MEM_POOL_Push(&FUSION_default_pool);
02862
02863 {
02864
02865 REF_LIST_STACK *writes1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
02866 &FUSION_default_pool);
02867 REF_LIST_STACK *reads1 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
02868 &FUSION_default_pool);
02869 SCALAR_STACK *scalar_writes1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
02870 &FUSION_default_pool);
02871 SCALAR_STACK *scalar_reads1 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
02872 &FUSION_default_pool);
02873 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
02874 &FUSION_default_pool);
02875 Build_Doloop_Stack(loop_nest1[0], stack1);
02876
02877 REF_LIST_STACK *writes2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
02878 &FUSION_default_pool);
02879 REF_LIST_STACK *reads2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
02880 &FUSION_default_pool);
02881 SCALAR_STACK *scalar_writes2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
02882 &FUSION_default_pool);
02883 SCALAR_STACK *scalar_reads2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
02884 &FUSION_default_pool);
02885 SCALAR_REF_STACK *params1 =
02886 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
02887 SCALAR_REF_STACK *params2 =
02888 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
02889 DOLOOP_STACK *stack2=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
02890 &FUSION_default_pool);
02891 Build_Doloop_Stack(loop_nest2[0], stack2);
02892
02893
02894 Init_Ref_Stmt_Counter();
02895 WN* loop_body=WN_do_body(loop_nest1[0]);
02896 WN* wn = 0;
02897 for (wn=WN_first(loop_body); wn; wn=WN_next(wn)) {
02898 New_Gather_References(wn,writes1,reads1,stack1,
02899 scalar_writes1,scalar_reads1,
02900 params1,&FUSION_default_pool,
02901 Gather_Scalar_Refs | Gather_Params);
02902 }
02903
02904
02905 loop_body=WN_do_body(loop_nest2[0]);
02906 for (wn=WN_first(loop_body); wn; wn=WN_next(wn)) {
02907 New_Gather_References(wn,writes2,reads2,stack2,
02908 scalar_writes2,scalar_reads2,
02909 params2,&FUSION_default_pool,
02910 Gather_Scalar_Refs | Gather_Params);
02911 }
02912
02913
02914
02915
02916
02917
02918
02919
02920
02921 INT si;
02922 for (si=0; si<scalar_writes2->Elements(); si++) {
02923 SCALAR_NODE* sinode=scalar_writes2->Bottom_nth(si);
02924 WN* write=sinode->Bottom_nth(0)->Wn;
02925 for (INT sj=0; sj<scalar_reads1->Elements(); sj++) {
02926 SCALAR_NODE* sjnode=scalar_reads1->Bottom_nth(sj);
02927 WN* read=sjnode->Bottom_nth(0)->Wn;
02928 if (Aliased(Alias_Mgr,write,read)!=NOT_ALIASED) {
02929 for (INT sii=0; sii<sinode->Elements(); sii++) {
02930 write=sinode->Bottom_nth(sii)->Wn;
02931 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
02932 read=sjnode->Bottom_nth(sjj)->Wn;
02933 Du_Mgr->Delete_Def_Use(write,read);
02934
02935 Du_Mgr->Add_Def_Use(write,read);
02936 }
02937 }
02938 }
02939 }
02940 }
02941
02942
02943 for (si=0; si<scalar_writes1->Elements(); si++) {
02944 SCALAR_NODE* sinode=scalar_writes1->Bottom_nth(si);
02945 WN* write=sinode->Bottom_nth(0)->Wn;
02946 for (INT sj=0; sj<scalar_reads2->Elements(); sj++) {
02947 SCALAR_NODE* sjnode=scalar_reads2->Bottom_nth(sj);
02948 WN* read=sjnode->Bottom_nth(0)->Wn;
02949 if (Aliased(Alias_Mgr,write,read)!=NOT_ALIASED) {
02950 for (INT sii=0; sii<sinode->Elements(); sii++) {
02951 write=sinode->Bottom_nth(sii)->Wn;
02952 for (INT sjj=0; sjj<sjnode->Elements(); sjj++) {
02953 read=sjnode->Bottom_nth(sjj)->Wn;
02954 Du_Mgr->Delete_Def_Use(write,read);
02955
02956 Du_Mgr->Add_Def_Use(write,read);
02957 }
02958 }
02959 }
02960 }
02961 }
02962 INT sj;
02963 for (sj=0; sj<params1->Elements(); sj++) {
02964 SCALAR_REF sjnode=params1->Bottom_nth(sj);
02965 WN* param=sjnode.Wn;
02966 INT si;
02967 for (si=0; si<scalar_writes1->Elements(); si++) {
02968 SCALAR_NODE* sinode=scalar_writes1->Bottom_nth(si);
02969 WN* write=sinode->Bottom_nth(0)->Wn;
02970 if (Aliased(Alias_Mgr,param,write)!=NOT_ALIASED) {
02971 for (INT sii=0; sii<sinode->Elements(); sii++) {
02972 write=sinode->Bottom_nth(sii)->Wn;
02973 Du_Mgr->Delete_Def_Use(write,param);
02974
02975 Du_Mgr->Add_Def_Use(write,param);
02976 }
02977 }
02978 }
02979 for (si=0; si<scalar_writes2->Elements(); si++) {
02980 SCALAR_NODE* sinode=scalar_writes2->Bottom_nth(si);
02981 WN* write=sinode->Bottom_nth(0)->Wn;
02982 if (Aliased(Alias_Mgr,param,write)!=NOT_ALIASED) {
02983 for (INT sii=0; sii<sinode->Elements(); sii++) {
02984 write=sinode->Bottom_nth(sii)->Wn;
02985 Du_Mgr->Delete_Def_Use(write,param);
02986
02987 Du_Mgr->Add_Def_Use(write,param);
02988 }
02989 }
02990 }
02991 for (si=0; si<scalar_reads1->Elements(); si++) {
02992 SCALAR_NODE* sinode=scalar_reads1->Bottom_nth(si);
02993 WN* read=sinode->Bottom_nth(0)->Wn;
02994 if (Aliased(Alias_Mgr,param,read)!=NOT_ALIASED) {
02995 for (INT sii=0; sii<sinode->Elements(); sii++) {
02996 read=sinode->Bottom_nth(sii)->Wn;
02997 Du_Mgr->Delete_Def_Use(LWN_Get_Parent(param),read);
02998
02999 Du_Mgr->Add_Def_Use(LWN_Get_Parent(param),read);
03000 }
03001 }
03002 }
03003 for (si=0; si<scalar_reads2->Elements(); si++) {
03004 SCALAR_NODE* sinode=scalar_reads2->Bottom_nth(si);
03005 WN* read=sinode->Bottom_nth(0)->Wn;
03006 if (Aliased(Alias_Mgr,param,read)!=NOT_ALIASED) {
03007 for (INT sii=0; sii<sinode->Elements(); sii++) {
03008 read=sinode->Bottom_nth(sii)->Wn;
03009 Du_Mgr->Delete_Def_Use(LWN_Get_Parent(param),read);
03010
03011 Du_Mgr->Add_Def_Use(LWN_Get_Parent(param),read);
03012 }
03013 }
03014 }
03015 }
03016
03017 }
03018
03019 MEM_POOL_Pop(&FUSION_default_pool);
03020 }
03021
03022
03023
03024
03025
03026
03027 static void Fusion_Loop_Stmt_Update(
03028 WN** loop_nest1,
03029 WN** loop_nest2,
03030 UINT fusion_level,
03031 DU_MANAGER* Du_Mgr)
03032 {
03033 MEM_POOL_Push(&FUSION_default_pool);
03034
03035 {
03036
03037 REF_LIST_STACK *writes2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
03038 &FUSION_default_pool);
03039 REF_LIST_STACK *reads2 = CXX_NEW(REF_LIST_STACK(&FUSION_default_pool),
03040 &FUSION_default_pool);
03041 SCALAR_STACK *scalar_writes2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
03042 &FUSION_default_pool);
03043 SCALAR_STACK *scalar_reads2 = CXX_NEW(SCALAR_STACK(&FUSION_default_pool),
03044 &FUSION_default_pool);
03045 SCALAR_REF_STACK *params2 =
03046 CXX_NEW(SCALAR_REF_STACK(&FUSION_default_pool), &FUSION_default_pool);
03047 DOLOOP_STACK *stack2=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
03048 &FUSION_default_pool);
03049 Build_Doloop_Stack(loop_nest2[0], stack2);
03050
03051
03052 Init_Ref_Stmt_Counter();
03053 WN* loop_body=WN_do_body(loop_nest2[0]);
03054 for (WN* wn=WN_first(loop_body); wn; wn=WN_next(wn)) {
03055 New_Gather_References(wn,writes2,reads2,stack2,
03056 scalar_writes2,scalar_reads2,
03057 params2,&FUSION_default_pool,Gather_Scalar_Refs);
03058 }
03059
03060 SYMBOL *loop2_var_symbol=
03061 CXX_NEW_ARRAY(SYMBOL, fusion_level, &FUSION_default_pool);
03062 for (INT i=0; i<fusion_level; i++)
03063 loop2_var_symbol[i].Init(WN_index(loop_nest2[i]));
03064
03065 INT si;
03066 for (si=0; si<scalar_reads2->Elements(); si++) {
03067 SCALAR_NODE* snode=scalar_reads2->Bottom_nth(si);
03068 SYMBOL read_symbol(snode->Bottom_nth(0)->Wn);
03069
03070
03071 INT i;
03072 for (i=0; i<fusion_level; i++)
03073 if (loop2_var_symbol[i]==read_symbol)
03074 break;
03075 if (i!=fusion_level)
03076 continue;
03077
03078 for (INT sj=0; sj<snode->Elements(); sj++) {
03079 WN* read=snode->Bottom_nth(sj)->Wn;
03080 DEF_LIST* def_list=Du_Mgr->Ud_Get_Def(read);
03081 WN* old_loop_stmt=def_list->Loop_stmt();
03082
03083
03084
03085 for (i=0; i<fusion_level; i++)
03086 if (old_loop_stmt==loop_nest2[i])
03087 break;
03088
03089 if (i==fusion_level)
03090 continue;
03091 else if (i!=0)
03092 FmtAssert(0, ("Strange Loop_Stmt for use in the fused loop."));
03093
03094
03095
03096
03097
03098
03099
03100
03101
03102
03103
03104
03105 else
03106 def_list->Set_loop_stmt(loop_nest1[0]);
03107 }
03108 }
03109 }
03110
03111 MEM_POOL_Pop(&FUSION_default_pool);
03112 }
03113
03114 static void Prompf_Record_Eliminations(WN* wn_tree)
03115 {
03116 if (wn_tree == NULL)
03117 return;
03118 INT map_id = WN_MAP32_Get(Prompf_Id_Map, wn_tree);
03119 if (WN_opcode(wn_tree) == OPC_DO_LOOP && map_id != 0) {
03120 Prompf_Info->Elimination(map_id);
03121 WN_MAP32_Set(Prompf_Id_Map, wn_tree, 0);
03122 }
03123 if (WN_operator(wn_tree) == OPR_BLOCK) {
03124 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
03125 Prompf_Record_Eliminations(wn);
03126 } else {
03127 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
03128 Prompf_Record_Eliminations(WN_kid(wn_tree, i));
03129 }
03130 }
03131
03132
03133
03134
03135
03136
03137
03138
03139
03140
03141
03142
03143
03144
03145
03146
03147
03148
03149 extern FISSION_FUSION_STATUS
03150 Fuse(WN* in_loop1, WN* in_loop2, mUINT8 fusion_level,
03151 UINT32 threshold, BOOL peeling_unrolled,
03152 UINT64 *prolog_out, UINT64 *epilog_out,
03153 WN** epilog_loop_out, mINT32 offset_out[])
03154 {
03155 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
03156 WN* loop_nest1[LNO_MAX_DO_LOOP_DEPTH];
03157 WN* loop_nest2[LNO_MAX_DO_LOOP_DEPTH];
03158 INT32 i;
03159
03160 char loop1_var_name[80];
03161 char loop2_var_name[80];
03162 if (strlen(ST_name(WN_st(WN_index(in_loop1))))>=80) {
03163 DevWarn("Loop var name %s too long",ST_name(WN_st(WN_index(in_loop1))));
03164 strcpy(loop1_var_name,"name_too_long");
03165 } else
03166 strcpy(loop1_var_name,ST_name(WN_st(WN_index(in_loop1))));
03167 if (strlen(ST_name(WN_st(WN_index(in_loop2))))>=80) {
03168 DevWarn("Loop var name %s too long",ST_name(WN_st(WN_index(in_loop2))));
03169 strcpy(loop2_var_name,"name_too_long");
03170 } else
03171 strcpy(loop2_var_name,ST_name(WN_st(WN_index(in_loop2))));
03172
03173 SRCPOS srcpos1=WN_Get_Linenum(in_loop1);
03174 SRCPOS srcpos2=WN_Get_Linenum(in_loop2);
03175
03176 FmtAssert(WN_opcode(in_loop1)==OPC_DO_LOOP,
03177 ("non-loop input node in Fuse()\n") );
03178 FmtAssert(WN_opcode(in_loop2)==OPC_DO_LOOP,
03179 ("non-loop input node in Fuse()\n") );
03180 FmtAssert(threshold < MAX_INT32,
03181 ("Alignment offset threshold too large (%d)\n", threshold) );
03182
03183 DO_LOOP_INFO *dli1=Get_Do_Loop_Info(in_loop1);
03184 DO_LOOP_INFO *dli2=Get_Do_Loop_Info(in_loop2);
03185
03186
03187
03188
03189 TYPE_ID ty_index1 = WN_desc(WN_start(in_loop1));
03190 TYPE_ID ty_index2 = WN_desc(WN_start(in_loop2));
03191 if ( ty_index1 != ty_index2 ) {
03192 if (LNO_Verbose)
03193 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03194 "(pv 597324 temp fix) loop index types differ, don't fuse");
03195 if (LNO_Analysis)
03196 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03197 "(pv 597324 temp fix) loop index types differ, don't fuse");
03198 if (LNO_Tlog)
03199 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03200 "(pv 597324 temp fix) loop index types differ, don't fuse");
03201 return Failed;
03202 }
03203
03204 if (dli1->No_Fusion || dli2->No_Fusion
03205 || !Cannot_Concurrentize(in_loop1) && Cannot_Concurrentize(in_loop2)
03206 || Cannot_Concurrentize(in_loop1) && !Cannot_Concurrentize(in_loop2)) {
03207 if (LNO_Verbose)
03208 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03209 "Loops with fission/fusion pragmas cannot be fused.");
03210 if (LNO_Analysis)
03211 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03212 "Loops with fission/fusion pragmas cannot be fused.");
03213 if (LNO_Tlog)
03214 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03215 "Loops with fission/fusion pragmas cannot be fused.");
03216 return Failed;
03217 }
03218
03219
03220
03221
03222
03223
03224
03225
03226
03227
03228
03229
03230
03231
03232
03233
03234 if (!Do_Loop_Is_Good(in_loop1) || !Do_Loop_Is_Good(in_loop2) ||
03235 Do_Loop_Has_Calls(in_loop1) || Do_Loop_Has_Calls(in_loop2)||
03236 Do_Loop_Has_Gotos(in_loop1) || Do_Loop_Has_Gotos(in_loop2)) {
03237 if (LNO_Verbose)
03238 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03239 "Loops with calls, exits, or gotos cannot be fused.");
03240 if (LNO_Analysis)
03241 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03242 "Loops with calls, exits, or gotos cannot be fused.");
03243 if (LNO_Tlog)
03244 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03245 "Loops with calls, exits, or gotos cannot be fused.");
03246 return Failed;
03247 }
03248
03249
03250
03251 if (WN_next(in_loop1)!=in_loop2) {
03252 if (LNO_Verbose)
03253 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03254 "Loops to be fused are not adjacent.");
03255 if (LNO_Analysis)
03256 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03257 "Loops to be fused are not adjacent.");
03258 if (LNO_Tlog)
03259 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03260 "Loops to be fused are not adjacent.");
03261 return Failed;
03262 }
03263
03264 FmtAssert(fusion_level>0, ("Illegal level number."));
03265
03266 if (Upper_Bound_Standardize(WN_end(in_loop1),TRUE)==FALSE) {
03267 if (LNO_Verbose)
03268 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03269 "Upper bound of loop1 is too complicated.");
03270 if (LNO_Analysis)
03271 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03272 "Upper bound of loop1 is too complicated.");
03273 if (LNO_Tlog)
03274 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03275 "Upper bound of loop1 is too complicated.");
03276 return Failed;
03277 }
03278 if (loop_var_is_live_on_exit(in_loop1))
03279 Finalize_Index_Variable(in_loop1,FALSE);
03280 scalar_rename(WN_start(in_loop1));
03281
03282 if (Upper_Bound_Standardize(WN_end(in_loop2),TRUE)==FALSE) {
03283 if (LNO_Verbose)
03284 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03285 "Upper bound of loop2 is too complicated.");
03286 if (LNO_Analysis)
03287 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03288 "Upper bound of loop2 is too complicated.");
03289 if (LNO_Tlog)
03290 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03291 "Upper bound of loop2 is too complicated.");
03292 return Failed;
03293 }
03294 if (loop_var_is_live_on_exit(in_loop2)) {
03295 Finalize_Index_Variable(in_loop2,TRUE);
03296 scalar_rename(WN_start(in_loop2));
03297 }
03298
03299
03300 UINT8 inner_most_level = fusion_level - 1;
03301 loop_nest1[0] = in_loop1;
03302 loop_nest2[0] = in_loop2;
03303 if ( Do_Loop_Is_Backward(loop_nest1[0]) &&
03304 !Do_Loop_Is_Backward(loop_nest2[0]))
03305 if (RV_Is_Legal(loop_nest1[0]))
03306 RV_Reverse_Loop(loop_nest1[0]);
03307 else if (RV_Is_Legal(loop_nest2[0]))
03308 RV_Reverse_Loop(loop_nest2[0]);
03309 else if (!Do_Loop_Is_Backward(loop_nest1[0]) &&
03310 Do_Loop_Is_Backward(loop_nest2[0]))
03311 if (RV_Is_Legal(loop_nest2[0]))
03312 RV_Reverse_Loop(loop_nest2[0]);
03313 else if (RV_Is_Legal(loop_nest1[0]))
03314 RV_Reverse_Loop(loop_nest1[0]);
03315
03316
03317
03318 if (WN_next(in_loop1)!=in_loop2)
03319 if (Good_Do_Depth(in_loop1)>0)
03320 if (Move_Adjacent(in_loop1, in_loop2)==FALSE) {
03321 if (LNO_Verbose)
03322 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03323 "Cannot move statement in between after reversal");
03324 if (LNO_Analysis)
03325 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03326 "Cannot move statement in between after reversal");
03327 if (LNO_Tlog)
03328 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03329 "Cannot move statement in between after reversal");
03330 return Failed;
03331 }
03332 else;
03333 else {
03334 if (LNO_Verbose)
03335 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03336 "Cannot move statement in between after reversal");
03337 if (LNO_Analysis)
03338 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03339 "Cannot move statement in between after reversal");
03340 if (LNO_Tlog)
03341 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03342 "Cannot move statement in between after reversal");
03343 return Failed;
03344 }
03345
03346 for (i=1; i<fusion_level; i++) {
03347 WN* lwn= Get_Only_Loop_Inside(loop_nest1[i-1],FALSE);
03348 Is_True(lwn, ("Non-simply nested loops or not enough level encountered."));
03349 loop_nest1[i] = lwn;
03350 lwn= Get_Only_Loop_Inside(loop_nest2[i-1],FALSE);
03351 Is_True(lwn, ("Non-simply nested loops or not enough level encountered."));
03352 loop_nest2[i] = lwn;
03353
03354 dli1=Get_Do_Loop_Info(loop_nest1[i]);
03355 dli2=Get_Do_Loop_Info(loop_nest2[i]);
03356
03357 if (dli1->No_Fusion || dli2->No_Fusion
03358 || !Cannot_Concurrentize(loop_nest1[i])
03359 && Cannot_Concurrentize(loop_nest2[i])
03360 || Cannot_Concurrentize(loop_nest1[i])
03361 && !Cannot_Concurrentize(loop_nest2[i])) {
03362 if (LNO_Verbose)
03363 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03364 "Loops with fission/fusion pragmas cannot be fused.");
03365 if (LNO_Analysis)
03366 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03367 "Loops with fission/fusion pragmas cannot be fused.");
03368 if (LNO_Tlog)
03369 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03370 "Loops with fission/fusion pragmas cannot be fused.");
03371 return Failed;
03372 }
03373
03374
03375
03376
03377
03378
03379
03380
03381
03382
03383
03384
03385
03386
03387
03388
03389 if ( Do_Loop_Is_Backward(loop_nest1[i]) &&
03390 !Do_Loop_Is_Backward(loop_nest2[i]))
03391 if (RV_Is_Legal(loop_nest1[i]))
03392 RV_Reverse_Loop(loop_nest1[i]);
03393 else if (RV_Is_Legal(loop_nest2[i]))
03394 RV_Reverse_Loop(loop_nest2[i]);
03395 else if (!Do_Loop_Is_Backward(loop_nest1[i]) &&
03396 Do_Loop_Is_Backward(loop_nest2[i]))
03397 if (RV_Is_Legal(loop_nest2[i]))
03398 RV_Reverse_Loop(loop_nest2[i]);
03399 else if (RV_Is_Legal(loop_nest1[i]))
03400 RV_Reverse_Loop(loop_nest1[i]);
03401
03402 if (Upper_Bound_Standardize(WN_end(loop_nest1[i]),TRUE)==FALSE) {
03403 if (LNO_Verbose)
03404 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03405 "Upper bound of loop1 is too complicated.");
03406 if (LNO_Analysis)
03407 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03408 "Upper bound of loop1 is too complicated.");
03409 if (LNO_Tlog)
03410 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03411 "Upper bound of loop1 is too complicated.");
03412 return Failed;
03413 }
03414 if (loop_var_is_live_on_exit(loop_nest1[i]))
03415 Finalize_Index_Variable(loop_nest1[i],FALSE);
03416 scalar_rename(WN_start(loop_nest1[i]));
03417
03418 if (Upper_Bound_Standardize(WN_end(loop_nest2[i]),TRUE)==FALSE) {
03419 if (LNO_Verbose)
03420 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03421 "Upper bound of loop2 is too complicated.");
03422 if (LNO_Analysis)
03423 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03424 "Upper bound of loop2 is too complicated.");
03425 if (LNO_Tlog)
03426 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03427 "Upper bound of loop2 is too complicated.");
03428 return Failed;
03429 }
03430 if (loop_var_is_live_on_exit(loop_nest2[i])) {
03431 Finalize_Index_Variable(loop_nest2[i],TRUE);
03432 scalar_rename(WN_start(loop_nest2[i]));
03433 }
03434
03435 lwn= loop_nest1[i];
03436 if (Move_Adjacent(lwn, NULL)==FALSE) {
03437 if (LNO_Verbose)
03438 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03439 "Statements after the first loop forbid fusion.");
03440 if (LNO_Analysis)
03441 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03442 "Statements after the first loop forbid fusion.");
03443 if (LNO_Tlog)
03444 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03445 "Statements after the first loop forbid fusion.");
03446 if (LNO_Test_Dump) {
03447 Dump_WN(WN_next(lwn), stdout, TRUE, 4, 4);
03448 }
03449 return Try_Level_By_Level;
03450 }
03451
03452 lwn= loop_nest2[i];
03453 if (Move_Adjacent(NULL, lwn)==FALSE) {
03454 if (LNO_Verbose)
03455 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03456 "Statements before the second loop forbid fusion.");
03457 if (LNO_Analysis)
03458 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03459 "Statements before the second loop forbid fusion.");
03460 if (LNO_Tlog)
03461 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03462 "Statements before the second loop forbid fusion.");
03463 if (LNO_Test_Dump) {
03464 Dump_WN(WN_prev(lwn), stdout, TRUE, 4, 4);
03465 }
03466 return Try_Level_By_Level;
03467 }
03468
03469 }
03470
03471
03472 UINT64 prolog = 0;
03473 UINT64 epilog = 0;
03474 WN* epilog_loop = NULL;
03475 mINT32 *offset=CXX_NEW_ARRAY(mINT32, fusion_level, &FUSION_default_pool);
03476
03477 if (!offset_out || (offset_out)[inner_most_level]==MAX_INT32) {
03478
03479
03480
03481 FISSION_FUSION_STATUS status=
03482 Fuse_Test(in_loop1, in_loop2, fusion_level, threshold, &prolog, &epilog,
03483 &epilog_loop, offset);
03484
03485 if (status!=Succeeded)
03486 return status;
03487
03488 if (prolog > threshold) {
03489 if (prolog<MAX_INT32) {
03490 if (LNO_Verbose)
03491 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03492 "Failed because of too many pre-peeled iterations required after fusion.");
03493 if (LNO_Analysis)
03494 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03495 "Failed because of too many pre-peeled iterations required after fusion.");
03496 if (LNO_Tlog)
03497 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03498 "Failed because of too many pre-peeled iterations required after fusion.");
03499 return (Failed);
03500 } else
03501 return Try_Level_By_Level;
03502 }
03503
03504 if (epilog > threshold) {
03505 if (epilog<MAX_INT32) {
03506 if (LNO_Verbose)
03507 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03508 "Failed because of too many post-peeled iterations required after fusion.");
03509 if (LNO_Analysis)
03510 fusion_analysis_info(FALSE,srcpos1,srcpos2,fusion_level,
03511 "Failed because of too many post-peeled iterations required after fusion.");
03512 if (LNO_Tlog)
03513 fusion_tlog_info(Failed,in_loop1,in_loop2,fusion_level,
03514 "Failed because of too many post-peeled iterations required after fusion.");
03515 return (Failed);
03516 } else
03517 return Try_Level_By_Level;
03518 }
03519
03520 if (prolog_out) *prolog_out = prolog;
03521 if (epilog_out) *epilog_out = epilog;
03522 if (epilog_loop_out) *epilog_loop_out = epilog_loop;
03523
03524 if (offset_out)
03525 for (i=0; i<fusion_level; i++)
03526 (offset_out)[i] = offset[i];
03527
03528 } else {
03529
03530
03531 offset = CXX_NEW_ARRAY(mINT32, fusion_level, &FUSION_default_pool);
03532 for (i=0; i<fusion_level; i++)
03533 offset[i] = (offset_out)[i];
03534 prolog = *prolog_out;
03535 epilog = *epilog_out;
03536 epilog_loop = *epilog_loop_out;
03537
03538 }
03539
03540 SYMBOL loop1_var_symbol[LNO_MAX_DO_LOOP_DEPTH];
03541 SYMBOL loop2_var_symbol[LNO_MAX_DO_LOOP_DEPTH];
03542 SYMBOL new_loop_var_symbol[LNO_MAX_DO_LOOP_DEPTH];
03543 SYMBOL pre_loop_var_symbol[LNO_MAX_DO_LOOP_DEPTH];
03544
03545 WN* wn;
03546
03547 TYPE_ID index_type;
03548 OPERATOR step_operator;
03549
03550 for (i=0; i<fusion_level; i++) {
03551
03552
03553 loop1_var_symbol[i].Init(WN_start(loop_nest1[i]));
03554 loop2_var_symbol[i].Init(WN_start(loop_nest2[i]));
03555
03556 index_type=WN_desc(WN_start(loop_nest1[i]));
03557 step_operator =
03558 WN_operator(WN_kid0(WN_step(loop_nest1[i])));
03559
03560 char new_loop_var_name[80];
03561
03562 ST *st;
03563 const char* name;
03564 st=WN_st(WN_start(loop_nest1[i]));
03565
03566 if (ST_class(st) == CLASS_PREG)
03567 name=Preg_Name(WN_offset(WN_start(loop_nest1[i])));
03568 else
03569 name=ST_name(st);
03570
03571
03572 if (strlen(name)>=70) {
03573 DevWarn("Loop var %s name too long",name);
03574 strcpy(new_loop_var_name,"name_too_long");
03575 } else
03576 sprintf(new_loop_var_name, "_%s_%d", name, New_Name_Count++);
03577 new_loop_var_symbol[i] = Create_Preg_Symbol(new_loop_var_name, index_type);
03578
03579
03580 char pre_loop_var_name[80];
03581 #ifdef KEY
03582
03583
03584
03585 if (ST_class(st) == CLASS_PREG)
03586 name=Preg_Name(WN_offset(WN_start(loop_nest1[i])));
03587 else
03588 name=ST_name(st);
03589 #endif
03590 if (strlen(name)>=70) {
03591 DevWarn("Loop var %s name too long",name);
03592 strcpy(pre_loop_var_name,"name_too_long");
03593 } else
03594 sprintf(pre_loop_var_name, "_i%d_tmp", Do_Loop_Depth(loop_nest1[i])+1);
03595 pre_loop_var_symbol[i]=Create_Preg_Symbol(pre_loop_var_name, index_type);
03596
03597 WN* start2=WN_start(loop_nest2[i]);
03598 WN* step2=WN_step(loop_nest2[i]);
03599
03600 WN* alias1 = NULL;
03601
03602 Replace_Symbol(WN_index(loop_nest1[i]),
03603 loop1_var_symbol[i], new_loop_var_symbol[i], alias1);
03604 Replace_Symbol(WN_start(loop_nest1[i]),
03605 loop1_var_symbol[i], new_loop_var_symbol[i], alias1);
03606 Replace_Symbol(WN_end(loop_nest1[i]),
03607 loop1_var_symbol[i], new_loop_var_symbol[i], alias1);
03608 Replace_Symbol(WN_step(loop_nest1[i]),
03609 loop1_var_symbol[i], new_loop_var_symbol[i], alias1);
03610
03611 Replace_Symbol(WN_do_body(loop_nest1[i]),
03612 loop1_var_symbol[i], new_loop_var_symbol[i], alias1);
03613
03614 WN* alias2 = NULL;
03615
03616 Replace_Symbol(WN_index(loop_nest2[i]),
03617 loop2_var_symbol[i], new_loop_var_symbol[i], alias2);
03618 Replace_Symbol(WN_start(loop_nest2[i]),
03619 loop2_var_symbol[i], new_loop_var_symbol[i], alias2);
03620 Replace_Symbol(WN_end(loop_nest2[i]),
03621 loop2_var_symbol[i], new_loop_var_symbol[i], alias2);
03622 Replace_Symbol(WN_step(loop_nest2[i]),
03623 loop2_var_symbol[i], new_loop_var_symbol[i], alias2);
03624
03625 OPERATOR inverse_step_operator;
03626 if (step_operator==OPR_ADD)
03627 inverse_step_operator=OPR_SUB;
03628 else
03629 inverse_step_operator=OPR_ADD;
03630
03631 WN* ldid_wn;
03632
03633 WN* tmp_wn=LWN_Copy_Tree(WN_kid0(start2),TRUE,LNO_Info_Map);
03634 if (!Array_Dependence_Graph->
03635 Add_Deps_To_Copy_Block(WN_kid0(start2),tmp_wn,FALSE))
03636 LNO_Erase_Dg_From_Here_In(WN_kid0(start2), Array_Dependence_Graph);
03637
03638 LWN_Copy_Def_Use(WN_kid0(start2), tmp_wn, Du_Mgr);
03639 WN* new_start2=LWN_CreateStid(
03640 WN_opcode(start2),
03641 WN_start(loop_nest1[i]),
03642 tmp_wn);
03643 LWN_Copy_Linenumber(loop_nest2[i], new_start2);
03644 LWN_Copy_Frequency_Tree(new_start2, start2);
03645
03646 tmp_wn=LWN_Copy_Tree(WN_kid0(step2),TRUE,LNO_Info_Map);
03647 if (!Array_Dependence_Graph->
03648 Add_Deps_To_Copy_Block(WN_kid0(step2),tmp_wn,FALSE))
03649 LNO_Erase_Dg_From_Here_In(WN_kid0(step2), Array_Dependence_Graph);
03650
03651 LWN_Copy_Def_Use(WN_kid0(step2), tmp_wn, Du_Mgr);
03652 WN* new_step2=LWN_CreateStid(
03653 WN_opcode(step2),
03654 WN_start(loop_nest1[i]),
03655 tmp_wn);
03656 LWN_Copy_Linenumber(loop_nest2[i], new_step2);
03657 LWN_Copy_Frequency_Tree(new_step2, step2);
03658
03659 ldid_wn=LWN_CreateLdid(
03660 OPCODE_make_op(OPR_LDID, Promote_Type(index_type), index_type),
03661 new_start2);
03662 LWN_Copy_Frequency(ldid_wn, new_start2);
03663
03664 Du_Mgr->Add_Def_Use(new_start2,ldid_wn);
03665 Du_Mgr->Add_Def_Use(new_step2,ldid_wn);
03666 Du_Mgr->Ud_Get_Def(ldid_wn)->Set_loop_stmt(loop_nest1[i]);
03667
03668 Replace_Ldid_With_Exp_Copy(
03669 new_loop_var_symbol[i],
03670 new_step2,
03671 ldid_wn,
03672 Du_Mgr);
03673
03674
03675
03676
03677 wn = LWN_CreateExp2(
03678 OPCODE_make_op(inverse_step_operator, Promote_Type(index_type), MTYPE_V),
03679 ldid_wn,
03680 LWN_Make_Icon(Promote_Type(index_type), offset[i]));
03681 LWN_Copy_Frequency_Tree(wn, ldid_wn);
03682
03683
03684 WN_kid0(new_start2)=
03685 LWN_CreateExp2(OPCODE_make_op(step_operator,
03686 Promote_Type(index_type), MTYPE_V), WN_kid0(new_start2),
03687 LWN_Make_Icon(Promote_Type(index_type), offset[i]));
03688
03689 LWN_Copy_Frequency_Tree(WN_kid0(new_start2), new_start2);
03690
03691 WN* old_end2=WN_end(loop_nest2[i]);
03692 SYMBOL kid0_symbol;
03693 SYMBOL kid1_symbol;
03694 if (WN_operator(WN_kid0(old_end2))==OPR_LDID)
03695 kid0_symbol.Init(WN_kid0(old_end2));
03696 if (WN_operator(WN_kid1(old_end2))==OPR_LDID)
03697 kid1_symbol.Init(WN_kid1(old_end2));
03698
03699 if (kid0_symbol==new_loop_var_symbol[i]) {
03700 WN_kid1(old_end2)=
03701 LWN_CreateExp2(OPCODE_make_op(step_operator,
03702 Promote_Type(index_type), MTYPE_V), WN_kid1(old_end2),
03703 LWN_Make_Icon(Promote_Type(index_type), offset[i]));
03704 LWN_Copy_Frequency_Tree(WN_kid1(old_end2), old_end2);
03705
03706 } else if (kid1_symbol==new_loop_var_symbol[i]) {
03707 WN_kid0(old_end2)=
03708 LWN_CreateExp2(OPCODE_make_op(step_operator,
03709 Promote_Type(index_type), MTYPE_V),
03710 WN_kid0(old_end2),
03711 LWN_Make_Icon(Promote_Type(index_type), offset[i]));
03712 LWN_Copy_Frequency_Tree(WN_kid0(old_end2), old_end2);
03713
03714 } else {
03715 Replace_Ldid_With_Exp_Copy(
03716 new_loop_var_symbol[i],
03717 old_end2,
03718 wn,
03719 Du_Mgr);
03720 }
03721
03722 Replace_Ldid_With_Exp_Copy(
03723 new_loop_var_symbol[i],
03724 old_end2,
03725 ldid_wn,
03726 Du_Mgr);
03727
03728 Replace_Ldid_With_Exp_Copy(
03729 loop2_var_symbol[i],
03730 WN_do_body(loop_nest2[i]),
03731 wn,
03732 Du_Mgr);
03733
03734 LWN_Delete_Tree(wn);
03735
03736
03737
03738 LWN_Delete_Tree(start2);
03739 LWN_Delete_Tree(step2);
03740
03741 WN_start(loop_nest2[i])=new_start2;
03742 LWN_Set_Parent(new_start2,loop_nest2[i]);
03743 LWN_Parentize(new_start2);
03744 WN_step(loop_nest2[i])=new_step2;
03745 LWN_Set_Parent(new_step2,loop_nest2[i]);
03746 LWN_Parentize(new_step2);
03747
03748 if (i!=fusion_level-1) {
03749
03750 WN* start1=WN_start(loop_nest1[i]);
03751 WN* step1=WN_step(loop_nest1[i]);
03752
03753
03754 wn = LWN_CreateLdid(
03755 OPCODE_make_op(OPR_LDID, index_type, index_type),
03756 start1);
03757
03758 LWN_Copy_Frequency(wn,start1);
03759 Du_Mgr->Add_Def_Use(start1,wn);
03760 Du_Mgr->Add_Def_Use(step1,wn);
03761 Du_Mgr->Ud_Get_Def(wn)->Set_loop_stmt(loop_nest1[i]);
03762
03763 Replace_Ldid_With_Exp_Copy(
03764 new_loop_var_symbol[i],
03765 WN_do_body(loop_nest2[i]),
03766 wn,
03767 Du_Mgr);
03768
03769 LWN_Delete_Tree(wn);
03770 }
03771
03772 }
03773
03774 Fusion_Du_Update(loop_nest1, loop_nest2, Du_Mgr);
03775 Fusion_Loop_Stmt_Update(loop_nest1, loop_nest2, fusion_level-1, Du_Mgr);
03776
03777 WN* inner_loop1 = loop_nest1[inner_most_level];
03778 WN* inner_loop2 = loop_nest2[inner_most_level];
03779
03780
03781
03782
03783
03784 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
03785 INT old_ids[2];
03786 for (i = fusion_level - 2; i >= 0; i--) {
03787 old_ids[0] = WN_MAP32_Get(Prompf_Id_Map, loop_nest1[i]);
03788 old_ids[1] = WN_MAP32_Get(Prompf_Id_Map, loop_nest2[i]);
03789 INT new_id = WN_MAP32_Get(Prompf_Id_Map, loop_nest1[i]);
03790
03791 WN_MAP32_Set(Prompf_Id_Map, loop_nest2[i], 0);
03792 Prompf_Info->Fusion(old_ids, new_id);
03793 }
03794 }
03795
03796 for (i=fusion_level-2; i>=0; i--) {
03797
03798 LWN_Insert_Block_Before(LWN_Get_Parent(loop_nest1[i+1]),
03799 NULL, WN_do_body(loop_nest2[i]));
03800
03801 LWN_Delete_Tree(WN_start(loop_nest2[i]));
03802 LWN_Delete_Tree(WN_step(loop_nest2[i]));
03803 LWN_Delete_Tree(WN_end(loop_nest2[i]));
03804 LWN_Delete_From_Block(LWN_Get_Parent(loop_nest2[i]), loop_nest2[i]);
03805 }
03806
03807
03808 if (prolog>0){
03809
03810 WN* wn1 = WN_prev(inner_loop1);
03811 if (prolog==1)
03812 Pre_loop_peeling(inner_loop1, prolog, TRUE, FALSE);
03813 else
03814 Pre_loop_peeling(inner_loop1, prolog, peeling_unrolled, FALSE);
03815 for (WN* stmt = WN_prev(inner_loop1); stmt != wn1; stmt=WN_prev(stmt))
03816 Replace_Symbol(stmt,
03817 new_loop_var_symbol[inner_most_level],
03818 pre_loop_var_symbol[inner_most_level], NULL);
03819 Replace_Symbol(WN_kid0(WN_start(inner_loop1)),
03820 new_loop_var_symbol[inner_most_level],
03821 pre_loop_var_symbol[inner_most_level], NULL);
03822 }
03823
03824
03825 if (epilog_loop == inner_loop1 && epilog) {
03826 if (epilog==1)
03827 Post_loop_peeling(inner_loop1, epilog, TRUE, FALSE);
03828 else
03829 Post_loop_peeling(inner_loop1, epilog, peeling_unrolled, FALSE);
03830 } else if (epilog_loop == inner_loop2 && epilog) {
03831 if (epilog==1)
03832 Post_loop_peeling(inner_loop2, epilog, TRUE, FALSE);
03833 else
03834 Post_loop_peeling(inner_loop2, epilog, peeling_unrolled, FALSE);
03835 }
03836
03837 WN* start1=WN_start(inner_loop1);
03838 WN* step1=WN_step(inner_loop1);
03839 index_type=WN_desc(WN_start(inner_loop1));
03840
03841 USE_LIST_ITER u_iter(Du_Mgr->Du_Get_Use(WN_start(inner_loop2)));
03842 for (DU_NODE* un=u_iter.First(); !u_iter.Is_Empty(); ) {
03843 WN* use=un->Wn();
03844 un=u_iter.Next();
03845 Du_Mgr->Add_Def_Use(start1,use);
03846 Du_Mgr->Add_Def_Use(step1,use);
03847 }
03848
03849 Fusion_Loop_Stmt_Update(&inner_loop1, &inner_loop2, 1, Du_Mgr);
03850
03851
03852 LWN_Insert_Block_Before(WN_do_body(inner_loop1), NULL,
03853 WN_do_body(inner_loop2));
03854
03855 DO_LOOP_INFO *loop_info1 =
03856 (DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map, inner_loop1);
03857 DO_LOOP_INFO *loop_info2 =
03858 (DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map, inner_loop2);
03859
03860 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
03861 INT old_ids[2];
03862 old_ids[0] = WN_MAP32_Get(Prompf_Id_Map, inner_loop1);
03863 old_ids[1] = WN_MAP32_Get(Prompf_Id_Map, inner_loop2);
03864 INT new_id = WN_MAP32_Get(Prompf_Id_Map, inner_loop1);
03865
03866 WN_MAP32_Set(Prompf_Id_Map, inner_loop2, 0);
03867 Prompf_Info->Fusion(old_ids, new_id);
03868 }
03869
03870
03871 LWN_Delete_Tree(WN_start(inner_loop2));
03872 LWN_Delete_Tree(WN_step(inner_loop2));
03873 LWN_Delete_Tree(WN_end(inner_loop2));
03874 LWN_Delete_From_Block(LWN_Get_Parent(inner_loop2),inner_loop2);
03875
03876 BOOL inner_loop_removed = FALSE;
03877 WN* inner_loop_parent=LWN_Get_Parent(inner_loop1);
03878
03879
03880
03881 if (wn=WN_SimplifyExp2(WN_opcode(WN_end(inner_loop1)),
03882 LWN_Copy_Tree(WN_kid0(WN_start(inner_loop1))),
03883 LWN_Copy_Tree(WN_kid1(WN_end(inner_loop1))))) {
03884 if (WN_opcode(wn) == OPC_U4INTCONST && WN_const_val(wn) == 0) {
03885
03886 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled())
03887 Prompf_Record_Eliminations(inner_loop1);
03888
03889 WN* tmp=LWN_Copy_Tree(WN_start(inner_loop1),TRUE,LNO_Info_Map);
03890 if (!Array_Dependence_Graph->
03891 Add_Deps_To_Copy_Block(WN_start(inner_loop1),tmp,FALSE))
03892 LNO_Erase_Dg_From_Here_In(WN_start(inner_loop1),
03893 Array_Dependence_Graph);
03894
03895 USE_LIST_ITER u_iter(Du_Mgr->Du_Get_Use(WN_start(inner_loop1)));
03896 Du_Mgr->Create_Use_List(tmp);
03897 for (DU_NODE* un=u_iter.First(); !u_iter.Is_Empty(); ) {
03898 WN* use=un->Wn();
03899 un=u_iter.Next();
03900 Du_Mgr->Add_Def_Use(tmp,use);
03901 }
03902 LWN_Copy_Def_Use(WN_kid0(WN_start(inner_loop1)),
03903 WN_kid0(tmp),Du_Mgr);
03904 LWN_Insert_Block_Before(LWN_Get_Parent(in_loop1),in_loop1,tmp);
03905 LWN_Update_Def_Use_Delete_Tree(inner_loop1,Du_Mgr);
03906 LWN_Update_Dg_Delete_Tree(inner_loop1, adg);
03907 LWN_Delete_From_Block(LWN_Get_Parent(inner_loop1),inner_loop1);
03908 USE_LIST_ITER u1_iter(Du_Mgr->Du_Get_Use(tmp));
03909 if (u1_iter.Is_Empty()) {
03910 LWN_Update_Def_Use_Delete_Tree(tmp,Du_Mgr);
03911 LWN_Delete_From_Block(LWN_Get_Parent(tmp),tmp);
03912 }
03913 inner_loop_removed = TRUE;
03914 if (LNO_Verbose)
03915 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
03916 "Empty innermost loop is removed after fusion!!");
03917 if (LNO_Analysis)
03918 fusion_analysis_info(TRUE,srcpos1,srcpos2,fusion_level,
03919 "Empty innermost loop is removed after fusion!!");
03920 if (LNO_Tlog) {
03921 char in_string[30];
03922 char out_string[30];
03923 sprintf(in_string,"%d %d %d", Srcpos_To_Line(srcpos1),
03924 Srcpos_To_Line(srcpos2), fusion_level);
03925 sprintf(out_string,"%d",Succeeded);
03926 Generate_Tlog("LNO","fusion", Srcpos_To_Line(srcpos1),
03927 ST_name(WN_st(WN_index(in_loop1))),
03928 in_string, out_string,
03929 "Empty innermost loop is removed after fusion!!");
03930 }
03931 }
03932 }
03933
03934 if (!inner_loop_removed || fusion_level>1) {
03935 WN_Simplify_Tree(in_loop1);
03936 LWN_Parentize(LWN_Get_Parent(in_loop1));
03937 }
03938
03939
03940
03941 BOOL has_gotos = loop_info1->Has_Gotos || loop_info2->Has_Gotos;
03942 BOOL has_calls = loop_info1->Has_Calls || loop_info2->Has_Calls;
03943 BOOL has_unsummarized_calls = loop_info1->Has_Unsummarized_Calls ||
03944 loop_info2->Has_Unsummarized_Calls;
03945 BOOL is_inner = loop_info1->Is_Inner && loop_info2->Is_Inner;
03946 if (!inner_loop_removed) {
03947 loop_info1->Has_Gotos = has_gotos;
03948 loop_info1->Has_Calls = has_calls;
03949 loop_info1->Has_Unsummarized_Calls = has_unsummarized_calls;
03950 loop_info1->Is_Inner = is_inner;
03951 loop_info1->Is_Ivdep = FALSE;
03952 loop_info1->Is_Concurrent_Call = FALSE;
03953 loop_info1->Concurrent_Directive = FALSE;
03954 }
03955
03956 for (i=fusion_level-2; i>=0; i--) {
03957 loop_info1 = (DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map, loop_nest1[i]);
03958 loop_info1->Has_Gotos = has_gotos;
03959 loop_info1->Has_Calls = has_calls;
03960 loop_info1->Has_Unsummarized_Calls = has_unsummarized_calls;
03961 loop_info1->Is_Concurrent_Call = FALSE;
03962 loop_info1->Concurrent_Directive = FALSE;
03963 }
03964
03965
03966 wn=inner_loop_parent;
03967 WN* inner_loop_found=NULL;
03968 if (inner_loop_removed)
03969 while (wn) {
03970 OPCODE opc=WN_opcode(wn);
03971 if (opc==OPC_DO_LOOP || opc==OPC_IF) {
03972 if (!inner_loop_found)
03973 inner_loop_found=Find_SCF_Inside(wn,OPC_DO_LOOP);
03974 if (opc==OPC_DO_LOOP) {
03975 Get_Do_Loop_Info(wn)->Is_Inner=(inner_loop_found==NULL);
03976 inner_loop_found=wn;
03977 } else if (opc==OPC_IF)
03978 Get_If_Info(wn)->Contains_Do_Loops=(inner_loop_found!=NULL);
03979 }
03980 wn=LWN_Get_Parent(wn);
03981 }
03982
03983
03984
03985
03986 for (i=0; i<fusion_level; i++)
03987 if ((i<fusion_level-1 &&
03988 (WN_first(WN_do_body(loop_nest1[i]))!=loop_nest1[i+1] ||
03989 WN_last(WN_do_body(loop_nest1[i]))!=loop_nest1[i+1])
03990 ) ||
03991 (i==fusion_level-1 && !inner_loop_removed)) {
03992 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&FUSION_default_pool),
03993 &FUSION_default_pool);
03994 Build_Doloop_Stack(LWN_Get_Parent(loop_nest1[i]), loop_stack);
03995 LNO_Build_Access(loop_nest1[i], loop_stack, &LNO_default_pool);
03996 if (!adg->Build_Region(loop_nest1[i],loop_nest1[i],loop_stack, TRUE))
03997 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(loop_nest1[i]), adg);
03998 break;
03999 }
04000
04001 if (LNO_Test_Dump) adg->Print(stdout);
04002
04003 if (LNO_Verbose)
04004 fusion_verbose_info(srcpos1,srcpos2,fusion_level,
04005 "Successfully fused !!");
04006 if (LNO_Analysis)
04007 fusion_analysis_info(TRUE,srcpos1,srcpos2,fusion_level,
04008 "Successfully fused !!");
04009 if (LNO_Tlog) {
04010 char in_string[30];
04011 char out_string[30];
04012 sprintf(in_string,"%d %d %d", Srcpos_To_Line(srcpos1),
04013 Srcpos_To_Line(srcpos2), fusion_level);
04014 sprintf(out_string,"%d",Succeeded);
04015 Generate_Tlog("LNO","fusion", Srcpos_To_Line(srcpos1),
04016 ST_name(WN_st(WN_index(in_loop1))),
04017 in_string, out_string, "Successfully fused !!");
04018 }
04019 if (inner_loop_removed)
04020 return Succeeded_and_Inner_Loop_Removed;
04021 else
04022 return Succeeded;
04023
04024 }
04025
04026
04027 extern void Fusion_Init()
04028 {
04029 if (!fusion_initialized) {
04030 MEM_POOL_Initialize(&FUSION_default_pool,"FUSION_default_pool",FALSE);
04031 MEM_POOL_Push(&FUSION_default_pool);
04032 fusion_initialized=TRUE;
04033 }
04034 }
04035
04036
04037 extern void Fusion_Finish()
04038 {
04039 if (fusion_initialized) {
04040 MEM_POOL_Pop(&FUSION_default_pool);
04041 MEM_POOL_Delete(&FUSION_default_pool);
04042 fusion_initialized=FALSE;
04043 }
04044 }
04045
04046