00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifdef USE_PCH
00041 #include "lno_pch.h"
00042 #endif // USE_PCH
00043 #pragma hdrstop
00044
00045 #ifdef _KEEP_RCS_ID
00046
00047 static char *rcs_id = "$Source: /home/bos/bk/kpro64-pending/be/lno/SCCS/s.outer.cxx $ $Revision: 1.6 $";
00048 #endif
00049
00050 #include <sys/types.h>
00051 #include <limits.h>
00052 #include "pu_info.h"
00053 #include "fusion.h"
00054 #include "fiz_fuse.h"
00055 #include "name.h"
00056 #include "lwn_util.h"
00057 #include "lno_bv.h"
00058 #include "ff_utils.h"
00059 #include "wn_map.h"
00060 #include "btree.h"
00061 #include "glob.h"
00062 #include "tlog.h"
00063 #include "parallel.h"
00064
00065 typedef HASH_TABLE<WN*,UINT> WN2UINT;
00066
00067 typedef enum { NORMAL, SKIP } OUTER_FUSION_STATUS;
00068
00069 typedef enum { INFO, FAIL, SUCCEED } INFO_TYPE;
00070
00071 static void outer_fusion_verbose_info(
00072 SRCPOS srcpos1,
00073 SRCPOS srcpos2,
00074 const char* message)
00075 {
00076
00077 printf("#### Outer Fusion(%d+%d): %s\n",
00078 Srcpos_To_Line(srcpos1),
00079 Srcpos_To_Line(srcpos2),
00080 message);
00081 }
00082
00083 static void outer_fusion_analysis_info(
00084 INFO_TYPE info_type,
00085 SRCPOS srcpos1,
00086 SRCPOS srcpos2,
00087 UINT32 snl_level1,
00088 UINT32 snl_level2,
00089 const char* message)
00090 {
00091
00092 switch (info_type) {
00093 case INFO:
00094 fprintf(LNO_Analysis,"( LNO_Outer_Fusion_Info ");
00095 break;
00096 case FAIL:
00097 fprintf(LNO_Analysis,"( LNO_Outer_Fusion_Failure ");
00098 break;
00099 case SUCCEED:
00100 fprintf(LNO_Analysis,"( LNO_Outer_Fusion_Success ");
00101 break;
00102 }
00103
00104 fprintf(LNO_Analysis,"(%s %d %d) (%s %d %d) \"%s\" )\n",
00105 Cur_PU_Name, Srcpos_To_Line(srcpos1), snl_level1,
00106 Cur_PU_Name, Srcpos_To_Line(srcpos2), snl_level2,
00107 message);
00108 }
00109
00110 static void outer_fusion_tlog_info(
00111 INFO_TYPE info_type,
00112 SRCPOS srcpos1,
00113 SRCPOS srcpos2,
00114 UINT32 snl_level1,
00115 UINT32 snl_level2,
00116 const char* message)
00117 {
00118
00119 char tmp_string[300];
00120 sprintf(tmp_string,"%d %d %d %d %d",
00121 info_type, Srcpos_To_Line(srcpos1), Srcpos_To_Line(srcpos2),
00122 snl_level1, snl_level2);
00123 Generate_Tlog("LNO","outer_loop_fusion", Srcpos_To_Line(srcpos1), "",
00124 tmp_string, "", message);
00125 }
00126
00127 #ifndef KEY // moved to common/com/config_lno.* and controllable by a flag
00128 #define OLF_size_upperbound 100 // max size allowed for fusion
00129 #define OLF_size_lowerbound 15 // remove several restriction
00130
00131 #endif
00132
00133 static BINARY_TREE<NAME2BIT> *mapping_dictionary;
00134 static UINT Bit_Position_Count=0;
00135 static MEM_POOL OLF_default_pool;
00136
00137 static UINT
00138 Stride_One_Level(REF_LIST_STACK *writes, REF_LIST_STACK *reads,
00139 UINT outer_level, UINT level) {
00140
00141 INT stride_one_level;
00142
00143 UINT* hist=CXX_NEW_ARRAY(UINT,level+1,&OLF_default_pool);
00144 INT i;
00145 for (i=0; i<=level; i++) {
00146 hist[i] = 0;
00147 }
00148
00149 REF_LIST_STACK* ref_list_stack[2];
00150 ref_list_stack[0]=writes;
00151 ref_list_stack[1]=reads;
00152
00153 for (INT list_count=0; list_count<2; list_count++)
00154 for (INT ii=0;ii<ref_list_stack[list_count]->Elements(); ii++) {
00155 REFERENCE_ITER l_iter(ref_list_stack[list_count]->Bottom_nth(ii));
00156 for (REFERENCE_NODE* node=l_iter.First(); !l_iter.Is_Empty();
00157 node=l_iter.Next()) {
00158 WN* array_node=node->Wn;
00159 if (OPCODE_is_load(WN_opcode(array_node))) {
00160 array_node = WN_kid0(array_node);
00161 } else {
00162 array_node = WN_kid1(array_node);
00163 }
00164 if (WN_operator(array_node) == OPR_ADD) {
00165 if (WN_operator(WN_kid0(array_node)) == OPR_ARRAY) {
00166 array_node = WN_kid0(array_node);
00167 } else {
00168 array_node = WN_kid1(array_node);
00169 }
00170 }
00171
00172 ACCESS_ARRAY* aa;
00173 aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,array_node);
00174
00175 if (aa->Too_Messy)
00176 continue;
00177
00178 ACCESS_VECTOR* av=aa->Dim(aa->Num_Vec()-1);
00179 if (av->Too_Messy || av->Non_Lin_Symb)
00180 continue;
00181
00182 stride_one_level= -1;
00183 for (INT i=outer_level; i<=level; i++)
00184 if (av->Loop_Coeff(i)==0)
00185 continue;
00186 else if (av->Loop_Coeff(i)==1 || av->Loop_Coeff(i)==-1)
00187 if (stride_one_level== -1)
00188 stride_one_level=i;
00189 else {
00190 stride_one_level= -2;
00191 break;
00192 }
00193 else {
00194 stride_one_level= -2;
00195 break;
00196 }
00197
00198 if (stride_one_level>=0)
00199 hist[stride_one_level]++;
00200 }
00201 }
00202
00203 UINT max_freq=0;
00204 stride_one_level=-1;
00205 for (i=outer_level; i<=level; i++)
00206 if (max_freq<hist[i]) {
00207 max_freq=hist[i];
00208 stride_one_level=i;
00209 }
00210
00211 return stride_one_level;
00212 }
00213
00214 static BIT_VECTOR*
00215 Array_Names_In_Loop(REF_LIST_STACK *writes, REF_LIST_STACK *reads) {
00216
00217 BIT_VECTOR* bv=CXX_NEW(BIT_VECTOR(256,&OLF_default_pool), &LNO_local_pool);
00218
00219 REF_LIST_STACK* ref_list_stack[2];
00220 ref_list_stack[0]=writes;
00221 ref_list_stack[1]=reads;
00222
00223 for (INT list_count=0; list_count<2; list_count++)
00224 for (INT ii=0;ii<ref_list_stack[list_count]->Elements(); ii++) {
00225 REFERENCE_ITER l_iter(ref_list_stack[list_count]->Bottom_nth(ii));
00226 for (REFERENCE_NODE* node=l_iter.First(); !l_iter.Is_Empty();
00227 node=l_iter.Next()) {
00228
00229 WN* array_node = node->Wn;
00230 if (OPCODE_is_load(WN_opcode(array_node))) {
00231 array_node = WN_kid0(array_node);
00232 } else {
00233 array_node = WN_kid1(array_node);
00234 }
00235 if (WN_operator(array_node) == OPR_ADD) {
00236 if (WN_operator(WN_kid0(array_node)) == OPR_ARRAY) {
00237 array_node = WN_kid0(array_node);
00238 } else if (
00239 WN_operator(WN_kid1(array_node)) == OPR_ARRAY) {
00240 array_node = WN_kid1(array_node);
00241 } else
00242 continue;
00243 }
00244
00245 if (!OPCODE_has_sym(WN_opcode(WN_array_base(array_node))))
00246 continue;
00247
00248 NAME2BIT temp_map(WN_array_base(array_node));
00249
00250 UINT bit_position;
00251 if ((mapping_dictionary->Find(temp_map))==NULL) {
00252 if (Bit_Position_Count==256) {
00253 CXX_DELETE(bv,&LNO_local_pool);
00254 return NULL;
00255 }
00256 bit_position=Bit_Position_Count++;
00257 temp_map.Set_Bit_Position(bit_position);
00258 mapping_dictionary->Enter(temp_map);
00259 } else
00260 bit_position=
00261 mapping_dictionary->Find(temp_map)->Get_Data()->Get_Bit_Position();
00262
00263 bv->Set(bit_position);
00264 }
00265 }
00266 return bv;
00267 }
00268
00269
00270
00271
00272
00273 static BOOL
00274 Any_Loop_In_SNL_Parallelizable(WN* loop, INT depth)
00275 {
00276 if (Get_Do_Loop_Info(loop)->Parallelizable) {
00277 return TRUE;
00278 }
00279 for (INT i = 1; i < depth; ++i) {
00280 loop = Get_Only_Loop_Inside(loop, FALSE);
00281 if (Get_Do_Loop_Info(loop)->Parallelizable) {
00282 return TRUE;
00283 }
00284 }
00285 return FALSE;
00286 }
00287
00288
00289 static FISSION_FUSION_STATUS Fuse_Outer_Loops(WN* loop1, WN* loop2,
00290 FIZ_FUSE_INFO* ffi, WN2UINT *wn2ffi,
00291 UINT* fusion_level_io) {
00292
00293 Is_True(Do_Loop_Is_Good(loop1) && !Do_Loop_Has_Calls(loop1) &&
00294 !Do_Loop_Has_Gotos(loop1),
00295 ("Bad loop passed to Fuse_Outer_Loops()."));
00296 Is_True(Do_Loop_Is_Good(loop2) && !Do_Loop_Has_Calls(loop2) &&
00297 !Do_Loop_Has_Gotos(loop2),
00298 ("Bad loop passed to Fuse_Outer_Loops()."));
00299
00300 char loop1_var_name[80];
00301 char loop2_var_name[80];
00302 if (strlen(ST_name(WN_st(WN_index(loop1))))>=80) {
00303 strcpy(loop1_var_name,"name_too_long");
00304 } else
00305 strcpy(loop1_var_name,ST_name(WN_st(WN_index(loop1))));
00306 if (strlen(ST_name(WN_st(WN_index(loop2))))>=80) {
00307 strcpy(loop2_var_name,"name_too_long");
00308 } else
00309 strcpy(loop2_var_name,ST_name(WN_st(WN_index(loop2))));
00310
00311 SRCPOS srcpos1=WN_Get_Linenum(loop1);
00312 SRCPOS srcpos2=WN_Get_Linenum(loop2);
00313
00314 DO_LOOP_INFO *dli1=Get_Do_Loop_Info(loop1);
00315 DO_LOOP_INFO *dli2=Get_Do_Loop_Info(loop2);
00316
00317 if (dli1->No_Fusion || dli2->No_Fusion
00318 || !Cannot_Concurrentize(loop1) && Cannot_Concurrentize(loop2)
00319 || Cannot_Concurrentize(loop1) && !Cannot_Concurrentize(loop2)) {
00320 if (LNO_Verbose)
00321 outer_fusion_verbose_info(srcpos1,srcpos2,
00322 "Loops with no_fusion pragmas cannot be outer fused.");
00323 if (LNO_Analysis)
00324 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,0,0,
00325 "Loops with no_fusion pragmas cannot be outer fused.");
00326 if (LNO_Tlog)
00327 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,0,0,
00328 "Loops with no_fusion pragmas cannot be outer fused.");
00329 return Failed;
00330 }
00331 if (dli1->LB->Too_Messy || dli1->UB->Too_Messy ||
00332 dli2->LB->Too_Messy || dli2->UB->Too_Messy) {
00333 if (LNO_Verbose)
00334 outer_fusion_verbose_info(srcpos1,srcpos2,
00335 "Loops with messy bounds cannot be outer fused.");
00336 if (LNO_Analysis)
00337 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,0,0,
00338 "Loops with messy bounds cannot be outer fused.");
00339 if (LNO_Tlog)
00340 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,0,0,
00341 "Loops with messy bounds cannot be outer fused.");
00342 return Failed;
00343 }
00344 if (!Do_Loop_Is_Good(loop1) || !Do_Loop_Is_Good(loop2) ||
00345 Do_Loop_Has_Calls(loop1) || Do_Loop_Has_Calls(loop2) ||
00346 Do_Loop_Has_Gotos(loop1) || Do_Loop_Has_Gotos(loop2)) {
00347 if (LNO_Verbose)
00348 outer_fusion_verbose_info(srcpos1,srcpos2,
00349 "Loops with calls, exits, or gotos cannot be outer fused.");
00350 if (LNO_Analysis)
00351 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,0,0,
00352 "Loops with calls, exits, or gotos cannot be outer fused.");
00353 if (LNO_Tlog)
00354 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,0,0,
00355 "Loops with calls, exits, or gotos cannot be outer fused.");
00356 return Failed;
00357 }
00358
00359 UINT ffi_index=wn2ffi->Find(loop1);
00360 Is_True(ffi_index,("Missing SNL info for loop1 in Fuse_Outer_Loops()."));
00361 if (ffi_index == 0) {
00362 SNL_INFO snl_info(loop1);
00363 ffi_index=ffi->New_Snl(snl_info);
00364 wn2ffi->Enter(loop1,ffi_index);
00365 }
00366 UINT snl_level1=ffi->Get_Depth(ffi_index);
00367 SNL_TYPE type1=ffi->Get_Type(ffi_index);
00368
00369 ffi_index=wn2ffi->Find(loop2);
00370 Is_True(ffi_index,("Missing SNL info for loop2 in Fuse_Outer_Loops()."));
00371 if (ffi_index == 0) {
00372 SNL_INFO snl_info(loop2);
00373 ffi_index=ffi->New_Snl(snl_info);
00374 wn2ffi->Enter(loop2,ffi_index);
00375 }
00376 UINT snl_level2=ffi->Get_Depth(ffi_index);
00377 SNL_TYPE type2=ffi->Get_Type(ffi_index);
00378
00379 if (LNO_Verbose)
00380 outer_fusion_verbose_info(srcpos1,srcpos2,
00381 "Attempt to fuse outer loops to improve locality.");
00382 if (LNO_Analysis)
00383 outer_fusion_analysis_info(INFO,srcpos1,srcpos2,snl_level1,snl_level2,
00384 "Attempt to fuse outer loops to improve locality.");
00385 if (LNO_Tlog)
00386 outer_fusion_tlog_info(INFO,srcpos1,srcpos2,snl_level1,snl_level2,
00387 "Attempt to fuse outer loops to improve locality.");
00388
00389 if (snl_level1!=snl_level2 && LNO_Fusion<2) {
00390 if (LNO_Verbose)
00391 outer_fusion_verbose_info(srcpos1,srcpos2,
00392 "Unequal SNL levels.");
00393 if (LNO_Analysis)
00394 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00395 "Unequal SNL levels.");
00396 if (LNO_Tlog)
00397 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00398 "Unequal SNL levels.");
00399 *fusion_level_io=0;
00400 return Failed;
00401 }
00402
00403 if (type1!=type2 && LNO_Fusion<2) {
00404 if (LNO_Verbose)
00405 outer_fusion_verbose_info(srcpos1,srcpos2,
00406 "Fusing SNL with non-SNL.");
00407 if (LNO_Analysis)
00408 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00409 "Fusing SNL with non-SNL.");
00410 if (LNO_Tlog)
00411 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00412 "Fusing SNL with non-SNL.");
00413 *fusion_level_io=0;
00414 return Failed;
00415 }
00416
00417 BOOL parallel1 = (Run_autopar && LNO_Run_AP > 0 &&
00418 Any_Loop_In_SNL_Parallelizable(loop1, snl_level1));
00419 BOOL parallel2 = (Run_autopar && LNO_Run_AP > 0 &&
00420 Any_Loop_In_SNL_Parallelizable(loop2, snl_level2));
00421 if (parallel1 != parallel2 && LNO_Fusion) {
00422 if (LNO_Verbose)
00423 outer_fusion_verbose_info(srcpos1,srcpos2,
00424 "Do not fuse parallel with serial loop.");
00425 if (LNO_Analysis)
00426 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,0,0,
00427 "Do not fuse parallel with serial loop.");
00428 if (LNO_Tlog)
00429 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,0,0,
00430 "Do not fuse parallel with serial loop.");
00431 return Failed;
00432 }
00433
00434 mapping_dictionary =
00435 CXX_NEW(BINARY_TREE<NAME2BIT>(&OLF_default_pool),&OLF_default_pool);
00436 Bit_Position_Count=0;
00437
00438 REF_LIST_STACK *writes1 = CXX_NEW(REF_LIST_STACK(&OLF_default_pool),
00439 &OLF_default_pool);
00440 REF_LIST_STACK *reads1 = CXX_NEW(REF_LIST_STACK(&OLF_default_pool),
00441 &OLF_default_pool);
00442 SCALAR_STACK *scalar_writes1 = CXX_NEW(SCALAR_STACK(&OLF_default_pool),
00443 &OLF_default_pool);
00444 SCALAR_STACK *scalar_reads1 = CXX_NEW(SCALAR_STACK(&OLF_default_pool),
00445 &OLF_default_pool);
00446 SCALAR_REF_STACK *params1 =
00447 CXX_NEW(SCALAR_REF_STACK(&OLF_default_pool), &OLF_default_pool);
00448 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&OLF_default_pool),
00449 &OLF_default_pool);
00450 Build_Doloop_Stack(loop1, stack1);
00451 Init_Ref_Stmt_Counter();
00452 UINT array_ref_count1=New_Gather_References(
00453 WN_do_body(loop1),writes1,reads1,stack1,
00454 scalar_writes1,scalar_reads1,params1,&OLF_default_pool);
00455
00456 if (array_ref_count1 == -1)
00457 return Failed;
00458 REF_LIST_STACK *writes2 = CXX_NEW(REF_LIST_STACK(&OLF_default_pool),
00459 &OLF_default_pool);
00460 REF_LIST_STACK *reads2 = CXX_NEW(REF_LIST_STACK(&OLF_default_pool),
00461 &OLF_default_pool);
00462 SCALAR_STACK *scalar_writes2 = CXX_NEW(SCALAR_STACK(&OLF_default_pool),
00463 &OLF_default_pool);
00464 SCALAR_STACK *scalar_reads2 = CXX_NEW(SCALAR_STACK(&OLF_default_pool),
00465 &OLF_default_pool);
00466 SCALAR_REF_STACK *params2 =
00467 CXX_NEW(SCALAR_REF_STACK(&OLF_default_pool), &OLF_default_pool);
00468 DOLOOP_STACK *stack2=CXX_NEW(DOLOOP_STACK(&OLF_default_pool),
00469 &OLF_default_pool);
00470 Build_Doloop_Stack(loop2, stack2);
00471 UINT array_ref_count2=New_Gather_References(
00472 WN_do_body(loop2),writes2,reads2,stack2,
00473 scalar_writes2,scalar_reads2,params2,&OLF_default_pool);
00474 if (array_ref_count2 == -1)
00475 return Failed;
00476
00477 if (array_ref_count1+array_ref_count2>OLF_size_upperbound && LNO_Fusion<2) {
00478 if (LNO_Verbose)
00479 outer_fusion_verbose_info(srcpos1,srcpos2,
00480 "Number of array references after merge is too big (>100)!!.");
00481 if (LNO_Analysis)
00482 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00483 "Number of array references after merge is too big (>100)!!.");
00484 if (LNO_Tlog)
00485 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00486 "Number of array references after merge is too big (>100)!!.");
00487 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00488 *fusion_level_io=0;
00489 return Failed;
00490 }
00491
00492 INT outer_level1 = dli1->Depth;
00493 INT outer_level2 = dli2->Depth;
00494 if (Stride_One_Level(writes1,reads1,outer_level1,snl_level1)
00495 !=Stride_One_Level(writes2,reads2,outer_level2,snl_level2)
00496 && LNO_Fusion<2) {
00497 if (LNO_Verbose)
00498 outer_fusion_verbose_info(srcpos1,srcpos2,
00499 "Stride-1 level differs.");
00500 if (LNO_Analysis)
00501 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00502 "Stride-1 level differs.");
00503 if (LNO_Tlog)
00504 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00505 "Stride-1 level differs.");
00506 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00507 *fusion_level_io=0;
00508 return Failed;
00509 }
00510
00511 BIT_VECTOR* bv1=Array_Names_In_Loop(writes1,reads1);
00512 BIT_VECTOR* bv2=Array_Names_In_Loop(writes2,reads2);
00513
00514 if (!bv1 || !bv2) {
00515 if (LNO_Verbose)
00516 outer_fusion_verbose_info(srcpos1,srcpos2,
00517 "Too many (>256) array names in loops.");
00518 if (LNO_Analysis)
00519 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00520 "Too many (>256) array names in loops.");
00521 if (LNO_Tlog)
00522 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00523 "Too many (>256) array names in loops.");
00524 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00525 *fusion_level_io=0;
00526 return Failed;
00527 }
00528
00529 if (LNO_Fusion<2)
00530 if (array_ref_count1+array_ref_count2>OLF_size_lowerbound &&
00531 2*((*bv1) & (*bv2)).Pop_Count() < (~(*bv1) & (*bv2)).Pop_Count() &&
00532 2*((*bv1) & (*bv2)).Pop_Count() < ((*bv1) & ~(*bv2)).Pop_Count()) {
00533 if (LNO_Verbose)
00534 outer_fusion_verbose_info(srcpos1,srcpos2,
00535 "Too few common array names or too many new array names in loop2.");
00536 if (LNO_Analysis)
00537 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00538 "Too few common array names or too many new array names in loop2.");
00539 if (LNO_Tlog)
00540 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00541 "Too few common array names or too many new array names in loop2.");
00542 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00543 *fusion_level_io=0;
00544 return Failed;
00545 }
00546
00547 UINT fusion_level=snl_level1;
00548 if (snl_level2<snl_level1)
00549 fusion_level=snl_level2;
00550 UINT fusion_level_tmp=fusion_level;
00551 UINT peeling_limit=LNO_Fusion_Peeling_Limit;
00552 if (dli1->Est_Num_Iterations>=0)
00553 peeling_limit=MIN(peeling_limit,dli1->Est_Num_Iterations);
00554 if (dli2->Est_Num_Iterations>=0)
00555 peeling_limit=MIN(peeling_limit,dli2->Est_Num_Iterations);
00556 if (snl_level1!=snl_level2 || type1!=Inner || type2!=Inner)
00557 peeling_limit=0;
00558 FISSION_FUSION_STATUS level_fusion_status;
00559 FISSION_FUSION_STATUS status=
00560 Fuse(loop1,loop2,fusion_level,peeling_limit,TRUE);
00561 if (status==Succeeded || status==Succeeded_and_Inner_Loop_Removed) {
00562 if (LNO_Verbose)
00563 outer_fusion_verbose_info(srcpos1,srcpos2,
00564 "Successfully fused outer loops.");
00565 if (LNO_Analysis)
00566 outer_fusion_analysis_info(SUCCEED,srcpos1,srcpos2,snl_level1,snl_level2,
00567 "Successfully fused outer loops.");
00568 if (LNO_Tlog)
00569 outer_fusion_tlog_info(SUCCEED,srcpos1,srcpos2,snl_level1,snl_level2,
00570 "Successfully fused outer loops.");
00571 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00572 if (status==Succeeded_and_Inner_Loop_Removed)
00573 fusion_level--;
00574 *fusion_level_io=fusion_level;
00575 return status;
00576 } else if (((snl_level1>1 && status==Try_Level_By_Level) || LNO_Fusion==2) &&
00577 ((level_fusion_status=Fuse_Level_By_Level(loop1,loop2,
00578 &fusion_level_tmp,peeling_limit,
00579 LNO_Fusion==2,TRUE,ffi))==Succeeded ||
00580 (level_fusion_status==Partially_fused && LNO_Fusion==2) ||
00581 level_fusion_status==Succeeded_and_Inner_Loop_Removed)) {
00582 if (LNO_Verbose)
00583 outer_fusion_verbose_info(srcpos1,srcpos2,
00584 "Successfully fused outer loops.");
00585 if (LNO_Analysis)
00586 outer_fusion_analysis_info(SUCCEED,srcpos1,srcpos2,snl_level1,snl_level2,
00587 "Successfully fused outer loops.");
00588 if (LNO_Tlog)
00589 outer_fusion_tlog_info(SUCCEED,srcpos1,srcpos2,snl_level1,snl_level2,
00590 "Successfully fused outer loops.");
00591 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00592 *fusion_level_io=fusion_level_tmp;
00593 return level_fusion_status;
00594 } else if (status==Partially_fused ||
00595 (status==Try_Level_By_Level &&
00596 level_fusion_status==Partially_fused)) {
00597 DevWarn("Partially fused loop is not restored");
00598 *fusion_level_io=fusion_level_tmp;
00599 return Partially_fused;
00600 } else {
00601 if (LNO_Verbose)
00602 outer_fusion_verbose_info(srcpos1,srcpos2,
00603 "Failed to fuse outer loops.");
00604 if (LNO_Analysis)
00605 outer_fusion_analysis_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00606 "Failed to fuse outer loops.");
00607 if (LNO_Tlog)
00608 outer_fusion_tlog_info(FAIL,srcpos1,srcpos2,snl_level1,snl_level2,
00609 "Failed to fuse outer loops.");
00610 CXX_DELETE(mapping_dictionary, &OLF_default_pool);
00611 *fusion_level_io=0;
00612 return Failed;
00613 }
00614 }
00615
00616 #ifdef KEY
00617
00618 static BOOL Same_Loop_Body ( WN* wn, WN* copy,
00619 SYMBOL loop_index, SYMBOL copy_loop_index )
00620 {
00621 if (!wn || !copy)
00622 return FALSE;
00623
00624 OPERATOR wn_opr = WN_operator(wn);
00625 OPERATOR copy_opr = WN_operator(copy);
00626
00627 if (wn_opr != wn_opr || WN_kid_count(wn) != WN_kid_count(copy))
00628 return FALSE;
00629
00630 if (wn_opr == OPR_LDID || wn_opr == OPR_STID) {
00631 SYMBOL wn_sym(wn);
00632 SYMBOL copy_sym(copy);
00633 if (wn_sym != copy_sym &&
00634 wn_sym != loop_index && copy_sym != copy_loop_index)
00635 return FALSE;
00636 }
00637 else if (wn_opr == OPR_BLOCK) {
00638 WN* stmt_wn;
00639 WN* stmt_copy;
00640 for (stmt_wn=WN_first(wn), stmt_copy = WN_first(copy);
00641 stmt_wn && stmt_copy;
00642 stmt_wn= WN_next(stmt_wn), stmt_copy = WN_next(stmt_copy))
00643 if (!Same_Loop_Body(stmt_wn, stmt_copy, loop_index, copy_loop_index))
00644 return FALSE;
00645 if (stmt_wn == NULL && stmt_copy != NULL ||
00646 stmt_wn != NULL && stmt_copy == NULL)
00647 return FALSE;
00648 }
00649 else if (wn_opr == OPR_DO_LOOP) {
00650 return Same_Loop_Body(WN_do_body(wn), WN_do_body(copy),
00651 loop_index, copy_loop_index);
00652 }
00653
00654 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
00655 if (!Same_Loop_Body(WN_kid(wn, kid), WN_kid(copy, kid),
00656 loop_index, copy_loop_index))
00657 return FALSE;
00658
00659 return TRUE;
00660 }
00661 #endif
00662 static OUTER_FUSION_STATUS Outer_Loop_Fusion_Walk(WN* wn,
00663 FIZ_FUSE_INFO* ffi, WN2UINT *wn2ffi) {
00664 OPCODE opc=WN_opcode(wn);
00665
00666 if (!OPCODE_is_scf(opc))
00667 return NORMAL;
00668 else if (opc==OPC_DO_LOOP) {
00669 if (Do_Loop_Is_Good(wn) && !Do_Loop_Has_Calls(wn) &&
00670 !Do_Loop_Has_Gotos(wn)) {
00671 WN* next_wn=WN_next(wn);
00672
00673
00674 while (next_wn &&
00675 ((WN_operator(next_wn) == OPR_PRAGMA &&
00676 (WN_pragma(next_wn) == WN_PRAGMA_INLINE_BODY_START ||
00677 WN_pragma(next_wn) == WN_PRAGMA_INLINE_BODY_END ||
00678 WN_pragma(next_wn) == WN_PRAGMA_CLIST_SKIP_BEGIN ||
00679 WN_pragma(next_wn) == WN_PRAGMA_FLIST_SKIP_BEGIN ||
00680 WN_pragma(next_wn) == WN_PRAGMA_CLIST_SKIP_END ||
00681 WN_pragma(next_wn) == WN_PRAGMA_FLIST_SKIP_END))
00682 ||
00683 (WN_operator(next_wn) == OPR_XPRAGMA &&
00684 WN_pragma(next_wn) == WN_PRAGMA_COPYIN_BOUND))) {
00685 LWN_Delete_Tree_From_Block(next_wn);
00686 next_wn=WN_next(wn);
00687 }
00688
00689 OUTER_FUSION_STATUS state=SKIP;
00690 while (next_wn && WN_opcode(next_wn)==OPC_DO_LOOP &&
00691 Do_Loop_Is_Good(next_wn) && !Do_Loop_Has_Calls(next_wn) &&
00692 !Do_Loop_Has_Gotos(next_wn)) {
00693 INT wn_index=wn2ffi->Find(wn);
00694 if (wn_index == 0) {
00695
00696 SNL_INFO snl_info(wn);
00697 wn_index=ffi->New_Snl(snl_info);
00698 wn2ffi->Enter(wn,wn_index);
00699 }
00700 INT next_wn_index=wn2ffi->Find(next_wn);
00701 if (next_wn_index == 0) {
00702
00703 SNL_INFO snl_info(next_wn);
00704 next_wn_index=ffi->New_Snl(snl_info);
00705 wn2ffi->Enter(next_wn,next_wn_index);
00706 }
00707 INT d1=ffi->Get_Depth(wn_index);
00708 INT d2=ffi->Get_Depth(next_wn_index);
00709 UINT fused_level;
00710 #ifdef KEY
00711
00712 WN* next_next_copy;
00713 if (LNO_Fusion == 2)
00714 next_next_copy =
00715 LWN_Copy_Tree(WN_next(next_wn), TRUE, LNO_Info_Map);
00716 #endif
00717 FISSION_FUSION_STATUS fusion_status=
00718 Fuse_Outer_Loops(wn,next_wn,ffi,wn2ffi,&fused_level);
00719 if (fusion_status==Succeeded || fusion_status==Partially_fused) {
00720 WN* wn1=wn;
00721 WN* wn2=wn;
00722 INT level=1;
00723 while (wn1=Get_Only_Loop_Inside(wn1,FALSE)) {
00724 level++;
00725 wn2=wn1;
00726 }
00727 if (d1>level) {
00728 INT i=ffi->Copy_Snl(ffi,wn_index);
00729 wn1=WN_first(WN_do_body(wn2));
00730 while (WN_opcode(wn1)!=OPC_DO_LOOP) wn1=WN_next(wn1);
00731 ffi->Set_Depth(i,d1-level);
00732 ffi->Set_Wn(i,wn1);
00733 wn2ffi->Enter(wn1,i);
00734 }
00735 if (d2>level) {
00736 INT j=ffi->Copy_Snl(ffi,next_wn_index);
00737 wn1=WN_last(WN_do_body(wn2));
00738 while (WN_opcode(wn1)!=OPC_DO_LOOP) wn1=WN_prev(wn1);
00739 ffi->Set_Depth(j,d2-level);
00740 ffi->Set_Wn(j,wn1);
00741 wn2ffi->Enter(wn1,j);
00742 }
00743 if (ffi->Get_Type(wn_index)==Inner &&
00744 ffi->Get_Type(next_wn_index)==Inner &&
00745 fusion_status==Succeeded)
00746 ffi->Set_Type(wn_index,Inner);
00747 else
00748 ffi->Set_Type(wn_index,Not_Inner);
00749 ffi->Set_Depth(wn_index,level);
00750 ffi->Set_Type(next_wn_index,Invalid);
00751 wn2ffi->Enter(next_wn, 0);
00752 } else if (fusion_status==Succeeded_and_Inner_Loop_Removed) {
00753 ffi->Set_Type(wn2ffi->Find(next_wn),Invalid);
00754 wn2ffi->Enter(next_wn, 0);
00755 if (fused_level>0)
00756 ffi->Set_Depth(wn2ffi->Find(wn),fused_level);
00757 else {
00758 ffi->Set_Type(wn2ffi->Find(wn),Invalid);
00759 wn2ffi->Enter(wn, 0);
00760 return state;
00761 }
00762 } else {
00763 WN* new_next_wn=WN_next(wn);
00764 if (next_wn!=new_next_wn) {
00765 ffi->Set_Wn(wn2ffi->Find(next_wn),new_next_wn);
00766 wn2ffi->Enter(new_next_wn,wn2ffi->Find(next_wn));
00767 wn2ffi->Enter(next_wn,0);
00768 }
00769 return state;
00770 }
00771 next_wn=WN_next(wn);
00772 state=NORMAL;
00773 #ifdef KEY
00774
00775 if (LNO_Fusion == 2 && next_wn &&
00776 WN_operator(next_wn) == OPR_DO_LOOP &&
00777 (!next_next_copy || WN_operator(next_next_copy) != OPR_DO_LOOP ||
00778
00779
00780
00781
00782 (!Same_Loop_Body(next_wn, next_next_copy,
00783 WN_index(next_wn), WN_index(next_next_copy)))))
00784 return SKIP;
00785 #endif
00786
00787
00788 while (next_wn &&
00789 ((WN_operator(next_wn) == OPR_PRAGMA &&
00790 (WN_pragma(next_wn) == WN_PRAGMA_INLINE_BODY_START ||
00791 WN_pragma(next_wn) == WN_PRAGMA_INLINE_BODY_END ||
00792 WN_pragma(next_wn) == WN_PRAGMA_CLIST_SKIP_BEGIN ||
00793 WN_pragma(next_wn) == WN_PRAGMA_FLIST_SKIP_BEGIN ||
00794 WN_pragma(next_wn) == WN_PRAGMA_CLIST_SKIP_END ||
00795 WN_pragma(next_wn) == WN_PRAGMA_FLIST_SKIP_END))
00796 ||
00797 (WN_operator(next_wn) == OPR_XPRAGMA &&
00798 WN_pragma(next_wn) == WN_PRAGMA_COPYIN_BOUND))) {
00799 LWN_Delete_Tree_From_Block(next_wn);
00800 next_wn=WN_next(wn);
00801 }
00802
00803 }
00804 } else
00805 (void)Outer_Loop_Fusion_Walk(WN_do_body(wn),ffi,wn2ffi);
00806 } else if (opc==OPC_BLOCK) {
00807 for (WN* stmt=WN_first(wn); stmt; ) {
00808 WN* prev_stmt=WN_prev(stmt);
00809 WN* next_stmt=WN_next(stmt);
00810 OUTER_FUSION_STATUS status=Outer_Loop_Fusion_Walk(stmt,ffi,wn2ffi);
00811 if (!prev_stmt)
00812 if (WN_first(wn)!=stmt && status==NORMAL)
00813 stmt=WN_first(wn);
00814
00815 else if (status==SKIP)
00816 stmt=WN_next(WN_first(wn));
00817 else
00818 if (WN_next(stmt)==next_stmt)
00819 stmt=next_stmt;
00820 else;
00821 else
00822 if (WN_next(prev_stmt)!=stmt && status==NORMAL)
00823 stmt=prev_stmt;
00824
00825
00826
00827 else if (status==SKIP)
00828 stmt=WN_next(WN_next(prev_stmt));
00829 else
00830 if (WN_next(stmt)==next_stmt)
00831 stmt=next_stmt;
00832 else;
00833 }
00834 } else
00835 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
00836 (void)Outer_Loop_Fusion_Walk(WN_kid(wn,kidno),ffi,wn2ffi);
00837 }
00838
00839 return NORMAL;
00840 }
00841
00842 void Outer_Loop_Fusion_Phase(WN* func_nd, FIZ_FUSE_INFO* ffi) {
00843
00844 MEM_POOL_Initialize(&OLF_default_pool,"OLF_default_pool",FALSE);
00845 MEM_POOL_Push(&OLF_default_pool);
00846
00847 WN2UINT* wn2ffi=CXX_NEW(WN2UINT(256,&OLF_default_pool),&OLF_default_pool);
00848
00849 ffi->Copy_Snl(ffi,0);
00850 ffi->Set_Type(0,Invalid);
00851
00852
00853
00854 for (INT i=1; i<ffi->Num_Snl(); i++) {
00855 WN* loop=ffi->Get_Wn(i);
00856 wn2ffi->Enter(loop, i);
00857 }
00858
00859 Outer_Loop_Fusion_Walk(func_nd,ffi,wn2ffi);
00860
00861 CXX_DELETE(wn2ffi, &OLF_default_pool);
00862
00863 MEM_POOL_Pop(&OLF_default_pool);
00864 MEM_POOL_Delete(&OLF_default_pool);
00865
00866 }
00867
00868