00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #define __STDC_LIMIT_MACROS
00064 #include <stdint.h>
00065 #ifdef USE_PCH
00066 #include "lno_pch.h"
00067 #endif // USE_PCH
00068 #pragma hdrstop
00069
00070 #ifdef _KEEP_RCS_ID
00071
00072 static char *rcs_id = "$Source: /home/bos/bk/kpro64-pending/be/lno/SCCS/s.inner_fission.cxx $ $Revision: 1.6 $";
00073 #endif
00074
00075 #include <sys/types.h>
00076 #include <stdlib.h>
00077 #include "defs.h"
00078 #include "config_targ_opt.h"
00079 #include "wn.h"
00080 #include "wn_map.h"
00081 #include "model.h"
00082 #include "mempool.h"
00083 #include "cxx_memory.h"
00084 #include "lwn_util.h"
00085 #include "ff_utils.h"
00086 #include "scalar_expand.h"
00087 #include "fission.h"
00088 #include "lnopt_main.h"
00089 #include "opt_du.h"
00090 #include "dep_graph.h"
00091 #include "access_vector.h"
00092 #include "btree.h"
00093 #include "reduc.h"
00094 #include "lno_bv.h"
00095 #include "snl.h"
00096 #include "name.h"
00097 #include "inner_fission.h"
00098 #include "glob.h"
00099 #include "snl_trans.h"
00100 #include "tlog.h"
00101 #include "sxlist.h"
00102 #include "sxlimit.h"
00103 #include "prompf.h"
00104 #include "anl_driver.h"
00105
00106 typedef HASH_TABLE<WN*,VINDEX16> WN2VINDEX;
00107 typedef HASH_TABLE<WN*,UINT> WN2UINT;
00108 typedef HASH_TABLE<WN*,INT> WN2INT;
00109 typedef DYN_ARRAY<UINT> UINT_DYN_ARRAY;
00110
00111 #define TWO_PASS 1 // run two pass merging algorithm if
00112
00113
00114 #define ESTIMATED_SIZE 100 // used to initialized hash table, etc.
00115 #define Iteration_Count_Threshold 10 // threshold to determine if a loop
00116
00117
00118 extern REDUCTION_MANAGER *red_manager;
00119 extern MEM_POOL SNL_local_pool;
00120 static MEM_POOL INNER_FISSION_default_pool;
00121
00122 static ARRAY_DIRECTED_GRAPH16 *adg;
00123
00124 static INT Total_FP_Register_Usage=0;
00125 static INT Total_INT_Register_Usage=0;
00126 static INT Total_TLB_Usage=0;
00127
00128 static ACCESS_VECTOR Dummy_Too_Messy_Access_Vector;
00129 static ACCESS_VECTOR Dummy_Access_Vector;
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 static void inner_fission_verbose_info(
00140 SRCPOS srcpos,
00141 const char* message)
00142 {
00143 printf("#### Inner Fission(%d): %s\n", Srcpos_To_Line(srcpos), message);
00144 }
00145
00146 static void inner_fission_tlog_info(
00147 INT new_loop_number,
00148 SRCPOS srcpos,
00149 WN* innerloop,
00150 char* message)
00151 {
00152 char in_string[30];
00153 char out_string[30];
00154 sprintf(in_string,"%d", Srcpos_To_Line(srcpos));
00155 sprintf(out_string,"%d", new_loop_number);
00156 Generate_Tlog("LNO","inner_loop_fission",
00157 Srcpos_To_Line(WN_Get_Linenum(innerloop)),
00158 ST_name(WN_st(WN_index(innerloop))),
00159 in_string, out_string, message);
00160 }
00161
00162
00163 extern WN* Find_Stmt_Under(WN* stmt,WN* body) {
00164
00165 if (WN_opcode(stmt)==OPC_FUNC_ENTRY)
00166 return NULL;
00167
00168 if (LWN_Get_Parent(stmt)==body)
00169 return stmt;
00170
00171 WN* parent;
00172 #ifndef KEY
00173 while ((parent=LWN_Get_Parent(stmt))!=body)
00174 if (WN_opcode(parent)==OPC_FUNC_ENTRY)
00175 return NULL;
00176 else
00177 stmt=parent;
00178 #else
00179
00180
00181
00182
00183
00184
00185
00186
00187 while ((parent=LWN_Get_Parent(stmt))!=body && parent)
00188 if (WN_opcode(parent)==OPC_FUNC_ENTRY)
00189 return NULL;
00190 else
00191 stmt=parent;
00192 if (!parent)
00193 return NULL;
00194 #endif
00195 return stmt;
00196 }
00197
00198
00199 static BOOL Is_Invariant(ACCESS_ARRAY* array, WN* loop) {
00200
00201 INT loopno=Do_Loop_Depth(loop);
00202 if (array->Non_Const_Loops() > loopno)
00203 return FALSE;
00204 for (INT i=0; i<array->Num_Vec(); i++) {
00205
00206 ACCESS_VECTOR *av = array->Dim(i);
00207 if (av->Too_Messy)
00208 return FALSE;
00209 for (INT j=loopno; j<av->Nest_Depth(); j++)
00210 if ((av->Loop_Coeff(j) != 0))
00211 return FALSE;
00212 }
00213
00214 return TRUE;
00215 }
00216
00217
00218 static INT Get_Limit() {
00219 if (LNO_Fission_Inner_Register_Limit>0)
00220 return LNO_Fission_Inner_Register_Limit;
00221 else
00222 return Target_FPRs;
00223 }
00224
00225
00226
00227
00228 static void reg_used_if_merged(
00229 FF_STMT_LIST* base_loop,
00230 FF_STMT_LIST* test_loop,
00231 WN* parent_loop,
00232 INT* num_fp_regs,
00233 INT* num_int_regs,
00234 INT* num_tlb) {
00235
00236 REGISTER_MODEL merge_reg_model(&INNER_FISSION_default_pool);
00237
00238 FF_STMT_ITER s_iter(base_loop);
00239 FF_STMT_NODE *stmt_node;
00240 for (stmt_node=s_iter.First(); !s_iter.Is_Empty();
00241 stmt_node=s_iter.Next()) {
00242 WN* stmt=stmt_node->Get_Stmt();
00243 merge_reg_model.Add_Statement(stmt);
00244 }
00245
00246 FF_STMT_ITER t_iter(test_loop);
00247 for (stmt_node=t_iter.First(); !t_iter.Is_Empty();
00248 stmt_node=t_iter.Next()) {
00249 WN* stmt=stmt_node->Get_Stmt();
00250 merge_reg_model.Add_Statement(stmt);
00251 }
00252
00253 merge_reg_model.Calculate_Register_Usage(
00254 parent_loop,num_fp_regs,num_int_regs,num_tlb);
00255 }
00256
00257
00258
00259
00260 extern UINT inner_fission_2(
00261 WN* loop,
00262 SCALAR_STACK* scalar_reads,
00263 SCALAR_STACK* scalar_writes,
00264 REF_LIST_STACK* reads,
00265 REF_LIST_STACK* writes,
00266 BINARY_TREE<NAME2BIT> *mapping_dictionary,
00267
00268
00269 FF_STMT_LIST& expandable_ref_list,
00270
00271 MEM_POOL* mpool)
00272 {
00273
00274 UINT bit_position=0;
00275
00276 REF_LIST_STACK *array_ref_list[2];
00277 array_ref_list[0]=reads;
00278 array_ref_list[1]=writes;
00279
00280
00281 INT i;
00282 for (i=0; i<2; i++) {
00283
00284 for (INT j=0; j<array_ref_list[i]->Elements(); j++) {
00285
00286 REFERENCE_ITER ref_iter(array_ref_list[i]->Bottom_nth(j));
00287 for (REFERENCE_NODE *n1=ref_iter.First(); !ref_iter.Is_Empty();
00288 n1=ref_iter.Next()) {
00289 WN* array_node=n1->Wn;
00290 if (OPCODE_is_load(WN_opcode(array_node))) {
00291 array_node = WN_kid0(array_node);
00292 } else {
00293 array_node = WN_kid1(array_node);
00294 }
00295 if (WN_operator(array_node) == OPR_ADD) {
00296 if (WN_operator(WN_kid0(array_node)) == OPR_ARRAY) {
00297 array_node = WN_kid0(array_node);
00298 } else {
00299 array_node = WN_kid1(array_node);
00300 }
00301 }
00302
00303 if (!OPCODE_has_sym(WN_opcode(WN_array_base(array_node))))
00304 continue;
00305
00306 NAME2BIT temp_map;
00307
00308 temp_map.Set_Symbol(WN_array_base(array_node));
00309 ACCESS_ARRAY* ar=
00310 (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,array_node);
00311 if (Is_Invariant(ar, loop))
00312 temp_map.Set_Access_Array(ar);
00313 else {
00314 ACCESS_ARRAY *ar_new=CXX_NEW(ACCESS_ARRAY(ar,mpool),mpool);
00315 Dummy_Too_Messy_Access_Vector.Const_Offset=INT64_MAX;
00316 Dummy_Too_Messy_Access_Vector.Too_Messy=0;
00317 Dummy_Access_Vector.Too_Messy=0;
00318
00319
00320
00321 for (INT k=0; k<ar->Num_Vec(); k++) {
00322 ACCESS_VECTOR *av=ar_new->Dim(k);
00323 if (av->Too_Messy || av->Contains_Non_Lin_Symb())
00324 ar_new->Dim(k)->Init(&Dummy_Too_Messy_Access_Vector, mpool);
00325 else if (av->Loop_Coeff(av->Nest_Depth()-1)!=0)
00326 ar_new->Dim(k)->Init(&Dummy_Access_Vector, mpool);
00327 }
00328 temp_map.Set_Access_Array(ar_new);
00329 }
00330
00331
00332
00333
00334 if (mapping_dictionary->Find(temp_map)==NULL) {
00335
00336 if (LNO_Test_Dump) {
00337 temp_map.Get_Symbol().Print(stdout);
00338 ACCESS_ARRAY* ar;
00339 if ((ar=temp_map.Get_Access_Array())) {
00340 ar->Print(stdout);
00341 }
00342 printf("\t\tat bit %d\n", bit_position);
00343 }
00344 temp_map.Set_Bit_Position(bit_position);
00345 mapping_dictionary->Enter(temp_map);
00346 bit_position++;
00347 }
00348 }
00349 }
00350 }
00351
00352 SCALAR_STACK *scalar_ref_list[2];
00353 scalar_ref_list[0]=scalar_reads;
00354 scalar_ref_list[1]=scalar_writes;
00355
00356
00357 for (i=0; i<2; i++) {
00358
00359 for (INT j=0; j<scalar_ref_list[i]->Elements(); j++) {
00360
00361 WN* scalar_ref=scalar_ref_list[i]->Bottom_nth(j)->Bottom_nth(0)->Wn;
00362 NAME2BIT temp_map;
00363
00364 temp_map.Set_Symbol(scalar_ref);
00365
00366
00367
00368
00369 if (mapping_dictionary->Find(temp_map)==NULL) {
00370
00371 if (LNO_Test_Dump) {
00372 temp_map.Get_Symbol().Print(stdout);
00373 printf("\t\tat bit %d\n", bit_position);
00374 }
00375 temp_map.Set_Bit_Position(bit_position);
00376 mapping_dictionary->Enter(temp_map);
00377 bit_position++;
00378 }
00379
00380 if (i==1) {
00381 SE_RESULT se_result = Scalar_Expandable(scalar_ref,loop, Du_Mgr);
00382 if (!Get_Trace(TP_LNOPT2, TT_LNO_DISABLE_SEFIN)
00383 && se_result != SE_NONE || se_result == SE_EASY)
00384 expandable_ref_list.Append(scalar_ref,mpool);
00385 }
00386
00387 }
00388 }
00389 return bit_position;
00390 }
00391
00392
00393
00394
00395
00396 extern void Register_Name_To_Statement(
00397 WN* loop,
00398 SCALAR_STACK* scalar_reads,
00399 SCALAR_STACK* scalar_writes,
00400 REF_LIST_STACK* reads,
00401 REF_LIST_STACK* writes,
00402 HASH_TABLE<WN*,UINT>* stmt_id,
00403
00404 BIT_VECTOR* stmt_name_set,
00405
00406 BINARY_TREE<NAME2BIT> *bit_pos_mapping)
00407
00408
00409 {
00410
00411 MEM_POOL_Push(&LNO_local_pool);
00412
00413 WN* body=WN_do_body(loop);
00414 REF_LIST_STACK *array_ref_list[2];
00415 array_ref_list[0]=reads;
00416 array_ref_list[1]=writes;
00417
00418
00419 INT i;
00420 for (i=0; i<2; i++) {
00421
00422 for (INT j=0; j<array_ref_list[i]->Elements(); j++) {
00423
00424 REFERENCE_ITER ref_iter(array_ref_list[i]->Bottom_nth(j));
00425 for (REFERENCE_NODE *n1=ref_iter.First(); !ref_iter.Is_Empty();
00426 n1=ref_iter.Next()) {
00427 WN* array_node=n1->Wn;
00428 if (OPCODE_is_load(WN_opcode(array_node))) {
00429 array_node = WN_kid0(array_node);
00430 } else {
00431 array_node = WN_kid1(array_node);
00432 }
00433 if (WN_operator(array_node) == OPR_ADD) {
00434 if (WN_operator(WN_kid0(array_node)) == OPR_ARRAY) {
00435 array_node = WN_kid0(array_node);
00436 } else {
00437 array_node = WN_kid1(array_node);
00438 }
00439 }
00440
00441 if (!OPCODE_has_sym(WN_opcode(WN_array_base(array_node))))
00442 continue;
00443
00444
00445
00446 NAME2BIT temp_map;
00447
00448 temp_map.Set_Symbol(WN_array_base(array_node));
00449 ACCESS_ARRAY* ar=
00450 (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,array_node);
00451 if (Is_Invariant(ar, loop))
00452 temp_map.Set_Access_Array(ar);
00453 else {
00454 ACCESS_ARRAY *ar_new=
00455 CXX_NEW(ACCESS_ARRAY(ar,&LNO_local_pool),&LNO_local_pool);
00456 Dummy_Too_Messy_Access_Vector.Const_Offset=INT64_MAX;
00457 Dummy_Too_Messy_Access_Vector.Too_Messy=0;
00458 Dummy_Access_Vector.Too_Messy=0;
00459
00460
00461
00462 for (INT k=0; k<ar->Num_Vec(); k++) {
00463 ACCESS_VECTOR *av=ar_new->Dim(k);
00464 if (av->Too_Messy || av->Contains_Non_Lin_Symb())
00465 ar_new->Dim(k)->Init(&Dummy_Too_Messy_Access_Vector,
00466 &LNO_local_pool);
00467 else if (av->Loop_Coeff(av->Nest_Depth()-1)!=0)
00468 ar_new->Dim(k)->Init(&Dummy_Access_Vector, &LNO_local_pool);
00469 }
00470 temp_map.Set_Access_Array(ar_new);
00471 }
00472
00473
00474 BINARY_TREE_NODE<NAME2BIT> *ptr=bit_pos_mapping->Find(temp_map);
00475 const NAME2BIT *name_map=ptr->Get_Data();
00476
00477
00478 WN* wn=Find_Stmt_Under(array_node,body);
00479 UINT temp_stmt_id=stmt_id->Find(wn);
00480
00481 stmt_name_set[temp_stmt_id].Set(name_map->Get_Bit_Position());
00482 }
00483 }
00484 }
00485
00486 SCALAR_STACK *scalar_ref_list[2];
00487 scalar_ref_list[0]=scalar_reads;
00488 scalar_ref_list[1]=scalar_writes;
00489
00490
00491 for (i=0; i<2; i++) {
00492
00493 for (INT j=0; j<scalar_ref_list[i]->Elements(); j++) {
00494 SCALAR_NODE* sjnode=scalar_ref_list[i]->Bottom_nth(j);
00495
00496 for (INT k=0; k<sjnode->Elements(); k++) {
00497
00498 WN* scalar_ref=sjnode->Bottom_nth(k)->Wn;
00499 NAME2BIT temp_map;
00500
00501
00502
00503 temp_map.Set_Symbol(scalar_ref);
00504 const NAME2BIT *name_map=bit_pos_mapping->Find(temp_map)->Get_Data();
00505
00506
00507 WN* wn=Find_Stmt_Under(scalar_ref,body);
00508 UINT temp_stmt_id=stmt_id->Find(wn);
00509
00510 stmt_name_set[temp_stmt_id].Set(name_map->Get_Bit_Position());
00511 }
00512 }
00513 }
00514 MEM_POOL_Pop(&LNO_local_pool);
00515 }
00516
00517 static UINT_DYN_ARRAY* merge_scc_to_form_new_loop(
00518 UINT total_scc,
00519 FF_STMT_LIST* scc,
00520 BIT_VECTOR* scc_name_set,
00521 REGISTER_MODEL* reg_model,
00522 BIT_VECTOR Expandable_Scalar_Set,
00523
00524
00525 WN* loop,
00526 SCC_DIRECTED_GRAPH16 *scc_dep_g
00527 )
00528 {
00529
00530
00531 INT* scc_queue=CXX_NEW_ARRAY(INT,total_scc+1,&INNER_FISSION_default_pool);
00532 INT* scc_score=CXX_NEW_ARRAY(INT,total_scc+1,&INNER_FISSION_default_pool);
00533 UINT head=0;
00534 UINT tail=0;
00535
00536
00537 UINT i;
00538 for (i=1; i<=total_scc; i++) {
00539 if (scc_dep_g->Get_In_Edge(i) == 0)
00540 scc_queue[head++]=i;
00541 }
00542
00543 UINT fp_limit=Get_Limit();
00544 UINT int_limit=Target_INTRs;
00545 UINT fp_threshold=fp_limit;
00546
00547
00548 UINT int_threshold=int_limit;
00549 UINT tlb_limit=Mhd.L[0].TLB_Entries;
00550 UINT tlb_threshold=tlb_limit;
00551
00552
00553 UINT_DYN_ARRAY *seed_scc=CXX_NEW(UINT_DYN_ARRAY(&INNER_FISSION_default_pool),
00554 &INNER_FISSION_default_pool);
00555
00556 while (tail<head) {
00557
00558
00559
00560
00561 UINT best_score=0;
00562 UINT best_scc=tail;
00563 for (i=tail; i!=head; i++) {
00564 UINT score=0;
00565
00566 EINDEX16 e=scc_dep_g->Get_Out_Edge(scc_queue[i]);
00567 while (e) {
00568
00569 VINDEX16 v=scc_dep_g->Get_Sink(e);
00570 if ((scc_dep_g->Get_In_Edge(v)==e) &&
00571 (scc_dep_g->Get_Next_In_Edge(e)==0)) {
00572 score++;
00573 }
00574 e=scc_dep_g->Get_Next_Out_Edge(e);
00575 }
00576 if (score>best_score) {
00577 best_score=score;
00578 best_scc=i;
00579 }
00580 }
00581
00582 UINT seed_scc_id=scc_queue[best_scc];
00583 scc_queue[best_scc]=scc_queue[--head];
00584 if (LNO_Test_Dump)
00585 printf("Choose scc %d as seed\n", seed_scc_id);
00586 UINT loop_id=seed_scc->Newidx();
00587 (*seed_scc)[loop_id]=seed_scc_id;
00588
00589
00590 EINDEX16 e=scc_dep_g->Get_Out_Edge(seed_scc_id);
00591 while (e) {
00592
00593 VINDEX16 v=scc_dep_g->Get_Sink(e);
00594 scc_dep_g->Delete_Edge(e);
00595 if (scc_dep_g->Get_In_Edge(v)==0) {
00596 scc_queue[head++]=v;
00597 }
00598 e=scc_dep_g->Get_Next_Out_Edge(e);
00599 }
00600
00601 INT fpr,intr,tlb;
00602 reg_model[seed_scc_id].Calculate_Register_Usage(loop,&fpr,&intr,&tlb);
00603 if (fpr<=fp_threshold && intr<=int_threshold && tlb <=tlb_threshold) {
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616 INT best_fp_reg_usage=fp_limit;
00617 INT best_int_reg_usage=int_limit;
00618 INT best_tlb_usage=tlb_limit;
00619 do {
00620
00621 INT32 best_score;
00622 INT j;
00623 for (j=tail; j<head; j++) {
00624 BIT_VECTOR name_set= scc_name_set[seed_scc_id];
00625 BIT_VECTOR k= scc_name_set[scc_queue[j]];
00626 scc_score[j]=
00627 10* (k & name_set & Expandable_Scalar_Set).Pop_Count()
00628 -20* (k & ~name_set & Expandable_Scalar_Set).Pop_Count()
00629 + 1* (k & name_set & ~Expandable_Scalar_Set).Pop_Count()
00630 - 2* (k & ~name_set & ~Expandable_Scalar_Set).Pop_Count();
00631 }
00632 for (j=tail; j<head; j++) {
00633 best_score=scc_score[j];
00634 UINT best_score_j1=j;
00635 for (INT j1=j+1; j1<head; j1++) {
00636 if (scc_score[j1]>best_score) {
00637 best_score=scc_score[j1];
00638 best_score_j1=j1;
00639 }
00640 }
00641 if (best_score_j1!=j) {
00642 INT t=scc_score[j];
00643 scc_score[j]=scc_score[best_score_j1];
00644 scc_score[best_score_j1]=t;
00645 t=scc_queue[j];
00646 scc_queue[j]=scc_queue[best_score_j1];
00647 scc_queue[best_score_j1]=t;
00648 }
00649 }
00650
00651 INT best_j= -1;
00652 for (j=tail; j<head; j++) {
00653 if (LNO_Test_Dump) {
00654 printf("Test scc %d for merging, score=%d",
00655 scc_queue[j],scc_score[j]);
00656 }
00657 INT temp_fp_reg_usage;
00658 INT temp_int_reg_usage;
00659 INT temp_tlb_usage;
00660 reg_used_if_merged(
00661 &scc[seed_scc_id], &scc[scc_queue[j]], loop,
00662 &temp_fp_reg_usage, &temp_int_reg_usage, &temp_tlb_usage);
00663 if (temp_fp_reg_usage<=fp_limit && temp_int_reg_usage<=int_limit &&
00664 temp_tlb_usage<=tlb_limit) {
00665
00666 best_j=j;
00667 best_score=scc_score[j];
00668 best_fp_reg_usage=temp_fp_reg_usage;
00669 best_int_reg_usage=temp_int_reg_usage;
00670 best_tlb_usage=temp_tlb_usage;
00671 if (LNO_Test_Dump)
00672 printf(" is the best.\n");
00673 break;
00674 } else
00675 if (LNO_Test_Dump)
00676 printf(" but uses too many (%d+%d+%d) registers.\n",
00677 temp_fp_reg_usage, temp_int_reg_usage,temp_tlb_usage);
00678 else;
00679 }
00680
00681 if (LNO_Run_P3==TWO_PASS && best_score <= -20) {
00682 if (LNO_Test_Dump)
00683 printf("Best score is too low. Stop merging with the seed scc.\n");
00684 break;
00685 } else if (best_j != -1) {
00686
00687 INT merged_scc=scc_queue[best_j];
00688 if (LNO_Test_Dump) {
00689 printf("Merge with scc %d\n", merged_scc);
00690 }
00691 scc_queue[best_j]=scc_queue[--head];
00692
00693
00694 FF_STMT_ITER s_iter(&scc[merged_scc]);
00695 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00696 stmt_node=s_iter.Next())
00697 reg_model[seed_scc_id].Add_Statement(stmt_node->Get_Stmt());
00698 scc[seed_scc_id].Append_List(&scc[merged_scc]);
00699
00700 scc_name_set[seed_scc_id]|=scc_name_set[merged_scc];
00701
00702
00703
00704 EINDEX16 e=scc_dep_g->Get_Out_Edge(merged_scc);
00705 while (e) {
00706
00707 VINDEX16 v=scc_dep_g->Get_Sink(e);
00708 scc_dep_g->Delete_Edge(e);
00709 if (scc_dep_g->Get_In_Edge(v)==0) {
00710 scc_queue[head++]=v;
00711 }
00712 e=scc_dep_g->Get_Next_Out_Edge(e);
00713 }
00714 } else break;
00715 } while (best_fp_reg_usage<=fp_threshold &&
00716 best_int_reg_usage<=int_threshold &&
00717 best_tlb_usage<=tlb_threshold);
00718
00719
00720 }
00721 }
00722
00723 if (LNO_Run_P3==TWO_PASS) {
00724
00725 STACK<UINT> scc_stack(&INNER_FISSION_default_pool);
00726
00727 for (INT j=seed_scc->Lastidx()-1; j>=0; j--) {
00728 UINT scc1=(*seed_scc)[j];
00729 UINT scc2=(*seed_scc)[j+1];
00730 INT temp_fp_reg_usage, temp_int_reg_usage, temp_tlb_usage;
00731 reg_used_if_merged(
00732 &scc[scc1], &scc[scc2], loop,
00733 &temp_fp_reg_usage, &temp_int_reg_usage, &temp_tlb_usage);
00734 if (temp_fp_reg_usage<=fp_limit && temp_int_reg_usage<=int_limit &&
00735 temp_tlb_usage <= tlb_limit) {
00736
00737 if (LNO_Test_Dump) {
00738 printf("Re-Merge scc %d with scc %d\n", scc1, scc2);
00739 }
00740
00741 FF_STMT_ITER s_iter(&scc[scc2]);
00742 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00743 stmt_node=s_iter.Next())
00744 reg_model[scc1].Add_Statement(stmt_node->Get_Stmt());
00745 scc[scc1].Append_List(&scc[scc2]);
00746 scc_name_set[scc1]|=scc_name_set[scc2];
00747
00748 } else
00749 scc_stack.Push(scc2);
00750 }
00751 UINT seed_scc0 = (*seed_scc)[0];
00752 scc_stack.Push(seed_scc0);
00753
00754 seed_scc->Resetidx();
00755 for (i=0; i<scc_stack.Elements(); i++) {
00756 seed_scc->Newidx();
00757 (*seed_scc)[i]=scc_stack.Top_nth(i);
00758 }
00759 }
00760
00761 return (seed_scc);
00762 }
00763
00764 static BOOL fission_is_better(
00765 UINT_DYN_ARRAY* new_loops,
00766 FF_STMT_LIST* scc,
00767 REGISTER_MODEL* reg_model,
00768 WN* loop,
00769 double orig_cycles,
00770 FF_STMT_LIST& expandable_ref_list)
00771 {
00772 WN* body=WN_do_body(loop);
00773 UINT total_loops=new_loops->Lastidx()+1;
00774
00775 if (total_loops<2)
00776 return FALSE;
00777
00778
00779 WN2INT *stmt_to_loop=
00780 CXX_NEW(WN2INT(ESTIMATED_SIZE, &INNER_FISSION_default_pool),&INNER_FISSION_default_pool);
00781
00782 UINT i;
00783 for (i=0; i<total_loops; i++) {
00784
00785 UINT seed_scc=(*new_loops)[i];
00786 FF_STMT_ITER s_iter(&scc[seed_scc]);
00787 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00788 stmt_node=s_iter.Next()) {
00789 WN* stmt=stmt_node->Get_Stmt();
00790 stmt_to_loop->Enter(stmt,i);
00791 }
00792 }
00793
00794 WN2INT *se_needed=
00795 CXX_NEW(WN2INT(ESTIMATED_SIZE, &INNER_FISSION_default_pool),
00796 &INNER_FISSION_default_pool);
00797 FF_STMT_ITER r_iter(&expandable_ref_list);
00798 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
00799 ref_node=r_iter.Next()) {
00800 WN* ref=ref_node->Get_Stmt();
00801 WN* stmt0=Find_Stmt_Under(ref,body);
00802 WN* wn_eq_loop = NULL;
00803 STACK<WN*>* equivalence_class=
00804 Scalar_Equivalence_Class(ref, Du_Mgr, &INNER_FISSION_default_pool,
00805 TRUE, &wn_eq_loop);
00806 BOOL need_expansion=FALSE;
00807 while (!equivalence_class->Is_Empty() && !need_expansion) {
00808 WN* ref1=equivalence_class->Pop();
00809 WN* stmt1=Find_Stmt_Under(ref1,body);
00810 if (stmt_to_loop->Find(stmt0)!=stmt_to_loop->Find(stmt1))
00811 need_expansion=TRUE;
00812 }
00813 if (need_expansion) {
00814 se_needed->Enter(ref,1);
00815 }
00816 }
00817
00818 double total_cycles=0.0;
00819 for (i=0; i<total_loops; i++) {
00820
00821 INT fpr;
00822 INT intr;
00823 INT tlb;
00824 double cycles;
00825 UINT seed_scc=(*new_loops)[i];
00826 reg_model[seed_scc].Evaluate(loop,se_needed,NULL,&cycles,&fpr,&intr,&tlb);
00827
00828 if (fpr>Target_FPRs || intr>Target_FPRs || tlb>Mhd.L[0].TLB_Entries)
00829 return FALSE;
00830
00831 total_cycles += cycles;
00832
00833 if (total_cycles>orig_cycles)
00834 return FALSE;
00835
00836 }
00837
00838 return TRUE;
00839
00840 }
00841
00842 static void separate_loop_and_scalar_expand(
00843 UINT_DYN_ARRAY* new_loops,
00844 FF_STMT_LIST* scc,
00845 REGISTER_MODEL* reg_model,
00846 WN* loop,
00847 FF_STMT_LIST& expandable_ref_list,
00848 WN2UINT* stmt_id,
00849 DYN_ARRAY<UINT>* line_number)
00850 {
00851 char tlog_message[1024];
00852 tlog_message[0]='\0';
00853 WN* body=WN_do_body(loop);
00854 UINT total_loops=new_loops->Lastidx()+1;
00855 UINT *loop_size=CXX_NEW_ARRAY(UINT,total_loops,&INNER_FISSION_default_pool);
00856
00857 WN2INT *stmt_to_loop=
00858 CXX_NEW(WN2INT(ESTIMATED_SIZE, &INNER_FISSION_default_pool),&INNER_FISSION_default_pool);
00859
00860 SRCPOS srcpos=Srcpos_To_Line(WN_Get_Linenum(loop));
00861 BOOL fission_ok = (total_loops>1);
00862 UINT i;
00863 for (i=0; i<total_loops; i++) {
00864
00865 UINT seed_scc=(*new_loops)[i];
00866 UINT total_stmt=0;
00867 FF_STMT_ITER s_iter(&scc[seed_scc]);
00868 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00869 stmt_node=s_iter.Next()) {
00870 WN* stmt=stmt_node->Get_Stmt();
00871 stmt_to_loop->Enter(stmt,i);
00872 LWN_Insert_Block_Before(body,NULL,LWN_Extract_From_Block(stmt));
00873 total_stmt++;
00874 }
00875 loop_size[i]=total_stmt;
00876 INT fpr;
00877 INT intr;
00878 INT tlb;
00879 reg_model[seed_scc].Calculate_Register_Usage(loop,&fpr,&intr,&tlb);
00880 if (LNO_Verbose) {
00881 char message[256];
00882 sprintf(message, "New loop %d", i);
00883 sprintf(message+strlen(message),
00884 " has %d stmts and uses %d FP and %d INT and %d TLB registers\n",
00885 total_stmt, fpr,intr,tlb);
00886 inner_fission_verbose_info(srcpos, message);
00887 }
00888 if (LNO_Tlog) {
00889 char message[256];
00890 sprintf(message, "%d (%d %d %d) ", total_stmt, fpr,intr,tlb);
00891 if (strlen(tlog_message)+strlen(message)<1024)
00892 strcpy(tlog_message+strlen(tlog_message), message);
00893 }
00894
00895 }
00896
00897 if (LNO_Tlog && fission_ok)
00898 inner_fission_tlog_info(total_loops, srcpos, loop, tlog_message);
00899
00900 if (LNO_Analysis) {
00901
00902 if (fission_ok) {
00903 fprintf(LNO_Analysis, "( LNO_Inner_Loop_Fission_Success (%d %d %d %d) %d ",
00904 Srcpos_To_Line(WN_Get_Linenum(loop)),
00905 Total_FP_Register_Usage,
00906 Total_INT_Register_Usage, Total_TLB_Usage, total_loops);
00907
00908 BOOL reg_pressure=FALSE;
00909
00910 for (UINT i=0; i<total_loops; i++) {
00911 fprintf(LNO_Analysis,"( ");
00912
00913 UINT seed_scc=(*new_loops)[i];
00914 UINT total_stmt=0;
00915 FF_STMT_ITER s_iter(&scc[seed_scc]);
00916 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00917 stmt_node=s_iter.Next()) {
00918 WN* stmt=stmt_node->Get_Stmt();
00919 total_stmt++;
00920
00921 UINT sid=stmt_id->Find(stmt);
00922 if (fission_ok)
00923 fprintf(LNO_Analysis,"( %d %d ) ",
00924 (*line_number)[sid], (*line_number)[sid]);
00925 }
00926
00927 INT fp_reg_used;
00928 INT int_reg_used;
00929 INT tlb_used;
00930 reg_model[seed_scc].Calculate_Register_Usage(loop,
00931 &fp_reg_used, &int_reg_used,&tlb_used);
00932
00933 fprintf(LNO_Analysis,"%d) ", fp_reg_used);
00934
00935 if (fp_reg_used>Get_Limit() || int_reg_used>Target_INTRs ||
00936 tlb_used>Mhd.L[0].TLB_Entries)
00937 reg_pressure=TRUE;
00938 }
00939
00940 if (reg_pressure==FALSE)
00941 fprintf(LNO_Analysis,
00942 "\"Register pressure eased by fission.\")\n");
00943 else
00944 fprintf(LNO_Analysis,
00945 "\"Register pressure partially eased by fission.\")\n");
00946
00947 } else {
00948
00949 fprintf(LNO_Analysis, "( LNO_Inner_Loop_Fission_Failure (%s %d %d %d %d) ",
00950 Cur_PU_Name, Srcpos_To_Line(WN_Get_Linenum(loop)),
00951 Total_FP_Register_Usage, Total_INT_Register_Usage, Total_TLB_Usage);
00952 fprintf(LNO_Analysis,
00953 "\"Fission failed due to strong dependences of statements.\")\n");
00954 }
00955
00956 }
00957
00958 if (total_loops>1) {
00959 BOOL has_calls_or_gotos_or_inner_loops = FALSE;
00960 DO_LOOP_INFO* loop_info=Get_Do_Loop_Info(loop, FALSE);
00961 if (loop_info->Has_Calls || loop_info->Has_Gotos || !loop_info->Is_Inner) {
00962 has_calls_or_gotos_or_inner_loops = TRUE;
00963 }
00964
00965 STACK<WN*> se_stack(&INNER_FISSION_default_pool);
00966 STACK<BOOL> finalize_stack(&INNER_FISSION_default_pool);
00967 FF_STMT_ITER r_iter(&expandable_ref_list);
00968 BOOL need_expansion = FALSE;
00969 BOOL need_finalization = FALSE;
00970 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
00971 ref_node=r_iter.Next()) {
00972 WN* ref=ref_node->Get_Stmt();
00973 WN* stmt0=Find_Stmt_Under(ref,body);
00974 WN* wn_eq_loop = NULL;
00975 STACK<WN*>* equivalence_class=
00976 Scalar_Equivalence_Class(ref, Du_Mgr, &INNER_FISSION_default_pool,
00977 TRUE, &wn_eq_loop);
00978 FmtAssert(wn_eq_loop == NULL || Wn_Is_Inside(loop, wn_eq_loop),
00979 ("separate_loop_and_scalar_expand: not really scalar expandable"));
00980 BOOL expand = FALSE;
00981 while (!equivalence_class->Is_Empty() && !expand) {
00982 WN* ref1=equivalence_class->Pop();
00983 WN* stmt1=Find_Stmt_Under(ref1,body);
00984 if (stmt_to_loop->Find(stmt0)!=stmt_to_loop->Find(stmt1))
00985 expand = TRUE;
00986 }
00987 if (expand) {
00988 need_expansion = TRUE;
00989 se_stack.Push(ref);
00990 BOOL finalize = wn_eq_loop != NULL;
00991 finalize_stack.Push(finalize);
00992 if (finalize)
00993 need_finalization = TRUE;
00994 }
00995 }
00996
00997 WN* guard_tests[1];
00998 guard_tests[0] = NULL;
00999 if (need_finalization)
01000 SE_Guard_Tests(loop, 1, guard_tests, Do_Loop_Depth(loop));
01001 WN* tile_loop = NULL;
01002 if (need_expansion && !Get_Trace(TP_LNOPT, TT_LNO_BIG_SCALAR_TILES)) {
01003 tile_loop = SE_Tile_Inner_Loop(loop, &LNO_default_pool);
01004 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01005 if (tile_loop != NULL) {
01006 INT old_id = WN_MAP32_Get(Prompf_Id_Map, loop);
01007 INT new_id = New_Construct_Id();
01008 WN_MAP32_Set(Prompf_Id_Map, tile_loop, new_id);
01009 Prompf_Info->Se_Tile(old_id, new_id);
01010 }
01011 }
01012 }
01013
01014 while (!se_stack.Is_Empty()) {
01015 WN* ref=se_stack.Pop();
01016 BOOL finalize = finalize_stack.Pop();
01017 if (tile_loop != NULL) {
01018 SYMBOL sym(ref);
01019 WN* loops[1];
01020 loops[0] = loop;
01021 WN* tile_loops[1];
01022 tile_loops[0] = tile_loop;
01023 INT strip_sizes[1];
01024 strip_sizes[0] = (INT) Step_Size(tile_loop, 0);
01025 INT order[1];
01026 order[0] = 0;
01027 Scalar_Expand(tile_loop, loop, ref, sym, loops, order, 1, TRUE,
01028 finalize, FALSE, guard_tests, NULL, tile_loops, strip_sizes, 1);
01029 } else {
01030 INT dummy[1]={0};
01031 SYMBOL sym(ref);
01032 Scalar_Expand(loop, loop, ref, sym, &loop, dummy, 1, TRUE, finalize,
01033 FALSE, guard_tests);
01034 }
01035 }
01036
01037 WN* tmp_loop1=loop;
01038 WN** wn_starts=CXX_NEW_ARRAY(WN*, total_loops, &INNER_FISSION_default_pool);
01039 WN** wn_ends=CXX_NEW_ARRAY(WN*, total_loops, &INNER_FISSION_default_pool);
01040 WN** wn_steps=CXX_NEW_ARRAY(WN*, total_loops, &INNER_FISSION_default_pool);
01041 WN** new_loops=CXX_NEW_ARRAY(WN*, total_loops, &INNER_FISSION_default_pool);
01042
01043 wn_starts[0]=WN_kid0(WN_start(tmp_loop1));
01044 wn_ends[0]=WN_end(tmp_loop1);
01045 wn_steps[0]=WN_kid0(WN_step(tmp_loop1));
01046 new_loops[0]=loop;
01047 WN* stmt=WN_first(body);
01048 PROMPF_LINES* old_lines = NULL;
01049 for (i=0; i<total_loops-1; i++) {
01050
01051 INT size=loop_size[i];
01052
01053 for (INT j=0; j<size; j++)
01054 stmt=WN_next(stmt);
01055
01056 WN* tmp_loop2;
01057
01058 Separate(tmp_loop1, WN_prev(stmt), 1, &tmp_loop2);
01059 LWN_Parentize(tmp_loop2);
01060 DO_LOOP_INFO* new_loop_info =
01061 CXX_NEW(DO_LOOP_INFO(loop_info,&LNO_default_pool), &LNO_default_pool);
01062 Set_Do_Loop_Info(tmp_loop2,new_loop_info);
01063 if (has_calls_or_gotos_or_inner_loops) {
01064
01065
01066 }
01067 wn_starts[i+1]=WN_kid0(WN_start(tmp_loop2));
01068 wn_ends[i+1]=WN_end(tmp_loop2);
01069 wn_steps[i+1]=WN_kid0(WN_step(tmp_loop2));
01070 new_loops[i+1]=tmp_loop2;
01071
01072 tmp_loop1=tmp_loop2;
01073 }
01074 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01075 INT old_id = WN_MAP32_Get(Prompf_Id_Map, new_loops[0]);
01076 PROMPF_LINES* old_lines = CXX_NEW(PROMPF_LINES(new_loops[0],
01077 &PROMPF_pool), &PROMPF_pool);
01078 INT* new_ids = CXX_NEW_ARRAY(INT, total_loops - 1, &PROMPF_pool);
01079 PROMPF_LINES** new_lines = CXX_NEW_ARRAY(PROMPF_LINES*, total_loops - 1,
01080 &PROMPF_pool);
01081 INT i;
01082 for (i = 0; i < total_loops - 1; i++) {
01083 new_ids[i] = New_Construct_Id();
01084 WN_MAP32_Set(Prompf_Id_Map, new_loops[i + 1], new_ids[i]);
01085 }
01086 for (i = 0; i < total_loops - 1; i++)
01087 new_lines[i] = CXX_NEW(PROMPF_LINES(new_loops[i + 1], &PROMPF_pool),
01088 &PROMPF_pool);
01089 Prompf_Info->Inner_Fission(old_id, old_lines, new_ids, new_lines,
01090 total_loops - 1);
01091 }
01092
01093 Fission_DU_Update(Du_Mgr,red_manager,wn_starts,wn_ends,wn_steps,
01094 total_loops,new_loops);
01095 for (i=0; i<total_loops-1; i++) {
01096 scalar_rename(LWN_Get_Parent(wn_starts[i]));
01097 }
01098
01099 adg->Fission_Dep_Update(new_loops[0],total_loops);
01100 }
01101
01102 }
01103
01104
01105
01106 extern INT Fission_Inner_Loop(WN* innerloop,
01107 BOOL known_to_have_register_allocation_problem)
01108 {
01109
01110 if (!Do_Loop_Is_Good(innerloop) || Do_Loop_Has_Calls(innerloop) ||
01111 Do_Loop_Has_Gotos(innerloop)) {
01112 Is_True(0, ("Bad loop passed to Fission_Inner_Loop().\n"));
01113 return 0;
01114 }
01115 if (!Do_Loop_Is_Inner(innerloop)) {
01116 Is_True(0, ("Non-innermost loop passed to Fission_Inner_Loop().\n"));
01117 return 0;
01118 }
01119 DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
01120 if (dli->Has_Gotos || dli->Has_Calls) {
01121 Is_True(0, ("Loop with gotos or calls passed to Fission().\n"));
01122 return 0;
01123 }
01124
01125 char source_line[80];
01126 if (strlen(Cur_PU_Name)>=65) {
01127 sprintf(source_line,"%s:%d", "name_too_long",
01128 Srcpos_To_Line(WN_Get_Linenum(innerloop)));
01129 } else
01130 sprintf(source_line,"%s:%d", Cur_PU_Name,
01131 Srcpos_To_Line(WN_Get_Linenum(innerloop)));
01132 SRCPOS srcpos=Srcpos_To_Line(WN_Get_Linenum(innerloop));
01133
01134
01135 if (!dli->Aggressive_Inner_Fission &&
01136 (dli->Est_Num_Iterations < Iteration_Count_Threshold)) {
01137 if (LNO_Verbose)
01138 inner_fission_verbose_info(srcpos,
01139 "Loops with too few iterations for fission\n");
01140 return 1;
01141 }
01142
01143 MEM_POOL_Push(&INNER_FISSION_default_pool);
01144 {
01145
01146 WN2INT *se_needed=
01147 CXX_NEW(WN2INT(ESTIMATED_SIZE, &INNER_FISSION_default_pool),
01148 &INNER_FISSION_default_pool);
01149
01150 REGISTER_MODEL *orig_reg_model=
01151 CXX_NEW(REGISTER_MODEL(&INNER_FISSION_default_pool),
01152 &INNER_FISSION_default_pool);
01153 WN *stmt;
01154 for (stmt=WN_first(WN_do_body(innerloop)); stmt; stmt=WN_next(stmt)) {
01155 orig_reg_model->Add_Statement(stmt);
01156 }
01157 double orig_cycles;
01158 INT orig_fpr;
01159 INT orig_intr;
01160 INT orig_tlb;
01161 orig_reg_model->Evaluate(
01162 innerloop,se_needed,NULL,&orig_cycles,&orig_fpr,&orig_intr,&orig_tlb);
01163 if (LNO_Verbose)
01164 printf("Before inner_fission: %f cycles, %d fpr, %d intr, %d tlb\n",
01165 orig_cycles,orig_fpr,orig_intr,orig_tlb);
01166
01167 if (dli->Est_Register_Usage.Est_Fp_Regs() >= 0 ||
01168 dli->Est_Register_Usage.Est_Int_Regs() >= 0 ||
01169 dli->Est_Register_Usage.Est_TLB() >= 0) {
01170 Total_FP_Register_Usage = dli->Est_Register_Usage.Est_Fp_Regs();
01171 Total_INT_Register_Usage = dli->Est_Register_Usage.Est_Int_Regs();
01172 Total_TLB_Usage= dli->Est_Register_Usage.Est_TLB();
01173 }
01174
01175
01176
01177
01178 if (!known_to_have_register_allocation_problem) {
01179 REGISTER_MODEL *test_reg_model=
01180 CXX_NEW(REGISTER_MODEL(&INNER_FISSION_default_pool),
01181 &INNER_FISSION_default_pool);
01182 WN* body=WN_do_body(innerloop);
01183 WN* stmt;
01184 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)) {
01185 test_reg_model->Add_Statement(stmt);
01186 }
01187 test_reg_model->Calculate_Register_Usage(innerloop,
01188 &Total_FP_Register_Usage,&Total_INT_Register_Usage,&Total_TLB_Usage);
01189 if (LNO_Verbose) {
01190 char message[256];
01191 sprintf(message,"Model estimates %d FP and %d INT and %d TLB registers required",
01192 Total_FP_Register_Usage, Total_INT_Register_Usage,Total_TLB_Usage);
01193 inner_fission_verbose_info(srcpos, message);
01194 }
01195 if (!dli->Aggressive_Inner_Fission &&
01196 Total_FP_Register_Usage<=Get_Limit() &&
01197 Total_INT_Register_Usage<=Target_INTRs &&
01198 Total_TLB_Usage<=Mhd.L[0].TLB_Entries) {
01199 CXX_DELETE(test_reg_model,&INNER_FISSION_default_pool);
01200 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01201 if (LNO_Verbose) {
01202 char message[256];
01203 sprintf(message,
01204 "Est. %d FP (<=%d) and %d INT (<=%d) and %d TLB (<=%d) regs, no fission is needed",
01205 Total_FP_Register_Usage, Get_Limit(),
01206 Total_INT_Register_Usage, Target_INTRs,
01207 Total_TLB_Usage,Mhd.L[0].TLB_Entries);
01208 inner_fission_verbose_info(srcpos, message);
01209 }
01210 return 1;
01211 }
01212 CXX_DELETE(test_reg_model,&INNER_FISSION_default_pool);
01213 }
01214 if (LNO_Verbose) {
01215 char message[256];
01216 sprintf(message,"Model estimates %d FP and %d INT registers required",
01217 Total_FP_Register_Usage, Total_INT_Register_Usage);
01218 inner_fission_verbose_info(srcpos, message);
01219 }
01220
01221 INT Split_Region(WN *region, ARRAY_DIRECTED_GRAPH16 *dep_graph);
01222 if (!Split_Region(innerloop,adg)) {
01223 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01224 return 0;
01225 }
01226
01227
01228 BINARY_TREE<NAME2BIT> *mapping_dictionary =
01229 CXX_NEW(BINARY_TREE<NAME2BIT>(&INNER_FISSION_default_pool),&INNER_FISSION_default_pool);
01230
01231
01232
01233
01234 SCC_DIRECTED_GRAPH16 *dep_g_p =
01235 CXX_NEW(SCC_DIRECTED_GRAPH16(ESTIMATED_SIZE,ESTIMATED_SIZE),
01236 &INNER_FISSION_default_pool);
01237
01238
01239
01240 WN2VINDEX *stmt_to_vertex=
01241 CXX_NEW(WN2VINDEX(ESTIMATED_SIZE, &INNER_FISSION_default_pool),
01242 &INNER_FISSION_default_pool);
01243
01244
01245 WN2UINT *stmt_id=
01246 CXX_NEW(WN2UINT(ESTIMATED_SIZE, &INNER_FISSION_default_pool),&INNER_FISSION_default_pool);
01247
01248 DYN_ARRAY<UINT32> line_number(&INNER_FISSION_default_pool);
01249
01250
01251 REF_LIST_STACK *writes = CXX_NEW(REF_LIST_STACK(&INNER_FISSION_default_pool),
01252 &INNER_FISSION_default_pool);
01253 REF_LIST_STACK *reads = CXX_NEW(REF_LIST_STACK(&INNER_FISSION_default_pool),
01254 &INNER_FISSION_default_pool);
01255
01256 SCALAR_STACK *scalar_writes = CXX_NEW(SCALAR_STACK(&INNER_FISSION_default_pool),
01257 &INNER_FISSION_default_pool);
01258 SCALAR_STACK *scalar_reads = CXX_NEW(SCALAR_STACK(&INNER_FISSION_default_pool),
01259 &INNER_FISSION_default_pool);
01260 SCALAR_REF_STACK *params =
01261 CXX_NEW(SCALAR_REF_STACK(&INNER_FISSION_default_pool), &INNER_FISSION_default_pool);
01262
01263
01264 DOLOOP_STACK *stack=CXX_NEW(DOLOOP_STACK(&INNER_FISSION_default_pool),
01265 &INNER_FISSION_default_pool);
01266 Build_Doloop_Stack(innerloop, stack);
01267
01268
01269
01270
01271 UINT stmt_count=0;
01272 INT32 gather_status=0;
01273 Init_Ref_Stmt_Counter();
01274 WN* body=WN_do_body(innerloop);
01275 for (stmt=WN_first(body); stmt && gather_status!= -1; stmt=WN_next(stmt)) {
01276 UINT i=line_number.Newidx();
01277 line_number[i]=Srcpos_To_Line(WN_Get_Linenum(stmt));
01278 VINDEX16 v=dep_g_p->Add_Vertex();
01279 if (v==0) {
01280 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01281 return(0);
01282 }
01283 stmt_to_vertex->Enter(stmt, v);
01284 stmt_id->Enter(stmt, stmt_count++);
01285 gather_status=New_Gather_References(stmt,writes,reads,stack,
01286 scalar_writes,scalar_reads,
01287 params,&INNER_FISSION_default_pool) ;
01288 }
01289 if (gather_status == -1) {
01290 DevWarn("Error in gathering references");
01291 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01292 return 0;
01293 }
01294
01295 line_number[line_number.Newidx()]=
01296 line_number[stmt_count-1]+1;
01297
01298
01299
01300 FF_STMT_LIST expandable_ref_list;
01301
01302
01303
01304
01305
01306 UINT sym_count=inner_fission_2(innerloop, scalar_reads, scalar_writes,
01307 reads, writes,
01308 mapping_dictionary, expandable_ref_list, &INNER_FISSION_default_pool);
01309
01310
01311
01312
01313 BIT_VECTOR* stmt_name_set=
01314 CXX_NEW_ARRAY(BIT_VECTOR, stmt_count, &INNER_FISSION_default_pool);
01315
01316
01317
01318 UINT i;
01319 for (i=0; i<stmt_count; i++)
01320 stmt_name_set[i].Init(sym_count, &INNER_FISSION_default_pool);
01321
01322
01323 BIT_VECTOR Expandable_Scalar_Set(sym_count, &INNER_FISSION_default_pool);
01324
01325
01326
01327 FF_STMT_ITER e_iter(&expandable_ref_list);
01328 for (FF_STMT_NODE* ref_node=e_iter.First(); !e_iter.Is_Empty();
01329 ref_node=e_iter.Next()) {
01330 NAME2BIT temp_map;
01331 temp_map.Set_Symbol(ref_node->Get_Stmt());
01332 Expandable_Scalar_Set.Set(mapping_dictionary->Find(temp_map)->
01333 Get_Data()->Get_Bit_Position());
01334 }
01335
01336 if (LNO_Test_Dump) {
01337 printf("Expandable_Scalar_Set=\n");
01338 Expandable_Scalar_Set.Print(stdout);
01339 }
01340
01341 Register_Name_To_Statement(
01342 innerloop,scalar_reads,scalar_writes,reads,writes,
01343 stmt_id,stmt_name_set,mapping_dictionary);
01344
01345 WN_MAP sdm=WN_MAP_Create(&INNER_FISSION_default_pool);
01346 ARRAY_DIRECTED_GRAPH16 *sdg =
01347 CXX_NEW(ARRAY_DIRECTED_GRAPH16(100,500,sdm,LEVEL_ARRAY_GRAPH),
01348 &INNER_FISSION_default_pool);
01349
01350 for (stmt = WN_first(body); stmt; stmt = WN_next(stmt)) {
01351 if (!Map_Stmt_To_Level_Graph(stmt,sdg)) {
01352 DevWarn("Statement dependence graph problem");
01353 WN_MAP_Delete(sdm);
01354 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01355 return(0);
01356 }
01357 }
01358
01359 BOOL status=Generate_Scalar_Dependence_For_Statement_Dependence_Graph(
01360 innerloop, scalar_reads, scalar_writes, params, sdg, red_manager,
01361 &Expandable_Scalar_Set, mapping_dictionary);
01362 if (status==FALSE) {
01363 DevWarn("Statement dependence graph problem");
01364 WN_MAP_Delete(sdm);
01365 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01366 return(0);
01367 }
01368
01369 status=Generate_Array_Dependence_For_Statement_Dependence_Graph(
01370 innerloop, reads, writes, sdg, red_manager, adg);
01371 if (status==FALSE) {
01372 DevWarn("Statement dependence graph problem");
01373 WN_MAP_Delete(sdm);
01374 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01375 return(0);
01376 }
01377
01378 if (LNO_Test_Dump) {
01379 fprintf(TFile,"Statement Dependence Graph of Inner_Fission Phase:\n");
01380 sdg->Print(TFile);
01381 }
01382
01383
01384
01385 EINDEX16 e=sdg->Get_Edge();
01386 while (e) {
01387 WN* source=sdg->Get_Wn(sdg->Get_Source(e));
01388 WN* sink=sdg->Get_Wn(sdg->Get_Sink(e));
01389 if (LWN_Get_Parent(source) == body || LWN_Get_Parent(sink) == body)
01390
01391 dep_g_p->Add_Unique_Edge(
01392 stmt_to_vertex->Find(source),
01393 stmt_to_vertex->Find(sink));
01394 e=sdg->Get_Next_Edge(e);
01395 }
01396
01397 if (LNO_Test_Dump) {
01398 fprintf(TFile,
01399 "Innerloop Statement Dependence Graph of Inner_Fission Phase:\n");
01400 dep_g_p->Print(TFile);
01401 }
01402
01403
01404
01405 SCC_DIRECTED_GRAPH16 *ac_g;
01406 ac_g = dep_g_p->Acyclic_Condensation(&INNER_FISSION_default_pool);
01407
01408 if (LNO_Test_Dump) {
01409 fprintf(TFile,"SCC Dependence Graph of Inner_Fission Phase:\n");
01410 ac_g->Print(TFile);
01411 }
01412
01413 VINDEX16 total_scc = dep_g_p->Get_Scc_Count();
01414
01415
01416 FF_STMT_LIST *scc;
01417 scc = CXX_NEW_ARRAY(FF_STMT_LIST, total_scc+1, &INNER_FISSION_default_pool);
01418
01419
01420 REGISTER_MODEL* reg_model=
01421 CXX_NEW_ARRAY(REGISTER_MODEL,total_scc+1, &INNER_FISSION_default_pool);
01422
01423
01424 BIT_VECTOR* scc_name_set=
01425 CXX_NEW_ARRAY(BIT_VECTOR, total_scc+1, &INNER_FISSION_default_pool);
01426
01427 UINT *scc_size=CXX_NEW_ARRAY(UINT, total_scc+1, &INNER_FISSION_default_pool);
01428
01429
01430 for (i=1; i<=total_scc; i++) {
01431 reg_model[i].Init(&INNER_FISSION_default_pool);
01432 scc_name_set[i].Init(sym_count,&INNER_FISSION_default_pool);
01433 scc_size[i]=0;
01434 }
01435
01436
01437
01438
01439 for (stmt = WN_first(WN_do_body(innerloop)); stmt; stmt = WN_next(stmt)) {
01440 VINDEX16 scc_id;
01441 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
01442 scc[scc_id].Append(stmt, &INNER_FISSION_default_pool);
01443 reg_model[scc_id].Add_Statement(stmt);
01444
01445 scc_name_set[scc_id]|=stmt_name_set[stmt_id->Find(stmt)];
01446 scc_size[scc_id]++;
01447 }
01448
01449 if (LNO_Test_Dump)
01450 for (i=1; i<=total_scc; i++) {
01451
01452 printf("INNER_FISSION:scc %d:", i);
01453 FF_STMT_ITER s_iter(&scc[i]);
01454 INT j=0;
01455 for (FF_STMT_NODE *stmt_node=s_iter.First(); !s_iter.Is_Empty();
01456 stmt_node=s_iter.Next()) {
01457 stmt=stmt_node->Get_Stmt();
01458 Dump_WN(stmt,stdout,TRUE,4,4);
01459 j++;
01460 }
01461 INT fpr, intr,tlb;
01462 reg_model[i].Calculate_Register_Usage(innerloop,&fpr,&intr,&tlb);
01463 printf(" has %d stmts and uses %d FP and %d INT and %d TLB rgisters\n",
01464 j, fpr,intr,tlb);
01465 printf("name_set=\n");
01466 scc_name_set[i].Print(stdout);
01467
01468 }
01469
01470 if (LNO_Analysis) {
01471 for (i=1; i<=total_scc; i++) {
01472 fprintf(LNO_Analysis,
01473 "( LNO_Inner_Loop_Fission_Scc (%s %d %d %d) ( ",
01474 Cur_PU_Name, Srcpos_To_Line(WN_Get_Linenum(innerloop)), i,scc_size[i]);
01475 FF_STMT_ITER s_iter(&scc[i]);
01476 for (FF_STMT_NODE *stmt_node=s_iter.First(); !s_iter.Is_Empty();
01477 stmt_node=s_iter.Next()) {
01478 stmt=stmt_node->Get_Stmt();
01479 fprintf(LNO_Analysis, "%d ", Srcpos_To_Line(WN_Get_Linenum(stmt)));
01480 }
01481 fprintf(LNO_Analysis, "))\n");
01482 }
01483 }
01484
01485 UINT_DYN_ARRAY* new_loops;
01486
01487 if (((DO_LOOP_INFO*)Get_Do_Loop_Info(innerloop))->Aggressive_Inner_Fission) {
01488
01489 new_loops=CXX_NEW(UINT_DYN_ARRAY(&INNER_FISSION_default_pool),
01490 &INNER_FISSION_default_pool);
01491 for (INT j=1; j<=total_scc; j++) {
01492 UINT i=new_loops->Newidx();
01493 (*new_loops)[i]=j;
01494 }
01495 } else {
01496
01497 new_loops=merge_scc_to_form_new_loop(
01498 total_scc,scc,scc_name_set,reg_model,
01499 Expandable_Scalar_Set,innerloop, ac_g);
01500 }
01501
01502
01503
01504
01505 if (dli->Aggressive_Inner_Fission ||
01506 fission_is_better(new_loops,scc,reg_model,
01507 innerloop,orig_cycles,expandable_ref_list)) {
01508
01509
01510 separate_loop_and_scalar_expand(new_loops,scc,reg_model,
01511 innerloop,expandable_ref_list,stmt_id,&line_number);
01512
01513 WN* loop_wn=innerloop;
01514 if (LNO_Verbose) {
01515 printf("After inner_fission: ");
01516 for (i=0; i<new_loops->Elements(); i++) {
01517
01518 REGISTER_MODEL *tmp_reg_model=
01519 CXX_NEW(REGISTER_MODEL(&INNER_FISSION_default_pool),
01520 &INNER_FISSION_default_pool);
01521 for (WN* stmt=WN_first(WN_do_body(loop_wn)); stmt; stmt=WN_next(stmt)) {
01522 tmp_reg_model->Add_Statement(stmt);
01523 }
01524 double tmp_cycles;
01525 INT tmp_fpr;
01526 INT tmp_intr;
01527 INT tmp_tlb;
01528 tmp_reg_model->Evaluate(
01529 loop_wn,se_needed,NULL,&tmp_cycles,&tmp_fpr,&tmp_intr,&tmp_tlb);
01530 printf("(%f cycles, %d fpr, %d intr, %d tlb) ",
01531 tmp_cycles,tmp_fpr,tmp_intr, tmp_tlb);
01532
01533 loop_wn=WN_next(loop_wn);
01534 }
01535 printf(" cycles\n");
01536 }
01537 } else {
01538 if (LNO_Verbose)
01539 printf ("Loop is not fissioned after evaluation\n");
01540 }
01541
01542
01543
01544 CXX_DELETE(dep_g_p, &INNER_FISSION_default_pool);
01545 CXX_DELETE(ac_g, &INNER_FISSION_default_pool);
01546 CXX_DELETE(sdg, &INNER_FISSION_default_pool);
01547 new_loops->Free_array();
01548 line_number.Free_array();
01549 WN_MAP_Delete(sdm);
01550 }
01551 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01552
01553 return 1;
01554
01555 }
01556
01557
01558 static void Inner_Fission_Phase_Walk(WN* wn) {
01559 OPCODE opc=WN_opcode(wn);
01560
01561 if (!OPCODE_is_scf(opc))
01562 return;
01563 else if (opc==OPC_DO_LOOP) {
01564 if (Do_Loop_Is_Good(wn) && Do_Loop_Is_Inner(wn) &&
01565 !Do_Loop_Has_Calls(wn) && !Do_Loop_Has_Gotos(wn)) {
01566 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn);
01567 if (WN_first(WN_do_body(wn)) &&
01568 (dli->Aggressive_Inner_Fission ||
01569 (!dli->No_Fission && !dli->Est_Register_Usage.Fits())))
01570 Fission_Inner_Loop(wn,dli->Est_Register_Usage.Does_Not_Fit());
01571 } else
01572 Inner_Fission_Phase_Walk(WN_do_body(wn));
01573 } else if (opc==OPC_BLOCK)
01574 for (WN* stmt=WN_first(wn); stmt;) {
01575 WN* next_stmt=WN_next(stmt);
01576 Inner_Fission_Phase_Walk(stmt);
01577 stmt=next_stmt;
01578 }
01579 else
01580 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
01581 Inner_Fission_Phase_Walk(WN_kid(wn,kidno));
01582 }
01583 }
01584
01585 void Inner_Fission(
01586 WN* func_nd,
01587 ARRAY_DIRECTED_GRAPH16* Array_Dependence_Graph
01588 )
01589 {
01590
01591
01592
01593
01594 MEM_POOL_Initialize(&INNER_FISSION_default_pool,"INNER_FISSION_default_pool",FALSE);
01595 MEM_POOL_Push(&INNER_FISSION_default_pool);
01596
01597 adg=Array_Dependence_Graph;
01598
01599 Inner_Fission_Phase_Walk(func_nd);
01600
01601 MEM_POOL_Pop(&INNER_FISSION_default_pool);
01602 MEM_POOL_Delete(&INNER_FISSION_default_pool);
01603
01604 }
01605
01606
01607