00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060 #ifdef USE_PCH
00061 #include "lno_pch.h"
00062 #endif // USE_PCH
00063 #pragma hdrstop
00064
00065 #ifdef _KEEP_RCS_ID
00066
00067 static char *rcs_id = "$Source: be/lno/SCCS/s.vintr_fis.cxx $ $Revision: 1.18 $";
00068 #endif
00069
00070 #include <sys/types.h>
00071 #include <stdlib.h>
00072 #include "defs.h"
00073 #include "glob.h"
00074 #include "wn.h"
00075 #include "wn_map.h"
00076 #include "cxx_memory.h"
00077 #include "lwn_util.h"
00078 #include "ff_utils.h"
00079 #include "lnoutils.h"
00080 #include "lnopt_main.h"
00081 #include "scalar_expand.h"
00082 #include "fission.h"
00083 #include "opt_du.h"
00084 #include "dep_graph.h"
00085 #include "btree.h"
00086 #include "reduc.h"
00087 #include "lno_bv.h"
00088 #include "snl.h"
00089 #include "name.h"
00090 #include "inner_fission.h"
00091 #include "lno_scc.h"
00092 #include "config_targ.h"
00093 #include "tlog.h"
00094 #include "sxlimit.h"
00095 #include "prompf.h"
00096 #include "anl_driver.h"
00097 #include "intrn_info.h"
00098 #ifdef KEY
00099 #include "wn_simp.h"
00100 #include "config_opt.h"
00101 #endif
00102
00103 #pragma weak New_Construct_Id
00104
00105 BOOL Variant_Array(WN *store, WN *split_point,
00106 ARRAY_DIRECTED_GRAPH16 *dep_graph);
00107 INT Split_Array(WN *statement, WN *split_point,
00108 ARRAY_DIRECTED_GRAPH16 *dep_graph);
00109 extern WN *Split_Using_Preg(WN* stmt, WN* intrinsic_op,
00110 ARRAY_DIRECTED_GRAPH16* dep_graph,
00111 BOOL recursive=TRUE);
00112 typedef STACK<WN*> STACK_OF_WN;
00113 typedef HASH_TABLE<WN*,VINDEX16> WN2VINDEX;
00114 typedef HASH_TABLE<WN*,UINT> WN2UINT;
00115 typedef HASH_TABLE<WN*,INT> WN2INT;
00116 typedef DYN_ARRAY<UINT> UINT_DYN_ARRAY;
00117
00118 #define ESTIMATED_SIZE 100 // used to initialized hash table, etc.
00119 #define Iteration_Count_Threshold 10 // threshold to determine if a loop
00120
00121
00122 extern REDUCTION_MANAGER *red_manager;
00123 extern MEM_POOL SNL_local_pool;
00124 static MEM_POOL VINTR_FIS_default_pool;
00125 static ARRAY_DIRECTED_GRAPH16 *adg;
00126
00127 static void vintr_fission_verbose_info(
00128 SRCPOS srcpos,
00129 char* message)
00130 {
00131
00132 printf("#### Vintr Fission(%d): %s\n", Srcpos_To_Line(srcpos), message);
00133
00134 }
00135
00136 static void vintr_fission_analysis_info(
00137 BOOL success,
00138 SRCPOS srcpos,
00139 char* message)
00140 {
00141
00142 if (success)
00143 fprintf(LNO_Analysis,"( LNO_Vintr_Fission_Success ");
00144 else
00145 fprintf(LNO_Analysis,"( LNO_Vintr_Fission_Failure ");
00146
00147 fprintf(LNO_Analysis,"(%s %d) \"%s\" )\n",
00148 Cur_PU_Name, Srcpos_To_Line(srcpos), message);
00149 }
00150
00151 static void vintr_fission_tlog_info(
00152 FISSION_FUSION_STATUS status,
00153 SRCPOS srcpos,
00154 char* index_name,
00155 char* message)
00156 {
00157
00158 char in_string[30];
00159 char out_string[30];
00160 sprintf(in_string,"%d", Srcpos_To_Line(srcpos));
00161 sprintf(out_string,"%d", status);
00162 Generate_Tlog("LNO","vintr_fission",
00163 Srcpos_To_Line(srcpos),
00164 index_name,
00165 in_string, out_string, message);
00166 }
00167
00168
00169 static INT64 MTYPE_size(TYPE_ID mtype)
00170 {
00171 INT64 sz;
00172 switch (mtype) {
00173 case MTYPE_B: sz=1; break;
00174 case MTYPE_I1: sz=1; break;
00175 case MTYPE_I2: sz=2; break;
00176 case MTYPE_I4: sz=4; break;
00177 case MTYPE_I8: sz=8; break;
00178 case MTYPE_U1: sz=1; break;
00179 case MTYPE_U2: sz=2; break;
00180 case MTYPE_U4: sz=4; break;
00181 case MTYPE_U8: sz=8; break;
00182 case MTYPE_F4: sz=4; break;
00183 case MTYPE_F8: sz=8; break;
00184 #if defined(TARG_IA64)
00185 case MTYPE_F10: sz=16; break;
00186 #else
00187 case MTYPE_F10: sz=10; break;
00188 #endif
00189 case MTYPE_F16: sz=16; break;
00190 case MTYPE_STR: sz=0; break;
00191 case MTYPE_FQ: sz=16; break;
00192 case MTYPE_M: sz=0; break;
00193 case MTYPE_C4: sz=8; break;
00194 case MTYPE_C8: sz=16; break;
00195 #if defined(TARG_IA64)
00196 case MTYPE_C10: sz=32; break;
00197 #endif
00198 case MTYPE_CQ: sz=32; break;
00199 case MTYPE_V: sz=0; break;
00200 default: sz=0; break;
00201 }
00202
00203 return sz;
00204 }
00205
00206 static BOOL
00207 Is_Proper_Descendant(WN *node, WN *ancestor)
00208 {
00209 do {
00210 node = LWN_Get_Parent(node);
00211 } while (node && node != ancestor);
00212 return (node != NULL);
00213 }
00214
00215
00216
00217
00218 extern UINT vintr_fis_2(
00219 WN* loop,
00220 SCALAR_STACK* scalar_reads,
00221 SCALAR_STACK* scalar_writes,
00222 BINARY_TREE<NAME2BIT> *mapping_dictionary,
00223
00224
00225 FF_STMT_LIST& expandable_ref_list)
00226
00227 {
00228
00229 UINT bit_position=0;
00230
00231 SCALAR_STACK *scalar_ref_list[2];
00232 scalar_ref_list[0]=scalar_reads;
00233 scalar_ref_list[1]=scalar_writes;
00234
00235
00236 for (INT i=0; i<2; i++) {
00237
00238 for (INT j=0; j<scalar_ref_list[i]->Elements(); j++) {
00239
00240 WN* scalar_ref=scalar_ref_list[i]->Bottom_nth(j)->Bottom_nth(0)->Wn;
00241 NAME2BIT temp_map;
00242
00243 temp_map.Set_Symbol(scalar_ref);
00244
00245
00246
00247
00248 const BINARY_TREE_NODE<NAME2BIT> *tree_node;
00249 if (mapping_dictionary->Find(temp_map)==NULL) {
00250
00251 if (LNO_Test_Dump) {
00252 temp_map.Get_Symbol().Print(stdout);
00253 printf("\t\tat bit %d\n", bit_position);
00254 }
00255 temp_map.Set_Bit_Position(bit_position);
00256 mapping_dictionary->Enter(temp_map);
00257 }
00258
00259 if (i==1) {
00260 SE_RESULT se_result = Scalar_Expandable(scalar_ref,loop, Du_Mgr);
00261 if (!Get_Trace(TP_LNOPT2, TT_LNO_DISABLE_SEFIN)
00262 && se_result != SE_NONE || se_result == SE_EASY)
00263 expandable_ref_list.Append(scalar_ref,&VINTR_FIS_default_pool);
00264 }
00265
00266 bit_position++;
00267 }
00268 }
00269 return bit_position;
00270 }
00271
00272 static BOOL is_call_by_value(WN* intrinsic_wn) {
00273 INTRINSIC intrinsic=(INTRINSIC)WN_intrinsic(intrinsic_wn);
00274 if (INTRN_by_value(intrinsic))
00275 return TRUE;
00276 else
00277 return FALSE;
00278 }
00279
00280 static INTRINSIC get_vec_intrinsic(INTRINSIC intr_id) {
00281
00282 switch (intr_id) {
00283 case INTRN_F4ACOS: return INTRN_F4VACOS;
00284 case INTRN_F8ACOS: return INTRN_F8VACOS;
00285 case INTRN_F4ASIN: return INTRN_F4VASIN;
00286 case INTRN_F8ASIN: return INTRN_F8VASIN;
00287 case INTRN_F4ATAN: return INTRN_F4VATAN;
00288 case INTRN_F8ATAN: return INTRN_F8VATAN;
00289 case INTRN_F4COS: return INTRN_F4VCOS ;
00290 case INTRN_F8COS: return INTRN_F8VCOS ;
00291 case INTRN_F4EXP: return INTRN_F4VEXP ;
00292 case INTRN_F8EXP: return INTRN_F8VEXP ;
00293 #ifndef KEY
00294 case INTRN_F4LOG: return INTRN_F4VLOG ;
00295 case INTRN_F8LOG: return INTRN_F8VLOG ;
00296 #else
00297 case INTRN_F4LOG:
00298 if (LNO_Run_Vintr == 2) return INTRN_F4VLOG ;
00299 else return INTRINSIC_INVALID;
00300
00301 case INTRN_F8LOG:
00302 if (LNO_Run_Vintr == 2) return INTRN_F8VLOG ;
00303 else return INTRINSIC_INVALID;
00304 #endif
00305 case INTRN_F4LOG10: return INTRN_F4VLOG10;
00306 case INTRN_F8LOG10: return INTRN_F8VLOG10;
00307 case INTRN_F4SIN: return INTRN_F4VSIN ;
00308 case INTRN_F8SIN: return INTRN_F8VSIN ;
00309
00310
00311 case INTRN_F4TAN: return INTRN_F4VTAN ;
00312 case INTRN_F8TAN: return INTRN_F8VTAN ;
00313 default: return INTRINSIC_INVALID;
00314 }
00315
00316 }
00317
00318 #ifdef KEY
00319 WN* find_loop_var_in_simple_ub(WN* loop) {
00320 #else
00321 static WN* find_loop_var_in_simple_ub(WN* loop) {
00322 #endif
00323 WN* start=WN_start(loop);
00324 WN* end=WN_end(loop);
00325 WN* step;
00326 WN* add=WN_kid0(WN_step(loop));
00327
00328 if (WN_operator(WN_kid0(add))==OPR_INTCONST)
00329 step=WN_kid0(add);
00330 else if (WN_operator(WN_kid1(add))==OPR_INTCONST)
00331 step=WN_kid1(add);
00332 else
00333 return NULL;
00334
00335 if (WN_const_val(step)!=1)
00336 return NULL;
00337
00338 ACCESS_ARRAY* aa=Get_Do_Loop_Info(loop)->UB;
00339
00340 if (aa->Too_Messy)
00341 return NULL;
00342
00343 WN* loop_index=NULL;
00344 USE_LIST* use_list=Du_Mgr->Du_Get_Use(start);
00345 if (use_list->Incomplete())
00346 return NULL;
00347 USE_LIST_ITER uiter(use_list);
00348 for (DU_NODE* u=uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
00349 WN* use=u->Wn();
00350 if (use==loop_index)
00351 continue;
00352 WN* tmp=use;
00353 while (tmp && tmp!=end)
00354 tmp=LWN_Get_Parent(tmp);
00355 if (tmp==end)
00356 if (loop_index)
00357 return NULL;
00358 else
00359 loop_index=use;
00360 }
00361
00362 #ifdef KEY
00363 FmtAssert(loop_index,
00364 ("The index variable does not appear in the loop end (try compiling without -WOPT:copy=off)!"));
00365 #endif
00366 WN* tmp=LWN_Get_Parent(loop_index);
00367 OPERATOR opr=WN_operator(tmp);
00368 if (opr==OPR_MPY) {
00369 WN* coeff_wn=WN_kid0(tmp);
00370 if (coeff_wn==loop_index)
00371 coeff_wn=WN_kid1(tmp);
00372 if (WN_operator(coeff_wn)!=OPR_INTCONST)
00373 return NULL;
00374 tmp=LWN_Get_Parent(tmp);
00375 } else if (opr==OPR_ADD) {
00376 tmp=LWN_Get_Parent(tmp);
00377 } else if (opr==OPR_GE || opr==OPR_LE ||
00378 opr==OPR_GT || opr==OPR_LT) {
00379 } else
00380 return NULL;
00381
00382 while (tmp!=end) {
00383 if (WN_operator(tmp)!=OPR_ADD)
00384 return NULL;
00385 tmp=LWN_Get_Parent(tmp);
00386 }
00387 return loop_index;
00388 }
00389
00390 typedef enum {
00391 Invariant=0,
00392 Reference=1,
00393 Simple=2,
00394 Complex=3
00395 } INTRINSIC_OPERAND_KIND;
00396
00397 static INTRINSIC_OPERAND_KIND intrinsic_operand_kind(WN* wn, WN* loop) {
00398
00399 OPERATOR opr=WN_operator(wn);
00400 if (opr==OPR_PARM) {
00401 if (WN_Parm_By_Reference(wn))
00402 return Reference;
00403 wn=WN_kid0(wn);
00404 opr=WN_operator(wn);
00405 }
00406
00407 if (opr==OPR_CONST || opr==OPR_INTCONST) {
00408 return Invariant;
00409 } else if (opr==OPR_LDA) {
00410 return Reference;
00411 } else if (opr==OPR_LDID) {
00412 SYMBOL symbol1(wn);
00413 SYMBOL symbol2(WN_index(loop));
00414 if (symbol1==symbol2)
00415 return Complex;
00416 DEF_LIST* def_list=Du_Mgr->Ud_Get_Def(wn);
00417 WN* loop_stmt=def_list->Loop_stmt();
00418 WN* body=WN_do_body(loop);
00419 DEF_LIST_ITER d_iter(def_list);
00420 for (DU_NODE* dnode=d_iter.First(); !d_iter.Is_Empty();
00421 dnode=d_iter.Next()) {
00422 WN* def=dnode->Wn();
00423 WN* stmt=Find_Stmt_Under(def,body);
00424 if (stmt!=NULL)
00425 return Complex;
00426 }
00427 return Invariant;
00428 } else if (opr==OPR_ILOAD) {
00429 if (WN_kid_count(wn) != 1 || WN_offset(wn) != 0 ||
00430 WN_operator(WN_kid0(wn)) != OPR_ARRAY)
00431 return Complex;
00432
00433 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,WN_kid0(wn));
00434
00435 if (aa->Too_Messy)
00436 return Complex;
00437
00438 ACCESS_VECTOR* av;
00439 INT loopno=Do_Loop_Depth(loop);
00440
00441 BOOL seen_non_zero=FALSE;
00442 for (INT i=0; i<aa->Num_Vec(); i++) {
00443 av=aa->Dim(i);
00444 if (av->Too_Messy || av->Non_Lin_Symb)
00445 return Complex;
00446 if ((av->Non_Const_Loops() > loopno))
00447 return Complex;
00448 if (av->Loop_Coeff(loopno)!=0)
00449 if (seen_non_zero)
00450 return Complex;
00451 else
00452 seen_non_zero=TRUE;
00453 }
00454 if (!seen_non_zero)
00455 return Invariant;
00456 return Simple;
00457 }
00458
00459 return Complex;
00460
00461 }
00462
00463 static BOOL is_vectorizable_intrinsic_op_stmt(WN* stmt, WN* loop) {
00464
00465 OPERATOR opr=WN_operator(stmt);
00466 if (opr==OPR_STID || opr==OPR_ISTORE) {
00467 WN* rhs=WN_kid0(stmt);
00468 opr=WN_operator(rhs);
00469 if (opr==OPR_INTRINSIC_OP) {
00470 INTRINSIC intrinsic_id=(INTRINSIC)WN_intrinsic(rhs);
00471 if (get_vec_intrinsic(intrinsic_id)==INTRINSIC_INVALID)
00472 return FALSE;
00473 WN* operand=WN_kid0(rhs);
00474 INTRINSIC_OPERAND_KIND kind= intrinsic_operand_kind(operand, loop);
00475 if (kind!=Invariant && kind!=Reference)
00476
00477 return TRUE;
00478 }
00479 }
00480 return FALSE;
00481 }
00482
00483 static void copy_propagation(WN* def, WN* use) {
00484
00485 FmtAssert(WN_operator(def)==OPR_STID,
00486 ("Expecting an STID"));
00487 FmtAssert(WN_operator(use)==OPR_LDID,
00488 ("Expecting an LDID"));
00489 LWN_Copy_Frequency_Tree(def,use);
00490 WN* parent=LWN_Get_Parent(use);
00491 if (WN_kid0(parent)==use) {
00492 WN_kid0(parent)=WN_kid0(def);
00493 LWN_Set_Parent(WN_kid0(parent),parent);
00494 } else {
00495 WN_kid1(parent)=WN_kid0(def);
00496 LWN_Set_Parent(WN_kid1(parent),parent);
00497 }
00498 WN_kid0(def)=use;
00499 Du_Mgr->Delete_Def_Use(def,use);
00500 LWN_Delete_From_Block(LWN_Get_Parent(def),def);
00501 }
00502
00503 static UINT_DYN_ARRAY* vintr_fis_merge_scc_to_form_new_loop(
00504 UINT total_scc,
00505 FF_STMT_LIST* scc,
00506 UINT* scc_size,
00507 WN* loop,
00508 SCC_DIRECTED_GRAPH16 *scc_dep_g
00509 )
00510 {
00511
00512
00513 UINT_DYN_ARRAY *seed_scc=CXX_NEW(UINT_DYN_ARRAY(&VINTR_FIS_default_pool),
00514 &VINTR_FIS_default_pool);
00515
00516
00517
00518
00519 INT* scc_queue[2];
00520 UINT head0, head1, tail0, tail1;
00521
00522 INT scc_remained=total_scc;
00523 UINT intrinsic=0;
00524 UINT non_intrinsic=1;
00525
00526 UINT i;
00527 for (i=0; i<2; i++) {
00528 scc_queue[i]= CXX_NEW_ARRAY(INT,total_scc+1,&VINTR_FIS_default_pool);
00529 }
00530 head0=tail0=0;
00531 head1=tail1=0;
00532
00533
00534 for (i=1; i<=total_scc; i++) {
00535
00536 if (scc_size[i]>0 && scc_dep_g->Get_In_Edge(i)==0) {
00537
00538
00539 if (scc_size[i]==1) {
00540 WN* stmt=scc[i].Head()->Get_Stmt();
00541 if (is_vectorizable_intrinsic_op_stmt(stmt,loop))
00542 scc_queue[intrinsic][head0++]=i;
00543 else
00544 scc_queue[non_intrinsic][head1++]=i;
00545 } else
00546 scc_queue[non_intrinsic][head1++]=i;
00547 } else if (scc_size[i]==0)
00548 scc_remained--;
00549 }
00550
00551 INT kind=intrinsic;
00552 INT last_loop_kind=intrinsic;
00553 WN* body=WN_do_body(loop);
00554 while (1) {
00555 UINT current_scc;
00556 if (kind==intrinsic && head0!=tail0) {
00557 current_scc=scc_queue[intrinsic][tail0++];
00558 UINT loop_id=seed_scc->Newidx();
00559 (*seed_scc)[loop_id]=current_scc;
00560 last_loop_kind=intrinsic;
00561 scc_remained--;
00562 } else if (kind==non_intrinsic && head1!=tail1) {
00563 current_scc=scc_queue[non_intrinsic][tail1++];
00564
00565 if (last_loop_kind!=non_intrinsic) {
00566 UINT loop_id=seed_scc->Newidx();
00567 (*seed_scc)[loop_id]=current_scc;
00568 } else {
00569 scc[(*seed_scc)[seed_scc->Lastidx()]].Append_List(&scc[current_scc]);
00570 }
00571 last_loop_kind=non_intrinsic;
00572 scc_remained--;
00573 } else {
00574 if (head0!=tail0)
00575 kind=intrinsic;
00576 else if (head1!=tail1)
00577 kind=non_intrinsic;
00578 else
00579 break;
00580 continue;
00581 }
00582
00583
00584 EINDEX16 e=scc_dep_g->Get_Out_Edge(current_scc);
00585 while (e) {
00586
00587 VINDEX16 v=scc_dep_g->Get_Sink(e);
00588 scc_dep_g->Delete_Edge(e);
00589 if (scc_dep_g->Get_In_Edge(v)==0) {
00590 if (scc_size[v]==1) {
00591 WN* stmt=scc[v].Head()->Get_Stmt();
00592 if (is_vectorizable_intrinsic_op_stmt(stmt,loop))
00593 scc_queue[intrinsic][head0++]=v;
00594 else
00595 scc_queue[non_intrinsic][head1++]=v;
00596 } else
00597 scc_queue[non_intrinsic][head1++]=v;
00598 }
00599 e=scc_dep_g->Get_Next_Out_Edge(e);
00600 }
00601 }
00602 FmtAssert(scc_remained==0,("Merging not finished in vfission"));
00603 return seed_scc;
00604
00605 }
00606
00607 static void vintr_fis_separate_loop_and_scalar_expand(
00608 UINT_DYN_ARRAY* new_loops,
00609 FF_STMT_LIST* scc,
00610 WN* loop,
00611 FF_STMT_LIST& expandable_ref_list)
00612 {
00613 WN* body=WN_do_body(loop);
00614 UINT total_loops=new_loops->Lastidx()+1;
00615 UINT *loop_size=CXX_NEW_ARRAY(UINT,total_loops,&VINTR_FIS_default_pool);
00616
00617 WN2INT *stmt_to_loop=
00618 CXX_NEW(WN2INT(ESTIMATED_SIZE, &VINTR_FIS_default_pool),&VINTR_FIS_default_pool);
00619
00620 BOOL fission_ok = (total_loops>1);
00621 UINT i;
00622 for (i=0; i<total_loops; i++) {
00623
00624 UINT seed_scc=(*new_loops)[i];
00625 UINT total_stmt=0;
00626 FF_STMT_ITER s_iter(&scc[seed_scc]);
00627 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
00628 stmt_node=s_iter.Next()) {
00629 WN* stmt=stmt_node->Get_Stmt();
00630 stmt_to_loop->Enter(stmt,i);
00631 LWN_Insert_Block_Before(body,NULL,LWN_Extract_From_Block(stmt));
00632 total_stmt++;
00633 }
00634 loop_size[i]=total_stmt;
00635
00636 }
00637
00638 if (total_loops>1) {
00639 BOOL has_calls_or_gotos_or_inner_loops = FALSE;
00640 DO_LOOP_INFO* loop_info=Get_Do_Loop_Info(loop, FALSE);
00641 if (loop_info->Has_Calls || loop_info->Has_Gotos || !loop_info->Is_Inner) {
00642 has_calls_or_gotos_or_inner_loops = TRUE;
00643 }
00644
00645 BOOL need_expansion = FALSE;
00646 BOOL need_finalization = FALSE;
00647 STACK<WN*> se_stack(&VINTR_FIS_default_pool);
00648 STACK<BOOL> finalize_stack(&VINTR_FIS_default_pool);
00649 FF_STMT_ITER r_iter(&expandable_ref_list);
00650 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
00651 ref_node=r_iter.Next()) {
00652 WN* ref=ref_node->Get_Stmt();
00653 WN* stmt0=Find_Stmt_Under(ref,body);
00654 WN* wn_eq_loop = NULL;
00655 STACK<WN*>* equivalence_class=
00656 Scalar_Equivalence_Class(ref, Du_Mgr, &VINTR_FIS_default_pool,
00657 TRUE, &wn_eq_loop);
00658 BOOL expand = FALSE;
00659 BOOL finalize = FALSE;
00660 while (!equivalence_class->Is_Empty() && !expand) {
00661 WN* ref1=equivalence_class->Pop();
00662 WN* stmt1=Find_Stmt_Under(ref1,body);
00663 if (stmt_to_loop->Find(stmt0)!=stmt_to_loop->Find(stmt1)) {
00664 expand = TRUE;
00665 need_expansion = TRUE;
00666 if (wn_eq_loop != NULL) {
00667 finalize = TRUE;
00668 need_finalization = TRUE;
00669 }
00670 }
00671 }
00672
00673
00674 if (expand) {
00675 se_stack.Push(ref);
00676 finalize_stack.Push(finalize);
00677 }
00678 }
00679 WN* guard_tests[1];
00680 guard_tests[0] = NULL;
00681 if (need_finalization)
00682 SE_Guard_Tests(loop, 1, guard_tests, Do_Loop_Depth(loop));
00683 WN* tile_loop = NULL;
00684 if (need_expansion && !Get_Trace(TP_LNOPT, TT_LNO_BIG_SCALAR_TILES)) {
00685 tile_loop = SE_Tile_Inner_Loop(loop, &LNO_default_pool);
00686 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00687 if (tile_loop != NULL) {
00688 INT old_id = WN_MAP32_Get(Prompf_Id_Map, loop);
00689 INT new_id = New_Construct_Id();
00690 WN_MAP32_Set(Prompf_Id_Map, tile_loop, new_id);
00691 Prompf_Info->Se_Tile(old_id, new_id);
00692 }
00693 }
00694 }
00695 for (i=0; i<se_stack.Elements(); i++) {
00696 WN* wn_ref = se_stack.Top_nth(i);
00697 SYMBOL sym(wn_ref);
00698 if (tile_loop != NULL) {
00699 BOOL finalize = finalize_stack.Top_nth(i);
00700 WN* loops[1];
00701 loops[0] = loop;
00702 WN* tile_loops[1];
00703 tile_loops[0] = tile_loop;
00704 INT strip_sizes[1];
00705 strip_sizes[0] = (INT) Step_Size(tile_loop, 0);
00706 INT order[1];
00707 order[0] = 0;
00708 Scalar_Expand(tile_loop, loop, NULL, sym, loops, order, 1, TRUE,
00709 finalize, FALSE, guard_tests, NULL, tile_loops, strip_sizes, 1);
00710 } else {
00711 INT dummy[1]={0};
00712 BOOL finalize = finalize_stack.Top_nth(i);
00713 Scalar_Expand(loop, loop, NULL, sym, &loop, dummy, 1, FALSE,
00714 finalize, FALSE, guard_tests);
00715 }
00716 }
00717
00718 WN* tmp_loop1=loop;
00719 WN** wn_starts=CXX_NEW_ARRAY(WN*, total_loops, &VINTR_FIS_default_pool);
00720 WN** wn_ends=CXX_NEW_ARRAY(WN*, total_loops, &VINTR_FIS_default_pool);
00721 WN** wn_steps=CXX_NEW_ARRAY(WN*, total_loops, &VINTR_FIS_default_pool);
00722 WN** new_loops=CXX_NEW_ARRAY(WN*, total_loops, &VINTR_FIS_default_pool);
00723
00724 wn_starts[0]=WN_kid0(WN_start(tmp_loop1));
00725 wn_ends[0]=WN_end(tmp_loop1);
00726 wn_steps[0]=WN_kid0(WN_step(tmp_loop1));
00727 new_loops[0]=loop;
00728 WN* stmt=WN_first(body);
00729
00730 for (i=0; i<total_loops-1; i++) {
00731
00732 INT size=loop_size[i];
00733
00734 for (INT j=0; j<size; j++)
00735 stmt=WN_next(stmt);
00736
00737 WN* tmp_loop2;
00738
00739 Separate(tmp_loop1, WN_prev(stmt), 1, &tmp_loop2);
00740 LWN_Parentize(tmp_loop2);
00741 DO_LOOP_INFO* new_loop_info =
00742 CXX_NEW(DO_LOOP_INFO(loop_info,&LNO_default_pool), &LNO_default_pool);
00743 Set_Do_Loop_Info(tmp_loop2,new_loop_info);
00744 if (has_calls_or_gotos_or_inner_loops) {
00745
00746
00747 }
00748 wn_starts[i+1]=WN_kid0(WN_start(tmp_loop2));
00749 wn_ends[i+1]=WN_end(tmp_loop2);
00750 wn_steps[i+1]=WN_kid0(WN_step(tmp_loop2));
00751 new_loops[i+1]=tmp_loop2;
00752
00753 tmp_loop1=tmp_loop2;
00754 }
00755
00756 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00757 INT old_id = WN_MAP32_Get(Prompf_Id_Map, new_loops[0]);
00758 PROMPF_LINES* old_lines = CXX_NEW(PROMPF_LINES(new_loops[0],
00759 &PROMPF_pool), &PROMPF_pool);
00760 INT* new_ids = CXX_NEW_ARRAY(INT, total_loops - 1, &PROMPF_pool);
00761 PROMPF_LINES** new_lines = CXX_NEW_ARRAY(PROMPF_LINES*, total_loops - 1,
00762 &PROMPF_pool);
00763 INT i;
00764 for (i = 0; i < total_loops - 1; i++) {
00765 new_ids[i] = New_Construct_Id();
00766 WN_MAP32_Set(Prompf_Id_Map, new_loops[i + 1], new_ids[i]);
00767 }
00768 for (i = 0; i < total_loops - 1; i++)
00769 new_lines[i] = CXX_NEW(PROMPF_LINES(new_loops[i + 1], &PROMPF_pool),
00770 &PROMPF_pool);
00771 Prompf_Info->Vintr_Fission(old_id, old_lines, new_ids, new_lines,
00772 total_loops - 1);
00773 }
00774
00775 Fission_DU_Update(Du_Mgr,red_manager,wn_starts,wn_ends,wn_steps,total_loops,new_loops);
00776 for (i=0; i<total_loops-1; i++)
00777 scalar_rename(LWN_Get_Parent(wn_starts[i]));
00778
00779 adg->Fission_Dep_Update(new_loops[0],total_loops);
00780 }
00781
00782 }
00783
00784 void Gather_Intrinsic_Ops(
00785 WN* wn, SCALAR_REF_STACK* intrinsic_ops, MEM_POOL *pool)
00786 {
00787 if (WN_opcode(wn) == OPC_BLOCK) {
00788 WN* kid = WN_first (wn);
00789 while (kid) {
00790 Gather_Intrinsic_Ops(kid,intrinsic_ops,pool);
00791 kid = WN_next(kid);
00792 }
00793 return;
00794 }
00795 if (WN_operator(wn)==OPR_INTRINSIC_OP) {
00796 SCALAR_REF scalar_ref(wn,0);
00797 intrinsic_ops->Push(scalar_ref);
00798 }
00799 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
00800 WN* kid = WN_kid(wn,kidno);
00801 Gather_Intrinsic_Ops(kid,intrinsic_ops,pool);
00802 }
00803 }
00804
00805 #ifdef KEY
00806 WN * Loop_being_replaced=NULL;
00807
00808 typedef enum {
00809 SIN = 0,
00810 COS = 1,
00811 OTHER = 2
00812 } INTRN_TYPE;
00813
00814 INTRN_TYPE Intrinsic_Type (INTRINSIC intr_id) {
00815 switch(intr_id) {
00816 case INTRN_F4COS:
00817 case INTRN_F8COS:
00818 return COS;
00819 case INTRN_F4SIN:
00820 case INTRN_F8SIN:
00821 return SIN;
00822 default:
00823 return OTHER;
00824 }
00825 }
00826
00827
00828 static BOOL UB_Defined_In_Loop(WN * wn, WN* loop,
00829 SYMBOL loop_var,
00830 DU_MANAGER* du)
00831 {
00832 if (WN_operator(wn) == OPR_LDID && loop_var != SYMBOL(wn)){
00833 DEF_LIST *def_list = du->Ud_Get_Def(wn);
00834 DEF_LIST_ITER iter(def_list);
00835 const DU_NODE* node = NULL;
00836 for (node = iter.First(); !iter.Is_Empty(); node = iter.Next()) {
00837 WN* def = node->Wn();
00838 if (Wn_Is_Inside(def, loop))
00839 return TRUE;
00840 }
00841 }
00842 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
00843 if (UB_Defined_In_Loop(WN_kid(wn, kid), loop,loop_var, du))
00844 return TRUE;
00845
00846 return FALSE;
00847 }
00848 #endif
00849
00850 static char fail_msg[128];
00851
00852
00853
00854 static INT Vintrinsic_Fission(WN* innerloop)
00855 {
00856 if(!Do_Loop_Is_Good(innerloop)){
00857 sprintf(fail_msg, "Do loop has unmapped loads/stores/calls");
00858 return 0;
00859 }
00860 if (Do_Loop_Has_Calls(innerloop)){
00861 sprintf(fail_msg, "Do loop has calls");
00862 return 0;
00863 }
00864
00865 if(Do_Loop_Has_Gotos(innerloop)){
00866 sprintf(fail_msg, "Do loop has gotos");
00867 return 0;
00868 }
00869
00870 if(Do_Loop_Is_Mp(innerloop)){
00871 sprintf(fail_msg, "Do loop is a MP loop");
00872 return 0;
00873 }
00874
00875 DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
00876 if (dli->Has_Gotos || dli->Has_Calls) {
00877 sprintf(fail_msg, "Do loop has calls or gotos");
00878 return 0;
00879 }
00880
00881 #ifdef TARG_X8664
00882
00883 if (dli->Is_Generally_Unimportant()) {
00884 sprintf(fail_msg, "It is a reminder loop from SIMD vectorizer");
00885 return 0;
00886 }
00887 #endif
00888
00889 if (dli->Est_Num_Iterations < Iteration_Count_Threshold) {
00890 sprintf(fail_msg, "Too few iterations");
00891 return 0;
00892 }
00893
00894
00895
00896 if (Index_Variable_Live_At_Exit(innerloop)) {
00897
00898 if (Upper_Bound_Standardize(WN_end(innerloop),TRUE)==FALSE) {
00899 sprintf(fail_msg, "Upper bound could not be standarized");
00900 return 0;
00901 }
00902 Finalize_Index_Variable(innerloop,FALSE);
00903 scalar_rename(WN_start(innerloop));
00904 }
00905
00906
00907 if (find_loop_var_in_simple_ub(innerloop)==NULL){
00908 sprintf(fail_msg, "Loop upper bound is too complicated");
00909 return 0;
00910 }
00911
00912 #ifdef KEY
00913
00914 if(UB_Defined_In_Loop(WN_end(innerloop), innerloop,
00915 SYMBOL(WN_index(innerloop)),
00916 Du_Mgr)){
00917 sprintf(fail_msg, "Loop upper bound has a definition in the loop body");
00918 return 0;
00919 }
00920 #endif
00921
00922 MEM_POOL_Push(&VINTR_FIS_default_pool);
00923 {
00924
00925 char loop_index_name[80];
00926 if (strlen(ST_name(WN_st(WN_index(innerloop))))>=80) {
00927 strcpy(loop_index_name,"name_too_long");
00928 } else
00929 strcpy(loop_index_name,ST_name(WN_st(WN_index(innerloop))));
00930
00931
00932 WN* parent_block=LWN_Get_Parent(innerloop);
00933
00934 TYPE_ID index_type=WN_rtype(WN_end(innerloop));
00935 char source_line[80];
00936 if (strlen(Cur_PU_Name)>=65) {
00937
00938 sprintf(source_line,"%s:%d", "name_too_long",
00939 Srcpos_To_Line(WN_Get_Linenum(innerloop)));
00940 } else
00941 sprintf(source_line,"%s:%d", Cur_PU_Name,
00942 Srcpos_To_Line(WN_Get_Linenum(innerloop)));
00943
00944
00945 SCC_DIRECTED_GRAPH16 *dep_g_p =
00946 CXX_NEW(SCC_DIRECTED_GRAPH16(ESTIMATED_SIZE,ESTIMATED_SIZE),
00947 &VINTR_FIS_default_pool);
00948
00949
00950
00951 WN2VINDEX *stmt_to_vertex=
00952 CXX_NEW(WN2VINDEX(ESTIMATED_SIZE, &VINTR_FIS_default_pool),
00953 &VINTR_FIS_default_pool);
00954
00955 SCALAR_REF_STACK *intrinsic_ops =
00956 CXX_NEW(SCALAR_REF_STACK(&VINTR_FIS_default_pool),
00957 &VINTR_FIS_default_pool);
00958
00959 WN* stmt;
00960 UINT stmt_count=0;
00961 WN* body=WN_do_body(innerloop);
00962
00963
00964
00965
00966
00967
00968
00969 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)) {
00970 Gather_Intrinsic_Ops(stmt,intrinsic_ops,&VINTR_FIS_default_pool) ;
00971 }
00972
00973 if (intrinsic_ops->Elements()==0) {
00974 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
00975 MEM_POOL_Pop(&VINTR_FIS_default_pool);
00976 sprintf(fail_msg, "No intrinsic ops in the loop");
00977 return 0;
00978 }
00979
00980 #ifdef KEY
00981 if (CIS_Allowed) {
00982
00983
00984
00985
00986 for (INT i=0; i<intrinsic_ops->Elements(); i++) {
00987 WN* intrini = intrinsic_ops->Top_nth(i).Wn;
00988 INTRN_TYPE typei = Intrinsic_Type(WN_intrinsic(intrini));
00989 if (typei != SIN && typei != COS)
00990 continue;
00991 WN* arg_intrini = WN_kid0(intrini);
00992 for (INT j=i+1; j<intrinsic_ops->Elements(); j++) {
00993 WN* intrinj = intrinsic_ops->Top_nth(j).Wn;
00994 INTRN_TYPE typej = Intrinsic_Type(WN_intrinsic(intrinj));
00995 if (typej != SIN && typej != COS)
00996 continue;
00997 if (typei == typej)
00998 continue;
00999 WN* arg_intrinj = WN_kid0(intrinj);
01000 if (WN_Simp_Compare_Trees(arg_intrini, arg_intrinj) == 0) {
01001 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01002 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01003 sprintf(fail_msg, "Vectorization may inhibit converting to SINCOS");
01004 return 0;
01005 }
01006 }
01007 }
01008 }
01009 #endif
01010 STACK_OF_WN *vec_intrinsic_ops=
01011 CXX_NEW(STACK_OF_WN(&VINTR_FIS_default_pool),&VINTR_FIS_default_pool);
01012
01013 INT i;
01014 for (i=0; i<intrinsic_ops->Elements(); i++) {
01015
01016 WN* intrinsic_op=intrinsic_ops->Top_nth(i).Wn;
01017
01018 WN* stmt=intrinsic_op;
01019 WN* stmt1;
01020 BOOL under_scf=FALSE;
01021 while ((stmt1=LWN_Get_Parent(stmt))!=body) {
01022 stmt=stmt1;
01023 if (WN_opcode(stmt)==OPC_BLOCK) {
01024 under_scf=TRUE;
01025 break;
01026 }
01027 }
01028 if (under_scf)
01029 continue;
01030 if (get_vec_intrinsic((INTRINSIC)WN_intrinsic(intrinsic_op))==
01031 INTRINSIC_INVALID)
01032 continue;
01033
01034 UINT kid_no;
01035 BOOL splitted=FALSE;
01036 for (kid_no=0; kid_no<WN_kid_count(intrinsic_op); kid_no++) {
01037 WN* tmp=WN_kid0(WN_kid(intrinsic_op,kid_no));
01038 INTRINSIC_OPERAND_KIND kind=intrinsic_operand_kind(tmp,innerloop);
01039 if (kind==Simple || kind==Complex) {
01040 tmp = Split_Using_Preg(stmt,tmp,adg,FALSE);
01041 FmtAssert(WN_operator(tmp)==OPR_STID,
01042 ("Expecting STID after splitting"));
01043 USE_LIST* use_list=Du_Mgr->Du_Get_Use(tmp);
01044 DU_NODE* node=use_list->Head();
01045 FmtAssert(use_list->Tail()==node, ("Too many uses after splitting"));
01046 splitted=TRUE;
01047 }
01048 }
01049 if (!splitted)
01050 continue;
01051
01052 vec_intrinsic_ops->Push(intrinsic_op);
01053 WN_OFFSET offset=WN_offset(WN_prev(stmt));
01054
01055 WN *intrinsic_root = Split_Using_Preg(stmt,intrinsic_op,adg,FALSE);
01056 FmtAssert(WN_operator(intrinsic_root)==OPR_STID,
01057 ("Expecting STID after splitting"));
01058 USE_LIST* use_list=Du_Mgr->Du_Get_Use(intrinsic_root);
01059 DU_NODE* node=use_list->Head();
01060 FmtAssert(use_list->Tail()==node, ("Too many uses after splitting"));
01061 WN* use=node->Wn();
01062 WN_offset(intrinsic_root)=WN_offset(use)=offset;
01063
01064
01065 if (WN_operator(stmt)==OPR_ISTORE &&
01066
01067 WN_desc(stmt)==WN_desc(use) &&
01068 Variant_Array(stmt,use,adg) &&
01069 Is_Proper_Descendant(use, WN_kid0(stmt))) {
01070 if (!Split_Array(stmt,use,adg)) {
01071 #ifndef TARG_X8664
01072 FmtAssert(0, ("Failed to split using array"));
01073 #else
01074
01075
01076
01077 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01078 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01079 sprintf(fail_msg, "Failed to split using array");
01080 return 0;
01081 #endif
01082 }
01083 }
01084
01085 }
01086
01087 if (vec_intrinsic_ops->Elements()==0) {
01088
01089 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01090 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01091 sprintf(fail_msg, "No vectorizable intrinsic operations");
01092 return 0;
01093 }
01094
01095 REF_LIST_STACK* writes = CXX_NEW(REF_LIST_STACK(&VINTR_FIS_default_pool),
01096 &VINTR_FIS_default_pool);
01097 REF_LIST_STACK* reads = CXX_NEW(REF_LIST_STACK(&VINTR_FIS_default_pool),
01098 &VINTR_FIS_default_pool);
01099
01100 SCALAR_STACK* scalar_writes = CXX_NEW(SCALAR_STACK(&VINTR_FIS_default_pool),
01101 &VINTR_FIS_default_pool);
01102 SCALAR_STACK* scalar_reads = CXX_NEW(SCALAR_STACK(&VINTR_FIS_default_pool),
01103 &VINTR_FIS_default_pool);
01104 SCALAR_REF_STACK* params = CXX_NEW(SCALAR_REF_STACK(&VINTR_FIS_default_pool),
01105 &VINTR_FIS_default_pool);
01106
01107
01108 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&VINTR_FIS_default_pool),
01109 &VINTR_FIS_default_pool);
01110 Build_Doloop_Stack(innerloop, stack1);
01111
01112
01113 Init_Ref_Stmt_Counter();
01114 INT32 gather_status = 0;
01115 for (stmt=WN_first(body); stmt && gather_status!= -1; stmt=WN_next(stmt)) {
01116 gather_status=New_Gather_References(stmt,writes,reads,stack1,
01117 scalar_writes,scalar_reads,
01118 params,&VINTR_FIS_default_pool) ;
01119 }
01120 if (gather_status == -1) {
01121 DevWarn("Error in gathering references");
01122 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01123 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01124 sprintf(fail_msg, "Error in gathering references");
01125 return 0;
01126 }
01127
01128 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)) {
01129 VINDEX16 v=dep_g_p->Add_Vertex();
01130 if (v==0) {
01131 DevWarn("Statement dependence graph problem");
01132 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01133 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01134 sprintf(fail_msg, "Statement dependence graph problem");
01135 return 0;
01136 }
01137 stmt_to_vertex->Enter(stmt, v);
01138 }
01139
01140 BINARY_TREE<NAME2BIT> *mapping_dictionary =
01141 CXX_NEW(BINARY_TREE<NAME2BIT>(&VINTR_FIS_default_pool),
01142 &VINTR_FIS_default_pool);
01143
01144
01145 FF_STMT_LIST expandable_ref_list;
01146
01147
01148
01149
01150
01151 UINT sym_count=vintr_fis_2(innerloop, scalar_reads, scalar_writes,
01152 mapping_dictionary, expandable_ref_list);
01153
01154
01155 BIT_VECTOR Expandable_Scalar_Set(sym_count, &VINTR_FIS_default_pool);
01156
01157
01158
01159 FF_STMT_ITER e_iter(&expandable_ref_list);
01160 for (FF_STMT_NODE* ref_node=e_iter.First(); !e_iter.Is_Empty();
01161 ref_node=e_iter.Next()) {
01162 NAME2BIT temp_map;
01163 temp_map.Set_Symbol(ref_node->Get_Stmt());
01164 Expandable_Scalar_Set.Set(mapping_dictionary->Find(temp_map)->
01165 Get_Data()->Get_Bit_Position());
01166 }
01167
01168 if (LNO_Test_Dump) {
01169 printf("Expandable_Scalar_Set=\n");
01170 Expandable_Scalar_Set.Print(stdout);
01171 }
01172
01173 WN_MAP sdm=WN_MAP_Create(&VINTR_FIS_default_pool);
01174 ARRAY_DIRECTED_GRAPH16 *sdg =
01175 CXX_NEW(ARRAY_DIRECTED_GRAPH16(100,500,sdm,LEVEL_ARRAY_GRAPH),
01176 &VINTR_FIS_default_pool);
01177
01178 for (stmt = WN_first(body); stmt; stmt = WN_next(stmt)) {
01179 if (!Map_Stmt_To_Level_Graph(stmt,sdg)) {
01180 FmtAssert(0, ("Error in mapping stmt to level graph\n"));
01181 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01182 CXX_DELETE(sdg, &VINTR_FIS_default_pool);
01183 WN_MAP_Delete(sdm);
01184 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01185 sprintf(fail_msg, "Error in mapping stmt to level graph");
01186 return(0);
01187 }
01188 }
01189
01190 BOOL status=Generate_Scalar_Dependence_For_Statement_Dependence_Graph(
01191 innerloop, scalar_reads, scalar_writes, params, sdg, red_manager,
01192 &Expandable_Scalar_Set, mapping_dictionary);
01193 if (status==FALSE) {
01194 DevWarn("Statement dependence graph problem");
01195 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01196 CXX_DELETE(sdg, &VINTR_FIS_default_pool);
01197 WN_MAP_Delete(sdm);
01198 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01199 sprintf(fail_msg, "Statement dependence graph problem");
01200 return(0);
01201 }
01202
01203 status=Generate_Array_Dependence_For_Statement_Dependence_Graph(
01204 innerloop, reads, writes, sdg, red_manager, adg);
01205 if (status==FALSE) {
01206 DevWarn("Statement dependence graph problem");
01207 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01208 CXX_DELETE(sdg, &VINTR_FIS_default_pool);
01209 WN_MAP_Delete(sdm);
01210 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01211 sprintf(fail_msg, "Statement dependence graph problem");
01212 return(0);
01213 }
01214
01215
01216
01217
01218 EINDEX16 e=sdg->Get_Edge();
01219 while (e) {
01220 WN* source=sdg->Get_Wn(sdg->Get_Source(e));
01221 WN* sink=sdg->Get_Wn(sdg->Get_Sink(e));
01222 if (LWN_Get_Parent(source) == body || LWN_Get_Parent(sink) == body)
01223
01224 dep_g_p->Add_Unique_Edge(
01225 stmt_to_vertex->Find(source),
01226 stmt_to_vertex->Find(sink));
01227 e=sdg->Get_Next_Edge(e);
01228
01229 }
01230
01231
01232
01233 SCC_DIRECTED_GRAPH16 *ac_g;
01234 ac_g = dep_g_p->Acyclic_Condensation(&VINTR_FIS_default_pool);
01235
01236 VINDEX16 total_scc = dep_g_p->Get_Scc_Count();
01237
01238
01239 FF_STMT_LIST *scc;
01240 scc = CXX_NEW_ARRAY(FF_STMT_LIST, total_scc+1, &VINTR_FIS_default_pool);
01241
01242 UINT *scc_size=CXX_NEW_ARRAY(UINT, total_scc+1, &VINTR_FIS_default_pool);
01243
01244 for (i=1; i<=total_scc; i++) {
01245 scc_size[i]=0;
01246 }
01247
01248
01249 for (stmt = WN_first(WN_do_body(innerloop)); stmt; stmt = WN_next(stmt)) {
01250 VINDEX16 scc_id;
01251 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
01252 scc_size[scc_id]++;
01253 }
01254
01255 for (i=0; i<vec_intrinsic_ops->Elements(); i++) {
01256 WN* intrinsic_op=vec_intrinsic_ops->Top_nth(i);
01257 stmt=Find_Stmt_Under(intrinsic_op,body);
01258 VINDEX16 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
01259 if (scc_size[scc_id]!=1) {
01260
01261
01262 continue;
01263 } else {
01264 FmtAssert(WN_operator(stmt)==OPR_STID,
01265 ("Expecting an STID"));
01266 UINT kid_no;
01267 for (kid_no=0; kid_no<WN_kid_count(intrinsic_op); kid_no++) {
01268 WN* tmp=WN_kid0(WN_kid(intrinsic_op,kid_no));
01269 if (WN_operator(tmp)==OPR_LDID) {
01270 DEF_LIST* def_list=Du_Mgr->Ud_Get_Def(tmp);
01271 WN* def=def_list->Head()->Wn();
01272 WN* source_stmt=def;
01273 if (def_list->Head()!=def_list->Tail() || stmt!=WN_next(source_stmt))
01274 continue;
01275 VINDEX16 source_scc_id =
01276 dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(source_stmt));
01277 if (scc_size[source_scc_id]==1 &&
01278 intrinsic_operand_kind(WN_kid0(def),innerloop)==Simple) {
01279
01280
01281 EINDEX16 e=ac_g->Get_In_Edge(source_scc_id);
01282 while (e) {
01283 VINDEX16 source_source_scc_id=ac_g->Get_Source(e);
01284 if (source_source_scc_id!=scc_id)
01285 ac_g->Add_Unique_Edge(source_source_scc_id,scc_id);
01286 e=ac_g->Get_Next_In_Edge(e);
01287 }
01288 e=ac_g->Get_Out_Edge(source_scc_id);
01289 while (e) {
01290 VINDEX16 sink_of_source_scc_id=ac_g->Get_Sink(e);
01291 if (scc_id!=sink_of_source_scc_id)
01292 ac_g->Add_Unique_Edge(scc_id,sink_of_source_scc_id);
01293 e=ac_g->Get_Next_Out_Edge(e);
01294 }
01295 ac_g->Delete_Vertex(source_scc_id);
01296 FF_STMT_ITER r_iter(&expandable_ref_list);
01297 FF_STMT_NODE* prev=NULL;
01298 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
01299 ref_node=r_iter.Next()) {
01300 if (ref_node->Get_Stmt()==source_stmt) {
01301 SYMBOL symbol1(source_stmt);
01302 SYMBOL symbol2(stmt);
01303 if (symbol1==symbol2)
01304 ref_node->Set_Stmt(stmt);
01305 else
01306 expandable_ref_list.Remove(prev,ref_node);
01307 break;
01308 }
01309 prev=ref_node;
01310 }
01311 copy_propagation(source_stmt,tmp);
01312 }
01313 }
01314 }
01315 USE_LIST* use_list=Du_Mgr->Du_Get_Use(stmt);
01316 WN* use=use_list->Head()->Wn();
01317 WN* sink_stmt=Find_Stmt_Under(use,body);
01318 if (use_list->Head()!=use_list->Tail() || sink_stmt!=WN_next(stmt))
01319 continue;
01320 VINDEX16 sink_scc_id =
01321 dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(sink_stmt));
01322 if (scc_size[sink_scc_id]==1 && LWN_Get_Parent(use)==sink_stmt &&
01323 Variant_Array(sink_stmt,use,adg)) {
01324
01325
01326 EINDEX16 e=ac_g->Get_In_Edge(scc_id);
01327 while (e) {
01328 VINDEX16 source_scc_id=ac_g->Get_Source(e);
01329 if (source_scc_id!=sink_scc_id)
01330 ac_g->Add_Unique_Edge(source_scc_id,sink_scc_id);
01331 e=ac_g->Get_Next_In_Edge(e);
01332 }
01333 e=ac_g->Get_Out_Edge(scc_id);
01334 while (e) {
01335 VINDEX16 sink_of_scc_id=ac_g->Get_Sink(e);
01336 if (sink_scc_id!=sink_of_scc_id)
01337 ac_g->Add_Unique_Edge(sink_scc_id,sink_of_scc_id);
01338 e=ac_g->Get_Next_Out_Edge(e);
01339 }
01340 ac_g->Delete_Vertex(scc_id);
01341 FF_STMT_ITER r_iter(&expandable_ref_list);
01342 FF_STMT_NODE* prev=NULL;
01343 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
01344 ref_node=r_iter.Next()) {
01345 if (ref_node->Get_Stmt()==stmt) {
01346 expandable_ref_list.Remove(prev,ref_node);
01347 break;
01348 }
01349 prev=ref_node;
01350 }
01351 copy_propagation(stmt,use);
01352 }
01353 }
01354 }
01355
01356 for (i=1; i<=total_scc; i++) {
01357 scc_size[i]=0;
01358 }
01359
01360
01361 for (stmt = WN_first(WN_do_body(innerloop)); stmt; stmt = WN_next(stmt)) {
01362 VINDEX16 scc_id;
01363 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
01364 scc[scc_id].Append(stmt, &VINTR_FIS_default_pool);
01365 scc_size[scc_id]++;
01366 }
01367
01368 if (LNO_Test_Dump)
01369 for (i=1; i<=total_scc; i++) {
01370
01371 printf("Vintr_Fis:scc %d:", i);
01372 FF_STMT_ITER s_iter(&scc[i]);
01373 INT j=0;
01374 for (FF_STMT_NODE *stmt_node=s_iter.First(); !s_iter.Is_Empty();
01375 stmt_node=s_iter.Next()) {
01376 stmt=stmt_node->Get_Stmt();
01377 Dump_WN(stmt,stdout,TRUE,4,4);
01378 j++;
01379 }
01380 printf(" has %d stmts\n", j);
01381
01382 }
01383
01384 if (total_scc==1 && scc_size[1]>1) {
01385 CXX_DELETE(ac_g, &VINTR_FIS_default_pool);
01386 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01387 CXX_DELETE(sdg, &VINTR_FIS_default_pool);
01388 WN_MAP_Delete(sdm);
01389 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01390 sprintf(fail_msg, "Dependence cycle exists");
01391 return(0);
01392 }
01393
01394 UINT_DYN_ARRAY* new_loops;
01395
01396 new_loops=vintr_fis_merge_scc_to_form_new_loop(total_scc,scc,scc_size,
01397 innerloop,ac_g);
01398
01399
01400
01401
01402
01403 vintr_fis_separate_loop_and_scalar_expand(new_loops,scc,
01404 innerloop,expandable_ref_list);
01405
01406 BOOL does_vectorization=FALSE;
01407 for (i=0; i<vec_intrinsic_ops->Elements(); i++) {
01408 WN* intrinsic_op=vec_intrinsic_ops->Top_nth(i);
01409
01410 if (!is_call_by_value(intrinsic_op))
01411 continue;
01412
01413 INTRINSIC intr_id=(INTRINSIC)WN_intrinsic(intrinsic_op);
01414 INTRINSIC vec_intr_id=get_vec_intrinsic(intr_id);
01415
01416
01417
01418 if (vec_intr_id==INTRINSIC_INVALID)
01419 continue;
01420
01421 WN* iload=WN_kid0(WN_kid0(intrinsic_op));
01422 WN* istore=LWN_Get_Parent(intrinsic_op);
01423 WN* new_body=LWN_Get_Parent(istore);
01424 WN* new_loop=LWN_Get_Parent(new_body);
01425 if (WN_opcode(new_body)!= OPC_BLOCK ||
01426 WN_first(new_body)!=istore || WN_last(new_body)!=istore)
01427 continue;
01428
01429 if (WN_operator(iload)!=OPR_ILOAD) {
01430 DevWarn("Expect an ILOAD in Vintrinsic_Fission");
01431 continue;
01432 } else if (WN_operator(WN_kid0(iload))!=OPR_ARRAY ||
01433 WN_kid_count(iload) != 1) {
01434 DevWarn("Expect a good ARRAY in Vintrinsic_Fission");
01435 continue;
01436 }
01437
01438 TYPE_ID rtype=WN_rtype(intrinsic_op);
01439 TYPE_ID ptr_type= Pointer_Size == 8 ? MTYPE_U8 : MTYPE_U4;
01440 TYPE_ID long_type= Pointer_Size == 8 ? MTYPE_I8 : MTYPE_I4;
01441 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
01442 OPCODE add_opc= OPCODE_make_op(OPR_ADD,index_type, MTYPE_V);
01443 OPCODE sub_opc= OPCODE_make_op(OPR_SUB,index_type, MTYPE_V);
01444 OPCODE mpy_opc= OPCODE_make_op(OPR_MPY,index_type, MTYPE_V);
01445 OPCODE addr_add_opc= OPCODE_make_op(OPR_ADD,ptr_type, MTYPE_V);
01446 OPCODE addr_intconst_opc= OPCODE_make_op(OPR_INTCONST,ptr_type, MTYPE_V);
01447 OPCODE addr_mpy_opc= OPCODE_make_op(OPR_MPY,ptr_type, MTYPE_V);
01448 OPCODE addr_div_opc= OPCODE_make_op(OPR_DIV,ptr_type, MTYPE_V);
01449 TY_IDX ptr_ty=Make_Pointer_Type(WN_ty(istore));
01450 TYPE_ID rtype1=WN_desc(iload);
01451 if (rtype!=rtype1) {
01452 DevWarn("Load storage size differs in Vintrinsic_Fission: %s %s",
01453 MTYPE_name(rtype),MTYPE_name(rtype1));
01454 }
01455 TYPE_ID rtype2=WN_desc(istore);
01456 if (rtype!=rtype2) {
01457 DevWarn("Store storage size differs in Vintrinsic_Fission: %s %s",
01458 MTYPE_name(rtype),MTYPE_name(rtype2));
01459 }
01460
01461 if (WN_operator(istore)!=OPR_ISTORE) {
01462 DevWarn("Expect an ISTORE in Vintrinsic_Fission");
01463 continue;
01464 } else if (WN_operator(WN_kid1(istore))!=OPR_ARRAY ||
01465 WN_kid_count(istore) != 2) {
01466 DevWarn("Expect a good ARRAY in Vintrinsic_Fission");
01467 continue;
01468 }
01469
01470 char intr_op_name[80];
01471 SRCPOS srcpos=Srcpos_To_Line(WN_Get_Linenum(istore));
01472 sprintf(intr_op_name,"%s",
01473 INTRN_rt_name((INTRINSIC)WN_intrinsic(intrinsic_op)));
01474
01475 INT loopno=Do_Loop_Depth(new_loop);
01476
01477 WN* arrayx=WN_kid0(iload);
01478 WN* startx=LWN_Copy_Tree(arrayx);
01479 LWN_Copy_Def_Use(arrayx,startx,Du_Mgr);
01480 if (WN_operator(WN_kid0(arrayx))==OPR_LDA) {
01481 ST* arrayx_st=WN_st(WN_kid0(arrayx));
01482 #ifdef _NEW_SYMTAB
01483 Clear_ST_addr_not_passed(arrayx_st);
01484 #else
01485 Set_ST_addr_taken_passed(arrayx_st);
01486 #endif
01487 }
01488 startx = LWN_CreateExp2(addr_add_opc, startx,
01489 WN_CreateIntconst(addr_intconst_opc, WN_offset(iload)));
01490 WN* stridex=WN_CreateIntconst(addr_intconst_opc,0);
01491 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,arrayx);
01492 INT64 esize=WN_element_size(arrayx);
01493 INT64 ty_size=MTYPE_size(rtype);
01494
01495 if (esize > 0) {
01496 FmtAssert(esize%ty_size==0,
01497 ("Input array element size not multiple of size of operation type"));
01498 for (INT j=0; j<aa->Num_Vec(); j++) {
01499 if (j!=0) {
01500 WN* size=WN_array_dim(arrayx,j);
01501 WN* tmp=LWN_Copy_Tree(size);
01502 LWN_Copy_Def_Use(size,tmp,Du_Mgr);
01503 stridex = LWN_CreateExp2(addr_mpy_opc, stridex, tmp);
01504 }
01505 stridex = LWN_CreateExp2(addr_add_opc, stridex,
01506 WN_CreateIntconst(addr_intconst_opc,
01507 aa->Dim(j)->Loop_Coeff(loopno)));
01508 }
01509 stridex = LWN_CreateExp2(addr_mpy_opc, stridex,
01510 WN_CreateIntconst(addr_intconst_opc,esize/ty_size));
01511 } else {
01512
01513 for (INT j=0; j<aa->Num_Vec(); j++) {
01514 WN* size=WN_array_dim(arrayx,j);
01515 WN* tmp=LWN_Copy_Tree(size);
01516 WN* tmp1;
01517 LWN_Copy_Def_Use(size,tmp,Du_Mgr);
01518 tmp1 = LWN_CreateExp2(addr_mpy_opc, tmp,
01519 WN_CreateIntconst(addr_intconst_opc,
01520 aa->Dim(j)->Loop_Coeff(loopno)));
01521 stridex = LWN_CreateExp2(addr_add_opc,stridex,tmp1);
01522 }
01523
01524 stridex = LWN_CreateExp2(addr_div_opc,stridex,
01525 WN_CreateIntconst(addr_intconst_opc,-ty_size/esize));
01526 }
01527
01528 SYMBOL index_symbol(WN_start(new_loop));
01529
01530 WN* arrayy=WN_kid1(istore);
01531 WN* starty=LWN_Copy_Tree(arrayy);
01532 LWN_Copy_Def_Use(arrayy,starty,Du_Mgr);
01533 if (WN_operator(WN_kid0(arrayy))==OPR_LDA) {
01534 ST* arrayy_st=WN_st(WN_kid0(arrayy));
01535 #ifdef _NEW_SYMTAB
01536 Clear_ST_addr_not_passed(arrayy_st);
01537 #else
01538 Set_ST_addr_taken_passed(arrayy_st);
01539 #endif
01540 }
01541 starty = LWN_CreateExp2(addr_add_opc, starty,
01542 WN_CreateIntconst(addr_intconst_opc, WN_offset(istore)));
01543 WN* stridey=WN_CreateIntconst(addr_intconst_opc,0);
01544 aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,arrayy);
01545 esize=WN_element_size(arrayy);
01546
01547 if (esize > 0) {
01548 FmtAssert(esize%ty_size==0,
01549 ("Output array element size not multiple of size of operation type"));
01550 for (INT j=0; j<aa->Num_Vec(); j++) {
01551 if (j!=0) {
01552 WN* size=WN_array_dim(arrayy,j);
01553 WN* tmp=LWN_Copy_Tree(size);
01554 LWN_Copy_Def_Use(size,tmp,Du_Mgr);
01555 stridey = LWN_CreateExp2(addr_mpy_opc, stridey, tmp);
01556 }
01557 stridey = LWN_CreateExp2(addr_add_opc, stridey,
01558 WN_CreateIntconst(addr_intconst_opc,
01559 aa->Dim(j)->Loop_Coeff(loopno)));
01560 }
01561 stridey = LWN_CreateExp2(addr_mpy_opc, stridey,
01562 WN_CreateIntconst(addr_intconst_opc,esize/ty_size));
01563 } else {
01564
01565 for (INT j=0; j<aa->Num_Vec(); j++) {
01566 WN* size=WN_array_dim(arrayy,j);
01567 WN* tmp=LWN_Copy_Tree(size);
01568 WN* tmp1;
01569 LWN_Copy_Def_Use(size,tmp,Du_Mgr);
01570 tmp1 = LWN_CreateExp2(addr_mpy_opc, tmp,
01571 WN_CreateIntconst(addr_intconst_opc,
01572 aa->Dim(j)->Loop_Coeff(loopno)));
01573 stridey = LWN_CreateExp2(addr_add_opc, stridey, tmp1);
01574 }
01575
01576 stridey = LWN_CreateExp2(addr_div_opc,stridey,
01577 WN_CreateIntconst(addr_intconst_opc,-ty_size/esize));
01578 }
01579
01580 WN* loop_end=WN_end(new_loop);
01581 WN* loop_index=find_loop_var_in_simple_ub(new_loop);
01582 WN* coeff_wn;
01583 WN* tmp=LWN_Get_Parent(loop_index);
01584 if (WN_operator(tmp)==OPR_MPY) {
01585 coeff_wn=WN_kid0(tmp);
01586 if (coeff_wn==loop_index)
01587 coeff_wn=WN_kid1(tmp);
01588 } else
01589 coeff_wn=WN_CreateIntconst(intconst_opc,1);
01590
01591 if (tmp==loop_end)
01592 tmp=loop_index;
01593 else
01594 while (LWN_Get_Parent(tmp)!=loop_end) {
01595 tmp=LWN_Get_Parent(tmp);
01596 }
01597
01598 UINT kid_id;
01599 if (WN_kid0(loop_end)==tmp)
01600 kid_id=0;
01601 else
01602 kid_id=1;
01603
01604 OPCODE opc=WN_opcode(loop_end);
01605 OPERATOR opr=OPCODE_operator(opc);
01606 OPCODE new_opcode;
01607 if (opr==OPR_LE) {
01608 new_opcode=OPCODE_make_op(OPR_LT,OPCODE_rtype(opc),
01609 OPCODE_desc(opc));
01610 WN_kid1(loop_end)=LWN_CreateExp2(add_opc, WN_kid1(loop_end),
01611 WN_CreateIntconst(intconst_opc,1));
01612 opr=OPR_LT;
01613 WN_set_opcode(loop_end,new_opcode);
01614 LWN_Parentize(loop_end);
01615 } else if (opr==OPR_GE) {
01616 new_opcode=OPCODE_make_op(OPR_GT,OPCODE_rtype(opc),
01617 OPCODE_desc(opc));
01618 WN_kid1(loop_end)=LWN_CreateExp2(sub_opc, WN_kid1(loop_end),
01619 WN_CreateIntconst(intconst_opc,1));
01620 opr=OPR_GT;
01621 WN_set_opcode(loop_end,new_opcode);
01622 LWN_Parentize(loop_end);
01623 }
01624
01625 WN* count=NULL;
01626 if (opr==OPR_LT || opr==OPR_GT) {
01627 #ifdef KEY
01628
01629
01630
01631
01632
01633
01634
01635
01636
01637
01638 Loop_being_replaced = new_loop;
01639 #endif
01640 WN* tmp0=LWN_Copy_Tree(WN_kid(loop_end,1-kid_id));
01641 WN* tmp1=LWN_Copy_Tree(WN_kid(loop_end,kid_id));
01642 LWN_Copy_Def_Use(WN_kid(loop_end,1-kid_id),tmp0,Du_Mgr);
01643 LWN_Copy_Def_Use(WN_kid(loop_end,kid_id),tmp1,Du_Mgr);
01644
01645 if (WN_const_val(coeff_wn)==1)
01646 count=LWN_CreateExp2(sub_opc,tmp0,tmp1);
01647 else {
01648 WN* kids[2];
01649 kids[0]=LWN_CreateExp2(sub_opc,tmp0,tmp1);
01650 kids[1]=LWN_Copy_Tree(coeff_wn);
01651 LWN_Copy_Def_Use(coeff_wn,kids[1],Du_Mgr);
01652 count=WN_Create_Intrinsic(
01653 OPCODE_make_op(OPR_INTRINSIC_OP,long_type, MTYPE_V),
01654 (long_type == MTYPE_I8 ?
01655 INTRN_I8DIVCEIL : INTRN_I4DIVCEIL),2,kids);
01656 }
01657
01658
01659 WN* kids[5];
01660 kids[0]=startx;
01661 kids[1]=starty;
01662 kids[2]=count;
01663 kids[3]=stridex;
01664 kids[4]=stridey;
01665
01666 if (LNO_Use_Parm) {
01667 kids[0]=WN_CreateParm(ptr_type, kids[0], ptr_ty, WN_PARM_BY_REFERENCE);
01668 Create_vector_alias(Alias_Mgr,iload,kids[0]);
01669 kids[1]=WN_CreateParm(ptr_type, kids[1], ptr_ty, WN_PARM_BY_REFERENCE);
01670 Create_vector_alias(Alias_Mgr,istore,kids[1]);
01671 kids[2]=WN_CreateParm(long_type, kids[2], Be_Type_Tbl(long_type),
01672 WN_PARM_BY_VALUE);
01673 kids[3]=WN_CreateParm(long_type, kids[3], Be_Type_Tbl(long_type),
01674 WN_PARM_BY_VALUE);
01675 kids[4]=WN_CreateParm(long_type, kids[4], Be_Type_Tbl(long_type),
01676 WN_PARM_BY_VALUE);
01677 }
01678
01679 WN* vintr_wn=WN_Create_Intrinsic(OPC_VINTRINSIC_CALL,
01680 vec_intr_id,5,kids);
01681 Replace_Ldid_With_Exp_Copy(
01682 index_symbol,startx,WN_kid0(WN_start(new_loop)),Du_Mgr);
01683 Replace_Ldid_With_Exp_Copy(
01684 index_symbol,starty,WN_kid0(WN_start(new_loop)),Du_Mgr);
01685 Replace_Ldid_With_Exp_Copy(
01686 index_symbol,count,WN_kid0(WN_start(new_loop)),Du_Mgr);
01687 LWN_Parentize(vintr_wn);
01688 LWN_Copy_Frequency_Tree(new_loop,new_loop);
01689 LWN_Copy_Frequency_Tree(vintr_wn,new_loop);
01690 LWN_Insert_Block_Before(LWN_Get_Parent(new_loop),new_loop,vintr_wn);
01691
01692
01693 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&VINTR_FIS_default_pool),
01694 &VINTR_FIS_default_pool);
01695 Build_Doloop_Stack(LWN_Get_Parent(vintr_wn), loop_stack);
01696
01697 LNO_Build_Access(vintr_wn, loop_stack, &LNO_default_pool);
01698
01699 VINDEX16 v=adg->Add_Vertex(vintr_wn);
01700 VINDEX16 vl=adg->Get_Vertex(iload);
01701 VINDEX16 vs=adg->Get_Vertex(istore);
01702
01703 DEPV_LIST* depv_list=NULL;
01704 e=adg->Get_Edge(vs,vs);
01705 if (e) {
01706 DEPV_ARRAY* depv_array1=adg->Depv_Array(e);
01707 if (depv_list==NULL)
01708 depv_list==CXX_NEW(DEPV_LIST(depv_array1,&VINTR_FIS_default_pool),
01709 &VINTR_FIS_default_pool);
01710 else
01711 for (INT ii=0; ii<depv_array1->Num_Vec(); ii++)
01712 depv_list->Append(depv_array1->Depv(ii),0);
01713 adg->Delete_Edge(e);
01714 }
01715
01716 e=adg->Get_Edge(vs,vl);
01717 if (e) {
01718 DEPV_ARRAY* depv_array1=adg->Depv_Array(e);
01719 if (depv_list==NULL)
01720 depv_list==CXX_NEW(DEPV_LIST(depv_array1,&VINTR_FIS_default_pool),
01721 &VINTR_FIS_default_pool);
01722 else
01723 for (INT ii=0; ii<depv_array1->Num_Vec(); ii++)
01724 depv_list->Append(depv_array1->Depv(ii),0);
01725 adg->Delete_Edge(e);
01726 }
01727 e=adg->Get_Edge(vl,vs);
01728 if (e) {
01729 DEPV_ARRAY* depv_array1=adg->Depv_Array(e);
01730 if (depv_list==NULL)
01731 depv_list==CXX_NEW(DEPV_LIST(depv_array1,&VINTR_FIS_default_pool),
01732 &VINTR_FIS_default_pool);
01733 else
01734 for (INT ii=0; ii<depv_array1->Num_Vec(); ii++)
01735 depv_list->Append(depv_array1->Depv(ii),0);
01736 adg->Delete_Edge(e);
01737 }
01738
01739 if (depv_list) {
01740 DEPV_ARRAY* depv_array=Create_DEPV_ARRAY(depv_list,&LNO_default_pool);
01741 UINT num_dim=depv_array->Num_Dim();
01742 if (num_dim>1) {
01743 depv_array=depv_array->Shorten(num_dim-1,&LNO_default_pool);
01744 adg->Add_Edge(v,v,depv_array);
01745 }
01746 }
01747
01748 e=adg->Get_In_Edge(vs);
01749 while (e) {
01750 DEPV_ARRAY* depv_array=adg->Depv_Array(e);
01751 adg->Set_Depv_Array(e,NULL);
01752 VINDEX16 from=adg->Get_Source(e);
01753 adg->Delete_Edge(e);
01754 EINDEX16 e1=adg->Get_Edge(from,vl);
01755 if (e1) {
01756 DEPV_LIST* depv_list=
01757 CXX_NEW(DEPV_LIST(depv_array,&VINTR_FIS_default_pool),
01758 &VINTR_FIS_default_pool);
01759 DEPV_ARRAY* depv_array1=adg->Depv_Array(e1);
01760 for (INT ii=0; ii<depv_array1->Num_Vec(); ii++)
01761 depv_list->Append(depv_array1->Depv(ii),0);
01762 depv_array=Create_DEPV_ARRAY(depv_list,&LNO_default_pool);
01763 adg->Delete_Edge(e1);
01764 }
01765 if (from==vl || from==vs) {
01766 Is_True(0,("Duplicate edge in dep. graph"));
01767 } else
01768 adg->Add_Edge(from,v,depv_array);
01769 e=adg->Get_In_Edge(vs);
01770 }
01771
01772 e=adg->Get_Out_Edge(vs);
01773 while (e) {
01774 DEPV_ARRAY* depv_array=adg->Depv_Array(e);
01775 adg->Set_Depv_Array(e,NULL);
01776 VINDEX16 to=adg->Get_Sink(e);
01777 adg->Delete_Edge(e);
01778 EINDEX16 e1=adg->Get_Edge(vl,to);
01779 if (e1) {
01780 DEPV_LIST* depv_list=
01781 CXX_NEW(DEPV_LIST(depv_array,&VINTR_FIS_default_pool),
01782 &VINTR_FIS_default_pool);
01783 DEPV_ARRAY* depv_array1=adg->Depv_Array(e1);
01784 for (INT ii=0; ii<depv_array1->Num_Vec(); ii++)
01785 depv_list->Append(depv_array1->Depv(ii),0);
01786 depv_array=Create_DEPV_ARRAY(depv_list,&LNO_default_pool);
01787 adg->Delete_Edge(e1);
01788 }
01789 adg->Add_Edge(v,to,depv_array);
01790 e=adg->Get_Out_Edge(vs);
01791 }
01792
01793 adg->Delete_Vertex(vs);
01794 adg->Delete_Vertex(vl);
01795
01796 WN* parent=LWN_Get_Parent(vintr_wn);
01797 while (parent) {
01798 if (WN_opcode(parent)==OPC_DO_LOOP) {
01799 if (Do_Loop_Is_Good(parent) && !Do_Loop_Has_Gotos(parent))
01800 break;
01801 else {
01802 parent=NULL;
01803 break;
01804 }
01805 }
01806 parent=LWN_Get_Parent(parent);
01807 }
01808 if (parent==NULL)
01809 adg->Delete_Vertex(v);
01810
01811 #ifdef KEY
01812 if (LNO_Vintr_Verbose) {
01813 printf("(%s:%d) ",
01814 Src_File_Name,
01815 Srcpos_To_Line(WN_Get_Linenum(new_loop)));
01816 printf("LOOP WAS VECTORIZED FOR VECTOR INTRINSIC ROUTINE(S).\n");
01817 }
01818 #endif
01819 LWN_Update_Def_Use_Delete_Tree(new_loop,Du_Mgr);
01820 LWN_Update_Dg_Delete_Tree(new_loop,adg);
01821 LWN_Delete_Tree(new_loop);
01822
01823 does_vectorization=TRUE;
01824
01825 #ifdef KEY
01826 Loop_being_replaced = NULL;
01827 #endif
01828
01829 if (LNO_Verbose)
01830 vintr_fission_verbose_info(srcpos,intr_op_name);
01831 if (LNO_Analysis)
01832 vintr_fission_analysis_info(TRUE, srcpos,intr_op_name);
01833 if ( LNO_Tlog ) {
01834 vintr_fission_tlog_info(Succeeded,srcpos,loop_index_name,intr_op_name);
01835 }
01836 } else {
01837 DevWarn("Strange loop upper bound");
01838 LWN_Delete_Tree(startx);
01839 LWN_Delete_Tree(stridex);
01840 LWN_Delete_Tree(starty);
01841 LWN_Delete_Tree(stridey);
01842 }
01843 }
01844
01845 if (does_vectorization) {
01846
01847 WN* wn=parent_block;
01848 WN* inner_loop_found=NULL;
01849 while (wn) {
01850
01851 OPCODE opc=WN_opcode(wn);
01852 if (opc==OPC_DO_LOOP || opc==OPC_IF) {
01853
01854 if (!inner_loop_found)
01855 inner_loop_found=Find_SCF_Inside(wn,OPC_DO_LOOP);
01856 if (opc==OPC_DO_LOOP) {
01857
01858 Get_Do_Loop_Info(wn)->Is_Inner=(inner_loop_found==NULL);
01859 Get_Do_Loop_Info(wn)->Has_Calls=TRUE;
01860 inner_loop_found=wn;
01861 } else if (opc==OPC_IF)
01862 Get_If_Info(wn)->Contains_Do_Loops=(inner_loop_found!=NULL);
01863 }
01864 wn=LWN_Get_Parent(wn);
01865 }
01866 }
01867
01868 CXX_DELETE(dep_g_p, &VINTR_FIS_default_pool);
01869 CXX_DELETE(ac_g, &VINTR_FIS_default_pool);
01870 CXX_DELETE(sdg, &VINTR_FIS_default_pool);
01871 new_loops->Free_array();
01872 WN_MAP_Delete(sdm);
01873 }
01874 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01875
01876 return 1;
01877
01878 }
01879
01880 #ifdef KEY //14182
01881 static BOOL Tree_Contains_Intrinsic(WN *wn)
01882 {
01883 if(WN_operator(wn)==OPR_INTRINSIC_OP)
01884 return TRUE;
01885 else if(WN_operator(wn)==OPR_BLOCK){
01886 for(WN* stmt=WN_first(wn); stmt; stmt=WN_next(stmt)){
01887 if(Tree_Contains_Intrinsic(stmt))
01888 return TRUE;
01889 }
01890 }
01891 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++){
01892 if(Tree_Contains_Intrinsic(WN_kid(wn,kidno)))
01893 return TRUE;
01894 }
01895 return FALSE;
01896 }
01897 #endif
01898
01899 static void Vintrinsic_Fission_Walk(WN* wn) {
01900 OPCODE opc=WN_opcode(wn);
01901
01902 if (!OPCODE_is_scf(opc))
01903 return;
01904 else if (opc==OPC_DO_LOOP) {
01905 #ifndef KEY
01906 if (Do_Loop_Is_Good(wn) && Do_Loop_Is_Inner(wn) && !Do_Loop_Has_Calls(wn)
01907 && !Do_Loop_Is_Mp(wn) && !Do_Loop_Has_Gotos(wn))
01908 Vintrinsic_Fission(wn);
01909 #else //bug 14182: report non-vectorizable reasons only for an inner loop that
01910
01911
01912 if(Do_Loop_Is_Inner(wn) && Tree_Contains_Intrinsic(wn)){
01913 sprintf(fail_msg, "Unknown reason");
01914 if(Vintrinsic_Fission(wn)==0){
01915 if (LNO_Vintr_Verbose) {
01916 printf("(%s:%d) %s, ",
01917 Src_File_Name,
01918 Srcpos_To_Line(WN_Get_Linenum(wn)),
01919 fail_msg);
01920 printf("loop was not intrinsic vectorized!\n");
01921 }
01922 }
01923 }
01924 #endif
01925 else
01926 Vintrinsic_Fission_Walk(WN_do_body(wn));
01927 } else if (opc==OPC_BLOCK)
01928 for (WN* stmt=WN_first(wn); stmt;) {
01929 WN* next_stmt=WN_next(stmt);
01930 Vintrinsic_Fission_Walk(stmt);
01931 stmt=next_stmt;
01932 }
01933 else
01934 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
01935 Vintrinsic_Fission_Walk(WN_kid(wn,kidno));
01936 }
01937 }
01938
01939 void Vintrinsic_Fission_Phase(WN* func_nd) {
01940
01941 MEM_POOL_Initialize(&VINTR_FIS_default_pool,"VINTR_FIS_default_pool",FALSE);
01942 MEM_POOL_Push(&VINTR_FIS_default_pool);
01943
01944 adg=Array_Dependence_Graph;
01945
01946 Vintrinsic_Fission_Walk(func_nd);
01947
01948 MEM_POOL_Pop(&VINTR_FIS_default_pool);
01949 MEM_POOL_Delete(&VINTR_FIS_default_pool);
01950
01951 }
01952
01953
01954