00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 #ifdef _KEEP_RCS_ID
00054
00055 static char *rcs_id = "$Source: be/lno/SCCS/s.simd.cxx $ $Revision: 1.244 $";
00056 #endif
00057
00058 #include "defs.h"
00059 #include "config_asm.h"
00060 #include "glob.h"
00061 #include "wn.h"
00062 #include "wn_map.h"
00063 #include "cxx_memory.h"
00064 #include "lwn_util.h"
00065 #include "ff_utils.h"
00066 #include "lnoutils.h"
00067 #include "lnopt_main.h"
00068 #include "scalar_expand.h"
00069 #include "fission.h"
00070 #include "opt_du.h"
00071 #include "dep_graph.h"
00072 #include "reduc.h"
00073 #include "snl.h"
00074 #include "name.h"
00075 #include "inner_fission.h"
00076 #include "lno_scc.h"
00077 #include "config_targ.h"
00078 #include "ir_reader.h"
00079 #include "wn_simp.h"
00080 #include "const.h"
00081 #include "data_layout.h"
00082 #include "cond.h"
00083 #include "config_opt.h"
00084 #include "region_main.h"
00085 #include "lego_util.h"
00086 #include "minvariant.h"
00087 #include "prompf.h"
00088
00089 #define ABS(a) ((a<0)?-(a):(a))
00090
00091 BOOL debug;
00092
00093 extern WN *Split_Using_Preg(WN* stmt, WN* simd_op,
00094 ARRAY_DIRECTED_GRAPH16* dep_graph,
00095 BOOL recursive=TRUE);
00096 typedef STACK<WN*> STACK_OF_WN;
00097 typedef HASH_TABLE<WN*,VINDEX16> WN2VINDEX;
00098 typedef HASH_TABLE<WN*,UINT> WN2UINT;
00099 typedef HASH_TABLE<WN*,INT> WN2INT;
00100 typedef DYN_ARRAY<UINT> UINT_DYN_ARRAY;
00101
00102 #define ESTIMATED_SIZE 100 // used to initialized hash table, etc.
00103 #define Iteration_Count_Threshold 10 // threshold to determine if a loop
00104
00105
00106 extern REDUCTION_MANAGER *red_manager;
00107 extern MEM_POOL SNL_local_pool;
00108 static MEM_POOL SIMD_default_pool;
00109 static ARRAY_DIRECTED_GRAPH16 *adg;
00110
00111
00112 static REDUCTION_MANAGER *simd_red_manager;
00113 static REDUCTION_MANAGER *depanal_red_manager;
00114 static REDUCTION_MANAGER *curr_simd_red_manager;
00115
00116 static void Simd_Mark_Code (WN* wn);
00117
00118 static INT Last_Vectorizable_Loop_Id = 0;
00119
00120 static BOOL Too_Few_Iterations(INT64 iters, WN *body)
00121 {
00122 if(iters < Iteration_Count_Threshold)
00123 return TRUE;
00124 if(iters >= 16)
00125 return FALSE;
00126
00127
00128 for(WN *stmt = WN_first(body); stmt; stmt = WN_next(stmt)){
00129 switch(WN_desc(stmt)){
00130 case MTYPE_I1: case MTYPE_U1:
00131 return TRUE;
00132 case MTYPE_I2: case MTYPE_U2:
00133 if(iters < 8)
00134 return TRUE;
00135 break;
00136 case MTYPE_I4: case MTYPE_U4: case MTYPE_F4:
00137 if(iters < 4)
00138 return TRUE;
00139 break;
00140 case MTYPE_I8: case MTYPE_U8: case MTYPE_F8: case MTYPE_C4:
00141 if(iters < 2)
00142 return TRUE;
00143 break;
00144 }
00145 }
00146 return FALSE;
00147 }
00148
00149
00150
00151 static void Count_Invariant(STACK_OF_WN *invars, WN *ops)
00152 {
00153 for(INT i=0; i<invars->Elements(); i++){
00154 WN *tmp = invars->Top_nth(i);
00155 if(Tree_Equiv(tmp, ops)) return;
00156 }
00157 invars->Push(ops);
00158 }
00159
00160
00161
00162
00163 extern UINT simd_2(
00164 WN* loop,
00165 SCALAR_STACK* scalar_reads,
00166 SCALAR_STACK* scalar_writes,
00167 BINARY_TREE<NAME2BIT> *mapping_dictionary,
00168
00169
00170 FF_STMT_LIST& expandable_ref_list)
00171
00172 {
00173
00174 UINT bit_position=0;
00175
00176 SCALAR_STACK *scalar_ref_list[2];
00177 scalar_ref_list[0]=scalar_reads;
00178 scalar_ref_list[1]=scalar_writes;
00179
00180
00181 for (INT i=0; i<2; i++) {
00182
00183 for (INT j=0; j<scalar_ref_list[i]->Elements(); j++) {
00184
00185 WN* scalar_ref=scalar_ref_list[i]->Bottom_nth(j)->Bottom_nth(0)->Wn;
00186 NAME2BIT temp_map;
00187
00188 temp_map.Set_Symbol(scalar_ref);
00189
00190
00191
00192
00193 const BINARY_TREE_NODE<NAME2BIT> *tree_node;
00194 if (mapping_dictionary->Find(temp_map)==NULL) {
00195
00196 if (LNO_Test_Dump) {
00197 temp_map.Get_Symbol().Print(stdout);
00198 printf("\t\tat bit %d\n", bit_position);
00199 }
00200 temp_map.Set_Bit_Position(bit_position);
00201 mapping_dictionary->Enter(temp_map);
00202 }
00203
00204 if (i==1) {
00205 SE_RESULT se_result = Scalar_Expandable(scalar_ref,loop, Du_Mgr);
00206 if (!Get_Trace(TP_LNOPT2, TT_LNO_DISABLE_SEFIN)
00207 && se_result != SE_NONE || se_result == SE_EASY)
00208 expandable_ref_list.Append(scalar_ref,&SIMD_default_pool);
00209 }
00210
00211 bit_position++;
00212 }
00213 }
00214 return bit_position;
00215 }
00216
00217 static BOOL is_vectorizable_op (OPERATOR opr, TYPE_ID rtype, TYPE_ID desc) {
00218
00219 switch (opr) {
00220 case OPR_SELECT:
00221 if (MTYPE_is_float(rtype))
00222 return TRUE;
00223 else
00224 return FALSE;
00225 case OPR_EQ: case OPR_NE:
00226 case OPR_LT: case OPR_GT: case OPR_LE: case OPR_GE:
00227 if (MTYPE_is_float(desc) && MTYPE_is_integral(rtype))
00228 return TRUE;
00229 else
00230 return FALSE;
00231 case OPR_TRUNC:
00232 if (rtype == MTYPE_I4 && desc == MTYPE_F4)
00233 return TRUE;
00234 else
00235 return FALSE;
00236 case OPR_CVT:
00237 if ((rtype == MTYPE_F8 || rtype == MTYPE_F4) &&
00238 (desc == MTYPE_I4 || desc == MTYPE_F4))
00239 return TRUE;
00240 else
00241 return FALSE;
00242 case OPR_INTRINSIC_OP:
00243 return TRUE;
00244 case OPR_PAREN:
00245 return TRUE;
00246 case OPR_ABS:
00247 if (rtype == MTYPE_F4 || rtype == MTYPE_F8)
00248 return TRUE;
00249 else
00250 return FALSE;
00251
00252 case OPR_NEG:
00253 if (rtype == MTYPE_C4)
00254 return FALSE;
00255 else
00256 return TRUE;
00257 case OPR_ADD:
00258 case OPR_SUB:
00259 return TRUE;
00260 case OPR_MPY:
00261 if (rtype == MTYPE_F8 || rtype == MTYPE_F4 ||
00262 #ifdef TARG_X8664
00263 ((rtype == MTYPE_C4 || rtype == MTYPE_C8) && Is_Target_SSE3()) ||
00264 #endif
00265
00266
00267 rtype == MTYPE_I4)
00268 return TRUE;
00269 else
00270 return FALSE;
00271 case OPR_DIV:
00272
00273 if (rtype == MTYPE_F8 || rtype == MTYPE_F4
00274 #ifdef TARG_X8664
00275 || (rtype == MTYPE_C4 && Is_Target_SSE3())
00276 #endif
00277 )
00278 return TRUE;
00279 else
00280 return FALSE;
00281 case OPR_MAX:
00282 case OPR_MIN:
00283 if (rtype == MTYPE_F4 || rtype == MTYPE_F8 || rtype == MTYPE_I4)
00284 return TRUE;
00285 else
00286 return FALSE;
00287 #if 0 // bug 8885
00288 case OPR_BAND:
00289
00290 case OPR_BXOR:
00291 if (rtype != MTYPE_F4 && rtype != MTYPE_F8)
00292 return TRUE;
00293 else
00294 return FALSE;
00295 #endif
00296 case OPR_SQRT:
00297 if (rtype == MTYPE_F4 || rtype == MTYPE_F8)
00298 return TRUE;
00299 else
00300 return FALSE;
00301 case OPR_RSQRT:
00302
00303 #ifdef TARG_X8664
00304 case OPR_ATOMIC_RSQRT:
00305 #endif
00306 if (rtype == MTYPE_F4)
00307 return TRUE;
00308 else
00309 return FALSE;
00310
00311
00312 case OPR_RECIP:
00313 if (rtype == MTYPE_F4 || rtype == MTYPE_F8)
00314 return TRUE;
00315 else
00316 return FALSE;
00317
00318 case OPR_PARM:
00319 return TRUE;
00320 default:
00321 return FALSE;
00322 }
00323 }
00324
00325 extern WN *find_loop_var_in_simple_ub(WN* loop);
00326
00327 typedef enum {
00328 Invariant=0,
00329 Reference=1,
00330 Simple=2,
00331 Complex=3
00332 } SIMD_OPERAND_KIND;
00333
00334 static SIMD_OPERAND_KIND simd_operand_kind(WN* wn, WN* loop) {
00335 OPERATOR opr=WN_operator(wn);
00336
00337 if (opr==OPR_PARM) {
00338 if (WN_Parm_By_Reference(wn))
00339 return Reference;
00340 wn=WN_kid0(wn);
00341 opr=WN_operator(wn);
00342 }
00343
00344 if (opr==OPR_CONST || opr==OPR_INTCONST) {
00345 return Invariant;
00346 } else if (opr==OPR_LDA) {
00347 return Reference;
00348 } else if (opr==OPR_LDID) {
00349 SYMBOL symbol1(wn);
00350 SYMBOL symbol2(WN_index(loop));
00351 if (symbol1==symbol2)
00352 return Complex;
00353 DEF_LIST* def_list=Du_Mgr->Ud_Get_Def(wn);
00354 WN* loop_stmt=def_list->Loop_stmt();
00355 WN* body=WN_do_body(loop);
00356 DEF_LIST_ITER d_iter(def_list);
00357 for (DU_NODE* dnode=d_iter.First(); !d_iter.Is_Empty();
00358 dnode=d_iter.Next()) {
00359 WN* def=dnode->Wn();
00360 WN* stmt=Find_Stmt_Under(def,body);
00361 if (stmt!=NULL)
00362 return Complex;
00363 }
00364 return Invariant;
00365 } else if (opr==OPR_ILOAD) {
00366 if (WN_kid_count(wn) != 1 || WN_offset(wn) != 0 ||
00367 WN_operator(WN_kid0(wn)) != OPR_ARRAY)
00368 return Complex;
00369
00370 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,WN_kid0(wn));
00371
00372 if (aa->Too_Messy)
00373 return Complex;
00374
00375 ACCESS_VECTOR* av;
00376 INT loopno=Do_Loop_Depth(loop);
00377
00378 BOOL seen_non_zero=FALSE;
00379 for (INT i=0; i<aa->Num_Vec(); i++) {
00380 av=aa->Dim(i);
00381 if (av->Too_Messy || av->Non_Lin_Symb)
00382 return Complex;
00383 if ((av->Non_Const_Loops() > loopno))
00384 return Complex;
00385 if (av->Loop_Coeff(loopno)!=0 && i != aa->Num_Vec()-1)
00386 return Reference;
00387 if (av->Loop_Coeff(loopno)!=0)
00388 if (seen_non_zero)
00389 return Complex;
00390 else
00391 seen_non_zero=TRUE;
00392 }
00393 if (!seen_non_zero)
00394 return Invariant;
00395 return Simple;
00396 } else if (opr==OPR_ISTORE) {
00397 if (WN_offset(wn) != 0 ||
00398 WN_operator(WN_kid1(wn)) != OPR_ARRAY)
00399 return Complex;
00400
00401 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,WN_kid1(wn));
00402
00403 if (aa->Too_Messy)
00404 return Complex;
00405
00406 ACCESS_VECTOR* av;
00407 INT loopno=Do_Loop_Depth(loop);
00408
00409 BOOL seen_non_zero=FALSE;
00410 for (INT i=0; i<aa->Num_Vec(); i++) {
00411 av=aa->Dim(i);
00412 if (av->Too_Messy || av->Non_Lin_Symb)
00413 return Complex;
00414 if ((av->Non_Const_Loops() > loopno))
00415 return Complex;
00416 if (av->Loop_Coeff(loopno)!=0 && i != aa->Num_Vec()-1)
00417 return Reference;
00418 if (av->Loop_Coeff(loopno)!=0)
00419 if (seen_non_zero)
00420 return Complex;
00421 else
00422 seen_non_zero=TRUE;
00423 }
00424 if (!seen_non_zero)
00425 return Invariant;
00426 return Simple;
00427 }
00428
00429 return Complex;
00430 }
00431
00432
00433
00434 BOOL Is_Vectorizable_Intrinsic(WN *wn);
00435
00436
00437 static BOOL Is_Under_Array(WN *wn)
00438 {
00439 WN* parent = LWN_Get_Parent(wn);
00440 while(parent && WN_operator(parent) != OPR_DO_LOOP) {
00441 if (WN_operator(parent) == OPR_ARRAY)
00442 return TRUE;
00443 parent = LWN_Get_Parent(parent);
00444 }
00445 return FALSE;
00446 }
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457 static BOOL Simd_Benefit (WN* wn) {
00458
00459 if (LNO_Run_Simd == 0)
00460 return FALSE;
00461 else if (LNO_Run_Simd == 2)
00462 return TRUE;
00463
00464 if (wn == NULL)
00465 return FALSE;
00466
00467 OPERATOR opr = WN_operator(wn);
00468
00469
00470
00471 if (opr == OPR_CVT &&
00472 (!Is_Under_Array(wn) || is_vectorizable_op(opr, WN_rtype(wn), WN_desc(wn))))
00473 return TRUE;
00474
00475 if((opr == OPR_RECIP && WN_rtype(wn) == MTYPE_F4) ||
00476 opr == OPR_SQRT || opr == OPR_TRUNC)
00477 return TRUE;
00478
00479
00480
00481
00482 if(Is_Vectorizable_Intrinsic(wn))
00483 return TRUE;
00484
00485 if (OPCODE_is_store(WN_opcode(wn)) &&
00486 (MTYPE_byte_size(WN_desc(wn)) < 8 ||
00487 MTYPE_is_complex(WN_desc(wn)) || opr == OPR_STID))
00488 return TRUE;
00489
00490 if (WN_operator(wn) == OPR_ARRAY &&
00491 WN_has_sym(WN_array_base(wn)) &&
00492
00493 WN_operator(WN_array_base(wn)) != OPR_LDID &&
00494 ST_sclass(WN_st(WN_array_base(wn))) != SCLASS_FORMAL)
00495 return TRUE;
00496
00497 if (WN_opcode(wn) == OPC_BLOCK)
00498 for (WN* stmt=WN_first(wn); stmt;) {
00499 WN* next_stmt=WN_next(stmt);
00500 if (Simd_Benefit(stmt))
00501 return TRUE;
00502 stmt=next_stmt;
00503 }
00504
00505 for (UINT kidno = 0; kidno < WN_kid_count(wn); kidno ++) {
00506 if (Simd_Benefit(WN_kid(wn, kidno)))
00507 return TRUE;
00508 }
00509
00510 return FALSE;
00511 }
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521 extern BOOL Is_Vectorization_Beneficial (WN* wn)
00522 {
00523 return Simd_Benefit(wn);
00524 }
00525
00526 static BOOL Is_Vectorizable_Tree (WN* tree)
00527 {
00528 if(tree == NULL) return TRUE;
00529
00530 OPERATOR opr = WN_operator(tree);
00531 if(opr == OPR_ILOAD || opr == OPR_LDID ||
00532 opr == OPR_CONST || opr == OPR_INTCONST)
00533 return TRUE;
00534
00535 if (!is_vectorizable_op(opr, WN_rtype(tree), WN_desc(tree)))
00536 return FALSE;
00537
00538 if (WN_kid_count(tree) > 2)
00539 return FALSE;
00540
00541 for(INT i=0; i<WN_kid_count(tree); i++){
00542 if(!Is_Vectorizable_Tree(WN_kid(tree, i)))
00543 return FALSE;
00544 }
00545 return TRUE;
00546 }
00547
00548 static BOOL Array_Subscript_Uses_IV (WN *wn, SYMBOL loop_var)
00549 {
00550 if (WN_operator(wn) == OPR_LDID) {
00551 SYMBOL symbol(wn);
00552 if (symbol == loop_var)
00553 return TRUE;
00554 }
00555 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
00556 if (Array_Subscript_Uses_IV(WN_kid(wn, kid), loop_var))
00557 return TRUE;
00558 return FALSE;
00559 }
00560
00561
00562
00563
00564
00565
00566 static BOOL Identify_Messy_Array_Subscript (WN* array, WN* loop,
00567 ACCESS_ARRAY* aa,
00568 INT i )
00569 {
00570 WN* index;
00571 if (WN_num_dim(array) == aa->Num_Vec()) {
00572 Is_True(i + 1 + WN_num_dim(array) < WN_kid_count(array), ("NYI"));
00573 if (i + 1 + WN_num_dim(array) < WN_kid_count(array)) {
00574 index = WN_kid(array, i + 1 + WN_num_dim(array));
00575 SYMBOL symbol(WN_index(loop));
00576 if (Array_Subscript_Uses_IV(index, symbol))
00577 return TRUE;
00578 }
00579 }
00580 return FALSE;
00581 }
00582
00583
00584 static const char *non_unit_stride;
00585 static char *non_vect_op;
00586
00587
00588
00589
00590
00591
00592
00593
00594 static BOOL Possible_Contiguous_Dope(WN *wn)
00595 {
00596 if(WN_element_size(wn) < 0 &&
00597 WN_operator(WN_array_base(wn)) == OPR_LDID){
00598 if (WN_offset(LWN_Get_Parent(wn)) > 0)
00599 return FALSE;
00600 TY_IDX ty_high = WN_ty(WN_array_base(wn));
00601 if(TY_kind(ty_high) == KIND_POINTER){
00602 TY_IDX ty_point_to = TY_pointed(ty_high);
00603 if(TY_kind(ty_point_to) == KIND_ARRAY){
00604 TY_IDX ty_ele = TY_etype(ty_point_to);
00605 if(TY_kind(ty_ele) == KIND_STRUCT){
00606 UINT fld_id = 0;
00607 if (!FLD_last_field(FLD_get_to_field(ty_ele, 1, fld_id)))
00608 return FALSE;
00609 }
00610 }
00611 }
00612 }
00613 return TRUE;
00614 }
00615
00616 static BOOL Unit_Stride_Reference(
00617 WN *wn,
00618 WN *loop,
00619 BOOL in_simd)
00620 {
00621
00622 BOOL ok = TRUE;
00623
00624 if (WN_opcode(wn) == OPC_BLOCK){
00625 WN* kid = WN_first (wn);
00626 while (kid) {
00627 if(!Unit_Stride_Reference(kid, loop, in_simd))
00628 return FALSE;
00629 kid = WN_next(kid);
00630 }
00631 return TRUE;
00632 }
00633
00634 if(WN_operator(wn) == OPR_ARRAY &&
00635 (in_simd || !Is_Loop_Invariant_Exp(wn, loop))){
00636
00637 ACCESS_ARRAY* aa = (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map, wn);
00638 ACCESS_VECTOR* av;
00639 INT loopno = Do_Loop_Depth(loop);
00640
00641 for (INT i = 0; i < aa->Num_Vec(); i ++) {
00642 av = aa->Dim(i);
00643 if (av->Loop_Coeff(loopno)!=0 && i != aa->Num_Vec()-1)
00644 ok = FALSE;
00645 if (i == aa->Num_Vec()-1 && av->Loop_Coeff(loopno) != 1 &&
00646 av->Loop_Coeff(loopno) != -1)
00647 ok = FALSE;
00648
00649 if (i == aa->Num_Vec()-1 && av->Loop_Coeff(loopno) == -1 &&
00650 ABS(WN_element_size(wn)) == 1)
00651 ok = FALSE;
00652
00653 if (i == aa->Num_Vec()-1 && av->Contains_Non_Lin_Symb())
00654 ok = FALSE;
00655
00656
00657 if (av->Too_Messy &&
00658 Identify_Messy_Array_Subscript(wn, loop, aa, i))
00659 ok = FALSE;
00660 if(!ok) break;
00661 }
00662
00663 if (ok && PU_src_lang(Get_Current_PU()) == PU_F90_LANG)
00664 ok = Possible_Contiguous_Dope(wn);
00665 if(!ok){
00666 if(in_simd && (debug || LNO_Simd_Verbose)){
00667 if(WN_has_sym(WN_array_base(wn))){
00668 SYMBOL array_symbol(WN_array_base(wn));
00669 non_unit_stride = array_symbol.Name();
00670 }
00671 }
00672 return FALSE;
00673 }
00674 }
00675
00676 for (UINT kidno = 0; kidno < WN_kid_count(wn); kidno ++) {
00677 if(!Unit_Stride_Reference(WN_kid(wn, kidno), loop, in_simd))
00678 return FALSE;
00679 }
00680
00681 return TRUE;
00682 }
00683
00684 static void Report_Non_Vectorizable_Op(WN *wn)
00685 {
00686 if(non_vect_op) return;
00687
00688 if(debug || LNO_Simd_Verbose){
00689 if(WN_operator(wn) == OPR_PAREN)
00690 non_vect_op = OPCODE_name(WN_opcode(WN_kid0(wn)));
00691 else non_vect_op = OPCODE_name(WN_opcode(wn));
00692 }
00693 }
00694
00695
00696 static BOOL Is_Unroll_Statement(WN *stmt, WN *body)
00697 {
00698 for(WN *tmp=WN_first(body); tmp; tmp=WN_next(tmp)){
00699 if(WN_operator(tmp)==OPR_ISTORE || WN_operator(tmp)==OPR_STID){
00700 if(MTYPE_byte_size(WN_desc(stmt)) > MTYPE_byte_size(WN_desc(tmp)))
00701 return TRUE;
00702 }
00703 }
00704 return FALSE;
00705 }
00706
00707
00708
00709
00710
00711
00712 INT Induction_Type_Size;
00713 BOOL Induction_Seen;
00714 BOOL Inconsistent_Induction;
00715
00716
00717 static BOOL Is_Well_Formed_Simd ( WN* wn, WN* loop)
00718 {
00719
00720 if (WN_operator(wn) == OPR_ILOAD) {
00721 if (WN_operator(LWN_Get_Parent(wn)) == OPR_ISTORE)
00722 return TRUE;
00723 else
00724 return FALSE;
00725 }
00726
00727 WN* parent = LWN_Get_Parent(wn);
00728 WN* kid0 = WN_kid0(wn);
00729 WN* kid1 = WN_kid1(wn);
00730
00731 if (WN_kid_count(wn) > 2 && WN_operator(wn) != OPR_SELECT)
00732 return FALSE;
00733
00734 if (WN_operator(wn) == OPR_SELECT) {
00735 if (!OPCODE_is_compare(WN_opcode(kid0)) ||
00736 !MTYPE_is_float(WN_desc(kid0)) ||
00737 !MTYPE_is_integral(WN_rtype(kid0)))
00738 return FALSE;
00739 kid0 = WN_kid1(wn);
00740 kid1 = WN_kid2(wn);
00741 }
00742
00743 if (OPCODE_is_compare(WN_opcode(wn)) && WN_operator(parent) != OPR_SELECT)
00744 return FALSE;
00745
00746
00747
00748
00749
00750 if(!Is_Target_EM64T() &&
00751 !Is_Target_Core() &&
00752 !Is_Target_Wolfdale() &&
00753 !Is_Target_Barcelona() &&
00754 WN_operator(wn) == OPR_RECIP && WN_rtype(wn) == MTYPE_F8
00755 && WN_operator(parent) == OPR_MPY)
00756 return FALSE;
00757
00758 if (!LNO_Simd_Reduction) {
00759 if (WN_operator(parent) == OPR_STID)
00760 return FALSE;
00761
00762 if (WN_operator(kid0) == OPR_LDID &&
00763 (WN_desc(kid0) == MTYPE_I1 ||
00764 WN_desc(kid0) == MTYPE_I2 ||
00765 WN_desc(parent) == MTYPE_I1 ||
00766 WN_desc(parent) == MTYPE_I2))
00767 return FALSE;
00768
00769 if (kid1 && WN_operator(kid1) == OPR_LDID &&
00770 (WN_desc(kid1) == MTYPE_I1 ||
00771 WN_desc(kid1) == MTYPE_I2 ||
00772 WN_desc(parent) == MTYPE_I1 ||
00773 WN_desc(parent) == MTYPE_I2))
00774 return FALSE;
00775
00776 }
00777
00778 if (WN_operator(wn) == OPR_MPY && WN_rtype(wn) == MTYPE_I4 &&
00779 WN_desc(parent) != MTYPE_I2){
00780 if((WN_operator(kid0) == OPR_INTCONST && WN_const_val(kid0) == 2 &&
00781 WN_operator(kid1) == OPR_ILOAD && WN_operator(WN_kid0(kid1)) == OPR_ARRAY)
00782 ||(WN_operator(kid1) == OPR_INTCONST && WN_const_val(kid1) == 2 &&
00783 WN_operator(kid0) == OPR_ILOAD && WN_operator(WN_kid0(kid0)) == OPR_ARRAY)
00784 );
00785 else return FALSE;
00786 }
00787
00788
00789
00790
00791
00792
00793
00794 if (MTYPE_is_float(WN_desc(parent)) && WN_operator(parent) == OPR_STID &&
00795 LNO_Run_Simd != 2 && Roundoff_Level == ROUNDOFF_NONE &&
00796
00797
00798
00799 ((WN_operator(wn) == OPR_MPY &&
00800 curr_simd_red_manager->Which_Reduction(parent) == RED_MPY) ||
00801 (WN_operator(wn) == OPR_ADD &&
00802 curr_simd_red_manager->Which_Reduction(parent) == RED_ADD)))
00803 return FALSE;
00804
00805
00806
00807 if (Do_Loop_Is_Mp(loop) &&
00808 WN_operator(parent) == OPR_STID && curr_simd_red_manager != NULL &&
00809 curr_simd_red_manager->Which_Reduction(parent) != RED_NONE)
00810 return FALSE;
00811
00812 if (WN_operator(parent) != OPR_ISTORE && WN_operator(parent) != OPR_STID &&
00813 !is_vectorizable_op(WN_operator(parent),
00814 WN_rtype(parent), WN_desc(parent)))
00815 return FALSE;
00816
00817 if (WN_operator(kid0) == OPR_ILOAD) {
00818 WN* array0 = WN_kid0(kid0);
00819 if (WN_operator(array0) == OPR_ARRAY &&
00820 WN_operator(WN_kid0(array0)) != OPR_LDID &&
00821 WN_operator(WN_kid0(array0)) != OPR_LDA) {
00822
00823
00824
00825 if (WN_operator(WN_kid0(array0)) == OPR_ADD) {
00826 WN* opnd0 = WN_kid0(WN_kid0(array0));
00827 WN* opnd1 = WN_kid1(WN_kid0(array0));
00828 if (((WN_operator(opnd0) == OPR_LDID || WN_operator(opnd0) == OPR_ARRAY) &&
00829 WN_operator(opnd1) == OPR_INTCONST) ||
00830 ((WN_operator(opnd1) == OPR_LDID||WN_operator(opnd0) == OPR_ARRAY) &&
00831 WN_operator(opnd0) == OPR_INTCONST))
00832 ;
00833 else
00834 return FALSE;
00835 } else
00836 return FALSE;
00837 }
00838 }
00839 if (WN_kid_count(wn) > 1 &&
00840 WN_operator(kid1) == OPR_ILOAD) {
00841 WN* array1 = WN_kid0(kid1);
00842 if (WN_operator(array1) == OPR_ARRAY &&
00843 WN_operator(WN_kid0(array1)) != OPR_LDID &&
00844 WN_operator(WN_kid0(array1)) != OPR_LDA) {
00845
00846
00847
00848 if (WN_operator(WN_kid0(array1)) == OPR_ADD) {
00849 WN* opnd0 = WN_kid0(WN_kid0(array1));
00850 WN* opnd1 = WN_kid1(WN_kid0(array1));
00851 if (((WN_operator(opnd0) == OPR_LDID || WN_operator(opnd0) == OPR_ARRAY) &&
00852 WN_operator(opnd1) == OPR_INTCONST) ||
00853 ((WN_operator(opnd1) == OPR_LDID || WN_operator(opnd0) == OPR_ARRAY) &&
00854 WN_operator(opnd0) == OPR_INTCONST))
00855 ;
00856 else
00857 return FALSE;
00858 } else
00859 return FALSE;
00860 }
00861 }
00862
00863
00864 if(WN_operator(kid0) == OPR_ILOAD && WN_operator(WN_kid0(kid0)) == OPR_ARRAY){
00865 WN * stmt = Find_Stmt_Under(wn, WN_do_body(loop));
00866 if(stmt && WN_operator(stmt)==OPR_STID && Is_Unroll_Statement(stmt, WN_do_body(loop))){
00867 if(WN_element_size(WN_kid0(kid0)) != MTYPE_byte_size(WN_desc(stmt)))
00868 return FALSE;
00869 }
00870 }
00871 if(kid1 && WN_operator(kid1) == OPR_ILOAD && WN_operator(WN_kid0(kid1)) == OPR_ARRAY){
00872 WN * stmt = Find_Stmt_Under(wn, WN_do_body(loop));
00873 if(stmt && WN_operator(stmt)==OPR_STID && Is_Unroll_Statement(stmt, WN_do_body(loop))){
00874 if(WN_element_size(WN_kid0(kid1)) != MTYPE_byte_size(WN_desc(stmt)))
00875 return FALSE;
00876 }
00877 }
00878
00879 if(!Is_Vectorizable_Tree(kid0)||!Is_Vectorizable_Tree(kid1))
00880 return FALSE;
00881
00882
00883 if (WN_kid_count(wn) == 2 &&
00884 ((WN_operator(kid0) == OPR_CONST || WN_operator(kid0) == OPR_INTCONST) &&
00885 (WN_operator(kid1) == OPR_CONST || WN_operator(kid1) == OPR_INTCONST)))
00886 return FALSE;
00887
00888 if (WN_operator(kid0) == OPR_LDID) {
00889 SYMBOL symbol1(kid0);
00890 SYMBOL symbol2(WN_index(loop));
00891 if (symbol1 == symbol2) {
00892
00893 if (Do_Loop_Is_Mp(loop))
00894 return FALSE;
00895 INT Type_Size = MTYPE_byte_size(WN_rtype(wn));
00896 if (WN_operator(wn) == OPR_CVT)
00897 Type_Size = MTYPE_byte_size(WN_desc(wn));
00898 if (Induction_Seen &&
00899 Type_Size != Induction_Type_Size) {
00900 Inconsistent_Induction = TRUE;
00901 return FALSE;
00902 }
00903 Induction_Seen = TRUE;
00904 Induction_Type_Size = Type_Size;
00905 }
00906 }
00907
00908 if (kid1 && WN_operator(kid1) == OPR_LDID) {
00909 SYMBOL symbol1(kid1);
00910 SYMBOL symbol2(WN_index(loop));
00911 if (symbol1 == symbol2) {
00912
00913 if (Do_Loop_Is_Mp(loop))
00914 return FALSE;
00915 INT Type_Size = MTYPE_byte_size(WN_rtype(wn));
00916 if (WN_operator(wn) == OPR_CVT)
00917 Type_Size = MTYPE_byte_size(WN_desc(wn));
00918 if (Induction_Seen &&
00919 Type_Size != Induction_Type_Size) {
00920 Inconsistent_Induction = TRUE;
00921 return FALSE;
00922 }
00923 Induction_Seen = TRUE;
00924 Induction_Type_Size = Type_Size;
00925 }
00926 }
00927
00928 if ((WN_operator(kid0) == OPR_ILOAD && WN_field_id(kid0) != 0) ||
00929 (kid1 && WN_operator(kid1) == OPR_ILOAD && WN_field_id(kid1) != 0) ||
00930 (WN_operator(parent) == OPR_ISTORE && WN_field_id(parent) != 0))
00931 return FALSE;
00932
00933
00934
00935 WN* stmt = parent;
00936 while(stmt && !OPCODE_is_store(WN_opcode(stmt)) &&
00937 WN_operator(stmt) != OPR_DO_LOOP){
00938 stmt = LWN_Get_Parent(stmt);
00939 }
00940 if (stmt && WN_operator(stmt) != OPR_DO_LOOP &&
00941 (WN_operator(kid0) == OPR_ILOAD && WN_rtype(kid0) != WN_desc(kid0) &&
00942 WN_desc(kid0) != WN_desc(stmt)) ||
00943 (kid1 && WN_operator(kid1) == OPR_ILOAD &&
00944 WN_rtype(kid1) != WN_desc(kid1) &&
00945 WN_desc(kid1) != WN_desc(stmt)))
00946 return FALSE;
00947
00948
00949
00950
00951
00952
00953
00954
00955 if (WN_operator(wn) != OPR_INTRINSIC_OP &&
00956
00957 !OPCODE_is_compare(WN_opcode(wn)) &&
00958 WN_operator(wn) != OPR_SELECT) {
00959 INT oper_size = -1, opnd_size = -1;
00960 WN* address;
00961
00962 if (WN_operator(parent) == OPR_ISTORE) {
00963 address = WN_kid1(parent);
00964 if (WN_operator(address) != OPR_ARRAY)
00965 return FALSE;
00966 else {
00967 if (WN_element_size(address) > 8)
00968 return FALSE;
00969 else
00970 oper_size = ABS(WN_element_size(address));
00971 }
00972 } else {
00973 oper_size = MTYPE_byte_size(WN_rtype(wn));
00974 if (WN_rtype(wn) == MTYPE_V)
00975 oper_size = MTYPE_byte_size(WN_desc(wn));
00976 }
00977
00978 for (INT kid_count = 0; kid_count < WN_kid_count(wn);
00979 kid_count ++) {
00980 WN* kid = WN_kid(wn, kid_count);
00981
00982 if (WN_operator(kid) == OPR_ILOAD) {
00983 address = WN_kid0(kid);
00984 if (WN_operator(address) != OPR_ARRAY)
00985 return FALSE;
00986 else {
00987 if (WN_element_size(address) > 8)
00988 return FALSE;
00989 else
00990 opnd_size = ABS(WN_element_size(address));
00991 }
00992 } else {
00993 opnd_size = MTYPE_byte_size(WN_rtype(kid));
00994 if (WN_rtype(kid) == MTYPE_V)
00995 opnd_size = MTYPE_byte_size(WN_desc(kid));
00996 }
00997
00998 if (opnd_size != oper_size && WN_operator(wn) != OPR_PARM &&
00999 WN_operator(wn) != OPR_CVT && WN_operator(wn) != OPR_TRUNC)
01000 return FALSE;
01001 if (WN_operator(wn) == OPR_CVT || WN_operator(wn) == OPR_TRUNC) {
01002 INT rsize = MTYPE_byte_size(WN_rtype(wn));
01003 INT dsize = MTYPE_byte_size(WN_desc(wn));
01004 if (rsize != oper_size || dsize != opnd_size)
01005 return FALSE;
01006 }
01007 }
01008 }
01009
01010
01011
01012
01013 if (WN_operator(parent) == OPR_ISTORE &&
01014 WN_operator(WN_kid1(parent)) == OPR_ARRAY &&
01015 ABS(WN_element_size(WN_kid1(parent))) !=
01016 MTYPE_byte_size(WN_desc(parent)))
01017 return FALSE;
01018
01019 return TRUE;
01020 }
01021
01022 static WN* Find_Do_Body (WN* simd_op)
01023 {
01024 WN* parent = LWN_Get_Parent(LWN_Get_Parent(simd_op));
01025 WN* body = LWN_Get_Parent(simd_op);
01026
01027 while (parent) {
01028 if (WN_operator(parent) == OPR_DO_LOOP)
01029 break;
01030 parent = LWN_Get_Parent(parent);
01031 body = LWN_Get_Parent(body);
01032 }
01033 return body;
01034 }
01035
01036 static BOOL is_vectorizable_op_stmt(WN* stmt, WN* loop) {
01037
01038 OPERATOR opr=WN_operator(stmt);
01039 if (opr==OPR_STID || opr==OPR_ISTORE) {
01040 WN* rhs=WN_kid0(stmt);
01041 opr=WN_operator(rhs);
01042 TYPE_ID rtype = WN_rtype(rhs);
01043 TYPE_ID desc = WN_desc(rhs);
01044 if (is_vectorizable_op(opr, rtype, desc)) {
01045 if (Is_Well_Formed_Simd(rhs, loop)) {
01046 return TRUE;
01047 }
01048 }
01049 }
01050 return FALSE;
01051 }
01052
01053 static UINT_DYN_ARRAY* simd_fis_merge_scc_to_form_new_loop(
01054 UINT total_scc,
01055 FF_STMT_LIST* scc,
01056 UINT* scc_size,
01057 WN* loop,
01058 SCC_DIRECTED_GRAPH16 *scc_dep_g
01059 )
01060 {
01061
01062
01063
01064 UINT_DYN_ARRAY *seed_scc=CXX_NEW(UINT_DYN_ARRAY(&SIMD_default_pool),
01065 &SIMD_default_pool);
01066
01067
01068
01069
01070 INT* scc_queue[2];
01071 UINT head0, head1, tail0, tail1;
01072
01073 INT scc_remained=total_scc;
01074 UINT simd=0;
01075 UINT non_simd=1;
01076
01077 UINT i;
01078 for (i=0; i<2; i++) {
01079 scc_queue[i]= CXX_NEW_ARRAY(INT,total_scc+1,&SIMD_default_pool);
01080 }
01081 head0=tail0=0;
01082 head1=tail1=0;
01083
01084
01085 for (i=1; i<=total_scc; i++) {
01086
01087 if (scc_size[i]>0 && scc_dep_g->Get_In_Edge(i)==0) {
01088
01089
01090 if (scc_size[i]==1) {
01091 WN* stmt=scc[i].Head()->Get_Stmt();
01092 if (is_vectorizable_op_stmt(stmt,loop))
01093 scc_queue[simd][head0++]=i;
01094 else
01095 scc_queue[non_simd][head1++]=i;
01096 } else
01097 scc_queue[non_simd][head1++]=i;
01098 } else if (scc_size[i]==0)
01099 scc_remained--;
01100 }
01101
01102 INT kind=simd;
01103 INT last_loop_kind=simd;
01104 WN* body=WN_do_body(loop);
01105 UINT entry_loop_id = seed_scc->Newidx();
01106 BOOL entry = TRUE;
01107 while (1) {
01108 UINT current_scc;
01109 if (kind==simd && head0!=tail0) {
01110 current_scc=scc_queue[simd][tail0++];
01111
01112 if (entry) {
01113 entry = FALSE;
01114 (*seed_scc)[entry_loop_id]=current_scc;
01115 } else {
01116 if (last_loop_kind!= simd) {
01117 UINT loop_id=seed_scc->Newidx();
01118 (*seed_scc)[loop_id]=current_scc;
01119 } else {
01120 scc[(*seed_scc)[seed_scc->Lastidx()]].Append_List(&scc[current_scc]);
01121 }
01122 }
01123 last_loop_kind=simd;
01124 scc_remained--;
01125 } else if (kind==non_simd && head1!=tail1) {
01126 current_scc=scc_queue[non_simd][tail1++];
01127
01128 if (entry) {
01129 entry = FALSE;
01130 (*seed_scc)[entry_loop_id]=current_scc;
01131 } else {
01132 if (last_loop_kind!=non_simd) {
01133 UINT loop_id=seed_scc->Newidx();
01134 (*seed_scc)[loop_id]=current_scc;
01135 } else {
01136 scc[(*seed_scc)[seed_scc->Lastidx()]].Append_List(&scc[current_scc]);
01137 }
01138 }
01139 last_loop_kind=non_simd;
01140 scc_remained--;
01141 } else {
01142 if (head0!=tail0)
01143 kind=simd;
01144 else if (head1!=tail1)
01145 kind=non_simd;
01146 else
01147 break;
01148 continue;
01149 }
01150
01151
01152 EINDEX16 e=scc_dep_g->Get_Out_Edge(current_scc);
01153 while (e) {
01154
01155 VINDEX16 v=scc_dep_g->Get_Sink(e);
01156 scc_dep_g->Delete_Edge(e);
01157 if (scc_dep_g->Get_In_Edge(v)==0) {
01158 if (scc_size[v]==1) {
01159 WN* stmt=scc[v].Head()->Get_Stmt();
01160 if (is_vectorizable_op_stmt(stmt,loop))
01161 scc_queue[simd][head0++]=v;
01162 else
01163 scc_queue[non_simd][head1++]=v;
01164 } else
01165 scc_queue[non_simd][head1++]=v;
01166 }
01167 e=scc_dep_g->Get_Next_Out_Edge(e);
01168 }
01169 }
01170 FmtAssert(scc_remained==0,("Merging not finished in simd phase"));
01171 return seed_scc;
01172 }
01173
01174 static void simd_fis_separate_loop_and_scalar_expand(
01175 UINT_DYN_ARRAY* new_loops,
01176 FF_STMT_LIST* scc,
01177 WN* loop,
01178 FF_STMT_LIST& expandable_ref_list)
01179 {
01180 WN* body=WN_do_body(loop);
01181 UINT total_loops=new_loops->Lastidx()+1;
01182 UINT *loop_size=CXX_NEW_ARRAY(UINT,total_loops,&SIMD_default_pool);
01183
01184 WN2INT *stmt_to_loop=
01185 CXX_NEW(WN2INT(ESTIMATED_SIZE, &SIMD_default_pool),
01186 &SIMD_default_pool);
01187
01188 BOOL fission_ok = (total_loops>1);
01189 UINT i;
01190 for (i=0; i<total_loops; i++) {
01191
01192 UINT seed_scc=(*new_loops)[i];
01193 UINT total_stmt=0;
01194 FF_STMT_ITER s_iter(&scc[seed_scc]);
01195 for (FF_STMT_NODE* stmt_node=s_iter.First(); !s_iter.Is_Empty();
01196 stmt_node=s_iter.Next()) {
01197 WN* stmt=stmt_node->Get_Stmt();
01198 stmt_to_loop->Enter(stmt,i);
01199 LWN_Insert_Block_Before(body,NULL,LWN_Extract_From_Block(stmt));
01200 total_stmt++;
01201 }
01202 loop_size[i]=total_stmt;
01203
01204 }
01205
01206 if (total_loops>=1) {
01207 BOOL has_calls_or_gotos_or_inner_loops = FALSE;
01208 DO_LOOP_INFO* loop_info=Get_Do_Loop_Info(loop, FALSE);
01209 if (loop_info->Has_Calls || loop_info->Has_Gotos || !loop_info->Is_Inner) {
01210 has_calls_or_gotos_or_inner_loops = TRUE;
01211 }
01212
01213 BOOL need_expansion = FALSE;
01214 BOOL need_finalization = FALSE;
01215 STACK<WN*> se_stack(&SIMD_default_pool);
01216 STACK<BOOL> finalize_stack(&SIMD_default_pool);
01217 FF_STMT_ITER r_iter(&expandable_ref_list);
01218 for (FF_STMT_NODE* ref_node=r_iter.First(); !r_iter.Is_Empty();
01219 ref_node=r_iter.Next()) {
01220 WN* ref=ref_node->Get_Stmt();
01221 WN* stmt0=Find_Stmt_Under(ref,body);
01222 WN* wn_eq_loop = NULL;
01223 STACK<WN*>* equivalence_class=
01224 Scalar_Equivalence_Class(ref, Du_Mgr, &SIMD_default_pool,
01225 TRUE, &wn_eq_loop);
01226 BOOL expand = FALSE;
01227 BOOL finalize = FALSE;
01228 while (!equivalence_class->Is_Empty() && !expand) {
01229 WN* ref1=equivalence_class->Pop();
01230 WN* stmt1=Find_Stmt_Under(ref1,body);
01231 if (1) {
01232 expand = TRUE;
01233 need_expansion = TRUE;
01234 if (wn_eq_loop != NULL) {
01235 finalize = TRUE;
01236 need_finalization = TRUE;
01237 }
01238 }
01239 }
01240
01241
01242 if (expand) {
01243 se_stack.Push(ref);
01244 finalize_stack.Push(finalize);
01245 }
01246 }
01247 WN* guard_tests[1];
01248 guard_tests[0] = NULL;
01249 if (need_finalization)
01250 SE_Guard_Tests(loop, 1, guard_tests, Do_Loop_Depth(loop));
01251 for (i=0; i<se_stack.Elements(); i++) {
01252 WN* wn_ref = se_stack.Top_nth(i);
01253 SYMBOL sym(wn_ref);
01254 INT dummy[1]={0};
01255 BOOL finalize = finalize_stack.Top_nth(i);
01256 Scalar_Expand(loop, loop, NULL, sym, &loop, dummy, 1, FALSE,
01257 finalize, FALSE, guard_tests);
01258 }
01259
01260 WN* tmp_loop1=loop;
01261 WN** wn_starts=CXX_NEW_ARRAY(WN*, total_loops, &SIMD_default_pool);
01262 WN** wn_ends=CXX_NEW_ARRAY(WN*, total_loops, &SIMD_default_pool);
01263 WN** wn_steps=CXX_NEW_ARRAY(WN*, total_loops, &SIMD_default_pool);
01264 WN** new_loops=CXX_NEW_ARRAY(WN*, total_loops, &SIMD_default_pool);
01265
01266 wn_starts[0]=WN_kid0(WN_start(tmp_loop1));
01267 wn_ends[0]=WN_end(tmp_loop1);
01268 wn_steps[0]=WN_kid0(WN_step(tmp_loop1));
01269 new_loops[0]=loop;
01270 WN* stmt=WN_first(body);
01271
01272 for (i=0; i<total_loops-1; i++) {
01273
01274 INT size=loop_size[i];
01275
01276 for (INT j=0; j<size; j++)
01277 stmt=WN_next(stmt);
01278
01279 WN* tmp_loop2;
01280
01281 Separate(tmp_loop1, WN_prev(stmt), 1, &tmp_loop2);
01282 LWN_Parentize(tmp_loop2);
01283 DO_LOOP_INFO* new_loop_info =
01284 CXX_NEW(DO_LOOP_INFO(loop_info,&LNO_default_pool), &LNO_default_pool);
01285 Set_Do_Loop_Info(tmp_loop2,new_loop_info);
01286 if (has_calls_or_gotos_or_inner_loops) {
01287
01288
01289 }
01290 wn_starts[i+1]=WN_kid0(WN_start(tmp_loop2));
01291 wn_ends[i+1]=WN_end(tmp_loop2);
01292 wn_steps[i+1]=WN_kid0(WN_step(tmp_loop2));
01293 new_loops[i+1]=tmp_loop2;
01294
01295 tmp_loop1=tmp_loop2;
01296 }
01297
01298 Fission_DU_Update(Du_Mgr,red_manager,wn_starts,wn_ends,wn_steps,
01299 total_loops,new_loops);
01300 for (i=0; i<total_loops-1; i++)
01301 scalar_rename(LWN_Get_Parent(wn_starts[i]));
01302
01303 adg->Fission_Dep_Update(new_loops[0],total_loops);
01304 }
01305 }
01306
01307 typedef enum { V16I1, V16I2, V16I4, V16I8, V16C8, INVALID } SIMD_KIND;
01308 #define V16F4 V16I4
01309 #define V16F8 V16I8
01310 #define V16C4 V16F8
01311 INT Vec_Unit_Size[6] = { 1, 2, 4, 8, 16, -1 };
01312
01313 static SIMD_KIND
01314 Find_Simd_Kind ( STACK_OF_WN *vec_simd_ops )
01315 {
01316 SIMD_KIND smallest_kind = INVALID;
01317
01318 for (INT i=0; i<vec_simd_ops->Elements(); i++){
01319 WN* simd_op=vec_simd_ops->Top_nth(i);
01320
01321 WN* istore=LWN_Get_Parent(simd_op);
01322
01323 while(istore && !OPCODE_is_store(WN_opcode(istore)) &&
01324 WN_operator(istore) != OPR_DO_LOOP)
01325 istore = LWN_Get_Parent(istore);
01326 FmtAssert(istore || WN_operator(istore) == OPR_DO_LOOP, ("NYI"));
01327
01328 TYPE_ID type;
01329 if (WN_desc(istore) == MTYPE_V)
01330 type = WN_rtype(istore);
01331 else
01332 type = WN_desc(istore);
01333
01334 switch(type) {
01335 case MTYPE_C4:
01336 if (smallest_kind > V16C4)
01337 smallest_kind = V16C4;
01338 break;
01339 case MTYPE_C8:
01340 if (smallest_kind > V16C8)
01341 smallest_kind = V16C8;
01342 break;
01343 case MTYPE_F4:
01344 if (smallest_kind > V16F4)
01345 smallest_kind = V16F4;
01346 break;
01347 case MTYPE_F8:
01348 if (smallest_kind > V16F8)
01349 smallest_kind = V16F8;
01350 break;
01351 case MTYPE_I1: case MTYPE_U1:
01352 smallest_kind = V16I1;
01353 break;
01354 case MTYPE_I2: case MTYPE_U2:
01355 if (smallest_kind > V16I2)
01356 smallest_kind = V16I2;
01357 break;
01358 case MTYPE_I4: case MTYPE_U4:
01359 if (smallest_kind > V16I4)
01360 smallest_kind = V16I4;
01361 break;
01362 case MTYPE_I8: case MTYPE_U8:
01363 if (smallest_kind > V16I8)
01364 smallest_kind = V16I8;
01365 break;
01366 default:
01367 smallest_kind = INVALID;
01368 break;
01369 }
01370 if (smallest_kind == INVALID)
01371 break;
01372 }
01373 return smallest_kind;
01374 }
01375
01376 BOOL Is_Vectorizable_Intrinsic (WN *wn)
01377 {
01378 INTRINSIC intrn = WN_intrinsic(wn);
01379
01380 if (intrn == INTRN_SUBSU2 ||
01381 intrn == INTRN_F4SIGN ||
01382 intrn == INTRN_F8SIGN )
01383 return TRUE;
01384
01385 if (!OPT_Fast_Math || Is_Target_32bit())
01386 return FALSE;
01387
01388 switch (intrn) {
01389 case INTRN_F4EXP:
01390 case INTRN_F8EXP:
01391 case INTRN_F4LOG:
01392 case INTRN_F8LOG:
01393 case INTRN_F4SIN:
01394 case INTRN_F8SIN:
01395 case INTRN_F4COS:
01396 case INTRN_F8COS:
01397 case INTRN_F4EXPEXPR:
01398 case INTRN_F8EXPEXPR:
01399 #if 0 // for Bug 8931, single vector sinh and cosh not ready
01400 case INTRN_F4SINH:
01401 case INTRN_F4COSH:
01402 #endif
01403 case INTRN_F8SINH:
01404 case INTRN_F8COSH:
01405 case INTRN_F4LOG10:
01406 case INTRN_F8LOG10:
01407 return TRUE;
01408 default:
01409 return FALSE;
01410 }
01411 }
01412
01413 BOOL Gather_Vectorizable_Ops(
01414 WN* wn, SCALAR_REF_STACK* simd_ops, MEM_POOL *pool, WN *loop)
01415 {
01416 if (WN_opcode(wn) == OPC_BLOCK){
01417 WN* kid = WN_first (wn);
01418 while(kid){
01419 if (!Gather_Vectorizable_Ops(kid,simd_ops,pool,loop))
01420 return FALSE;
01421 kid = WN_next(kid);
01422 }
01423 return TRUE;
01424 }
01425
01426 OPERATOR opr=WN_operator(wn);
01427 TYPE_ID rtype = WN_rtype(wn);
01428 TYPE_ID desc = WN_desc(wn);
01429
01430 if (opr == OPR_IF || opr == OPR_REGION){
01431 Report_Non_Vectorizable_Op(wn);
01432 return FALSE;
01433 }
01434 if (is_vectorizable_op(opr, rtype, desc)){
01435 if ((opr != OPR_INTRINSIC_OP &&
01436 Is_Well_Formed_Simd(wn, loop)) ||
01437 (opr == OPR_INTRINSIC_OP &&
01438 Is_Vectorizable_Intrinsic(wn))) {
01439 SCALAR_REF scalar_ref(wn,0);
01440 simd_ops->Push(scalar_ref);
01441 }else{
01442
01443
01444 if(Is_Under_Array(wn)) return TRUE;
01445
01446 Report_Non_Vectorizable_Op(wn);
01447 return FALSE;
01448 }
01449 } else if(OPCODE_is_store(WN_opcode(LWN_Get_Parent(wn)))&&
01450 WN_operator(wn) != OPR_ARRAY){
01451 Report_Non_Vectorizable_Op(wn);
01452 return FALSE;
01453 }
01454
01455
01456 if (opr == OPR_CVT && !is_vectorizable_op(opr, rtype, desc)){
01457
01458
01459 if(Is_Under_Array(wn)) return TRUE;
01460 Report_Non_Vectorizable_Op(wn);
01461 return FALSE;
01462 }
01463
01464 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++){
01465 WN* kid = WN_kid(wn,kidno);
01466 if (!Gather_Vectorizable_Ops(kid,simd_ops,pool,loop))
01467 return FALSE;
01468 }
01469
01470
01471
01472
01473 if (WN_operator(wn) == OPR_STID && curr_simd_red_manager &&
01474 curr_simd_red_manager->Which_Reduction(wn) != RED_NONE) {
01475
01476
01477 if(MTYPE_is_complex(WN_desc(wn))){
01478 Report_Non_Vectorizable_Op(wn);
01479 return FALSE;
01480 }
01481
01482
01483
01484
01485 if (!Du_Mgr)
01486 return FALSE;
01487 USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
01488 if (!use_list)
01489 return FALSE;
01490 WN *body = WN_do_body(loop);
01491 USE_LIST_ITER uiter(use_list);
01492 INT num_reuse = 0;
01493 for (DU_NODE* u = uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
01494 WN* use=u->Wn();
01495 if (Wn_Is_Inside(use, loop)) {
01496 WN* stmt = Find_Stmt_Under(use, body);
01497 if (curr_simd_red_manager->Which_Reduction(stmt) == RED_NONE ||
01498 (WN_operator(stmt) == OPR_STID &&
01499 (WN_st(stmt) != WN_st(wn) ||
01500 WN_store_offset(stmt) != WN_store_offset(wn)))){
01501 Report_Non_Vectorizable_Op(wn);
01502 return FALSE;
01503 }
01504 }
01505
01506
01507 if (Wn_Is_Inside(use, wn)) {
01508 if (num_reuse > 0){
01509 Report_Non_Vectorizable_Op(wn);
01510 return FALSE;
01511 }
01512 else num_reuse ++;
01513 }
01514 }
01515 }
01516
01517
01518
01519 if (WN_operator(wn) == OPR_STID && curr_simd_red_manager &&
01520 curr_simd_red_manager->Which_Reduction(wn) == RED_NONE) {
01521 if (!Du_Mgr)
01522 return FALSE;
01523 USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
01524 if (!use_list || use_list->Incomplete())
01525 return FALSE;
01526 USE_LIST_ITER uiter(use_list);
01527 for (DU_NODE* u = uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
01528 WN* use=u->Wn();
01529 WN* body = WN_do_body(loop);
01530 WN* stmt = Find_Stmt_Under(use, body);
01531
01532
01533
01534
01535
01536
01537
01538 if(stmt && WN_operator(stmt)==OPR_XPRAGMA &&
01539 WN_pragma(stmt) == WN_PRAGMA_COPYIN_BOUND &&
01540 WN_kid0(stmt) == use)
01541 return FALSE;
01542
01543
01544 WN* loop_stmt = WN_first(body);
01545 for (; loop_stmt; loop_stmt = WN_next(loop_stmt)) {
01546 if (loop_stmt == stmt){
01547 Report_Non_Vectorizable_Op(wn);
01548 return FALSE;
01549 }
01550 if (loop_stmt == wn)
01551 break;
01552 }
01553 }
01554 }
01555
01556
01557
01558 if (WN_operator(wn) == OPR_STID) {
01559 if (!Du_Mgr)
01560 return FALSE;
01561 USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
01562 if (!use_list)
01563 return FALSE;
01564 USE_LIST_ITER uiter(use_list);
01565 for (DU_NODE* u = uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
01566 WN* use=u->Wn();
01567 if (Wn_Is_Inside(use, loop)) {
01568 WN* parent = LWN_Get_Parent(use);
01569 while(parent &&
01570 !OPCODE_is_load(WN_opcode(parent)) &&
01571 !OPCODE_is_store(WN_opcode(parent))) {
01572 if (WN_operator(parent) == OPR_ARRAY){
01573 Report_Non_Vectorizable_Op(wn);
01574 return FALSE;
01575 }
01576 parent = LWN_Get_Parent(parent);
01577 }
01578 }
01579 }
01580 }
01581
01582
01583
01584 if (WN_operator(wn) == OPR_STID && curr_simd_red_manager &&
01585 curr_simd_red_manager->Which_Reduction(wn) == RED_NONE) {
01586 if (!Du_Mgr)
01587 return FALSE;
01588 USE_LIST* use_list=Du_Mgr->Du_Get_Use(wn);
01589 if (!use_list)
01590 return FALSE;
01591 USE_LIST_ITER uiter(use_list);
01592 BOOL used_in_loop = FALSE;
01593 for (DU_NODE* u = uiter.First(); !uiter.Is_Empty() && !used_in_loop;
01594 u=uiter.Next()) {
01595 WN* use=u->Wn();
01596 if (Wn_Is_Inside(use, loop))
01597 used_in_loop = TRUE;
01598
01599
01600
01601 if (Wn_Is_Inside(use, wn)){
01602 Report_Non_Vectorizable_Op(wn);
01603 return FALSE;
01604 }
01605 }
01606 if (used_in_loop == FALSE){
01607 Report_Non_Vectorizable_Op(wn);
01608 return FALSE;
01609 }
01610 }
01611
01612
01613 if (WN_operator(wn) == OPR_ISTORE) {
01614 WN* stmt_next = WN_next(wn);
01615 while(stmt_next) {
01616 if (WN_operator(stmt_next) == OPR_ISTORE &&
01617 WN_Simp_Compare_Trees(WN_kid1(wn), WN_kid1(stmt_next)) == 0 &&
01618 ABS(WN_offset(wn) - WN_offset(stmt_next)) <=
01619 MTYPE_byte_size(WN_desc(wn)) &&
01620 WN_offset(wn) != WN_offset(stmt_next)){
01621 Report_Non_Vectorizable_Op(wn);
01622 return FALSE;
01623 }
01624 stmt_next = WN_next(stmt_next);
01625 }
01626 }
01627
01628 return TRUE;
01629 }
01630
01631
01632
01633
01634
01635
01636
01637
01638 static void Find_Nodes(OPERATOR opr,
01639 SYMBOL sym,
01640 WN* wn_tree,
01641 STACK<WN*>* stack)
01642 {
01643 if (WN_operator(wn_tree) == opr) {
01644 SYMBOL newsym = SYMBOL(wn_tree);
01645 if (newsym == sym)
01646 stack->Push(wn_tree);
01647 }
01648 if (WN_opcode(wn_tree) == OPC_BLOCK) {
01649 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
01650 Find_Nodes(opr, sym, wn, stack);
01651 } else {
01652 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01653 Find_Nodes(opr, sym, WN_kid(wn_tree, i), stack);
01654 }
01655 }
01656
01657 static void Add_Vertices(WN *wn_tree)
01658 {
01659 if (WN_opcode(wn_tree) == OPC_BLOCK) {
01660 for (WN* wn = WN_first(wn_tree); wn != NULL; wn = WN_next(wn))
01661 Add_Vertices(wn);
01662 } else {
01663 if (OPCODE_is_load(WN_opcode(wn_tree)) ||
01664 OPCODE_is_store(WN_opcode(wn_tree)))
01665 adg->Add_Vertex(wn_tree);
01666 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01667 Add_Vertices(WN_kid(wn_tree, i));
01668 }
01669 }
01670
01671 static void Delete_Def_Use (WN *wn_tree)
01672 {
01673 if (WN_operator(wn_tree) == OPR_LDID) {
01674 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_tree);
01675 if(def_list==NULL) return;
01676 DEF_LIST_ITER iter(def_list);
01677 const DU_NODE *node = iter.First();
01678 const DU_NODE *next;
01679 Is_True(!iter.Is_Empty(),("Empty def list in Delete_Def_Use"));
01680 for(next = iter.Next(); node; node=next, next=iter.Next()){
01681 WN *def = (WN *) node->Wn();
01682 Du_Mgr->Delete_Def_Use(def,wn_tree);
01683 }
01684 }
01685 for (INT i = 0; i < WN_kid_count(wn_tree); i++)
01686 Delete_Def_Use(WN_kid(wn_tree, i));
01687 }
01688
01689 static void Copy_Def_Use (WN *from_tree,
01690 WN *to_tree,
01691 SYMBOL sym,
01692 BOOL no_synch)
01693 {
01694 FmtAssert(WN_operator(from_tree) == WN_operator(to_tree) ||
01695 no_synch,
01696 ("from_tree and to_tree not in synch"));
01697 if (WN_operator(from_tree) != WN_operator(to_tree) &&
01698
01699 !(WN_operator(from_tree) == OPR_LT &&
01700 WN_operator(to_tree) == OPR_LE))
01701 return;
01702
01703 if (WN_operator(from_tree) == OPR_LDID) {
01704 SYMBOL currsym = SYMBOL(from_tree);
01705 FmtAssert(SYMBOL(to_tree) == currsym,
01706 ("from_tree and to_tree have different symbols"));
01707
01708
01709
01710 if (currsym != sym) {
01711 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(from_tree);
01712 DEF_LIST_ITER iter(def_list);
01713 const DU_NODE *node = iter.First();
01714 Is_True(!iter.Is_Empty(),("Empty def list in Copy_Def_Use"));
01715 for(; !iter.Is_Empty();node=iter.Next()){
01716 WN *def = (WN *) node->Wn();
01717 Du_Mgr->Add_Def_Use(def, to_tree);
01718 DEF_LIST *def_list_to = Du_Mgr->Ud_Get_Def(to_tree);
01719 DEF_LIST *def_list_from = Du_Mgr->Ud_Get_Def(from_tree);
01720 def_list_to->Set_loop_stmt(def_list_from->Loop_stmt());
01721 }
01722 }
01723 }
01724
01725 for (INT i = 0; i < WN_kid_count(from_tree); i ++) {
01726 Copy_Def_Use(WN_kid(from_tree, i), WN_kid(to_tree, i), sym, no_synch);
01727 }
01728 }
01729
01730 static void
01731 Simd_Replace_With_Constant(WN *copy, SYMBOL sym, WN *cons, TYPE_ID index_type)
01732 {
01733 FmtAssert(WN_operator(cons) == OPR_INTCONST, ("Handle this"));
01734 if (WN_operator(copy) == OPR_LDID) {
01735 SYMBOL currsym = SYMBOL(copy);
01736 if (currsym == sym) {
01737 WN *parent = LWN_Get_Parent(copy);
01738 INT kid;
01739 for (kid = 0; kid < WN_kid_count(parent); kid ++)
01740 if (WN_kid(parent, kid) == copy)
01741 break;
01742 OPCODE intconst_opc=
01743 OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
01744 WN_kid(parent, kid) =
01745 WN_CreateIntconst(intconst_opc, WN_const_val(cons));
01746 LWN_Set_Parent(WN_kid(parent, kid), parent);
01747 }
01748 }
01749
01750 for (INT i = 0; i < WN_kid_count(copy); i ++) {
01751 Simd_Replace_With_Constant(WN_kid(copy, i), sym, cons, index_type);
01752 }
01753 return;
01754 }
01755
01756
01757
01758
01759 static void Update_Symbol_Use_Def (WN *src, WN *dest, SYMBOL symbol, BOOL flag)
01760 {
01761 if (WN_operator(src) == OPR_LDID) {
01762 SYMBOL currsym = SYMBOL(src);
01763 if (!flag || currsym != symbol) {
01764
01765 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(src);
01766 DEF_LIST_ITER iter(def_list);
01767 const DU_NODE *node = iter.First();
01768 Is_True(!iter.Is_Empty(),("Empty def list in Update_Symbol_Use_Def"));
01769
01770
01771
01772 DOLOOP_STACK sym_stack(&LNO_local_pool);
01773 Find_Nodes(OPR_LDID, currsym, dest, &sym_stack);
01774 for(; !iter.Is_Empty();node=iter.Next()){
01775 for (INT k = 0; k < sym_stack.Elements(); k++) {
01776 WN* wn_use = sym_stack.Bottom_nth(k);
01777
01778 WN *def = (WN *) node->Wn();
01779 Du_Mgr->Add_Def_Use(def, wn_use);
01780 DEF_LIST *def_list_to = Du_Mgr->Ud_Get_Def(wn_use);
01781 DEF_LIST *def_list_from = Du_Mgr->Ud_Get_Def(src);
01782 def_list_to->Set_loop_stmt(def_list_from->Loop_stmt());
01783 }
01784 }
01785 }
01786 } else {
01787 for (INT i = 0; i < WN_kid_count(src); i ++)
01788 Update_Symbol_Use_Def(WN_kid(src, i), dest, symbol, flag);
01789 }
01790 }
01791
01792
01793 BOOL Analyse_Dependencies(WN* innerloop)
01794 {
01795 WN* body=WN_do_body(innerloop);
01796 WN* stmt;
01797
01798 SCC_DIRECTED_GRAPH16 *dep_g_p =
01799 CXX_NEW(SCC_DIRECTED_GRAPH16(ESTIMATED_SIZE,ESTIMATED_SIZE),
01800 &SIMD_default_pool);
01801
01802
01803
01804 WN2VINDEX *stmt_to_vertex=
01805 CXX_NEW(WN2VINDEX(ESTIMATED_SIZE, &SIMD_default_pool),
01806 &SIMD_default_pool);
01807
01808 SCALAR_REF_STACK *simd_ops =
01809 CXX_NEW(SCALAR_REF_STACK(&SIMD_default_pool),
01810 &SIMD_default_pool);
01811
01812 if (LNO_Simd_Reduction) {
01813 depanal_red_manager = CXX_NEW
01814 (REDUCTION_MANAGER(&SIMD_default_pool), &SIMD_default_pool);
01815 depanal_red_manager->Build(innerloop,TRUE,FALSE);
01816 curr_simd_red_manager = depanal_red_manager;
01817 }
01818
01819 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)) {
01820 Gather_Vectorizable_Ops(stmt,simd_ops,&SIMD_default_pool, innerloop) ;
01821 }
01822
01823 if (LNO_Simd_Reduction && depanal_red_manager) {
01824 CXX_DELETE(depanal_red_manager,&SIMD_default_pool);
01825 curr_simd_red_manager = simd_red_manager;
01826 }
01827
01828 if (simd_ops->Elements()==0) {
01829 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01830 return TRUE;
01831 }
01832
01833 STACK_OF_WN *vec_simd_ops=
01834 CXX_NEW(STACK_OF_WN(&SIMD_default_pool),&SIMD_default_pool);
01835
01836 for (INT i=0; i<simd_ops->Elements(); i++) {
01837
01838 WN* simd_op=simd_ops->Top_nth(i).Wn;
01839 WN* stmt=simd_op;
01840 WN* stmt1;
01841 BOOL under_scf=FALSE;
01842 while ((stmt1=LWN_Get_Parent(stmt))!=body) {
01843 stmt=stmt1;
01844 if (WN_opcode(stmt)==OPC_BLOCK) {
01845 under_scf=TRUE;
01846 break;
01847 }
01848 }
01849 if (under_scf)
01850 continue;
01851 TYPE_ID rtype = WN_rtype(simd_op);
01852 TYPE_ID desc = WN_desc(simd_op);
01853 UINT kid_no;
01854 BOOL splitted=FALSE;
01855
01856 for (kid_no=0; kid_no<WN_kid_count(simd_op); kid_no++) {
01857 WN* tmp=WN_kid(simd_op,kid_no);
01858 tmp = Split_Using_Preg(stmt,tmp,adg,FALSE);
01859 FmtAssert(WN_operator(tmp)==OPR_STID,
01860 ("Expecting STID after splitting"));
01861 USE_LIST* use_list=Du_Mgr->Du_Get_Use(tmp);
01862 DU_NODE* node=use_list->Head();
01863 FmtAssert(use_list->Tail()==node, ("Too many uses after splitting"));
01864 splitted=TRUE;
01865 }
01866 if (!splitted)
01867 continue;
01868
01869 vec_simd_ops->Push(simd_op);
01870
01871 WN_OFFSET offset=WN_offset(WN_prev(stmt));
01872
01873 WN *simd_root = Split_Using_Preg(stmt,simd_op,adg,FALSE);
01874 FmtAssert(WN_operator(simd_root)==OPR_STID,
01875 ("Expecting STID after splitting"));
01876 USE_LIST* use_list=Du_Mgr->Du_Get_Use(simd_root);
01877 DU_NODE* node=use_list->Head();
01878 FmtAssert(use_list->Tail()==node, ("Too many uses after splitting"));
01879 WN* use=node->Wn();
01880
01881
01882
01883 }
01884
01885 if (vec_simd_ops->Elements()==0) {
01886
01887 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01888 return TRUE;
01889 }
01890
01891 REF_LIST_STACK* writes = CXX_NEW(REF_LIST_STACK(&SIMD_default_pool),
01892 &SIMD_default_pool);
01893 REF_LIST_STACK* reads = CXX_NEW(REF_LIST_STACK(&SIMD_default_pool),
01894 &SIMD_default_pool);
01895
01896 SCALAR_STACK* scalar_writes = CXX_NEW(SCALAR_STACK(&SIMD_default_pool),
01897 &SIMD_default_pool);
01898 SCALAR_STACK* scalar_reads = CXX_NEW(SCALAR_STACK(&SIMD_default_pool),
01899 &SIMD_default_pool);
01900 SCALAR_REF_STACK* params = CXX_NEW(SCALAR_REF_STACK(&SIMD_default_pool),
01901 &SIMD_default_pool);
01902
01903
01904 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&SIMD_default_pool),
01905 &SIMD_default_pool);
01906 Build_Doloop_Stack(innerloop, stack1);
01907
01908
01909 Init_Ref_Stmt_Counter();
01910 INT32 gather_status = 0;
01911 for (stmt=WN_first(body); stmt && gather_status!= -1; stmt=WN_next(stmt)) {
01912 gather_status=New_Gather_References(stmt,writes,reads,stack1,
01913 scalar_writes,scalar_reads,
01914 params,&SIMD_default_pool) ;
01915 }
01916 if (gather_status == -1) {
01917 DevWarn("Error in gathering references");
01918 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01919 return TRUE;
01920 }
01921
01922 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)) {
01923 VINDEX16 v=dep_g_p->Add_Vertex();
01924 if (v==0) {
01925 DevWarn("Statement dependence graph problem");
01926 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01927 return TRUE;
01928 }
01929 stmt_to_vertex->Enter(stmt, v);
01930 }
01931
01932 BINARY_TREE<NAME2BIT> *mapping_dictionary =
01933 CXX_NEW(BINARY_TREE<NAME2BIT>(&SIMD_default_pool),
01934 &SIMD_default_pool);
01935
01936
01937 FF_STMT_LIST expandable_ref_list;
01938
01939
01940
01941
01942
01943 UINT sym_count=simd_2(innerloop, scalar_reads, scalar_writes,
01944 mapping_dictionary, expandable_ref_list);
01945
01946
01947 BIT_VECTOR Expandable_Scalar_Set(sym_count, &SIMD_default_pool);
01948
01949
01950
01951 FF_STMT_ITER e_iter(&expandable_ref_list);
01952 for (FF_STMT_NODE* ref_node=e_iter.First(); !e_iter.Is_Empty();
01953 ref_node=e_iter.Next()) {
01954 NAME2BIT temp_map;
01955 temp_map.Set_Symbol(ref_node->Get_Stmt());
01956 Expandable_Scalar_Set.Set(mapping_dictionary->Find(temp_map)->
01957 Get_Data()->Get_Bit_Position());
01958 }
01959
01960 if (LNO_Test_Dump) {
01961 printf("Expandable_Scalar_Set=\n");
01962 Expandable_Scalar_Set.Print(stdout);
01963 }
01964
01965 WN_MAP sdm=WN_MAP_Create(&SIMD_default_pool);
01966 ARRAY_DIRECTED_GRAPH16 *sdg =
01967 CXX_NEW(ARRAY_DIRECTED_GRAPH16(100,500,sdm,LEVEL_ARRAY_GRAPH),
01968 &SIMD_default_pool);
01969
01970 for (stmt = WN_first(body); stmt; stmt = WN_next(stmt)) {
01971 if (!Map_Stmt_To_Level_Graph(stmt,sdg)) {
01972 FmtAssert(0, ("Error in mapping stmt to level graph\n"));
01973 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01974 CXX_DELETE(sdg, &SIMD_default_pool);
01975 WN_MAP_Delete(sdm);
01976 return TRUE;
01977 }
01978 }
01979
01980 BOOL status=Generate_Scalar_Dependence_For_Statement_Dependence_Graph(
01981 innerloop, scalar_reads, scalar_writes, params, sdg, red_manager,
01982 &Expandable_Scalar_Set, mapping_dictionary);
01983 if (status==FALSE) {
01984 DevWarn("Statement dependence graph problem");
01985 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01986 CXX_DELETE(sdg, &SIMD_default_pool);
01987 WN_MAP_Delete(sdm);
01988 return TRUE;
01989 }
01990
01991 status=Generate_Array_Dependence_For_Statement_Dependence_Graph(
01992 innerloop, reads, writes, sdg, red_manager, adg);
01993 if (status==FALSE) {
01994 DevWarn("Statement dependence graph problem");
01995 CXX_DELETE(dep_g_p, &SIMD_default_pool);
01996 CXX_DELETE(sdg, &SIMD_default_pool);
01997 WN_MAP_Delete(sdm);
01998 return TRUE;
01999 }
02000
02001
02002
02003
02004 EINDEX16 e=sdg->Get_Edge();
02005 while (e) {
02006 WN* source=sdg->Get_Wn(sdg->Get_Source(e));
02007 WN* sink=sdg->Get_Wn(sdg->Get_Sink(e));
02008 if (LWN_Get_Parent(source) == body || LWN_Get_Parent(sink) == body)
02009
02010 dep_g_p->Add_Unique_Edge(
02011 stmt_to_vertex->Find(source),
02012 stmt_to_vertex->Find(sink));
02013 e=sdg->Get_Next_Edge(e);
02014
02015 }
02016
02017
02018
02019 SCC_DIRECTED_GRAPH16 *ac_g;
02020 ac_g = dep_g_p->Acyclic_Condensation(&SIMD_default_pool);
02021
02022 VINDEX16 total_scc = dep_g_p->Get_Scc_Count();
02023
02024
02025 FF_STMT_LIST *scc;
02026 scc = CXX_NEW_ARRAY(FF_STMT_LIST, total_scc+1, &SIMD_default_pool);
02027
02028 UINT *scc_size=CXX_NEW_ARRAY(UINT, total_scc+1, &SIMD_default_pool);
02029
02030 for (INT i=1; i<=total_scc; i++) {
02031 scc_size[i]=0;
02032 }
02033
02034
02035 for (stmt = WN_first(WN_do_body(innerloop)); stmt; stmt = WN_next(stmt)) {
02036 VINDEX16 scc_id;
02037 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
02038 scc_size[scc_id]++;
02039 }
02040
02041 for (INT i=0; i<vec_simd_ops->Elements(); i++) {
02042 WN* simd_op=vec_simd_ops->Top_nth(i);
02043 stmt=Find_Stmt_Under(simd_op,body);
02044 VINDEX16 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
02045 if (scc_size[scc_id]!=1) {
02046 CXX_DELETE(dep_g_p, &SIMD_default_pool);
02047 CXX_DELETE(sdg, &SIMD_default_pool);
02048 WN_MAP_Delete(sdm);
02049 return TRUE;
02050 }
02051 }
02052
02053 CXX_DELETE(dep_g_p, &SIMD_default_pool);
02054 CXX_DELETE(sdg, &SIMD_default_pool);
02055 WN_MAP_Delete(sdm);
02056 return FALSE;
02057 }
02058
02059 static BOOL Loop_Has_Asm (WN* loop)
02060 {
02061 LWN_ITER* itr = LWN_WALK_TreeIter(WN_do_body(loop));
02062 for (; itr != NULL; itr = LWN_WALK_TreeNext(itr)) {
02063 WN* node = itr->wn;
02064 if (WN_operator(node) == OPR_ASM_STMT)
02065 return TRUE;
02066 }
02067
02068 return FALSE;
02069 }
02070
02071
02072
02073
02074
02075
02076 static BOOL Contain_Vectorizable_Intrinsic(WN *wn)
02077 {
02078
02079 OPERATOR opr=WN_operator(wn);
02080
02081 if(opr == OPR_INTRINSIC_OP &&
02082 Is_Vectorizable_Intrinsic(wn))
02083 return TRUE;
02084
02085 if (WN_opcode(wn) == OPC_BLOCK){
02086 WN* kid = WN_first (wn);
02087 while (kid) {
02088 if(Contain_Vectorizable_Intrinsic(kid))
02089 return TRUE;
02090 kid = WN_next(kid);
02091 }
02092 return FALSE;
02093 }
02094
02095 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
02096 WN* kid = WN_kid(wn,kidno);
02097 if(Contain_Vectorizable_Intrinsic(kid))
02098 return TRUE;
02099 }
02100 return FALSE;
02101 }
02102
02103
02104
02105
02106
02107
02108 extern BOOL Is_Aggressive_Vintr_Loop(WN* innerloop)
02109 {
02110 if (LNO_Run_Vintr < 2)
02111 return FALSE;
02112
02113 if (Loop_Has_Asm(innerloop))
02114 return FALSE;
02115
02116 if (WN_opcode(innerloop) != OPC_DO_LOOP ||
02117 !Do_Loop_Is_Good(innerloop) ||
02118 Do_Loop_Has_Calls(innerloop) ||
02119 Do_Loop_Has_Gotos(innerloop) ||
02120 Do_Loop_Is_Mp(innerloop) ||
02121 !Do_Loop_Is_Inner(innerloop))
02122 return FALSE;
02123
02124 WN* body = WN_do_body(innerloop);
02125 return Contain_Vectorizable_Intrinsic(body);
02126 }
02127
02128
02129 extern BOOL Is_Vectorizable_Loop (WN* innerloop)
02130 {
02131 if (LNO_Run_Simd == 0)
02132 return FALSE;
02133
02134 if (Loop_Has_Asm(innerloop))
02135 return FALSE;
02136
02137 if (WN_opcode(innerloop) != OPC_DO_LOOP ||
02138 !Do_Loop_Is_Good(innerloop) ||
02139 Do_Loop_Has_Calls(innerloop) ||
02140 Do_Loop_Has_Gotos(innerloop) ||
02141 Do_Loop_Is_Mp(innerloop) ||
02142 !Do_Loop_Is_Inner(innerloop))
02143 return FALSE;
02144
02145 WN* body = WN_do_body(innerloop);
02146 WN* stmt;
02147 MEM_POOL SIMD_tmp_pool;
02148 MEM_POOL_Initialize(&SIMD_tmp_pool,"SIMD_tmp_pool",FALSE);
02149 MEM_POOL_Push(&SIMD_tmp_pool);
02150
02151 SCALAR_REF_STACK *simd_ops =
02152 CXX_NEW(SCALAR_REF_STACK(&SIMD_tmp_pool),
02153 &SIMD_tmp_pool);
02154
02155 BOOL save_simp_state = WN_Simplifier_Enable(FALSE);
02156 Simd_Mark_Code(WN_do_body(innerloop));
02157 WN_Simplifier_Enable(save_simp_state);
02158
02159 if (LNO_Simd_Reduction) {
02160 WN* func_nd = LWN_Get_Parent(innerloop);
02161 while(func_nd && WN_opcode(func_nd) != OPC_FUNC_ENTRY)
02162 func_nd = LWN_Get_Parent(func_nd);
02163 simd_red_manager = CXX_NEW
02164 (REDUCTION_MANAGER(&SIMD_tmp_pool), &SIMD_tmp_pool);
02165 simd_red_manager->Build(func_nd,TRUE,FALSE);
02166 curr_simd_red_manager = simd_red_manager;
02167 }
02168
02169 Induction_Seen = FALSE;
02170 BOOL _stop = FALSE;
02171 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt))
02172 if (!Gather_Vectorizable_Ops(stmt, simd_ops,&SIMD_tmp_pool, innerloop)){
02173 _stop = TRUE;
02174 break;
02175 }
02176
02177 if (LNO_Simd_Reduction && simd_red_manager)
02178 CXX_DELETE(simd_red_manager,&SIMD_tmp_pool);
02179
02180
02181
02182 BOOL move_invar = (!Get_Trace(TP_LNOPT, TT_LNO_GUARD) && LNO_Minvar);
02183 if(!_stop && !Unit_Stride_Reference(body, innerloop, !move_invar))
02184 _stop = TRUE;
02185
02186 if(_stop){
02187 MEM_POOL_Pop(&SIMD_tmp_pool);
02188 MEM_POOL_Delete(&SIMD_tmp_pool);
02189 return FALSE;
02190 }
02191
02192
02193 WN* loop_copy = LWN_Copy_Tree(innerloop, TRUE, LNO_Info_Map);
02194 DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
02195 DO_LOOP_INFO* new_loop_info =
02196 CXX_NEW(DO_LOOP_INFO(dli,&LNO_default_pool), &LNO_default_pool);
02197 Set_Do_Loop_Info(loop_copy, new_loop_info);
02198 adg=Array_Dependence_Graph;
02199 if (!adg->Add_Deps_To_Copy_Block(innerloop, loop_copy, TRUE)) {
02200 LNO_Erase_Dg_From_Here_In(loop_copy, adg);
02201 MEM_POOL_Pop(&SIMD_tmp_pool);
02202 MEM_POOL_Delete(&SIMD_tmp_pool);
02203 return FALSE;
02204 }
02205 MEM_POOL_Initialize(&SIMD_default_pool,"SIMD_default_pool",FALSE);
02206 MEM_POOL_Push(&SIMD_default_pool);
02207 BOOL Has_Dependencies = Analyse_Dependencies(loop_copy);
02208 LNO_Erase_Dg_From_Here_In(loop_copy, adg);
02209 MEM_POOL_Pop(&SIMD_default_pool);
02210 MEM_POOL_Delete(&SIMD_default_pool);
02211
02212 MEM_POOL_Pop(&SIMD_tmp_pool);
02213 MEM_POOL_Delete(&SIMD_tmp_pool);
02214
02215 return !Has_Dependencies;
02216 }
02217
02218 extern void Mark_Auto_Vectorizable_Loops (WN* wn)
02219 {
02220 OPCODE opc=WN_opcode(wn);
02221
02222 if (!OPCODE_is_scf(opc))
02223 return;
02224 else if (opc==OPC_DO_LOOP) {
02225 if (Do_Loop_Is_Good(wn) && Do_Loop_Is_Inner(wn) && !Do_Loop_Has_Calls(wn)
02226 && !Do_Loop_Is_Mp(wn) && !Do_Loop_Has_Gotos(wn)) {
02227 if (Is_Vectorizable_Loop(wn)) {
02228 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn, FALSE);
02229 dli->Vectorizable = TRUE;
02230 }
02231 } else
02232 Mark_Auto_Vectorizable_Loops(WN_do_body(wn));
02233 } else if (opc==OPC_BLOCK)
02234 for (WN* stmt=WN_first(wn); stmt;) {
02235 WN* next_stmt=WN_next(stmt);
02236 Mark_Auto_Vectorizable_Loops(stmt);
02237 stmt=next_stmt;
02238 }
02239 else
02240 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
02241 Mark_Auto_Vectorizable_Loops(WN_kid(wn,kidno));
02242 }
02243 }
02244
02245
02246
02247
02248
02249
02250 static void Simd_Mark_Code (WN* wn)
02251 {
02252 if ((WN_operator(wn) == OPR_ILOAD &&
02253 WN_operator(WN_kid0(wn)) == OPR_ARRAY) ||
02254 WN_operator(wn) == OPR_LDID ||
02255 WN_operator(wn) == OPR_CONST ||
02256 WN_operator(wn) == OPR_INTCONST) {
02257 WN* parent = LWN_Get_Parent(wn);
02258 if (((WN_operator(parent) == OPR_ISTORE &&
02259 WN_operator(WN_kid1(parent)) == OPR_ARRAY) ||
02260 WN_operator(parent) == OPR_STID) &&
02261 WN_desc(parent) != MTYPE_M &&
02262 WN_desc(parent) != MTYPE_C4 && WN_desc(parent) != MTYPE_C8 &&
02263 ! MTYPE_is_vector(WN_desc(parent))) {
02264 TYPE_ID desc = WN_rtype(wn);
02265 OPCODE paren_opc;
02266 if (!MTYPE_is_float(desc) && MTYPE_is_unsigned(desc)) {
02267 switch(desc) {
02268 case MTYPE_U1: desc = MTYPE_I1; break;
02269 case MTYPE_U2: desc = MTYPE_I2; break;
02270 case MTYPE_U4: desc = MTYPE_I4; break;
02271 case MTYPE_U8: desc = MTYPE_I8; break;
02272 }
02273 }
02274 paren_opc = OPCODE_make_op(OPR_PAREN, desc, MTYPE_V);
02275 WN* new_parent;
02276 if (WN_operator(wn) == OPR_CONST) {
02277 new_parent = WN_Create(OPR_PAREN, desc, MTYPE_V, 1);
02278 WN_kid0(new_parent) = wn;
02279 } else
02280 new_parent =
02281 LWN_CreateExp1(paren_opc, WN_kid(parent, 0));
02282 WN_kid0(parent) = new_parent;
02283 LWN_Parentize(parent);
02284 }
02285 }
02286
02287 if (WN_opcode(wn)==OPC_BLOCK)
02288 for (WN* stmt=WN_first(wn); stmt;) {
02289 WN* next_stmt=WN_next(stmt);
02290 Simd_Mark_Code(stmt);
02291 stmt=next_stmt;
02292 }
02293 else
02294 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
02295 Simd_Mark_Code(WN_kid(wn, kid));
02296 }
02297
02298 static INT Simd_Compute_Best_Align (INT offset, INT fn, INT size)
02299 {
02300 INT A0, A;
02301
02302 A0 = offset;
02303 A = (A0 + fn*size)%16;
02304 return (A == 0 ? A : ((16 - A)/size));
02305 }
02306
02307
02308
02309
02310
02311
02312 BOOL vec_unroll_preg_created[4];
02313 WN *vec_unroll_preg_store[4];
02314
02315
02316
02317 static void
02318 Create_Unroll_Copy(WN* copy, INT add_to_base,
02319 WN* orig, TYPE_ID index_type,
02320 WN* vec_preg_incr, WN* loop)
02321 {
02322 FmtAssert(WN_operator(copy) == WN_operator(orig), ("Handle this"));
02323 OPCODE add_opc= OPCODE_make_op(OPR_ADD,index_type, MTYPE_V);
02324 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
02325 INT aa_num, dim_max;
02326 WN* array_index;
02327
02328 if (WN_operator(copy) == OPR_ARRAY) {
02329 INT kid = WN_num_dim(copy)<<1;
02330 array_index = WN_kid(copy, kid);
02331 WN_kid(copy, kid) =
02332 LWN_CreateExp2(add_opc, array_index,
02333 WN_CreateIntconst(intconst_opc, add_to_base));
02334
02335 dim_max = WN_num_dim(copy);
02336 for (aa_num = 0; aa_num < dim_max - 1; aa_num ++) {
02337 LWN_Copy_Def_Use(WN_kid(orig, aa_num + dim_max + 1),
02338 WN_kid(copy, aa_num + dim_max + 1),
02339 Du_Mgr);
02340 }
02341
02342
02343
02344 if (ABS(WN_element_size(copy)) != add_to_base) {
02345 WN* parent = LWN_Get_Parent(copy);
02346 FmtAssert(WN_operator(parent) == OPR_ILOAD ||
02347 WN_operator(parent) == OPR_ISTORE, ("NYI"));
02348 TY_IDX ty_parent;
02349 if (WN_operator(parent) == OPR_ILOAD)
02350 ty_parent = TY_pointed(WN_load_addr_ty(parent));
02351 else
02352 ty_parent = TY_pointed(WN_ty(parent));
02353 Set_TY_align (ty_parent,
02354 ABS(WN_element_size(copy)) ?
02355 TY_log_base2(ABS(WN_element_size(copy))) :
02356 8 );
02357 TY_IDX ty_idx = 0;
02358 TY &ty = New_TY (ty_idx);
02359 TY_Init (ty, Pointer_Size, KIND_POINTER, Pointer_Mtype,
02360 Save_Str ("anon_ptr."));
02361 Set_TY_pointed (ty, ty_parent);
02362 if (WN_operator(parent) == OPR_ILOAD)
02363 WN_set_load_addr_ty (parent, ty_idx);
02364 else
02365 WN_set_ty (parent, ty_idx);
02366 }
02367 return;
02368 }
02369 else if (WN_operator(copy) == OPR_LDID && vec_preg_incr) {
02370 SYMBOL sym1(copy);
02371 SYMBOL sym2(vec_preg_incr);
02372 if (sym1 == sym2) {
02373 FmtAssert(MTYPE_is_vector(WN_desc(copy)), ("Handle this case"));
02374
02375 TYPE_ID vec_type = WN_desc(copy);
02376 INT unroll_type;
02377
02378 switch(add_to_base) {
02379 case 1: unroll_type = 0; break;
02380 case 2: unroll_type = 1; break;
02381 case 4: unroll_type = 2; break;
02382 case 8: unroll_type = 3; break;
02383 default: FmtAssert(FALSE, ("NYI"));
02384 }
02385
02386 if (!vec_unroll_preg_created[unroll_type]) {
02387 WN* body = WN_do_body(loop);
02388
02389 TCON unroll_const_tcon = Host_To_Targ(MTYPE_I4, add_to_base);
02390 ST* unroll_const_symbol =
02391 New_Const_Sym (Enter_tcon(unroll_const_tcon),
02392 Be_Type_Tbl(MTYPE_I4));
02393 WN* unroll_const =
02394 WN_CreateConst (OPR_CONST, vec_type, MTYPE_V,
02395 unroll_const_symbol);
02396 SYMBOL vec_unroll_symbol;
02397 WN* loop_enclosing_block = loop;
02398
02399 vec_unroll_symbol =
02400 Create_Preg_Symbol(sym1.Name(), vec_type);
02401 vec_unroll_preg_store[unroll_type] =
02402 AWN_StidIntoSym(&vec_unroll_symbol, unroll_const);
02403 while (WN_operator(loop_enclosing_block) != OPR_BLOCK)
02404 loop_enclosing_block =
02405 LWN_Get_Parent(loop_enclosing_block);
02406 LWN_Insert_Block_Before(loop_enclosing_block, loop,
02407 vec_unroll_preg_store[unroll_type]);
02408 WN_Set_Linenum ( vec_unroll_preg_store[unroll_type],
02409 WN_Get_Linenum(loop) );
02410 LWN_Parentize(vec_unroll_preg_store[unroll_type]);
02411 LWN_Set_Parent(vec_unroll_preg_store[unroll_type],
02412 loop_enclosing_block);
02413 vec_unroll_preg_created[unroll_type] = TRUE;
02414 }
02415
02416
02417 SYMBOL vec_unroll_preg_symbol(vec_unroll_preg_store[unroll_type]);
02418 WN *use_vec_unroll_preg = AWN_LdidSym(&vec_unroll_preg_symbol);
02419 WN* parent = LWN_Get_Parent(copy);
02420 INT kid = 0;
02421 while(WN_kid(parent, kid) != copy && kid < WN_kid_count(parent))
02422 kid++;
02423 FmtAssert(WN_kid(parent, kid), ("Handle this"));
02424 WN_kid(parent, kid) =
02425 AWN_Add(vec_type, use_vec_unroll_preg, copy);
02426 Du_Mgr->Add_Def_Use(vec_unroll_preg_store[unroll_type],
02427 use_vec_unroll_preg);
02428 LWN_Parentize(parent);
02429 }
02430 }
02431
02432
02433 for (INT kid = 0; kid < WN_kid_count(copy); kid ++)
02434 Create_Unroll_Copy(WN_kid(copy, kid), add_to_base,
02435 WN_kid(orig, kid), index_type, vec_preg_incr, loop);
02436
02437 return;
02438 }
02439
02440
02441
02442 static void Rename_Reductions_Rec (WN_OFFSET orig_offset, ST *orig_st,
02443 WN_OFFSET offset, ST *st,
02444 WN *node, TYPE_ID vmtype)
02445 {
02446 if (WN_operator(node) == OPR_LDID && WN_st(node) == orig_st &&
02447 WN_load_offset(node) == orig_offset) {
02448 WN_set_desc(node, vmtype);
02449 WN_set_rtype(node, vmtype);
02450 WN_load_offset(node) = offset;
02451 WN_st_idx(node) = ST_st_idx(st);
02452 }
02453 else if (WN_operator(node) == OPR_STID && WN_st(node) == orig_st &&
02454 WN_store_offset(node) == orig_offset) {
02455 WN_set_desc(node, vmtype);
02456 WN_store_offset(node) = offset;
02457 WN_st_idx(node) = ST_st_idx(st);
02458 }
02459 for (INT kid = 0; kid < WN_kid_count(node); kid ++)
02460 Rename_Reductions_Rec(orig_offset, orig_st,
02461 offset, st, WN_kid(node, kid), vmtype);
02462 }
02463
02464 static void Rename_Other_Reductions (WN_OFFSET orig_offset, ST *orig_st,
02465 WN_OFFSET offset,
02466 WN *start, WN *last,
02467 TYPE_ID vmtype)
02468 {
02469 WN *stmt = start;
02470 ST *st = WN_st(last);
02471
02472 while (stmt != last) {
02473 Rename_Reductions_Rec(orig_offset, orig_st, offset, st, stmt, vmtype);
02474 stmt = WN_next(stmt);
02475 }
02476 }
02477
02478
02479
02480 static BOOL Is_Last_Red_Stmt ( WN *stmt )
02481 {
02482 ST* st = WN_st(stmt);
02483 WN_OFFSET offset = WN_offset(stmt);
02484 WN* curr_stmt = WN_next(stmt);
02485
02486 while(curr_stmt) {
02487 if (WN_operator(curr_stmt) == OPR_STID && WN_st(curr_stmt) == st &&
02488 WN_store_offset(curr_stmt) == offset)
02489 return FALSE;
02490 curr_stmt = WN_next(curr_stmt);
02491 }
02492 return TRUE;
02493 }
02494
02495 static void Create_Stride1_Condition_If_Required (WN *array_base,
02496 WN **if_noncontig)
02497 {
02498 TYPE_ID mtype = Is_Target_32bit() ? MTYPE_I4 : MTYPE_I8;
02499 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,mtype, MTYPE_V);
02500 WN *value = WN_CreateIntconst(intconst_opc, 1);
02501 WN *stride;
02502 TY_IDX ty_dope = ST_type(WN_st(array_base));
02503
02504
02505
02506 if (TY_kind(ty_dope) != KIND_STRUCT ||
02507 strncmp(TY_name(ty_dope), ".dope.", 6) != 0){
02508 WN *array = LWN_Get_Parent(array_base);
02509 WN *inner_str_m = WN_kid(array, WN_num_dim(array));
02510 stride = LWN_Copy_Tree(inner_str_m, TRUE, LNO_Info_Map);
02511 LWN_Copy_Def_Use(inner_str_m, stride, Du_Mgr);
02512 WN_set_rtype(stride, mtype);
02513 if (*if_noncontig)
02514 *if_noncontig = WN_CAND( *if_noncontig, WN_EQ( mtype, stride, value ) );
02515 else
02516 *if_noncontig = WN_EQ( mtype, stride, value );
02517 return;
02518 }
02519 FLD_ITER fld_iter_dope = Make_fld_iter(TY_fld(ty_dope));
02520 while(!FLD_last_field(fld_iter_dope)) fld_iter_dope++;
02521 FLD_HANDLE fld_dims(fld_iter_dope);
02522 UINT64 dims_offset = FLD_ofst(fld_dims);
02523 TY_IDX ty_dims = FLD_type(fld_dims);
02524 if (TY_kind(ty_dims) != KIND_ARRAY) return;
02525 TY_IDX ty_dope_bnd = TY_etype(ty_dims);
02526 if (TY_kind(ty_dope_bnd) != KIND_STRUCT) return;
02527 FLD_ITER fld_iter_dope_bnd = Make_fld_iter(TY_fld(ty_dope_bnd));
02528 while(!FLD_last_field(fld_iter_dope_bnd)) fld_iter_dope_bnd++;
02529 FLD_HANDLE fld_dope_bnd(fld_iter_dope_bnd);
02530 UINT64 str_offset = FLD_ofst(fld_dope_bnd);
02531 TY_IDX ty_dope_bnd_str = FLD_type(fld_dope_bnd);
02532 if (TY_kind(ty_dope_bnd_str) != KIND_SCALAR) return;
02533 stride = LWN_Copy_Tree(array_base, TRUE, LNO_Info_Map);
02534 LWN_Copy_Def_Use(array_base, stride, Du_Mgr);
02535 WN_set_rtype(stride, mtype);
02536 WN_set_desc(stride, mtype);
02537 WN_offset(stride) = dims_offset + str_offset;
02538 if (*if_noncontig)
02539 *if_noncontig = WN_CAND( *if_noncontig, WN_EQ( mtype, stride, value ) );
02540 else
02541 *if_noncontig = WN_EQ( mtype, stride, value );
02542 return;
02543 }
02544
02545
02546
02547
02548
02549
02550
02551 static WN* Version_Loop(WN* wn_loop)
02552 {
02553 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02554 REDUCTION_MANAGER* rm = red_manager;
02555 WN_MAP version_map = WN_MAP_Create(&LNO_local_pool);
02556 WN* wn_copy = LWN_Copy_Tree(wn_loop, TRUE, LNO_Info_Map, TRUE, version_map);
02557 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02558 STACK<WN*> st_old(&LNO_local_pool);
02559 STACK<WN*> st_new(&LNO_local_pool);
02560 Prompf_Assign_Ids(wn_loop, wn_copy, &st_old, &st_new, TRUE);
02561 }
02562 BOOL all_internal = WN_Rename_Duplicate_Labels(wn_loop, wn_copy,
02563 Current_Func_Node, &LNO_local_pool);
02564 Is_True(all_internal, ("external labels renamed"));
02565
02566
02567 WN* wn_array[2];
02568 wn_array[0] = wn_loop;
02569 wn_array[1] = wn_copy;
02570 Unrolled_DU_Update(wn_array, 2, Do_Loop_Depth(wn_loop) - 1, TRUE, FALSE);
02571 dg->Versioned_Dependences_Update(wn_loop, wn_copy, Do_Loop_Depth(wn_loop),
02572 version_map);
02573 WN_MAP_Delete(version_map);
02574 if (rm != NULL)
02575 rm->Unroll_Update(wn_array, 2);
02576
02577
02578 WN* wn_total_cond = LWN_Make_Icon(Boolean_type, 1);
02579 LWN_Extract_From_Block(wn_loop);
02580 WN* wn_if = LWN_CreateIf(wn_total_cond, WN_CreateBlock(), WN_CreateBlock());
02581 LWN_Insert_Block_After(WN_then(wn_if), NULL, wn_loop);
02582 LWN_Insert_Block_After(WN_else(wn_if), NULL, wn_copy);
02583 WN_Set_Linenum(wn_if, WN_Get_Linenum(wn_loop));
02584 IF_INFO *ii =
02585 CXX_NEW(IF_INFO(&LNO_default_pool, TRUE, FALSE), &LNO_default_pool);
02586 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii);
02587 DOLOOP_STACK *stack = CXX_NEW(DOLOOP_STACK(&LNO_default_pool),
02588 &LNO_default_pool);
02589 Build_Doloop_Stack(wn_if, stack);
02590 LNO_Build_If_Access(wn_if, stack);
02591 return wn_if;
02592 }
02593
02594
02595
02596
02597
02598
02599
02600 static WN* Version_Region(WN* region, WN *wn_loop)
02601 {
02602 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
02603 REDUCTION_MANAGER* rm = red_manager;
02604 WN_MAP version_map = WN_MAP_Create(&LNO_local_pool);
02605 WN* region_copy = LWN_Copy_Tree(region, TRUE, LNO_Info_Map, TRUE, version_map);
02606 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
02607 STACK<WN*> st_old(&LNO_local_pool);
02608 STACK<WN*> st_new(&LNO_local_pool);
02609 Prompf_Assign_Ids(region, region_copy, &st_old, &st_new, TRUE);
02610 }
02611 BOOL all_internal = WN_Rename_Duplicate_Labels(region, region_copy,
02612 Current_Func_Node, &LNO_local_pool);
02613 Is_True(all_internal, ("external labels renamed"));
02614
02615
02616 WN* wn_array[2];
02617 wn_array[0] = region;
02618 wn_array[1] = region_copy;
02619 Unrolled_DU_Update(wn_array, 2, Do_Loop_Depth(wn_loop) - 1, TRUE, FALSE);
02620 dg->Versioned_Dependences_Update(region, region_copy, Do_Loop_Depth(wn_loop),
02621 version_map);
02622 WN_MAP_Delete(version_map);
02623 if (rm != NULL)
02624 rm->Unroll_Update(wn_array, 2);
02625
02626
02627 WN* wn_total_cond = LWN_Make_Icon(Boolean_type, 1);
02628 LWN_Extract_From_Block(region);
02629 WN* wn_if = LWN_CreateIf(wn_total_cond, WN_CreateBlock(), WN_CreateBlock());
02630 LWN_Insert_Block_After(WN_then(wn_if), NULL, region);
02631 LWN_Insert_Block_After(WN_else(wn_if), NULL, region_copy);
02632 WN_Set_Linenum(wn_if, WN_Get_Linenum(region));
02633 IF_INFO *ii =
02634 CXX_NEW(IF_INFO(&LNO_default_pool, TRUE, TRUE), &LNO_default_pool);
02635 WN_MAP_Set(LNO_Info_Map, wn_if, (void *) ii);
02636 DOLOOP_STACK *stack = CXX_NEW(DOLOOP_STACK(&LNO_default_pool),
02637 &LNO_default_pool);
02638 Build_Doloop_Stack(wn_if, stack);
02639 LNO_Build_If_Access(wn_if, stack);
02640 return wn_if;
02641 }
02642
02643 static BOOL Simd_Pre_Analysis(WN *innerloop, char *verbose_msg)
02644 {
02645
02646 DO_LOOP_INFO *dli = Get_Do_Loop_Info(innerloop);
02647 if(!dli){
02648 sprintf(verbose_msg, "Loop info was not set.");
02649 return FALSE;
02650 }
02651
02652 if(dli->Has_Bad_Mem || dli->Has_Calls || dli->Has_Gotos || dli->Has_Exits){
02653 sprintf(verbose_msg, "Loop has calls or Gotos");
02654 return FALSE;
02655 }
02656
02657 if(!dli->Is_Inner){
02658 sprintf(verbose_msg, "Loop is not innermost");
02659 return FALSE;
02660 }
02661
02662
02663 if(Too_Few_Iterations(dli->Est_Num_Iterations, WN_do_body(innerloop))){
02664 sprintf(verbose_msg, "Loop has too few iterations.");
02665 return FALSE;
02666 }
02667
02668
02669
02670
02671
02672
02673
02674 for (WN *stmt=WN_first(WN_do_body(innerloop)); stmt; stmt=WN_next(stmt)) {
02675 if (WN_operator(stmt) == OPR_STID) {
02676 USE_LIST* use_list=Du_Mgr->Du_Get_Use(stmt);
02677 if (!use_list) {
02678 sprintf(verbose_msg, "Loop has missing use_list.");
02679 return FALSE;
02680 }
02681 }
02682 }
02683
02684
02685
02686 if (Index_Variable_Live_At_Exit(innerloop)) {
02687
02688
02689
02690 if (Do_Loop_Is_Mp(innerloop) && !Early_MP_Processing){
02691 sprintf(verbose_msg, "Index variale lives at the exit of OpenMP loop.");
02692 return FALSE;
02693 }
02694
02695 if (!Upper_Bound_Standardize(WN_end(innerloop),TRUE)){
02696 sprintf(verbose_msg, "Loop upper bound can not be std.");
02697 return FALSE;
02698 }
02699
02700 Finalize_Index_Variable(innerloop,FALSE);
02701 scalar_rename(WN_start(innerloop));
02702 }
02703
02704 if (Loop_Has_Asm(innerloop)) {
02705 sprintf(verbose_msg, "Loop has inline assembly.");
02706 return FALSE;
02707 }
02708
02709
02710 if (find_loop_var_in_simple_ub(innerloop)==NULL){
02711 sprintf(verbose_msg, "Loop upper bound too complicated.");
02712 return FALSE;
02713 }
02714
02715
02716
02717 if (Do_Loop_Is_Mp(innerloop) && !Early_MP_Processing) {
02718 WN *enclosing_parallel_region, *region_pragma;
02719 BOOL reduction = FALSE, pdo = FALSE;
02720 enclosing_parallel_region = LWN_Get_Parent(innerloop);
02721 while(enclosing_parallel_region &&
02722 WN_operator(enclosing_parallel_region) != OPR_REGION)
02723 enclosing_parallel_region =
02724 LWN_Get_Parent(enclosing_parallel_region);
02725 #ifdef KEY
02726 if (PU_cxx_lang(Get_Current_PU()) &&
02727 Is_Eh_Or_Try_Region(enclosing_parallel_region))
02728 enclosing_parallel_region =
02729 LWN_Get_Parent(LWN_Get_Parent(enclosing_parallel_region));
02730 #endif
02731 FmtAssert(enclosing_parallel_region, ("NYI"));
02732 region_pragma = WN_first(WN_region_pragmas(enclosing_parallel_region));
02733 while(region_pragma && (!reduction || !pdo)) {
02734 if (WN_pragma(region_pragma) == WN_PRAGMA_REDUCTION)
02735 reduction = TRUE;
02736 else if (WN_pragma(region_pragma) == WN_PRAGMA_PDO_BEGIN)
02737 pdo = TRUE;
02738 region_pragma = WN_next(region_pragma);
02739 }
02740 if (pdo && reduction) {
02741 sprintf(verbose_msg, "Reduction loop in a DO region.");
02742 return FALSE;
02743 }
02744 }
02745
02746
02747 non_unit_stride = "unknown";
02748 if (!Unit_Stride_Reference(WN_do_body(innerloop), innerloop, TRUE)) {
02749 sprintf(verbose_msg, "Non-contiguous array \"%s\" reference exists.",
02750 non_unit_stride);
02751 return FALSE;
02752 }
02753
02754 if(!Simd_Benefit(WN_do_body(innerloop))){
02755 sprintf(verbose_msg, "Vectorization is not likely to be beneficial (try -LNO:simd=2 to vectorize it).");
02756 return FALSE;
02757 }
02758 return TRUE;
02759 }
02760
02761
02762
02763 static STACK_OF_WN *vec_simd_ops;
02764 static INT *simd_operand_invariant[3];
02765 static BOOL *simd_op_last_in_loop;
02766 static SIMD_KIND *simd_op_kind;
02767 static TYPE_ID index_type;
02768 static BOOL needs_scalar_expansion;
02769
02770 static BOOL SA_Set_SimdOps_Info1(WN* body,
02771 SCALAR_REF_STACK *simd_ops,
02772 char *verbose_msg)
02773 {
02774 STACK_OF_WN *invariant_ops =
02775 CXX_NEW(STACK_OF_WN(&SIMD_default_pool),&SIMD_default_pool);
02776 INT invariant_operands = 0;
02777 INT curr_num_simd = 0;
02778 WN* simd_op;
02779
02780 for (INT i=0; i<simd_ops->Elements(); i++){
02781 simd_op = simd_ops->Top_nth(i).Wn;
02782 WN* stmt=simd_op;
02783 WN* stmt1;
02784 BOOL under_scf=FALSE;
02785 while((stmt1=LWN_Get_Parent(stmt)) != body){
02786 stmt = stmt1;
02787 if (WN_opcode(stmt)==OPC_BLOCK){
02788 under_scf=TRUE;
02789 break;
02790 }
02791 }
02792 if (under_scf)
02793 continue;
02794 TYPE_ID rtype = WN_rtype(simd_op);
02795 TYPE_ID desc = WN_desc(simd_op);
02796 #if 1
02797
02798 FmtAssert(is_vectorizable_op(WN_operator(simd_op), rtype, desc),
02799 ("Handle this piece"));
02800 #endif
02801 if (!is_vectorizable_op(WN_operator(simd_op), rtype, desc))
02802 continue;
02803
02804 for (INT kid_no=0; kid_no<WN_kid_count(simd_op); kid_no++){
02805 WN* tmp=WN_kid(simd_op,kid_no);
02806 SIMD_OPERAND_KIND kind=simd_operand_kind(tmp,LWN_Get_Parent(body));
02807
02808
02809 if(kind==Invariant && WN_operator(tmp) == OPR_LDID)
02810 Count_Invariant(invariant_ops, tmp);
02811
02812 if (kind == Invariant ||
02813 (kind == Simple &&
02814 (WN_operator(tmp) == OPR_CONST ||
02815 WN_operator(tmp) == OPR_INTCONST)))
02816 simd_operand_invariant[kid_no][curr_num_simd] = 1;
02817 else
02818 simd_operand_invariant[kid_no][curr_num_simd] = 0;
02819 }
02820 curr_num_simd ++;
02821 vec_simd_ops->Push(simd_op);
02822
02823 if (WN_rtype(simd_op) != MTYPE_V &&
02824 WN_rtype(simd_op) != MTYPE_F8 && WN_rtype(simd_op) != MTYPE_I8) {
02825 invariant_operands = -1;
02826 }
02827 else if (WN_desc(simd_op) != MTYPE_V &&
02828 WN_desc(simd_op) != MTYPE_F8 && WN_desc(simd_op) != MTYPE_I8) {
02829 invariant_operands = -1;
02830 }
02831 }
02832
02833 if (vec_simd_ops->Elements()==0){
02834 sprintf(verbose_msg, "Loop has 0 vectorizable ops.\n");
02835 return FALSE;
02836 }
02837 if(invariant_operands != -1)
02838 invariant_operands = invariant_ops->Elements();
02839
02840 CXX_DELETE(invariant_ops, &SIMD_default_pool);
02841
02842 if ((Is_Target_64bit() && invariant_operands >= 16) ||
02843 (Is_Target_32bit() && invariant_operands >= 8)) {
02844 sprintf(verbose_msg, "Loop has too many loop invariants.");
02845 return FALSE;
02846 }
02847 return TRUE;
02848 }
02849
02850 static void SA_Set_SimdOps_Info2()
02851 {
02852
02853
02854 typedef HASH_TABLE<WN*, WN**> HTABLE_TYPE;
02855 HTABLE_TYPE *hash_table =
02856 CXX_NEW(HTABLE_TYPE(vec_simd_ops->Elements(),
02857 &LNO_local_pool), &LNO_local_pool);
02858 INT max_size = 0;
02859 simd_op_last_in_loop =
02860 CXX_NEW_ARRAY(BOOL, vec_simd_ops->Elements(),&LNO_local_pool);
02861 simd_op_kind =
02862 CXX_NEW_ARRAY(SIMD_KIND, vec_simd_ops->Elements(),&LNO_local_pool);
02863
02864 WN *istore, *simd_op;
02865 for (INT i=0; i < vec_simd_ops->Elements(); i++){
02866 simd_op=vec_simd_ops->Top_nth(i);
02867
02868 istore=LWN_Get_Parent(simd_op);
02869 WN* new_body=Find_Do_Body(istore);
02870 WN* new_loop=LWN_Get_Parent(new_body);
02871 WN **newwn = CXX_NEW_ARRAY(WN*,vec_simd_ops->Elements(),&LNO_local_pool);
02872 INT num = 1, j;
02873 BOOL found = FALSE;
02874 WN* found_eq = NULL;
02875 INT found_eq_loc;
02876
02877
02878 newwn[0] = simd_op;
02879 simd_op_kind[i] = INVALID;
02880 simd_op_last_in_loop[i] = TRUE;
02881
02882
02883
02884 for (j = i - 1; j >= 0 && !found; j --) {
02885 WN **checkwn = hash_table->Find(vec_simd_ops->Top_nth(j));
02886 for (INT k = 0; k < max_size && !found; k++) {
02887 if (checkwn[k] == simd_op) {
02888 found = TRUE;
02889 found_eq = vec_simd_ops->Top_nth(j);
02890 found_eq_loc = j;
02891 }
02892 }
02893 }
02894 if (found_eq) {
02895 hash_table->Enter(simd_op, hash_table->Find(found_eq));
02896 simd_op_kind[i] = simd_op_kind[found_eq_loc];
02897 simd_op_last_in_loop[found_eq_loc] = FALSE;
02898 continue;
02899 }
02900
02901
02902 for (j=i+1; j<vec_simd_ops->Elements(); j++) {
02903 WN *simd_op_next=vec_simd_ops->Top_nth(j);
02904
02905 WN* istore_next=LWN_Get_Parent(simd_op_next);
02906 WN* new_body_next = Find_Do_Body(istore_next);
02907 WN* new_loop_next=LWN_Get_Parent(new_body_next);
02908
02909 if (new_loop == new_loop_next)
02910 newwn[num++] = simd_op_next;
02911 }
02912 hash_table->Enter(simd_op, newwn);
02913 if (max_size < num)
02914 max_size = num;
02915
02916
02917
02918
02919 STACK_OF_WN *vec_simd_ops_tmp=
02920 CXX_NEW(STACK_OF_WN(&SIMD_default_pool),&SIMD_default_pool);
02921 for (j = 0; j < num; j++)
02922 vec_simd_ops_tmp->Push(newwn[j]);
02923 simd_op_kind[i] = Find_Simd_Kind(vec_simd_ops_tmp);
02924 }
02925 }
02926
02927
02928 static BOOL SA_Loop_Has_Dependence_Cycles(WN *innerloop, char *verbose_msg)
02929 {
02930 DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
02931 WN* loop_copy = LWN_Copy_Tree(innerloop, TRUE, LNO_Info_Map);
02932
02933 DO_LOOP_INFO* new_loop_info =
02934 CXX_NEW(DO_LOOP_INFO(dli,&LNO_default_pool), &LNO_default_pool);
02935 Set_Do_Loop_Info(loop_copy, new_loop_info);
02936 if (!adg->Add_Deps_To_Copy_Block(innerloop, loop_copy, TRUE)) {
02937 LNO_Erase_Dg_From_Here_In(loop_copy, adg);
02938 sprintf(verbose_msg, "Too many edges in Dependence graph.");
02939 return TRUE;
02940 }
02941 if (Analyse_Dependencies(loop_copy)) {
02942 LNO_Erase_Dg_From_Here_In(loop_copy, adg);
02943 sprintf(verbose_msg, "Loop has dependencies.");
02944 return TRUE;
02945 }
02946 LNO_Erase_Dg_From_Here_In(loop_copy, adg);
02947 return FALSE;
02948 }
02949
02950 static void SA_Version_F90_Loops_For_Contiguous(WN *innerloop)
02951 {
02952 WN *array_base, *simd_op;
02953 WN *if_noncontig = NULL;
02954 STACK_OF_WN *noncontig_array_dope=
02955 CXX_NEW(STACK_OF_WN(&SIMD_default_pool),&SIMD_default_pool);
02956 for (INT i=vec_simd_ops->Elements()-1; i >= 0; i--){
02957 simd_op=vec_simd_ops->Top_nth(i);
02958 for (INT kid = 0; kid < WN_kid_count(simd_op); kid ++){
02959 WN* opnd = WN_kid(simd_op, kid);
02960 if (WN_operator(opnd) == OPR_ILOAD &&
02961 WN_operator(WN_kid0(opnd)) == OPR_ARRAY &&
02962 WN_element_size(WN_kid0(opnd)) < 0 &&
02963 WN_operator(WN_array_base(WN_kid0(opnd))) == OPR_LDID) {
02964 array_base = WN_array_base(WN_kid0(opnd));
02965 BOOL entered = FALSE;
02966 for (INT id = 0; id < noncontig_array_dope->Elements(); id ++){
02967 if (SYMBOL(noncontig_array_dope->Bottom_nth(id)) ==
02968 SYMBOL(array_base)) {
02969 entered = TRUE;
02970 break;
02971 }
02972 }
02973 if (entered) continue;
02974 Create_Stride1_Condition_If_Required(array_base, &if_noncontig);
02975 noncontig_array_dope->Push(array_base);
02976 }
02977 }
02978 WN *parent = LWN_Get_Parent(simd_op);
02979 if (WN_operator(parent) == OPR_ISTORE &&
02980 WN_operator(WN_kid1(parent)) == OPR_ARRAY &&
02981 WN_element_size(WN_kid1(parent)) < 0 &&
02982 WN_operator(WN_array_base(WN_kid1(parent))) == OPR_LDID) {
02983 array_base = WN_array_base(WN_kid1(parent));
02984 BOOL entered = FALSE;
02985 for (INT id = 0; id < noncontig_array_dope->Elements(); id ++) {
02986 if (SYMBOL(noncontig_array_dope->Bottom_nth(id)) ==
02987 SYMBOL(array_base)) {
02988 entered = TRUE;
02989 break;
02990 }
02991 }
02992 if (entered) continue;
02993 Create_Stride1_Condition_If_Required(array_base, &if_noncontig);
02994 noncontig_array_dope->Push(array_base);
02995 }
02996 }
02997 if (if_noncontig) {
02998 if (Do_Loop_Is_Mp(innerloop)){
02999
03000 WN* enclosing_parallel_region;
03001 enclosing_parallel_region = LWN_Get_Parent(innerloop);
03002 while(enclosing_parallel_region &&
03003 WN_operator(enclosing_parallel_region) != OPR_REGION)
03004 enclosing_parallel_region =
03005 LWN_Get_Parent(enclosing_parallel_region);
03006 #ifdef KEY
03007 if (PU_cxx_lang(Get_Current_PU()) &&
03008 Is_Eh_Or_Try_Region(enclosing_parallel_region))
03009 enclosing_parallel_region =
03010 LWN_Get_Parent(LWN_Get_Parent(enclosing_parallel_region));
03011 #endif
03012 WN *stmt_before_region = WN_prev(enclosing_parallel_region);
03013 FmtAssert(stmt_before_region, ("NYI"));
03014 WN *parent_block = LWN_Get_Parent(enclosing_parallel_region);
03015 WN *wn_if = Version_Region(enclosing_parallel_region, innerloop);
03016 WN_if_test(wn_if) = if_noncontig;
03017 LWN_Insert_Block_After(parent_block, stmt_before_region, wn_if);
03018 LWN_Parentize(wn_if);
03019 } else {
03020 WN *stmt_before_loop = WN_prev(innerloop);
03021 WN *parent_block = LWN_Get_Parent(innerloop);
03022 WN *wn_if = Version_Loop(innerloop);
03023 WN_if_test(wn_if) = if_noncontig;
03024 LWN_Insert_Block_After(parent_block, stmt_before_loop, wn_if);
03025 LWN_Parentize(wn_if);
03026 }
03027 }
03028 }
03029
03030
03031 static BOOL Simd_Analysis(WN *innerloop, char *verbose_msg)
03032 {
03033
03034 WN *stmt;
03035 WN *body = WN_do_body(innerloop);
03036 needs_scalar_expansion = FALSE;
03037
03038 index_type=WN_rtype(WN_end(innerloop));
03039
03040
03041 non_vect_op = NULL;
03042 Induction_Seen = FALSE;
03043 Inconsistent_Induction = FALSE;
03044 SCALAR_REF_STACK *simd_ops =
03045 CXX_NEW(SCALAR_REF_STACK(&SIMD_default_pool),&SIMD_default_pool);
03046 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)){
03047 if (!Gather_Vectorizable_Ops(stmt, simd_ops, &SIMD_default_pool, innerloop)){
03048 if (!Inconsistent_Induction){
03049 if(non_vect_op==NULL)
03050 Report_Non_Vectorizable_Op(stmt);
03051 sprintf(verbose_msg, "Expression rooted at op \"%s\"(line %d) is not vectorizable.",
03052 non_vect_op, Srcpos_To_Line(WN_Get_Linenum(stmt)));
03053 }
03054 else sprintf(verbose_msg, "Induction loop has to be split");
03055 return FALSE;
03056 }
03057 }
03058
03059 if (simd_ops->Elements()==0) {
03060 sprintf(verbose_msg, "Loop has 0 vectorizable ops.");
03061 return FALSE;
03062 }
03063
03064
03065 for(stmt=WN_first(body); stmt && curr_simd_red_manager; stmt=WN_next(stmt)){
03066 if (WN_operator(stmt) == OPR_STID &&
03067 curr_simd_red_manager->Which_Reduction(stmt) == RED_NONE){
03068 STACK<WN*>* equivalence_class=
03069 Scalar_Equivalence_Class(stmt, Du_Mgr,&LNO_local_pool);
03070 if (!equivalence_class){
03071 if(LNO_Run_Simd == 2){
03072 needs_scalar_expansion = TRUE;
03073 break;
03074 }
03075 sprintf(verbose_msg, "Vectorization requires scalar expansion(implemented under -LNO:simd=2).");
03076 return FALSE;
03077 }
03078 }
03079 }
03080
03081
03082 vec_simd_ops = CXX_NEW(STACK_OF_WN(&SIMD_default_pool),&SIMD_default_pool);
03083 for(INT ii=0; ii<3; ii++){
03084 simd_operand_invariant[ii] =
03085 CXX_NEW_ARRAY(INT,simd_ops->Elements(),&SIMD_default_pool);
03086 }
03087 if(!SA_Set_SimdOps_Info1(body, simd_ops, verbose_msg))
03088 return FALSE;
03089
03090
03091
03092
03093
03094
03095
03096
03097 for (INT i=vec_simd_ops->Elements()-1; i >= 0; i--) {
03098 WN *simd_op=vec_simd_ops->Top_nth(i);
03099 if (WN_rtype(simd_op) != MTYPE_C8 && WN_desc(simd_op) != MTYPE_C8)
03100 continue;
03101 if (simd_operand_invariant[0][vec_simd_ops->Elements()-i-1] == 1 ||
03102 (WN_kid_count(simd_op) >= 2 &&
03103 simd_operand_invariant[1][vec_simd_ops->Elements()-i-1] == 1) ||
03104 (WN_kid_count(simd_op) >= 3 &&
03105 simd_operand_invariant[2][vec_simd_ops->Elements()-i-1] == 1)) {
03106 sprintf(verbose_msg, "Loop has C8 type invariant.\n");
03107 return FALSE;
03108 }
03109 }
03110
03111
03112 if(SA_Loop_Has_Dependence_Cycles(innerloop, verbose_msg))
03113 return FALSE;
03114
03115
03116 if (PU_src_lang(Get_Current_PU()) == PU_F90_LANG)
03117 SA_Version_F90_Loops_For_Contiguous(innerloop);
03118
03119 REF_LIST_STACK* writes = CXX_NEW(REF_LIST_STACK(&SIMD_default_pool),
03120 &SIMD_default_pool);
03121 REF_LIST_STACK* reads = CXX_NEW(REF_LIST_STACK(&SIMD_default_pool),
03122 &SIMD_default_pool);
03123
03124 SCALAR_STACK* scalar_writes = CXX_NEW(SCALAR_STACK(&SIMD_default_pool),
03125 &SIMD_default_pool);
03126 SCALAR_STACK* scalar_reads = CXX_NEW(SCALAR_STACK(&SIMD_default_pool),
03127 &SIMD_default_pool);
03128 SCALAR_REF_STACK* params = CXX_NEW(SCALAR_REF_STACK(&SIMD_default_pool),
03129 &SIMD_default_pool);
03130
03131 DOLOOP_STACK *stack1=CXX_NEW(DOLOOP_STACK(&SIMD_default_pool),
03132 &SIMD_default_pool);
03133 Build_Doloop_Stack(innerloop, stack1);
03134
03135
03136 Init_Ref_Stmt_Counter();
03137 INT32 gather_status = 0;
03138 for (stmt=WN_first(body); stmt && gather_status != -1; stmt=WN_next(stmt)){
03139 gather_status=New_Gather_References(stmt,writes,reads,stack1,
03140 scalar_writes,scalar_reads,
03141 params,&SIMD_default_pool) ;
03142 }
03143 if (gather_status == -1) {
03144 sprintf(verbose_msg, "Error in gathering phase.");
03145 return FALSE;
03146 }
03147
03148
03149 SCC_DIRECTED_GRAPH16 *dep_g_p =
03150 CXX_NEW(SCC_DIRECTED_GRAPH16(ESTIMATED_SIZE,ESTIMATED_SIZE),&SIMD_default_pool);
03151
03152
03153 WN2VINDEX *stmt_to_vertex=
03154 CXX_NEW(WN2VINDEX(ESTIMATED_SIZE, &SIMD_default_pool), &SIMD_default_pool);
03155
03156 for (stmt=WN_first(body); stmt; stmt=WN_next(stmt)){
03157 VINDEX16 v=dep_g_p->Add_Vertex();
03158 if (v==0){
03159 sprintf(verbose_msg, "Statement dependence graph problem.");
03160 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03161 return FALSE;
03162 }
03163 stmt_to_vertex->Enter(stmt, v);
03164 }
03165
03166 BINARY_TREE<NAME2BIT> *mapping_dictionary =
03167 CXX_NEW(BINARY_TREE<NAME2BIT>(&SIMD_default_pool), &SIMD_default_pool);
03168
03169
03170 FF_STMT_LIST expandable_ref_list;
03171
03172
03173
03174
03175
03176 UINT sym_count=simd_2(innerloop, scalar_reads, scalar_writes,
03177 mapping_dictionary, expandable_ref_list);
03178
03179 BIT_VECTOR Expandable_Scalar_Set(sym_count, &SIMD_default_pool);
03180
03181
03182
03183 FF_STMT_ITER e_iter(&expandable_ref_list);
03184 for (FF_STMT_NODE* ref_node=e_iter.First(); !e_iter.Is_Empty();
03185 ref_node=e_iter.Next()) {
03186 NAME2BIT temp_map;
03187 temp_map.Set_Symbol(ref_node->Get_Stmt());
03188 Expandable_Scalar_Set.Set(mapping_dictionary->Find(temp_map)->
03189 Get_Data()->Get_Bit_Position());
03190 }
03191
03192 if (LNO_Test_Dump) {
03193 printf("Expandable_Scalar_Set=\n");
03194 Expandable_Scalar_Set.Print(stdout);
03195 }
03196
03197 WN_MAP sdm=WN_MAP_Create(&SIMD_default_pool);
03198 ARRAY_DIRECTED_GRAPH16 *sdg =
03199 CXX_NEW(ARRAY_DIRECTED_GRAPH16(100,500,sdm,LEVEL_ARRAY_GRAPH),
03200 &SIMD_default_pool);
03201
03202 for (stmt = WN_first(body); stmt; stmt = WN_next(stmt)){
03203 if (!Map_Stmt_To_Level_Graph(stmt,sdg)){
03204 FmtAssert(0, ("Error in mapping stmt to level graph\n"));
03205 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03206 CXX_DELETE(sdg, &SIMD_default_pool);
03207 WN_MAP_Delete(sdm);
03208 sprintf(verbose_msg, "Error in mapping stmt to level graph.");
03209 return FALSE;
03210 }
03211 }
03212
03213 BOOL status=Generate_Scalar_Dependence_For_Statement_Dependence_Graph(
03214 innerloop, scalar_reads, scalar_writes, params, sdg, red_manager,
03215 &Expandable_Scalar_Set, mapping_dictionary);
03216 if (status==FALSE) {
03217 sprintf(verbose_msg, "Statement dependence graph problem.");
03218 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03219 CXX_DELETE(sdg, &SIMD_default_pool);
03220 WN_MAP_Delete(sdm);
03221 return FALSE;
03222 }
03223
03224 status=Generate_Array_Dependence_For_Statement_Dependence_Graph(
03225 innerloop, reads, writes, sdg, red_manager, adg);
03226 if (status==FALSE) {
03227 sprintf(verbose_msg, "Statement dependence graph problem.");
03228 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03229 CXX_DELETE(sdg, &SIMD_default_pool);
03230 WN_MAP_Delete(sdm);
03231 return FALSE;
03232 }
03233
03234
03235 EINDEX16 e=sdg->Get_Edge();
03236 while (e) {
03237 WN* source=sdg->Get_Wn(sdg->Get_Source(e));
03238 WN* sink=sdg->Get_Wn(sdg->Get_Sink(e));
03239 if (LWN_Get_Parent(source) == body || LWN_Get_Parent(sink) == body)
03240
03241 dep_g_p->Add_Unique_Edge(
03242 stmt_to_vertex->Find(source),
03243 stmt_to_vertex->Find(sink));
03244 e=sdg->Get_Next_Edge(e);
03245
03246 }
03247
03248
03249
03250 SCC_DIRECTED_GRAPH16 *ac_g;
03251 ac_g = dep_g_p->Acyclic_Condensation(&SIMD_default_pool);
03252
03253 VINDEX16 total_scc = dep_g_p->Get_Scc_Count();
03254
03255
03256 FF_STMT_LIST *scc;
03257 scc = CXX_NEW_ARRAY(FF_STMT_LIST, total_scc+1, &SIMD_default_pool);
03258
03259 UINT *scc_size=CXX_NEW_ARRAY(UINT, total_scc+1, &SIMD_default_pool);
03260
03261 for (INT i=1; i<=total_scc; i++)
03262 scc_size[i]=0;
03263
03264
03265 for (stmt = WN_first(WN_do_body(innerloop)); stmt; stmt = WN_next(stmt)) {
03266 VINDEX16 scc_id;
03267 scc_id = dep_g_p->Get_Scc_Id(stmt_to_vertex->Find(stmt));
03268 scc[scc_id].Append(stmt, &SIMD_default_pool);
03269 scc_size[scc_id]++;
03270 }
03271 if (LNO_Test_Dump)
03272 for (INT i=1; i<=total_scc; i++) {
03273
03274 printf("Simd:scc %d:", i);
03275 FF_STMT_ITER s_iter(&scc[i]);
03276 INT j=0;
03277 for (FF_STMT_NODE *stmt_node=s_iter.First(); !s_iter.Is_Empty();
03278 stmt_node=s_iter.Next()) {
03279 stmt=stmt_node->Get_Stmt();
03280 Dump_WN(stmt,stdout,TRUE,4,4);
03281 j++;
03282 }
03283 printf(" has %d stmts\n", j);
03284 }
03285
03286 if (total_scc==1 && scc_size[1]>1) {
03287 CXX_DELETE(ac_g, &SIMD_default_pool);
03288 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03289 CXX_DELETE(sdg, &SIMD_default_pool);
03290 WN_MAP_Delete(sdm);
03291 sprintf(verbose_msg, "Loop has to be split.");
03292 return FALSE;
03293 }
03294
03295 UINT_DYN_ARRAY* new_loops;
03296 WN *simd_op;
03297
03298 new_loops=simd_fis_merge_scc_to_form_new_loop(total_scc,scc,scc_size,
03299 innerloop,ac_g);
03300
03301
03302 if (LNO_Run_Simd != 2 && new_loops->Lastidx() != 0) {
03303
03304
03305
03306
03307
03308
03309 BOOL super_vector = FALSE;
03310 for (INT i=0; i<vec_simd_ops->Elements() && !super_vector; i++) {
03311 simd_op=vec_simd_ops->Top_nth(i);
03312 if (OPCODE_is_compare(WN_opcode(simd_op)) &&
03313 MTYPE_is_size_double(WN_desc(simd_op)))
03314 continue;
03315 if (WN_rtype(simd_op) != MTYPE_V &&
03316 WN_rtype(simd_op) != MTYPE_F8 && WN_rtype(simd_op) != MTYPE_I8)
03317 super_vector = TRUE;
03318 else if (WN_desc(simd_op) != MTYPE_V &&
03319 WN_desc(simd_op) != MTYPE_F8 && WN_desc(simd_op) != MTYPE_I8)
03320 super_vector = TRUE;
03321 }
03322
03323 if (!super_vector) {
03324 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03325 CXX_DELETE(ac_g, &SIMD_default_pool);
03326 CXX_DELETE(sdg, &SIMD_default_pool);
03327 WN_MAP_Delete(sdm);
03328 sprintf(verbose_msg,"Loop has to be scalar-expanded.");
03329 return FALSE;
03330 }
03331 }
03332
03333
03334 if (needs_scalar_expansion)
03335 simd_fis_separate_loop_and_scalar_expand(new_loops,scc, innerloop,
03336 expandable_ref_list);
03337
03338
03339
03340 SA_Set_SimdOps_Info2();
03341
03342 CXX_DELETE(dep_g_p, &SIMD_default_pool);
03343 CXX_DELETE(ac_g, &SIMD_default_pool);
03344 CXX_DELETE(sdg, &SIMD_default_pool);
03345 WN_MAP_Delete(sdm);
03346
03347 return TRUE;
03348 }
03349
03350
03351 static INT Simd_Align_Best_Peel(STACK_OF_WN *vec_simd_ops, SIMD_KIND *simd_op_kind,
03352 INT **simd_op_best_align, WN *innerloop)
03353 {
03354 INT peel_benefit[16], peel;
03355 INT best_peel = 0, best_benefit = 0;
03356 for (peel = 0; peel < 16; peel ++) {
03357 peel_benefit[peel] = -1;
03358 for (INT j=vec_simd_ops->Elements()-1; j >= 0; j--) {
03359 WN* simd_op=vec_simd_ops->Top_nth(j);
03360
03361 if (simd_op_kind[j] == INVALID ||
03362 innerloop != LWN_Get_Parent(Find_Do_Body(simd_op)))
03363 continue;
03364
03365 for(INT k=0; k<4; k++)
03366 if (simd_op_best_align[k][j] == peel)
03367 peel_benefit[peel] ++;
03368 }
03369 }
03370
03371 for (peel = 0; peel < 16; peel ++) {
03372 if (peel_benefit[peel] > best_benefit) {
03373 best_benefit = peel_benefit[peel];
03374 best_peel = peel;
03375 }
03376 }
03377 return best_peel;
03378 }
03379
03380
03381 static BOOL Simd_Align_UB_Variable(WN *innerloop)
03382 {
03383 WN* end = WN_end(innerloop);
03384 SYMBOL loop_index(WN_index(innerloop));
03385 if (WN_kid_count(end) != 2)
03386 return TRUE;
03387 else if (WN_operator(WN_kid0(end)) == OPR_LDID &&
03388 loop_index == SYMBOL(WN_kid0(end))) {
03389 if (WN_operator(WN_kid1(end)) != OPR_INTCONST)
03390 return TRUE;
03391 } else if (WN_operator(WN_kid1(end)) == OPR_LDID &&
03392 loop_index == SYMBOL(WN_kid1(end))) {
03393 if (WN_operator(WN_kid0(end)) != OPR_INTCONST)
03394 return TRUE;
03395 }
03396 return FALSE;
03397 }
03398
03399
03400
03401 static INT Simd_Align_Analysis(INT init_align, WN *load_store,
03402 WN *simd_op, INT size,
03403 SIMD_KIND simd_kind,
03404 WN *innerloop, BOOL is_store)
03405 {
03406 INT alignment = init_align;
03407 WN *istore = is_store?load_store:LWN_Get_Parent(simd_op);
03408 TYPE_ID index_type=WN_rtype(WN_end(innerloop));
03409 WN *array0 = is_store?WN_kid1(load_store):WN_kid0(load_store);
03410 ACCESS_ARRAY* aa0=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,array0);
03411 WN *copy = LWN_Copy_Tree(WN_kid(array0, WN_kid_count(array0) - 1),
03412 TRUE, LNO_Info_Map);
03413 WN *start = LWN_Copy_Tree(WN_kid0(WN_start(innerloop)),
03414 TRUE, LNO_Info_Map);
03415 SYMBOL symbol(WN_index(innerloop));
03416 BOOL const_lb =
03417 WN_operator(WN_kid0(WN_start(innerloop))) == OPR_INTCONST;
03418 if (!const_lb) {
03419 if (WN_operator(WN_kid0(WN_start(innerloop))) == OPR_LDID) {
03420 SYMBOL symnew(WN_kid0(WN_start(innerloop)));
03421
03422 Replace_Symbol(copy, symbol, symnew, WN_kid0(WN_start(innerloop)));
03423
03424 }
03425 } else {
03426 if (WN_operator(copy) == OPR_LDID) {
03427 SYMBOL sym(copy);
03428 if (sym == symbol) {
03429 OPCODE intconst_opc=
03430 OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
03431 copy = WN_CreateIntconst(intconst_opc,
03432 WN_const_val(WN_kid0(WN_start(innerloop))));
03433 }
03434 } else
03435 Simd_Replace_With_Constant(copy, symbol,
03436 WN_kid0(WN_start(innerloop)),
03437 index_type);
03438 }
03439 copy = WN_Simplify_Tree(copy);
03440 if(WN_operator(WN_array_base(array0))==OPR_LDID)
03441
03442 alignment = -2;
03443 else if (WN_operator(copy) != OPR_INTCONST)
03444 alignment = -2;
03445 else if (!WN_has_sym(WN_array_base(array0)))
03446
03447 alignment = -2;
03448 else if (!is_store && ((WN_operator(simd_op) == OPR_CVT &&
03449 MTYPE_byte_size(WN_rtype(simd_op)) !=
03450 MTYPE_byte_size(WN_desc(simd_op))) ||
03451 (WN_operator(simd_op) == OPR_TRUNC &&
03452 MTYPE_byte_size(WN_rtype(simd_op)) !=
03453 MTYPE_byte_size(WN_desc(simd_op)))))
03454
03455 alignment = -2;
03456 else if (!is_store && Vec_Unit_Size[simd_kind] != MTYPE_byte_size(WN_desc(load_store)))
03457 alignment = -2;
03458 else {
03459 if (aa0->Dim(aa0->Num_Vec()-1)->Loop_Coeff(Do_Loop_Depth(innerloop))==
03460 -1)
03461 copy = LWN_CreateExp2(OPCODE_make_op(OPR_SUB,Mtype_TransferSign(MTYPE_I4, index_type), MTYPE_V),
03462 copy,
03463 WN_CreateIntconst(OPCODE_make_op(OPR_INTCONST,
03464 index_type,
03465 MTYPE_V),
03466 (16/ABS(WN_element_size(array0)))-1));
03467 INT fn = WN_const_val(copy);
03468
03469 WN *array_base = WN_array_base(array0);
03470 ST *st = WN_st(array_base);
03471
03472 TY_IDX ty_iload0;
03473 ST *base_st; INT64 offset;
03474 Base_Symbol_And_Offset(WN_st(array_base),
03475 &base_st, &offset);
03476
03477 BOOL var_base = WN_operator(array_base) != OPR_LDA;
03478 if (!var_base)
03479 offset += WN_lda_offset(array_base);
03480 offset += WN_offset(load_store);
03481
03482 if(!is_store || !var_base){
03483 ty_iload0 = ST_type(base_st);
03484 alignment = Simd_Compute_Best_Align(offset, fn, size);
03485 Set_TY_align_exp (ty_iload0, 4);
03486
03487 Base_Symbol_And_Offset(WN_st(array_base),
03488 &base_st, &offset);
03489 if (ST_sclass(base_st) == SCLASS_COMMON && offset%16 != 0)
03490 alignment = -2;
03491
03492
03493 if (ST_is_equivalenced(st))
03494 alignment = -2;
03495 }else{
03496 ty_iload0 = WN_ty(istore);
03497 if (TY_kind(ty_iload0) == KIND_POINTER)
03498 ty_iload0 = TY_pointed(ty_iload0);
03499 else ty_iload0 = MTYPE_To_TY(MTYPE_I1);
03500 }
03501
03502
03503 if (ST_sclass(base_st) == SCLASS_REG)
03504 alignment = -2;
03505
03506
03507
03508
03509 if (strncmp(ST_name(base_st),
03510 Temp_Symbol_Prefix "_misym",
03511 sizeof(Temp_Symbol_Prefix "_misym") - 1) == 0)
03512 alignment = -2;
03513
03514 if (WN_num_dim(array0) >= 2 &&
03515 (!WN_kid(array0, WN_num_dim(array0)) ||
03516 WN_operator(WN_kid(array0, WN_num_dim(array0))) != OPR_INTCONST ||
03517 (WN_const_val(WN_kid(array0, WN_num_dim(array0)))*
03518
03519
03520
03521 MTYPE_byte_size(WN_desc(istore) == MTYPE_V ?
03522 WN_rtype(istore) : WN_desc(istore)))%16 != 0))
03523 alignment = -2;
03524 if (alignment == -2 ||
03525 (TY_kind(ST_type(st)) == KIND_STRUCT &&
03526 strncmp(TY_name(ST_type(st)),".dope.",6) == 0) ||
03527 (TY_kind(ST_type(st)) == KIND_POINTER && !Align_Unsafe &&
03528 (!ST_pt_to_unique_mem(st) || ST_is_temp_var(st) ||
03529 ST_pt_to_compiler_generated_mem(st))))
03530 ;
03531 else if (TY_kind(ST_type(st)) == KIND_POINTER) {
03532 TY_IDX ty = TY_pointed(ST_type(st));
03533 Set_TY_align_exp(ty, 4);
03534 Set_TY_pointed(ST_type(st), ty);
03535 }
03536 else if (base_st->sym_class != CLASS_BLOCK &&
03537 ST_sclass(st) != SCLASS_FORMAL &&
03538 ST_sclass(st) != SCLASS_FORMAL_REF)
03539 Set_ST_type(base_st, ty_iload0);
03540 else if (ST_sclass(st) != SCLASS_AUTO &&
03541 ST_sclass(st) != SCLASS_EXTERN &&
03542 ST_sclass(st) != SCLASS_FORMAL &&
03543 ST_sclass(st) != SCLASS_FORMAL_REF) {
03544 TY_IDX st_ty_idx = ST_type(st);
03545 Set_TY_align_exp(st_ty_idx, 4);
03546 Set_ST_type(st, st_ty_idx);
03547 Set_STB_align(base_st, 16);
03548 Simd_Reallocate_Objects = TRUE;
03549 } else if (ST_sclass(st) == SCLASS_AUTO &&
03550 Stack_Alignment() == 16 &&
03551 ST_level(st) == Current_scope) {
03552 TY_IDX st_ty_idx = ST_type(st);
03553 Set_TY_align_exp(st_ty_idx, 4);
03554 Set_ST_type(st, st_ty_idx);
03555 }
03556 if (alignment == -2 ||
03557 (TY_kind(ST_type(st)) == KIND_STRUCT &&
03558 strncmp(TY_name(ST_type(st)),".dope.",6) == 0) ||
03559 (TY_kind(ST_type(st)) == KIND_POINTER && !Align_Unsafe &&
03560 (!ST_pt_to_unique_mem(st) || ST_is_temp_var(st) ||
03561 ST_pt_to_compiler_generated_mem(st))))
03562 alignment = -2;
03563 else if (ST_sclass(st) == SCLASS_AUTO &&
03564 (ST_level(st) != Current_scope ||
03565 Stack_Alignment() != 16))
03566 alignment = -2;
03567 else if (ST_sclass(st) == SCLASS_FORMAL ||
03568 ST_sclass(st) == SCLASS_FORMAL_REF)
03569 alignment = -2;
03570 else if (base_st->sym_class == CLASS_BLOCK &&
03571 alignment < 0)
03572 alignment = 0;
03573 }
03574 return alignment;
03575 }
03576
03577
03578 static void Simd_Align_Load_Store(WN *load_store, BOOL is_load)
03579 {
03580 TY_IDX ty_load_store = TY_pointed(is_load?
03581 WN_load_addr_ty(load_store):WN_ty(load_store));
03582 TY_IDX ty_idx = 0;
03583 TY &ty = New_TY (ty_idx);
03584 Set_TY_align (ty_load_store, 16);
03585
03586 TY_Init (ty, Pointer_Size, KIND_POINTER, Pointer_Mtype,
03587 Save_Str ("anon_ptr."));
03588 Set_TY_pointed (ty, ty_load_store);
03589 if(is_load)
03590 WN_set_load_addr_ty (load_store, ty_idx);
03591 else WN_set_ty (load_store, ty_idx);
03592 }
03593
03594
03595 static void Simd_Align_Array_References( STACK_OF_WN *vec_simd_ops,
03596 SIMD_KIND *simd_op_kind,
03597 INT **simd_op_best_align,
03598 INT best_peel,
03599 WN *innerloop)
03600 {
03601 for (INT j=vec_simd_ops->Elements()-1; j >= 0; j--) {
03602 WN* simd_op=vec_simd_ops->Top_nth(j);
03603
03604 if (innerloop != LWN_Get_Parent(Find_Do_Body(simd_op)) ||
03605 simd_op_kind[j] == INVALID)
03606 continue;
03607
03608 WN *load_store[4];
03609 load_store[0]= WN_kid0(simd_op);
03610 load_store[1]= WN_kid_count(simd_op)>1 ? WN_kid1(simd_op):NULL;
03611 load_store[2]= WN_kid_count(simd_op)>2 ? WN_kid2(simd_op):NULL;
03612 load_store[3]= LWN_Get_Parent(simd_op);
03613 if (WN_operator(simd_op) == OPR_INTRINSIC_OP)
03614 for(INT k=0; k< 3; k++)
03615 if(load_store[k]){
03616 FmtAssert(WN_operator(load_store[k]) == OPR_PARM, ("NYI"));
03617 load_store[k] = WN_kid0(load_store[k]);
03618 }
03619
03620 for(INT k=0; k<4; k++)
03621 if(simd_op_best_align[k][j] == best_peel)
03622 Simd_Align_Load_Store(load_store[k], k!=3);
03623 }
03624 }
03625
03626 static void Simd_Update_Index_Def_Use(WN *loop,WN *depend_loop, WN *what, SYMBOL sym)
03627 {
03628 DOLOOP_STACK sym_stack(&LNO_local_pool);
03629 Find_Nodes(OPR_LDID, sym, what, &sym_stack);
03630 for (int k = 0; k < sym_stack.Elements(); k++) {
03631 WN* wn_use = sym_stack.Bottom_nth(k);
03632
03633 Du_Mgr->Add_Def_Use(WN_start(depend_loop), wn_use);
03634 Du_Mgr->Add_Def_Use(WN_step(depend_loop), wn_use);
03635 }
03636 for (int k = 0; k < sym_stack.Elements(); k++) {
03637 WN* wn_use = sym_stack.Bottom_nth(k);
03638 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_use);
03639 def_list->Set_loop_stmt(loop);
03640 }
03641 }
03642
03643 static void Simd_Update_Loop_Info(WN *loop, WN *orig_loop,DO_LOOP_INFO *dli, BOOL set_unimportant)
03644 {
03645 DO_LOOP_INFO* new_dli =
03646 CXX_NEW(DO_LOOP_INFO(dli,&LNO_default_pool), &LNO_default_pool);
03647 Set_Do_Loop_Info(loop, new_dli);
03648
03649
03650 SYMBOL symbol(WN_index(loop));
03651 Simd_Update_Index_Def_Use(loop, loop,WN_end(loop),symbol);
03652 Simd_Update_Index_Def_Use(loop, loop,WN_do_body(loop),symbol);
03653 Simd_Update_Index_Def_Use(loop, loop,WN_step(loop),symbol);
03654 Simd_Update_Index_Def_Use(loop, orig_loop,WN_start(loop),symbol);
03655
03656 if (WN_kid_count(loop) == 6 && set_unimportant) {
03657 WN *loop_info = WN_do_loop_info(loop);
03658 WN_Set_Loop_Unimportant_Misc(loop_info);
03659 DO_LOOP_INFO *dli_p = Get_Do_Loop_Info(loop);
03660 dli_p->Set_Generally_Unimportant();
03661 }
03662 }
03663
03664 static void Simd_Copy_Def_Use_For_Loop_Stmt(WN* vloop, WN *ploop)
03665 {
03666
03667 SYMBOL index(WN_index(vloop));
03668
03669 WN *vbody = WN_do_body(vloop);
03670 WN *pbody = WN_do_body(ploop);
03671 WN *vstmt, *pstmt;
03672 for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
03673 vstmt != NULL && pstmt != NULL;
03674 vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
03675 Copy_Def_Use(vstmt, pstmt, index, FALSE);
03676
03677 for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
03678 vstmt != NULL && pstmt != NULL;
03679 vstmt=WN_next(vstmt), pstmt=WN_next(pstmt))
03680 LWN_Copy_Def_Use(WN_kid0(vstmt),WN_kid0(pstmt), Du_Mgr);
03681
03682 for (vstmt=WN_first(vbody), pstmt=WN_first(pbody);
03683 vstmt != NULL && pstmt != NULL;
03684 vstmt=WN_next(vstmt), pstmt=WN_next(pstmt)){
03685
03686 if (WN_operator(vstmt) == OPR_STID) {
03687 USE_LIST* use_list=Du_Mgr->Du_Get_Use(vstmt);
03688 USE_LIST_ITER uiter(use_list);
03689 DOLOOP_STACK sym_stack(&LNO_local_pool);
03690 SYMBOL symbol(vstmt);
03691 Find_Nodes(OPR_LDID, symbol, WN_do_body(ploop),&sym_stack);
03692 for (INT j = 0; j < sym_stack.Elements(); j++) {
03693 WN* wn_use = sym_stack.Bottom_nth(j);
03694 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_use);
03695 def_list->Set_loop_stmt(ploop);
03696 }
03697 if (use_list->Incomplete()) {
03698 Du_Mgr->Create_Use_List(pstmt);
03699 Du_Mgr->Du_Get_Use(pstmt)->Set_Incomplete();
03700 continue;
03701 }
03702 for (DU_NODE* u=uiter.First(); !uiter.Is_Empty(); u=uiter.Next()) {
03703 WN* use = u->Wn();
03704 Du_Mgr->Add_Def_Use(pstmt, use);
03705 }
03706 }
03707 }
03708 }
03709
03710
03711 static void Simd_Align_Generate_Peel_Loop(WN *vloop, INT best_peel, DO_LOOP_INFO *dli)
03712 {
03713 TYPE_ID index_type=WN_rtype(WN_end(vloop));
03714 OPCODE add_opc = OPCODE_make_op(OPR_ADD,index_type, MTYPE_V);
03715 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
03716 WN *start1 = LWN_Copy_Tree(WN_kid0(WN_start(vloop)),TRUE, LNO_Info_Map);
03717 WN *start2 = LWN_Copy_Tree(WN_kid0(WN_start(vloop)),TRUE, LNO_Info_Map);
03718 WN *pub = LWN_CreateExp2(add_opc,
03719 WN_CreateIntconst(intconst_opc, best_peel-1),start1);
03720 WN *vlb = LWN_CreateExp2(add_opc,
03721 WN_CreateIntconst(intconst_opc, best_peel),start2);
03722
03723
03724 Update_Symbol_Use_Def(WN_kid0(WN_start(vloop)), pub,
03725 WN_index(vloop), FALSE);
03726 Update_Symbol_Use_Def(WN_kid0(WN_start(vloop)), vlb,
03727 WN_index(vloop), FALSE);
03728
03729 WN *ploop = LWN_Copy_Tree(vloop, TRUE, LNO_Info_Map);
03730
03731 Copy_Def_Use(WN_start(vloop), WN_start(ploop),
03732 WN_index(vloop), FALSE );
03733 Simd_Copy_Def_Use_For_Loop_Stmt(vloop, ploop);
03734
03735
03736 WN *loop_end = WN_end(ploop);
03737
03738 LWN_Update_Def_Use_Delete_Tree(loop_end, Du_Mgr);
03739 WN_kid1(loop_end) = pub;
03740 LWN_Set_Parent(WN_end(ploop),ploop);
03741 LWN_Parentize(WN_end(ploop));
03742
03743
03744 WN *start_vloop = WN_start(vloop);
03745 WN_kid0(start_vloop) = vlb;
03746
03747 LWN_Set_Parent(WN_kid0(start_vloop),start_vloop);
03748 LWN_Set_Parent(start_vloop,vloop);
03749
03750
03751 Simd_Update_Loop_Info(ploop,vloop, dli,TRUE);
03752
03753
03754 LWN_Insert_Block_Before(LWN_Get_Parent(vloop),vloop,ploop);
03755
03756
03757 LWN_Parentize(vloop);
03758 LWN_Parentize(ploop);
03759 LWN_Set_Parent(ploop, LWN_Get_Parent(vloop));
03760
03761 Add_Vertices(WN_do_body(ploop));
03762 adg->Fission_Dep_Update(ploop, 1);
03763 adg->Fission_Dep_Update(vloop, 1);
03764 }
03765
03766 static INT Simd_Count_Good_Vector(STACK_OF_WN *vec_simd_ops, SIMD_KIND *simd_op_kind)
03767 {
03768 INT good_vector=0;
03769 for (INT i=0; i<vec_simd_ops->Elements(); i++) {
03770 WN *simd_op=vec_simd_ops->Top_nth(i);
03771 if (simd_op_kind[i] == INVALID)
03772 continue;
03773 if (OPCODE_is_compare(WN_opcode(simd_op)) &&
03774 MTYPE_is_size_double(WN_desc(simd_op)))
03775 continue;
03776 if (WN_rtype(simd_op) != MTYPE_V &&
03777 WN_rtype(simd_op) != MTYPE_C8 &&
03778 WN_rtype(simd_op) != MTYPE_F8 && WN_rtype(simd_op) != MTYPE_I8)
03779 good_vector ++;
03780 else if (WN_desc(simd_op) != MTYPE_V &&
03781 WN_rtype(simd_op) != MTYPE_C8 &&
03782 WN_desc(simd_op) != MTYPE_F8 && WN_desc(simd_op) != MTYPE_I8)
03783 good_vector ++;
03784 }
03785 return good_vector;
03786 }
03787
03788
03789 static WN *Simd_Create_Remainder_Loop(WN *innerloop)
03790 {
03791 WN *remainderloop = LWN_Copy_Tree(innerloop, TRUE, LNO_Info_Map);
03792 if (!adg->Add_Deps_To_Copy_Block(innerloop, remainderloop, TRUE))
03793 FmtAssert(FALSE, ("Probably too many edges in dependence graph."));
03794
03795
03796
03797 Copy_Def_Use(WN_end(innerloop), WN_end(remainderloop),
03798 WN_index(innerloop), FALSE );
03799 Simd_Copy_Def_Use_For_Loop_Stmt(innerloop, remainderloop);
03800
03801 return remainderloop;
03802 }
03803
03804
03805 static void Simd_Handle_Negative_Coefficient(
03806 WN *parent,
03807 INT which_kid,
03808 WN *array,
03809 WN *loop,
03810 BOOL no_shuffle)
03811 {
03812 FmtAssert(WN_element_size(array), ("NYI"));
03813 INT incr = 16/ABS(WN_element_size(array));
03814 ACCESS_ARRAY* aa = (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,array);
03815 if (aa->Dim(aa->Num_Vec()-1)->Loop_Coeff(Do_Loop_Depth(loop))==-1){
03816 TYPE_ID vector_type;
03817 WN *opnd = LWN_Get_Parent(array);
03818 switch(ABS(WN_element_size(array))) {
03819 case 1: vector_type = MTYPE_V16I1; break;
03820 case 2: vector_type = MTYPE_V16I2; break;
03821 case 4:
03822 if (MTYPE_is_float(WN_desc(opnd)))
03823 vector_type = MTYPE_V16F4;
03824 else
03825 vector_type = MTYPE_V16I4;
03826 break;
03827 case 8:
03828 if (MTYPE_is_float(WN_desc(opnd)))
03829 vector_type = MTYPE_V16F8;
03830 else
03831 vector_type = MTYPE_V16I8;
03832 break;
03833 default: FmtAssert(FALSE, ("NYI"));
03834 }
03835 TYPE_ID index_type=WN_rtype(WN_end(loop));
03836 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
03837 OPCODE sub_opc= OPCODE_make_op(OPR_SUB,Mtype_TransferSign(MTYPE_I4, index_type), MTYPE_V);
03838 WN* index = WN_kid(array, WN_num_dim(array)<<1);
03839 WN_kid(array, WN_num_dim(array)<<1) =
03840 LWN_CreateExp2(sub_opc, index,
03841 WN_CreateIntconst(intconst_opc, incr-1));
03842 LWN_Parentize(array);
03843 if(no_shuffle == FALSE){
03844 WN_kid(parent, which_kid) =
03845 LWN_CreateExp1(OPCODE_make_op(OPR_SHUFFLE, vector_type,
03846 vector_type),
03847 WN_kid(parent, which_kid));
03848 WN_offset(WN_kid(parent, which_kid)) = 0 ;
03849 }
03850 LWN_Parentize(parent);
03851 }
03852 }
03853
03854 static void Simd_Add_Shuffle_For_Negative_Coefficient(WN* simd_op, WN *loop)
03855 {
03856
03857 for (INT kid = 0; kid < WN_kid_count(simd_op); kid ++){
03858 WN *opnd = WN_kid(simd_op, kid);
03859 if (WN_operator(opnd) == OPR_ILOAD &&
03860 WN_operator(WN_kid0(opnd)) == OPR_ARRAY)
03861 Simd_Handle_Negative_Coefficient(simd_op, kid, WN_kid0(opnd), loop, FALSE);
03862 }
03863
03864 WN *parent = LWN_Get_Parent(simd_op);
03865 if(WN_operator(parent) == OPR_ISTORE &&
03866 WN_operator(WN_kid1(parent)) == OPR_ARRAY){
03867
03868 BOOL no_shuffle = (WN_operator(WN_kid0(parent)) == OPR_PAREN &&
03869 ((WN_operator(WN_kid0(WN_kid0(parent))) == OPR_LDID &&
03870 SYMBOL(WN_kid0(WN_kid0(parent))) != SYMBOL(WN_index(loop))) ||
03871 WN_operator(WN_kid0(WN_kid0(parent))) == OPR_INTCONST ||
03872 WN_operator(WN_kid0(WN_kid0(parent))) == OPR_CONST));
03873 Simd_Handle_Negative_Coefficient(parent,0,WN_kid1(parent), loop, no_shuffle);
03874 }
03875 }
03876
03877 static TYPE_ID Simd_Get_Vector_Type(WN *istore)
03878 {
03879 TYPE_ID vmtype, type;
03880 if (!OPCODE_is_store(WN_opcode(istore))){
03881
03882 WN* stmt = istore;
03883 while(stmt && !OPCODE_is_store(WN_opcode(stmt)) &&
03884 WN_operator(stmt) != OPR_DO_LOOP &&
03885
03886 WN_operator(stmt) != OPR_CVT &&
03887 WN_operator(stmt) != OPR_TRUNC) {
03888 stmt = LWN_Get_Parent(stmt);
03889 }
03890 if (!stmt || WN_operator(stmt) == OPR_DO_LOOP)
03891 type = WN_rtype(istore);
03892 else type = WN_desc(stmt);
03893 } else type = WN_desc(istore);
03894 switch(type) {
03895 case MTYPE_V16C8: case MTYPE_C8:
03896 vmtype = MTYPE_V16C8;
03897 break;
03898 case MTYPE_V16C4: case MTYPE_C4:
03899 vmtype = MTYPE_V16C4;
03900 break;
03901 case MTYPE_V16F4: case MTYPE_F4:
03902 vmtype = MTYPE_V16F4;
03903 break;
03904 case MTYPE_V16F8: case MTYPE_F8:
03905 vmtype = MTYPE_V16F8;
03906 break;
03907 case MTYPE_V16I1: case MTYPE_I1:
03908 case MTYPE_U1:
03909 vmtype = MTYPE_V16I1;
03910 break;
03911 case MTYPE_V16I2: case MTYPE_I2:
03912 case MTYPE_U2:
03913 vmtype = MTYPE_V16I2;
03914 break;
03915 case MTYPE_V16I4: case MTYPE_I4:
03916 case MTYPE_U4:
03917 vmtype = MTYPE_V16I4;
03918 break;
03919 case MTYPE_V16I8: case MTYPE_I8:
03920 case MTYPE_U8:
03921 vmtype = MTYPE_V16I8;
03922 break;
03923 }
03924 return vmtype;
03925 }
03926
03927
03928 static WN *Simd_Vectorize_Constants(WN *const_wn,
03929 WN *istore,
03930 WN *simd_op)
03931 {
03932 FmtAssert(const_wn && (WN_operator(const_wn)==OPR_INTCONST ||
03933 WN_operator(const_wn)==OPR_CONST),("not a constant operand"));
03934
03935 TYPE_ID desc = WN_desc(const_wn);
03936 TYPE_ID type;
03937 TCON tcon;
03938 ST *sym;
03939 if (WN_desc(istore) == MTYPE_V)
03940 type = WN_rtype(istore);
03941 else
03942 type = WN_desc(istore);
03943 if (WN_operator(simd_op) == OPR_PARM &&
03944 WN_operator(istore) == OPR_INTRINSIC_OP &&
03945 WN_intrinsic(istore) == INTRN_SUBSU2) {
03946 type = WN_desc(LWN_Get_Parent(istore));
03947 }
03948 if (!MTYPE_is_float(type)){
03949 if (MTYPE_is_size_double(type)){
03950 INT64 value = (INT64)WN_const_val(const_wn);
03951 tcon = Host_To_Targ(MTYPE_I8, value);
03952 } else {
03953 INT value = (INT)WN_const_val(const_wn);
03954 tcon = Host_To_Targ(MTYPE_I4, value);
03955 }
03956 sym = New_Const_Sym (Enter_tcon (tcon),
03957 Be_Type_Tbl(type));
03958 }
03959 switch (type) {
03960 case MTYPE_F4: case MTYPE_V16F4:
03961 WN_set_rtype(const_wn, MTYPE_V16F4);
03962 break;
03963 case MTYPE_F8: case MTYPE_V16F8:
03964 WN_set_rtype(const_wn, MTYPE_V16F8);
03965 break;
03966 case MTYPE_C4: case MTYPE_V16C4:
03967 WN_set_rtype(const_wn, MTYPE_V16C4);
03968 break;
03969 case MTYPE_U1: case MTYPE_I1: case MTYPE_V16I1:
03970 const_wn = WN_CreateConst (OPR_CONST, MTYPE_V16I1, MTYPE_V, sym);
03971 break;
03972 case MTYPE_U2: case MTYPE_I2: case MTYPE_V16I2:
03973 const_wn = WN_CreateConst (OPR_CONST, MTYPE_V16I2, MTYPE_V, sym);
03974 break;
03975 case MTYPE_U4: case MTYPE_I4: case MTYPE_V16I4:
03976 const_wn = WN_CreateConst (OPR_CONST, MTYPE_V16I4, MTYPE_V, sym);
03977 break;
03978 case MTYPE_U8: case MTYPE_I8: case MTYPE_V16I8:
03979 const_wn = WN_CreateConst (OPR_CONST, MTYPE_V16I8, MTYPE_V, sym);
03980 break;
03981 }
03982
03983 return const_wn;
03984 }
03985
03986 static WN *Simd_Vectorize_Invariants(WN *inv_wn,
03987 WN *istore,
03988 WN *simd_op)
03989 {
03990 TYPE_ID desc = WN_desc(inv_wn);
03991 TYPE_ID type;
03992 if (WN_desc(istore) == MTYPE_V)
03993 type = WN_rtype(istore);
03994 else
03995 type = WN_desc(istore);
03996
03997 if (WN_operator(simd_op) == OPR_CVT || WN_operator(simd_op) == OPR_TRUNC)
03998 type = desc;
03999
04000 switch (type) {
04001 case MTYPE_V16C8: case MTYPE_C8:
04002
04003
04004 WN_set_rtype(inv_wn, MTYPE_V16C8);
04005 WN_set_desc(inv_wn, MTYPE_V16C8);
04006 break;
04007 case MTYPE_V16C4: case MTYPE_C4:
04008 WN_set_rtype(inv_wn, MTYPE_F8);
04009 WN_set_desc(inv_wn, MTYPE_F8);
04010 inv_wn =
04011 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16C4, MTYPE_F8),
04012 inv_wn);
04013 break;
04014 case MTYPE_V16F4: case MTYPE_F4:
04015 inv_wn =
04016 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F4, desc),
04017 inv_wn);
04018 break;
04019 case MTYPE_V16F8: case MTYPE_F8:
04020 inv_wn =
04021 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16F8, desc),
04022 inv_wn);
04023 break;
04024 case MTYPE_V16I1: case MTYPE_U1: case MTYPE_I1:
04025 inv_wn =
04026 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16I1, MTYPE_I1),
04027 inv_wn);
04028 break;
04029 case MTYPE_V16I2: case MTYPE_U2: case MTYPE_I2:
04030 inv_wn =
04031 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16I2, MTYPE_I2),
04032 inv_wn);
04033 break;
04034 case MTYPE_V16I4: case MTYPE_U4: case MTYPE_I4:
04035 inv_wn =
04036 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16I4, MTYPE_I4),
04037 inv_wn);
04038 break;
04039 case MTYPE_V16I8: case MTYPE_U8: case MTYPE_I8:
04040 inv_wn =
04041 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, MTYPE_V16I8, MTYPE_I8),
04042 inv_wn);
04043 break;
04044 }
04045 return inv_wn;
04046 }
04047
04048 static void Simd_Vectorize_Intrinsics(WN *simd_op)
04049 {
04050 if (WN_intrinsic(simd_op) == INTRN_SUBSU2) {
04051 WN_intrinsic(simd_op) = INTRN_SUBSV16I2;
04052 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16I2);
04053 WN_set_rtype(WN_kid1(simd_op), MTYPE_V16I2);
04054 } else {
04055 INTRINSIC intrn = WN_intrinsic(simd_op);
04056 switch(intrn) {
04057
04058 case INTRN_F8SIGN:
04059 WN_intrinsic(simd_op) = INTRN_SIGNV16F8;
04060 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04061 WN_set_rtype(WN_kid1(simd_op), MTYPE_V16F8);
04062 break;
04063
04064 case INTRN_F4SIGN:
04065 WN_intrinsic(simd_op) = INTRN_SIGNV16F4;
04066 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04067 WN_set_rtype(WN_kid1(simd_op), MTYPE_V16F4);
04068 break;
04069
04070 case INTRN_F8EXPEXPR:
04071 WN_intrinsic(simd_op) = INTRN_V16F8EXPEXPR;
04072 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04073 break;
04074 case INTRN_F4EXPEXPR:
04075 WN_intrinsic(simd_op) = INTRN_V16F4EXPEXPR;
04076 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04077 break;
04078
04079 case INTRN_F8SINH:
04080 WN_intrinsic(simd_op) = INTRN_V16F8SINH;
04081 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04082 break;
04083 #if 0 //for bug 8931 release this when single precision vec ready
04084 case INTRN_F4SINH:
04085 WN_intrinsic(simd_op) = INTRN_V16F4SINH;
04086 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04087 break;
04088 #endif
04089 case INTRN_F8COSH:
04090 WN_intrinsic(simd_op) = INTRN_V16F8COSH;
04091 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04092 break;
04093 #if 0 //for bug 8931 release this when single precision vec ready
04094 case INTRN_F4COSH:
04095 WN_intrinsic(simd_op) = INTRN_V16F4COSH;
04096 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04097 break;
04098 #endif
04099 case INTRN_F4EXP:
04100 WN_intrinsic(simd_op) = INTRN_V16F4EXP;
04101 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04102 break;
04103 case INTRN_F8EXP:
04104 WN_intrinsic(simd_op) = INTRN_V16F8EXP;
04105 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04106 break;
04107 case INTRN_F4LOG:
04108 WN_intrinsic(simd_op) = INTRN_V16F4LOG;
04109 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04110 break;
04111 case INTRN_F8LOG:
04112 WN_intrinsic(simd_op) = INTRN_V16F8LOG;
04113 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04114 break;
04115 case INTRN_F4LOG10:
04116 WN_intrinsic(simd_op) = INTRN_V16F4LOG10;
04117 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04118 break;
04119 case INTRN_F8LOG10:
04120 WN_intrinsic(simd_op) = INTRN_V16F8LOG10;
04121 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04122 break;
04123 case INTRN_F4SIN:
04124 WN_intrinsic(simd_op) = INTRN_V16F4SIN;
04125 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04126 break;
04127 case INTRN_F8SIN:
04128 WN_intrinsic(simd_op) = INTRN_V16F8SIN;
04129 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04130 break;
04131 case INTRN_F4COS:
04132 WN_intrinsic(simd_op) = INTRN_V16F4COS;
04133 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F4);
04134 break;
04135 case INTRN_F8COS:
04136 WN_intrinsic(simd_op) = INTRN_V16F8COS;
04137 WN_set_rtype(WN_kid0(simd_op), MTYPE_V16F8);
04138 break;
04139 }
04140 }
04141 }
04142
04143 static INT Simd_Unroll_Times_By_SimdKind(SIMD_KIND simd_kind)
04144 {
04145 INT vect= 1;
04146 switch(simd_kind) {
04147 case V16I1: vect = 16; break;
04148 case V16I2: vect = 8; break;
04149 case V16F4: vect = 4; break;
04150 case V16F8: vect = 2; break;
04151 default: vect = 1; break;
04152 }
04153 return vect;
04154 }
04155
04156 static INT Simd_Unroll_Times_By_VectorType(TYPE_ID vmtype)
04157 {
04158 INT vect = 1;
04159 switch (vmtype){
04160 case MTYPE_V16C4: case MTYPE_V16I8: case MTYPE_V16F8: vect = 2; break;
04161 case MTYPE_V16I4: case MTYPE_V16F4: vect = 4; break;
04162 case MTYPE_V16I2: vect = 8; break;
04163 case MTYPE_V16I1: vect = 16;break;
04164 default: vect=1;break;
04165 }
04166 return vect;
04167 }
04168
04169 static void Simd_Update_Copy_Array_Index(WN *copy, WN *orig,
04170 INT add_to_base, TYPE_ID index_type)
04171 {
04172 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
04173 OPCODE add_opc= OPCODE_make_op(OPR_ADD,index_type, MTYPE_V);
04174 INT kid_for0 = WN_num_dim(copy)<<1;
04175 WN_kid(copy, kid_for0) =
04176 LWN_CreateExp2(add_opc, WN_kid(copy, kid_for0),
04177 WN_CreateIntconst(intconst_opc, add_to_base));
04178
04179
04180 for (INT aa_num = 0; aa_num < WN_num_dim(copy) - 1; aa_num ++){
04181 INT dim = aa_num + WN_num_dim(copy) + 1;
04182 LWN_Copy_Def_Use(WN_kid(orig,dim), WN_kid(copy,dim),Du_Mgr);
04183 }
04184 }
04185
04186 static void Simd_Unroll_Statement( INT unroll_times, INT add_to_base,
04187 WN *istore, WN *vec_index_preg_store,
04188 WN *innerloop, TYPE_ID index_type)
04189 {
04190 WN *copy_simd_op, *copy, *iload_copy=NULL, *origA=NULL;
04191
04192 for (INT k = 1, sum = add_to_base; k < unroll_times; k ++){
04193 copy = LWN_Copy_Tree(istore, TRUE, LNO_Info_Map);
04194 LWN_Copy_Def_Use(WN_kid0(istore),WN_kid0(copy), Du_Mgr);
04195 LWN_Copy_Def_Use(WN_kid1(istore),WN_kid1(copy), Du_Mgr);
04196 LWN_Set_Parent(copy, LWN_Get_Parent(istore));
04197 copy_simd_op = WN_kid0(copy);
04198
04199 for(INT k=0; k < WN_kid_count(copy_simd_op); k++){
04200 iload_copy = WN_kid0(WN_kid(WN_kid0(copy),k));
04201 origA = WN_kid0(WN_kid(WN_kid0(istore),k));
04202 if(iload_copy && WN_operator(iload_copy) == OPR_ARRAY){
04203 Simd_Update_Copy_Array_Index(iload_copy, origA, add_to_base, index_type);
04204 }
04205 else if(iload_copy && WN_operator(iload_copy) == OPR_ILOAD &&
04206 WN_operator(WN_kid0(iload_copy)) == OPR_ARRAY &&
04207 WN_operator(LWN_Get_Parent(iload_copy)) == OPR_SHUFFLE){
04208 iload_copy = WN_kid0(iload_copy);
04209 origA = WN_kid0(origA);
04210 Simd_Update_Copy_Array_Index(iload_copy, origA, -add_to_base, index_type);
04211 }
04212 else if(iload_copy && WN_operator(iload_copy) == OPR_SHUFFLE &&
04213 WN_operator(WN_kid0(iload_copy)) == OPR_ILOAD &&
04214 WN_operator(WN_kid0(WN_kid0(iload_copy))) == OPR_ARRAY) {
04215 iload_copy = WN_kid0(WN_kid0(iload_copy));
04216 origA = WN_kid0(WN_kid0(origA));
04217 Simd_Update_Copy_Array_Index(iload_copy, origA, -add_to_base, index_type);
04218 }
04219 else if(iload_copy)
04220 Create_Unroll_Copy(WN_kid(WN_kid0(copy), k), add_to_base,
04221 WN_kid(WN_kid0(istore), k), index_type,
04222 vec_index_preg_store, innerloop);
04223 }
04224
04225
04226 ACCESS_ARRAY* aa = (ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,
04227 WN_kid1(copy));
04228 if (aa->Dim(aa->Num_Vec()-1)->Loop_Coeff(Do_Loop_Depth(innerloop))==-1)
04229 add_to_base = -add_to_base;
04230 Simd_Update_Copy_Array_Index(WN_kid1(copy), WN_kid1(istore), add_to_base, index_type);
04231 add_to_base = ABS(add_to_base);
04232
04233 LWN_Parentize(copy);
04234
04235
04236 LWN_Insert_Block_After(LWN_Get_Parent(istore),istore,copy);
04237
04238
04239 Add_Vertices(copy);
04240
04241
04242 add_to_base += sum;
04243 }
04244
04245 SYMBOL symbol(WN_index(innerloop));
04246 DOLOOP_STACK sym_stack(&LNO_local_pool);
04247 INT k;
04248 Find_Nodes(OPR_LDID, symbol, WN_do_body(innerloop), &sym_stack);
04249 for (k = 0; k < sym_stack.Elements(); k++) {
04250 WN* wn_use = sym_stack.Bottom_nth(k);
04251 Du_Mgr->Add_Def_Use(WN_start(innerloop), wn_use);
04252 Du_Mgr->Add_Def_Use(WN_step(innerloop), wn_use);
04253 }
04254 for (k = 0; k < sym_stack.Elements(); k++) {
04255 WN* wn_use = sym_stack.Bottom_nth(k);
04256 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(wn_use);
04257 def_list->Set_loop_stmt(innerloop);
04258 }
04259 }
04260
04261
04262 static BOOL Simd_Good_Reduction_Load(WN *innerloop, WN *load)
04263 {
04264 WN* stmt = Find_Stmt_Under(load,WN_do_body(innerloop));
04265 FmtAssert(stmt && (curr_simd_red_manager || WN_operator(stmt) != OPR_STID),("NYI"));
04266 if (WN_operator(stmt) == OPR_STID &&
04267 curr_simd_red_manager->Which_Reduction(stmt) != RED_NONE &&
04268 WN_st(load) == WN_st(stmt) && WN_offset(load) == WN_offset(stmt))
04269 return TRUE;
04270 else return FALSE;
04271 }
04272
04273 static WN* Simd_Vectorize_Scalar_Reduction(WN *red_load, WN *red_store,
04274 WN *simd_op, WN *innerloop,
04275 TYPE_ID vmtype, INT kid)
04276 {
04277 WN_OFFSET orig_offset = WN_load_offset(red_load);
04278 ST *orig_st = WN_st(red_load);
04279
04280 WN* tmp = red_load;
04281 WN* copy = LWN_Copy_Tree(tmp, TRUE, LNO_Info_Map);
04282 DEF_LIST *def_list = Du_Mgr->Ud_Get_Def(tmp);
04283 WN* loop = def_list->Loop_stmt();
04284 if(loop == innerloop)
04285 def_list->Set_loop_stmt(NULL);
04286 Du_Mgr->Delete_Def_Use(innerloop,tmp);
04287 Du_Mgr->Delete_Def_Use(red_store, tmp);
04288 LWN_Copy_Def_Use(tmp, copy, Du_Mgr);
04289 if (WN_operator(simd_op) != OPR_MAX && WN_operator(simd_op) != OPR_MIN)
04290 Delete_Def_Use(tmp);
04291
04292 WN* last_tmp = tmp;
04293 TYPE_ID desc = WN_desc(tmp);
04294 TYPE_ID rtype = WN_rtype(tmp);
04295 WN_set_desc(tmp, vmtype);
04296 WN_set_rtype(tmp, vmtype);
04297
04298 WN* last_op = simd_op;
04299 WN *parent = LWN_Get_Parent(last_op);
04300 while(parent && parent != red_store) {
04301 parent = LWN_Get_Parent(parent);
04302 last_op = LWN_Get_Parent(last_op);
04303 }
04304 FmtAssert(parent==red_store && WN_operator(red_store) == OPR_STID, ("NYI"));
04305 tmp = Split_Using_Preg(red_store, tmp, adg, FALSE);
04306 WN_OFFSET offset = WN_store_offset(tmp);
04307 if (WN_operator(simd_op) != OPR_MAX && WN_operator(simd_op) != OPR_MIN)
04308 LWN_Delete_Tree(last_tmp);
04309 else{
04310 WN_set_desc(last_tmp, desc);
04311 WN_set_rtype(last_tmp, rtype);
04312 }
04313
04314
04315 TCON tcon; INT value; float valuefp; double valuedp;
04316 ST* st; WN *inv_node;
04317 if (WN_operator(simd_op) == OPR_ADD ||
04318 WN_operator(simd_op) == OPR_SUB){
04319 value = 0; valuefp = 0.0F; valuedp = 0.0;
04320 }else if (WN_operator(simd_op) == OPR_MPY ||
04321 WN_operator(simd_op) == OPR_DIV) {
04322 value = 1; valuefp = 1.0F; valuedp = 1.0;
04323 }
04324 if (WN_operator(simd_op) == OPR_ADD ||
04325 WN_operator(simd_op) == OPR_SUB ||
04326 WN_operator(simd_op) == OPR_DIV ||
04327 WN_operator(simd_op) == OPR_MPY) {
04328 if (!MTYPE_is_integral(desc)) {
04329 if (desc == MTYPE_F4)
04330 tcon = Host_To_Targ_Float_4 (MTYPE_F4, valuefp);
04331 else
04332 tcon = Host_To_Targ_Float (MTYPE_F8, valuedp);
04333 }
04334 else
04335 tcon = Host_To_Targ(MTYPE_I4, value);
04336 st = New_Const_Sym (Enter_tcon (tcon), Be_Type_Tbl(desc));
04337 inv_node = WN_CreateConst (OPR_CONST, vmtype, MTYPE_V, st);
04338 } else{
04339 inv_node = LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE, vmtype,
04340 Mtype_TransferSign(MTYPE_I4, desc)),last_tmp);
04341 }
04342 WN_kid0(tmp) = inv_node;
04343
04344 LWN_Extract_From_Block(tmp);
04345
04346 if(!Do_Loop_Is_Mp(innerloop))
04347 LWN_Insert_Block_Before(LWN_Get_Parent(innerloop), innerloop, tmp);
04348 else {
04349
04350
04351
04352 WN* enclosing_parallel_region = LWN_Get_Parent(innerloop);
04353 while(enclosing_parallel_region &&
04354 WN_operator(enclosing_parallel_region) != OPR_REGION)
04355 enclosing_parallel_region = LWN_Get_Parent(enclosing_parallel_region);
04356 FmtAssert(enclosing_parallel_region, ("NYI"));
04357 if (WN_pragma(WN_first(WN_region_pragmas(
04358 enclosing_parallel_region))) !=
04359 WN_PRAGMA_PARALLEL_DO)
04360 LWN_Insert_Block_Before(LWN_Get_Parent(
04361 enclosing_parallel_region),
04362 enclosing_parallel_region, tmp);
04363 else
04364 LWN_Insert_Block_Before(LWN_Get_Parent(innerloop),
04365 innerloop, tmp);
04366 }
04367 LWN_Parentize(tmp);
04368 last_tmp = tmp;
04369
04370 WN_set_rtype(simd_op, vmtype);
04371 WN_set_rtype(last_op, vmtype);
04372 tmp = Split_Using_Preg(red_store, last_op, adg, FALSE);
04373 WN_store_offset(tmp) = offset;
04374 WN_load_offset(WN_kid0(red_store)) = offset;
04375 WN_set_rtype(simd_op, rtype);
04376 WN_set_rtype(last_op, rtype);
04377
04378 LWN_Extract_From_Block(red_store);
04379 if (!Do_Loop_Is_Mp(innerloop))
04380 LWN_Insert_Block_After(LWN_Get_Parent(innerloop),
04381 innerloop, red_store);
04382 else{
04383
04384
04385
04386 WN* enclosing_parallel_region = LWN_Get_Parent(innerloop);
04387 while(enclosing_parallel_region &&
04388 WN_operator(enclosing_parallel_region) != OPR_REGION)
04389 enclosing_parallel_region = LWN_Get_Parent(enclosing_parallel_region);
04390 FmtAssert(enclosing_parallel_region, ("NYI"));
04391
04392
04393 WN *region_pragma =
04394 WN_first(WN_region_pragmas(enclosing_parallel_region));
04395 while(region_pragma &&
04396 WN_pragma(region_pragma) != WN_PRAGMA_REDUCTION)
04397 region_pragma = WN_next(region_pragma);
04398 if (!region_pragma)
04399 LWN_Insert_Block_After(LWN_Get_Parent(
04400 enclosing_parallel_region),
04401 enclosing_parallel_region, red_store);
04402 else
04403 LWN_Insert_Block_After(LWN_Get_Parent(innerloop),
04404 innerloop, red_store);
04405 }
04406
04407
04408 OPERATOR opr;
04409 OPERATOR s_opr = WN_operator(simd_op);
04410 desc = WN_desc(red_store);
04411 if (MTYPE_is_unsigned(desc)) desc = MTYPE_complement(desc);
04412 switch(WN_operator(simd_op)) {
04413 case OPR_ADD: opr = OPR_REDUCE_ADD; break;
04414 case OPR_SUB: opr = OPR_REDUCE_ADD; s_opr = OPR_ADD; break;
04415 case OPR_MPY: opr = OPR_REDUCE_MPY; break;
04416 case OPR_DIV: opr = OPR_REDUCE_MPY; s_opr = OPR_MPY; break;
04417 case OPR_MAX: opr = OPR_REDUCE_MAX; break;
04418 case OPR_MIN: opr = OPR_REDUCE_MIN; break;
04419 default: FmtAssert(FALSE, ("NYI"));
04420 }
04421 if (MTYPE_is_integral(desc) && MTYPE_byte_size(desc) < 4)
04422 desc = MTYPE_I4;
04423 WN* reduce = LWN_CreateExp2(OPCODE_make_op(s_opr, WN_rtype(simd_op),WN_desc(simd_op)),
04424 LWN_CreateExp1(OPCODE_make_op(opr, desc, vmtype), WN_kid0(red_store)),
04425 copy);
04426 LWN_Parentize(reduce);
04427 LWN_Set_Parent(reduce, red_store);
04428 WN_kid0(red_store) = reduce;
04429
04430
04431 Du_Mgr->Add_Def_Use(last_tmp, WN_kid0(WN_kid1((reduce))));
04432 Du_Mgr->Add_Def_Use(last_tmp, WN_kid(simd_op, kid));
04433 Du_Mgr->Add_Def_Use(tmp, WN_kid(simd_op, kid));
04434 Du_Mgr->Add_Def_Use(tmp, WN_kid0(WN_kid1(reduce)));
04435 def_list = Du_Mgr->Ud_Get_Def(WN_kid(simd_op, kid));
04436 def_list->Set_loop_stmt(innerloop);
04437
04438
04439 Rename_Other_Reductions(orig_offset, orig_st, offset,
04440 WN_first(WN_do_body(innerloop)), tmp, vmtype);
04441
04442 return red_store;
04443 }
04444
04445 static WN *Simd_Vectorize_Induction_Variables(WN *operand, WN *simd_op, WN *innerloop,
04446 TYPE_ID vmtype, STACK_OF_WN *vec_simd_ops,
04447 SIMD_KIND *simd_op_kind, SIMD_KIND simd_kind)
04448
04449 {
04450 SYMBOL vec_index_symbol;
04451 SYMBOL vec_loop_incr_symbol;
04452 WN *vec_index_preg_store = NULL;
04453 WN *vec_loop_incr_preg_store;
04454 WN *incr_vec_index_symbol;
04455 TYPE_ID prog_const_type;
04456 SYMBOL symbol(operand);
04457
04458 BOOL const_lb = FALSE;
04459 INT const_val = 0;
04460 if(WN_operator(WN_kid0(WN_start(innerloop))) == OPR_INTCONST){
04461 const_lb = TRUE;
04462 const_val = WN_const_val(WN_kid0(WN_start(innerloop)));
04463 }
04464
04465 INT vec_unit;
04466 BOOL shorter_type = FALSE;
04467 TYPE_ID scalar_type;
04468
04469 if (WN_desc(simd_op) == MTYPE_V)
04470 scalar_type = WN_rtype(simd_op);
04471 else
04472 scalar_type = WN_desc(simd_op);
04473 switch(vmtype){
04474 case MTYPE_V16I1:
04475 prog_const_type = vmtype; break;
04476 case MTYPE_V16I2:
04477 prog_const_type = vmtype;
04478 if (MTYPE_byte_size(scalar_type) < 2)
04479 shorter_type = TRUE;
04480 break;
04481 case MTYPE_V16I4:
04482 prog_const_type = vmtype;
04483 if (MTYPE_byte_size(scalar_type) < 4)
04484 shorter_type = TRUE;
04485 break;
04486 case MTYPE_V16I8:
04487 prog_const_type = vmtype;
04488 if (MTYPE_byte_size(scalar_type) < 8)
04489 shorter_type = TRUE;
04490 break;
04491 case MTYPE_V16F4:
04492 prog_const_type = MTYPE_V16I4;
04493 if (MTYPE_byte_size(scalar_type) < 4)
04494 shorter_type = TRUE;
04495 break;
04496 case MTYPE_V16F8:
04497 prog_const_type = MTYPE_V16I8;
04498 if (MTYPE_byte_size(scalar_type) < 8)
04499 shorter_type = TRUE;
04500 break;
04501 default: FmtAssert(FALSE, ("NYI"));
04502 }
04503
04504 if (shorter_type) {
04505 switch(scalar_type) {
04506 case MTYPE_U1:
04507 case MTYPE_I1: prog_const_type = MTYPE_V16I1; break;
04508 case MTYPE_U2:
04509 case MTYPE_I2: prog_const_type = MTYPE_V16I2; break;
04510 case MTYPE_U4:
04511 case MTYPE_I4: prog_const_type = MTYPE_V16I4; break;
04512 case MTYPE_U8:
04513 case MTYPE_I8: prog_const_type = MTYPE_V16I8; break;
04514 case MTYPE_F4: prog_const_type = MTYPE_V16I4; break;
04515 default: FmtAssert(FALSE, ("NYI"));
04516 }
04517 }
04518
04519 switch (simd_kind) {
04520 case V16I1: vec_unit = 16; break;
04521 case V16I2: vec_unit = 8; break;
04522 case V16I4: vec_unit = 4; break;
04523 case V16I8: vec_unit = 2; break;
04524 case V16C8: vec_unit = 1; break;
04525 default: FmtAssert(FALSE, ("NYI"));
04526 }
04527 TCON prog_const_tcon = Create_Simd_Prog_Const(prog_const_type,
04528 const_val);
04529 ST* prog_const_symbol =
04530 New_Const_Sym (Enter_tcon(prog_const_tcon),
04531 Be_Type_Tbl(prog_const_type));
04532 WN* prog_const =
04533 WN_CreateConst (OPR_CONST, prog_const_type, MTYPE_V,
04534 prog_const_symbol);
04535
04536
04537 TCON loop_incr_const_tcon = Host_To_Targ(MTYPE_I4, vec_unit);
04538 ST* loop_incr_const_symbol =
04539 New_Const_Sym (Enter_tcon(loop_incr_const_tcon),
04540 Be_Type_Tbl(MTYPE_I4));
04541 WN* loop_incr_const =
04542 WN_CreateConst (OPR_CONST, prog_const_type, MTYPE_V,
04543 loop_incr_const_symbol);
04544 vec_index_symbol=
04545 Create_Preg_Symbol(symbol.Name(), prog_const_type);
04546 vec_loop_incr_symbol =
04547 Create_Preg_Symbol(symbol.Name(), prog_const_type);
04548
04549 if (const_lb)
04550 vec_index_preg_store =
04551 AWN_StidIntoSym(&vec_index_symbol, prog_const);
04552 else {
04553 WN* lb = LWN_Copy_Tree(WN_kid0(WN_start(innerloop)));
04554 LWN_Copy_Def_Use(WN_kid0(WN_start(innerloop)), lb, Du_Mgr);
04555 WN* lb_replicate =
04556 LWN_CreateExp1(OPCODE_make_op(OPR_REPLICATE,
04557 prog_const_type,
04558 WN_desc(lb)==MTYPE_V?WN_rtype(lb):WN_desc(lb)), lb);
04559 vec_index_preg_store =
04560 AWN_StidIntoSym(&vec_index_symbol,
04561 AWN_Add(prog_const_type, prog_const,
04562 lb_replicate));
04563 }
04564 LWN_Parentize(vec_index_preg_store);
04565 vec_loop_incr_preg_store =
04566 AWN_StidIntoSym(&vec_loop_incr_symbol, loop_incr_const);
04567 WN* loop_enclosing_block = innerloop;
04568 while (WN_operator(loop_enclosing_block) != OPR_BLOCK)
04569 loop_enclosing_block =
04570 LWN_Get_Parent(loop_enclosing_block);
04571 LWN_Insert_Block_Before(loop_enclosing_block, innerloop,
04572 vec_index_preg_store);
04573 WN_Set_Linenum ( vec_index_preg_store,
04574 WN_Get_Linenum(innerloop) );
04575 LWN_Insert_Block_Before(loop_enclosing_block, innerloop,
04576 vec_loop_incr_preg_store);
04577 WN_Set_Linenum ( vec_loop_incr_preg_store,
04578 WN_Get_Linenum(innerloop) );
04579
04580 WN* use_vec_incr_loop = AWN_LdidSym(&vec_loop_incr_symbol);
04581 WN* use_vec_index_symbol = AWN_LdidSym(&vec_index_symbol);
04582 incr_vec_index_symbol =
04583 AWN_StidIntoSym(&vec_index_symbol,
04584 AWN_Add(prog_const_type,
04585 use_vec_index_symbol,
04586 use_vec_incr_loop));
04587 LWN_Insert_Block_After(WN_do_body(innerloop),
04588 WN_last(WN_do_body(innerloop)),
04589 incr_vec_index_symbol);
04590 WN_Set_Linenum ( incr_vec_index_symbol,
04591 WN_Get_Linenum(innerloop) );
04592 Du_Mgr->Add_Def_Use(vec_loop_incr_preg_store,
04593 use_vec_incr_loop);
04594 Du_Mgr->Add_Def_Use(vec_index_preg_store,
04595 use_vec_index_symbol);
04596 Du_Mgr->Add_Def_Use(incr_vec_index_symbol,
04597 use_vec_index_symbol);
04598
04599 for (INT i=0; i<vec_simd_ops->Elements(); i++){
04600 WN *cur_simd_op=vec_simd_ops->Top_nth(i);
04601 if (simd_op_kind[i] == INVALID ||
04602 Enclosing_Do_Loop(cur_simd_op) != innerloop)
04603 continue;
04604 for (INT kid = 0; kid < WN_kid_count(cur_simd_op); kid ++){
04605 WN *operand1 = WN_kid(cur_simd_op, kid);
04606 if (WN_operator(operand1)==OPR_LDID &&
04607 SYMBOL(operand1) == SYMBOL(WN_index(innerloop))){
04608 Delete_Def_Use(operand1);
04609 WN_st_idx(operand1)=ST_st_idx(vec_index_symbol.St());
04610 WN_offset(operand1)=vec_index_symbol.WN_Offset();
04611 WN_set_desc(operand1, prog_const_type);
04612 WN_set_rtype(operand1, prog_const_type);
04613 Du_Mgr->Add_Def_Use(vec_index_preg_store, operand1);
04614 Du_Mgr->Add_Def_Use(incr_vec_index_symbol, operand1);
04615 }
04616 }
04617 }
04618 return vec_index_preg_store;
04619 }
04620
04621 static void Simd_Vectorize_Load_And_Equilvalent(WN *load, WN *innerloop, TYPE_ID vmtype)
04622 {
04623 STACK<WN*>* equivalence_class=
04624 Scalar_Equivalence_Class(load, Du_Mgr,&LNO_local_pool);
04625 if (!equivalence_class){
04626 equivalence_class = CXX_NEW(STACK<WN*>(&LNO_local_pool), &LNO_local_pool);
04627 equivalence_class->Push(load);
04628 }
04629
04630 SYMBOL symbol(load);
04631 SYMBOL new_symbol=
04632 Create_Preg_Symbol(symbol.Name(), vmtype);
04633 for (INT i=0; i<equivalence_class->Elements(); i++) {
04634 WN* scalar_ref=equivalence_class->Top_nth(i);
04635
04636
04637
04638
04639
04640
04641
04642 if (!Wn_Is_Inside(scalar_ref, innerloop))
04643 continue;
04644
04645 WN_st_idx(scalar_ref)=ST_st_idx(new_symbol.St());
04646 WN_offset(scalar_ref)=new_symbol.WN_Offset();
04647 WN_set_desc(scalar_ref, vmtype);
04648 if (WN_operator(scalar_ref) != OPR_STID)
04649 WN_set_rtype(scalar_ref, vmtype);
04650 }
04651 CXX_DELETE(equivalence_class, &LNO_local_pool);
04652 }
04653
04654 static void Simd_Vectorize_SimdOp_And_Kids(WN *simd_op, TYPE_ID vmtype, BOOL *invarkid)
04655 {
04656
04657 if (WN_operator(simd_op) != OPR_CVT && WN_operator(simd_op) != OPR_TRUNC){
04658 for(INT kid=0; kid<WN_kid_count(simd_op); kid++){
04659 if(invarkid[kid]) continue;
04660 WN *operand = WN_kid(simd_op, kid);
04661 if (WN_operator(simd_op) == OPR_INTRINSIC_OP) {
04662 FmtAssert(WN_operator(operand) == OPR_PARM, ("NYI"));
04663 operand = WN_kid0(operand);
04664 }
04665 if(WN_operator(operand) == OPR_SHUFFLE)
04666 operand = WN_kid0(operand);
04667
04668 if(WN_desc(operand) != MTYPE_V && !MTYPE_is_vector(WN_desc(operand)))
04669 WN_set_desc(operand, vmtype);
04670 if(WN_rtype(operand) != MTYPE_V && !MTYPE_is_vector(WN_rtype(operand)))
04671 WN_set_rtype(operand, vmtype);
04672 }
04673
04674 if(OPCODE_is_compare(WN_opcode(simd_op))){
04675 if (vmtype == MTYPE_V16F4)
04676 WN_set_rtype (simd_op, MTYPE_V16I4);
04677 else WN_set_rtype (simd_op, MTYPE_V16I8);
04678 WN_set_desc(simd_op, vmtype);
04679 }else if (!MTYPE_is_vector(WN_rtype(simd_op)))
04680 WN_set_rtype (simd_op, vmtype);
04681 }else{
04682 TYPE_ID vec_rtype, vec_desc;
04683 switch(WN_desc(simd_op)) {
04684 case MTYPE_I4: vec_desc = MTYPE_V16I4; break;
04685 case MTYPE_F4: vec_desc = MTYPE_V16F4; break;
04686 default: FmtAssert(FALSE, ("NYI"));
04687 }
04688 switch(WN_rtype(simd_op)) {
04689 case MTYPE_F8:
04690 vec_rtype = MTYPE_V16F8;
04691 if (vec_desc == MTYPE_V16I4)
04692 vec_desc = MTYPE_V8I4;
04693 else vec_desc = MTYPE_V8F4;
04694 break;
04695 case MTYPE_F4: vec_rtype = MTYPE_V16F4; break;
04696 case MTYPE_I4: vec_rtype = MTYPE_V16I4; break;
04697 default: FmtAssert(FALSE, ("NYI"));
04698 }
04699 WN_set_rtype(simd_op, vec_rtype);
04700 WN_set_desc(simd_op, vec_desc);
04701 if(!invarkid[0]){
04702 WN *operand0 = WN_kid0(simd_op);
04703 if(WN_operator(operand0)==OPR_SHUFFLE)
04704 operand0 = WN_kid0(operand0);
04705 if (!MTYPE_is_vector(WN_rtype(operand0)))
04706 WN_set_rtype(operand0, vec_desc);
04707 if(!MTYPE_is_vector(WN_desc(operand0)) && WN_desc(operand0) != MTYPE_V)
04708 WN_set_desc(operand0, vec_desc);
04709 }
04710 }
04711
04712 WN *istore = LWN_Get_Parent(simd_op);
04713 if(WN_operator(istore) == OPR_SHUFFLE)
04714 istore = LWN_Get_Parent(istore);
04715 if (WN_operator(istore) != OPR_STID && WN_operator(istore) != OPR_CVT &&
04716 WN_operator(istore) != OPR_TRUNC &&
04717 !OPCODE_is_compare(WN_opcode(istore))) {
04718 if (WN_desc(istore) != MTYPE_V)
04719 WN_set_desc(istore, vmtype);
04720 if (WN_rtype(istore) != MTYPE_V)
04721 WN_set_rtype(istore, vmtype);
04722 }
04723
04724
04725 if (WN_operator(simd_op) == OPR_INTRINSIC_OP)
04726 Simd_Vectorize_Intrinsics(simd_op);
04727 }
04728
04729 static void Simd_Finalize_Loops(WN *innerloop, WN *remainderloop, INT vect, WN *reduction_node)
04730 {
04731
04732 SYMBOL symbol(WN_index(innerloop));
04733 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,index_type, MTYPE_V);
04734 OPCODE add_opc= OPCODE_make_op(OPR_ADD,index_type, MTYPE_V);
04735 OPCODE sub_opc= OPCODE_make_op(OPR_SUB,Mtype_TransferSign(MTYPE_I4, index_type), MTYPE_V);
04736 OPCODE div_opc = OPCODE_make_op(OPR_DIV,index_type, MTYPE_V);
04737 OPCODE cmp_opc = WN_opcode(WN_end(innerloop));
04738 OPERATOR opr = OPCODE_operator(cmp_opc);
04739 OPCODE new_cmp_opc = OPCODE_make_op(OPR_LE,
04740 WN_rtype(WN_end(innerloop)),
04741 WN_desc(WN_end(innerloop)));
04742 FmtAssert((opr == OPR_LE || opr == OPR_LT ||
04743 opr == OPR_GE || opr == OPR_GT), ("NYI"));
04744 if (opr == OPR_GE || opr == OPR_GT) {
04745 opr = (opr == OPR_GE) ? OPR_LE: OPR_LT;
04746
04747
04748 BOOL save_simp_state = WN_Simplifier_Enable(FALSE);
04749 WN_end(innerloop) =
04750 LWN_CreateExp2(new_cmp_opc,
04751 WN_kid1(WN_end(innerloop)),
04752 WN_kid0(WN_end(innerloop)));
04753 WN_Simplifier_Enable(save_simp_state);
04754 }
04755
04756 WN *step;
04757 WN *add = WN_kid0(WN_step(innerloop));
04758 WN *loop_end = WN_end(innerloop);
04759 WN *loop_end_tmp, *loop_end_tmp_rloop;
04760 OPCODE mpy_opc = OPCODE_make_op(OPR_MPY,index_type, MTYPE_V);
04761 WN *loop_start = LWN_Copy_Tree(WN_start(innerloop), TRUE, LNO_Info_Map);
04762 WN *loop_start_tmp =
04763 LWN_Copy_Tree(WN_start(innerloop), TRUE, LNO_Info_Map);
04764 LWN_Copy_Def_Use(WN_kid0(WN_start(innerloop)),
04765 WN_kid0(loop_start), Du_Mgr);
04766 LWN_Copy_Def_Use(WN_kid0(WN_start(innerloop)),
04767 WN_kid0(loop_start_tmp), Du_Mgr);
04768 WN *loop_start_rloop =
04769 LWN_Copy_Tree(WN_start(innerloop), TRUE, LNO_Info_Map);
04770 WN *loop_start_rloop_tmp =
04771 LWN_Copy_Tree(WN_start(innerloop), TRUE, LNO_Info_Map);
04772 LWN_Copy_Def_Use(WN_kid0(WN_start(innerloop)),
04773 WN_kid0(loop_start_rloop), Du_Mgr);
04774 LWN_Copy_Def_Use(WN_kid0(WN_start(innerloop)),
04775 WN_kid0(loop_start_rloop_tmp), Du_Mgr);
04776
04777
04778 WN *loop_index = find_loop_var_in_simple_ub(innerloop);
04779 WN *tmp = LWN_Get_Parent(loop_index);
04780 if (tmp == loop_end)
04781 tmp = loop_index;
04782 else{
04783 while (LWN_Get_Parent(tmp)!=loop_end)
04784 tmp=LWN_Get_Parent(tmp);
04785 }
04786 BOOL rloop_needed = TRUE;
04787 if (WN_kid0(loop_end)==tmp) {
04788 if (opr == OPR_LT) {
04789
04790
04791 WN_kid1(loop_end) =
04792 LWN_CreateExp2(add_opc,
04793 WN_kid1(loop_end),
04794 WN_CreateIntconst(intconst_opc, -1));
04795
04796
04797
04798
04799
04800 WN_set_opcode(loop_end,new_cmp_opc);
04801 }
04802 loop_end_tmp = LWN_Copy_Tree(WN_end(innerloop), TRUE, LNO_Info_Map);
04803 LWN_Copy_Def_Use(WN_kid1(WN_end(innerloop)),
04804 WN_kid1(loop_end_tmp), Du_Mgr);
04805 loop_end_tmp_rloop =
04806 LWN_Copy_Tree(WN_end(innerloop), TRUE, LNO_Info_Map);
04807 LWN_Copy_Def_Use(WN_kid1(WN_end(innerloop)),
04808 WN_kid1(loop_end_tmp_rloop), Du_Mgr);
04809
04810 LWN_Update_Def_Use_Delete_Tree(loop_end, Du_Mgr);
04811 if (WN_operator(WN_kid1(loop_end)) == OPR_INTCONST &&
04812 WN_operator(WN_kid0(loop_start)) == OPR_INTCONST) {
04813
04814 if ((WN_const_val(WN_kid1(loop_end)) -
04815 WN_const_val(WN_kid0(loop_start)) + 1)%vect == 0)
04816 rloop_needed = FALSE;
04817 }
04818 else if (WN_operator(WN_kid1(loop_end)) != OPR_INTCONST &&
04819 WN_operator(WN_kid0(loop_start)) != OPR_INTCONST) {
04820 WN* tmp1 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid1(loop_end)));
04821 WN* tmp2 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_start)));
04822 if (WN_operator(tmp1) == OPR_INTCONST &&
04823 WN_operator(tmp2) == OPR_INTCONST) {
04824 if ((WN_const_val(tmp1) - WN_const_val(tmp2) + 1)%vect == 0)
04825 rloop_needed = FALSE;
04826 }
04827 }
04828 else if (WN_operator(WN_kid1(loop_end)) != OPR_INTCONST &&
04829 WN_operator(WN_kid0(loop_start)) == OPR_INTCONST) {
04830 WN* tmp1 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid1(loop_end)));
04831 if (WN_operator(tmp1) == OPR_INTCONST) {
04832 if ((WN_const_val(tmp1) - WN_const_val(WN_kid0(loop_start)) + 1)%vect == 0)
04833 rloop_needed = FALSE;
04834 }
04835 }
04836 else if (WN_operator(WN_kid1(loop_end)) == OPR_INTCONST &&
04837 WN_operator(WN_kid0(loop_start)) != OPR_INTCONST) {
04838 WN* tmp2 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_start)));
04839 if (WN_operator(tmp2) == OPR_INTCONST) {
04840 if ((WN_const_val(WN_kid1(loop_end)) -
04841 WN_const_val(tmp2) + 1)%vect == 0)
04842 rloop_needed = FALSE;
04843 }
04844 }
04845 WN_kid1(loop_end) =
04846 LWN_CreateExp2(add_opc,
04847 LWN_CreateExp2(add_opc,
04848 LWN_CreateExp2(mpy_opc,
04849 LWN_CreateExp2(div_opc,
04850 LWN_CreateExp2(add_opc,
04851 LWN_CreateExp2(sub_opc,
04852 WN_kid1(loop_end_tmp),
04853 WN_kid0(loop_start)),
04854 WN_CreateIntconst(intconst_opc, 1)),
04855 WN_CreateIntconst(intconst_opc, vect)),
04856 WN_CreateIntconst(intconst_opc, vect)),
04857 WN_CreateIntconst(intconst_opc, -1)),
04858 WN_kid0(loop_start_tmp));
04859
04860
04861 WN *start = WN_start(remainderloop);
04862 WN_kid0(start) =
04863 LWN_CreateExp2(add_opc,
04864 LWN_CreateExp2(mpy_opc,
04865 LWN_CreateExp2(div_opc,
04866 LWN_CreateExp2(add_opc,
04867 LWN_CreateExp2(sub_opc,
04868 WN_kid1(loop_end_tmp_rloop),
04869 WN_kid0(loop_start_rloop)),
04870 WN_CreateIntconst(intconst_opc, 1)),
04871 WN_CreateIntconst(intconst_opc, vect)),
04872 WN_CreateIntconst(intconst_opc, vect)),
04873 WN_kid0(loop_start_rloop_tmp));
04874 }
04875 else {
04876 if (opr == OPR_LT) {
04877
04878
04879 WN_kid0(loop_end) =
04880 LWN_CreateExp2(add_opc,
04881 WN_kid0(loop_end),
04882 WN_CreateIntconst(intconst_opc, -1));
04883
04884
04885
04886
04887
04888 WN_set_opcode(loop_end,new_cmp_opc);
04889 }
04890 loop_end_tmp = LWN_Copy_Tree(WN_end(innerloop), TRUE, LNO_Info_Map);
04891 LWN_Copy_Def_Use(WN_kid0(WN_end(innerloop)),
04892 WN_kid0(loop_end_tmp), Du_Mgr);
04893 loop_end_tmp_rloop =
04894 LWN_Copy_Tree(WN_end(innerloop), TRUE, LNO_Info_Map);
04895 LWN_Copy_Def_Use(WN_kid0(WN_end(innerloop)),
04896 WN_kid0(loop_end_tmp_rloop), Du_Mgr);
04897
04898 LWN_Update_Def_Use_Delete_Tree(loop_end, Du_Mgr);
04899 if (WN_operator(WN_kid0(loop_end)) == OPR_INTCONST &&
04900 WN_operator(WN_kid0(loop_start)) == OPR_INTCONST) {
04901
04902 if ((WN_const_val(WN_kid0(loop_end)) -
04903 WN_const_val(WN_kid0(loop_start)) + 1)%vect == 0)
04904 rloop_needed = FALSE;
04905 }
04906 else if (WN_operator(WN_kid0(loop_end)) != OPR_INTCONST &&
04907 WN_operator(WN_kid0(loop_start)) != OPR_INTCONST) {
04908 WN* tmp1 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_end)));
04909 WN* tmp2 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_start)));
04910 if (WN_operator(tmp1) == OPR_INTCONST &&
04911 WN_operator(tmp2) == OPR_INTCONST) {
04912 if ((WN_const_val(tmp1) - WN_const_val(tmp2) + 1)%vect == 0)
04913 rloop_needed = FALSE;
04914 }
04915 }
04916 else if (WN_operator(WN_kid0(loop_end)) != OPR_INTCONST &&
04917 WN_operator(WN_kid0(loop_start)) == OPR_INTCONST) {
04918 WN* tmp1 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_end)));
04919 if (WN_operator(tmp1) == OPR_INTCONST) {
04920 if ((WN_const_val(tmp1) - WN_const_val(WN_kid0(loop_start)) + 1)%vect == 0)
04921 rloop_needed = FALSE;
04922 }
04923 }
04924 else if (WN_operator(WN_kid0(loop_end)) == OPR_INTCONST &&
04925 WN_operator(WN_kid0(loop_start)) != OPR_INTCONST) {
04926 WN* tmp2 = WN_Simplify_Tree(LWN_Copy_Tree(WN_kid0(loop_start)));
04927 if (WN_operator(tmp2) == OPR_INTCONST) {
04928 if ((WN_const_val(WN_kid0(loop_end)) -
04929 WN_const_val(tmp2) + 1)%vect == 0)
04930 rloop_needed = FALSE;
04931 }
04932 }
04933 WN_kid1(loop_end) =
04934 LWN_CreateExp2(add_opc,
04935 LWN_CreateExp2(add_opc,
04936 LWN_CreateExp2(mpy_opc,
04937 LWN_CreateExp2(div_opc,
04938 LWN_CreateExp2(add_opc,
04939 LWN_CreateExp2(sub_opc,
04940 WN_kid0(loop_end_tmp),
04941 WN_kid0(loop_start)),
04942 WN_CreateIntconst(intconst_opc, 1)),
04943 WN_CreateIntconst(intconst_opc, vect)),
04944 WN_CreateIntconst(intconst_opc, vect)),
04945 WN_CreateIntconst(intconst_opc, -1)),
04946 WN_kid0(loop_start_tmp));
04947
04948 WN *start = WN_start(remainderloop);
04949 WN_kid0(start) =
04950 LWN_CreateExp2(add_opc,
04951 LWN_CreateExp2(mpy_opc,
04952 LWN_CreateExp2(div_opc,
04953 LWN_CreateExp2(add_opc,
04954 LWN_CreateExp2(sub_opc,
04955 WN_kid0(loop_end_tmp_rloop),
04956 WN_kid0(loop_start_rloop)),
04957 WN_CreateIntconst(intconst_opc, 1)),
04958 WN_CreateIntconst(intconst_opc, vect)),
04959 WN_CreateIntconst(intconst_opc, vect)),
04960 WN_kid0(loop_start_rloop_tmp));
04961 }
04962
04963
04964
04965
04966
04967
04968
04969
04970 {
04971 WN* rloop_start = LWN_Copy_Tree(WN_start(remainderloop));
04972 WN* rloop_end = LWN_Copy_Tree(WN_end(remainderloop));
04973 WN* diff = LWN_CreateExp2(sub_opc,
04974 WN_kid1(rloop_end),
04975 WN_kid0(rloop_start));
04976 WN* simpdiff = WN_Simplify_Tree(diff);
04977 if (WN_operator(simpdiff) == OPR_INTCONST &&
04978 WN_const_val(simpdiff) < 0)
04979 rloop_needed = FALSE;
04980 }
04981
04982 Simd_Update_Index_Def_Use(innerloop,innerloop,WN_end(innerloop), symbol);
04983 LWN_Set_Parent(WN_end(innerloop),innerloop);
04984 LWN_Parentize(WN_end(innerloop));
04985
04986
04987 if (WN_operator(WN_kid0(add)) == OPR_INTCONST)
04988 step = WN_kid0(add);
04989 else if (WN_operator(WN_kid1(add)) == OPR_INTCONST)
04990 step = WN_kid1(add);
04991 else
04992 FmtAssert(FALSE, ("Handle this"));
04993 if (WN_const_val(step)!= 1)
04994 FmtAssert(FALSE, ("Handle this"));
04995 WN_const_val(step) = vect;
04996 Simd_Update_Index_Def_Use(innerloop, innerloop,WN_step(innerloop), symbol);
04997
04998 WN *start = WN_start(remainderloop);
04999 LWN_Set_Parent(WN_kid0(start),start);
05000 LWN_Set_Parent(start,remainderloop);
05001 LWN_Parentize(start);
05002 LWN_Parentize(innerloop);
05003
05004 DO_LOOP_INFO *dli = Get_Do_Loop_Info(innerloop);
05005 Simd_Update_Loop_Info(remainderloop, innerloop,dli,FALSE);
05006
05007
05008 if (rloop_needed){
05009 if(!Do_Loop_Is_Mp(innerloop)){
05010 if(reduction_node)
05011 LWN_Insert_Block_After(LWN_Get_Parent(innerloop),
05012 reduction_node,remainderloop);
05013 else
05014 LWN_Insert_Block_After(LWN_Get_Parent(innerloop),
05015 innerloop,remainderloop);
05016
05017 LWN_Parentize(remainderloop);
05018 LWN_Set_Parent(remainderloop, LWN_Get_Parent(innerloop));
05019 }else {
05020 WN* enclosing_parallel_region = LWN_Get_Parent(innerloop);
05021 while(enclosing_parallel_region &&
05022 WN_operator(enclosing_parallel_region) != OPR_REGION)
05023 enclosing_parallel_region =
05024 LWN_Get_Parent(enclosing_parallel_region);
05025 FmtAssert(enclosing_parallel_region, ("Simd: MP loop has no enclosing region!"));
05026
05027
05028 if(WN_pragma(WN_first(WN_region_pragmas(enclosing_parallel_region))) ==
05029 WN_PRAGMA_PARALLEL_DO)
05030 LWN_Insert_Block_After(LWN_Get_Parent(enclosing_parallel_region),
05031 enclosing_parallel_region,remainderloop);
05032 else{
05033
05034
05035 WN *body,*pragmas,*exits,*region;
05036
05037
05038 body = WN_CreateBlock ();
05039 pragmas = WN_CreateBlock ();
05040 exits = WN_CreateBlock ();
05041 region = WN_CreateRegion (REGION_KIND_MP,
05042 body,
05043 pragmas,
05044 exits,
05045 -1,
05046 0);
05047 WN* pragma =
05048 WN_CreatePragma(WN_PRAGMA_SINGLE_PROCESS_BEGIN,
05049 (ST_IDX)NULL, 0, 0);
05050 WN_set_pragma_omp(pragma);
05051 LWN_Insert_Block_After(pragmas,NULL,pragma);
05052
05053
05054
05055
05056
05057 WN* region_pragma =
05058 WN_first(WN_region_pragmas(enclosing_parallel_region));
05059 while(region_pragma && WN_pragma(region_pragma) != WN_PRAGMA_NOWAIT)
05060 region_pragma = WN_next(region_pragma);
05061 if (region_pragma && WN_pragma(region_pragma) == WN_PRAGMA_NOWAIT) {
05062 WN* nowait_pragma =
05063 WN_CreatePragma (WN_PRAGMA_NOWAIT,
05064 (ST_IDX) NULL, 0, 0);
05065 WN_set_pragma_omp(nowait_pragma);
05066 LWN_Insert_Block_After(pragmas,pragma,nowait_pragma);
05067 pragma = nowait_pragma;
05068 }
05069 WN* pragma_end =
05070 WN_CreatePragma(WN_PRAGMA_END_MARKER,
05071 (ST_IDX)NULL, 0, 0);
05072 WN_set_pragma_omp(pragma_end);
05073 LWN_Insert_Block_After(pragmas,pragma,pragma_end);
05074 LWN_Insert_Block_After(LWN_Get_Parent(enclosing_parallel_region),
05075 enclosing_parallel_region, region);
05076 LWN_Insert_Block_After(body,NULL,remainderloop);
05077 LWN_Set_Parent(remainderloop, body);
05078 LWN_Parentize(region);
05079 LWN_Set_Parent(region, LWN_Get_Parent(enclosing_parallel_region));
05080
05081
05082 RID *o_rid, *n_rid, *p_rid;
05083 o_rid = REGION_get_rid(enclosing_parallel_region);
05084 n_rid = RID_Create(WN_region_id(region), RID_level(o_rid), region);
05085
05086 RID_level(n_rid) = RID_level(o_rid);
05087 RID_type(n_rid) = RID_type(o_rid);
05088 RID_depth(n_rid) = RID_depth(o_rid);
05089 RID_srcpos(n_rid) = WN_Get_Linenum(region);
05090 RID_bounds_exist(n_rid) = REGION_BOUND_UNKNOWN;
05091 RID_has_return(n_rid) = REGION_NO_RETURN;
05092 RID_num_exits(n_rid) = RID_num_exits(o_rid);
05093 RID_eh_range_ptr(n_rid) = RID_eh_range_ptr(o_rid);
05094
05095 WN_MAP_Set(RID_map, region, (void *)n_rid);
05096 RID_rwn(n_rid) = region;
05097 p_rid = RID_parent(o_rid);
05098 RID_Add_kid(n_rid, p_rid);
05099 }
05100 }
05101
05102
05103 DO_LOOP_INFO *dli_m = Get_Do_Loop_Info(innerloop);
05104 DO_LOOP_INFO *dli_r = Get_Do_Loop_Info(remainderloop);
05105 INT num_bounds_m = Num_Lower_Bounds(innerloop, dli_m->Step);
05106 INT num_bounds_r = Num_Lower_Bounds(remainderloop, dli_r->Step);
05107 DOLOOP_STACK stack_m(&SIMD_default_pool);
05108 DOLOOP_STACK stack_r(&SIMD_default_pool);
05109 CXX_DELETE(dli_m->UB, dli_m->UB->Pool());
05110 CXX_DELETE(dli_r->LB, dli_r->LB->Pool());
05111 Build_Doloop_Stack(innerloop, &stack_m);
05112 Build_Doloop_Stack(remainderloop, &stack_r);
05113 dli_r->LB =
05114 CXX_NEW(ACCESS_ARRAY(num_bounds_r,stack_r.Elements(),
05115 &LNO_default_pool),
05116 &LNO_default_pool);
05117 dli_r->LB->Set_LB(WN_kid0(WN_start(remainderloop)), &stack_r,
05118 dli_r->Step->Const_Offset);
05119 dli_m->UB = CXX_NEW(ACCESS_ARRAY(num_bounds_m,stack_m.Elements(),
05120 &LNO_default_pool),
05121 &LNO_default_pool);
05122 dli_m->UB->Set_UB(WN_end(innerloop), &stack_m);
05123
05124
05125 if (WN_kid_count(remainderloop) == 6) {
05126 WN *loop_info = WN_do_loop_info(remainderloop);
05127 WN_Set_Loop_Unimportant_Misc(loop_info);
05128 dli_r->Set_Generally_Unimportant();
05129 }
05130
05131 }else {
05132 Delete_Def_Use(WN_end(remainderloop));
05133 WN *r_body = WN_do_body(remainderloop);
05134 WN *r_stmt;
05135 for (r_stmt=WN_first(r_body); r_stmt != NULL; r_stmt=WN_next(r_stmt))
05136 Delete_Def_Use(r_stmt);
05137 LNO_Erase_Dg_From_Here_In(remainderloop,adg);
05138 }
05139 adg->Fission_Dep_Update(innerloop, 1);
05140 }
05141
05142
05143 static INT Simd(WN* innerloop)
05144 {
05145
05146 #ifdef TARG_X8664
05147 INT good_vector = 0;
05148
05149
05150 char verbose_msg[128];
05151 if(!Simd_Pre_Analysis(innerloop, verbose_msg)){
05152 if (debug || LNO_Simd_Verbose){
05153 printf("(%s:%d) %s Loop was not vectorized.\n", Src_File_Name,
05154 Srcpos_To_Line(WN_Get_Linenum(innerloop)), verbose_msg);
05155 }
05156 return 0;
05157 }
05158
05159 {
05160 Last_Vectorizable_Loop_Id ++;
05161 if (Last_Vectorizable_Loop_Id < LNO_Simd_Loop_Skip_Before ||
05162 Last_Vectorizable_Loop_Id > LNO_Simd_Loop_Skip_After ||
05163 Last_Vectorizable_Loop_Id == LNO_Simd_Loop_Skip_Equal)
05164 return 0;
05165 }
05166
05167 MEM_POOL_Push(&SIMD_default_pool);
05168 {
05169 DO_LOOP_INFO* dli=Get_Do_Loop_Info(innerloop);
05170 WN *stmt, *simd_op, *istore;
05171 WN *body = WN_do_body(innerloop);
05172 BOOL save_simp_state = WN_Simplifier_Enable(FALSE);
05173 Simd_Mark_Code(WN_do_body(innerloop));
05174 WN_Simplifier_Enable(save_simp_state);
05175
05176 if(!Simd_Analysis(innerloop,verbose_msg)){
05177 MEM_POOL_Pop(&SIMD_default_pool);
05178 if (debug || LNO_Simd_Verbose){
05179 printf("(%s:%d) %s Loop was not vectorized.\n", Src_File_Name,
05180 Srcpos_To_Line(WN_Get_Linenum(innerloop)), verbose_msg);
05181 }
05182 return 0;
05183 }
05184
05185
05186 INT *simd_op_best_align[4];
05187 for(INT k=0; k<4; k++)
05188 simd_op_best_align[k] =
05189 CXX_NEW_ARRAY(INT,vec_simd_ops->Elements(),&SIMD_default_pool);
05190 BOOL ubound_variable = Simd_Align_UB_Variable(innerloop);
05191
05192 for (INT i=vec_simd_ops->Elements()-1; i >= 0; i--) {
05193 simd_op=vec_simd_ops->Top_nth(i);
05194
05195 SIMD_KIND simd_kind = simd_op_kind[i];
05196 for(INT k=0; k<4; k++)
05197 simd_op_best_align[k][i]=-1;
05198
05199 if (simd_kind == INVALID)
05200 continue;
05201
05202 WN *load_store[4];
05203 load_store[0]=WN_kid0(simd_op);
05204 load_store[1] =WN_kid_count(simd_op)>1 ? WN_kid1(simd_op):NULL;
05205 load_store[2]=WN_kid_count(simd_op)>2 ? WN_kid2(simd_op):NULL;
05206 load_store[3]=LWN_Get_Parent(simd_op);
05207 WN* innerloop=LWN_Get_Parent(Find_Do_Body(simd_op));
05208 INT size = Vec_Unit_Size[simd_kind];
05209 if (WN_operator(simd_op) == OPR_INTRINSIC_OP)
05210 for(INT k=0; k<3; k++)
05211 if(load_store[k]){
05212 FmtAssert(WN_operator(load_store[k]) == OPR_PARM, ("NYI"));
05213 load_store[k] = WN_kid0(load_store[k]);
05214 }
05215 INT second_indx = vec_simd_ops->Elements()-i-1;
05216 for(INT k=0; k<3; k++){
05217 if (load_store[k]==NULL || simd_operand_invariant[k][second_indx] == 1)
05218 simd_op_best_align[k][i] = -2;
05219
05220 if (simd_op_best_align[k][i] != -2 &&
05221 WN_operator(load_store[k]) == OPR_ILOAD)
05222 simd_op_best_align[k][i] =
05223 Simd_Align_Analysis(simd_op_best_align[k][i],
05224 load_store[k], simd_op, size, simd_kind, innerloop,FALSE);
05225 }
05226 if (WN_operator(load_store[3]) != OPR_ISTORE)
05227 continue;
05228 simd_op_best_align[3][i] =
05229 Simd_Align_Analysis(simd_op_best_align[3][i],
05230 load_store[3], simd_op, size, simd_kind, innerloop,TRUE);
05231 }
05232
05233 for (INT i=vec_simd_ops->Elements()-1; i >= 0; i--) {
05234 simd_op=vec_simd_ops->Top_nth(i);
05235
05236 if (simd_op_kind[i] == INVALID || !simd_op_last_in_loop[i])
05237 continue;
05238
05239 WN* innerloop=LWN_Get_Parent(Find_Do_Body(simd_op));
05240 INT best_peel = Simd_Align_Best_Peel(vec_simd_ops, simd_op_kind,
05241 simd_op_best_align, innerloop);
05242
05243 if(best_peel==0)
05244 Simd_Align_Array_References(vec_simd_ops,simd_op_kind,
05245 simd_op_best_align,best_peel,innerloop);
05246 if (best_peel <= 0 || ubound_variable)
05247 continue;
05248
05249 Simd_Align_Generate_Peel_Loop(innerloop, best_peel, dli);
05250 Simd_Align_Array_References(vec_simd_ops,simd_op_kind,
05251 simd_op_best_align,best_peel,innerloop);
05252 }
05253
05254
05255 #ifdef Is_True_On //internal debug purpose
05256 if (debug || LNO_Simd_Verbose)
05257 good_vector = Simd_Count_Good_Vector(vec_simd_ops, simd_op_kind);
05258 #endif
05259
05260
05261 WN* reduction_node= NULL;
05262 WN *vec_index_preg_store = NULL;
05263 for(INT ii=0; ii<4; ii++){
05264 vec_unroll_preg_created[ii] = FALSE;
05265 vec_unroll_preg_store[ii] = NULL;
05266 }
05267
05268 for (INT i=vec_simd_ops->Elements()-1; i >= 0; i--){
05269 simd_op=vec_simd_ops->Top_nth(i);
05270
05271
05272
05273 SIMD_KIND simd_kind = simd_op_kind[i];
05274 if (simd_kind == INVALID)
05275 continue;
05276
05277 istore=LWN_Get_Parent(simd_op);
05278
05279 WN *innerloop = Enclosing_Do_Loop(simd_op);
05280 WN *remainderloop = NULL;
05281
05282 if(simd_op_last_in_loop[i])
05283 remainderloop = Simd_Create_Remainder_Loop(innerloop);
05284
05285
05286 TYPE_ID vmtype = Simd_Get_Vector_Type(istore);
05287
05288 Simd_Add_Shuffle_For_Negative_Coefficient(simd_op,innerloop);
05289
05290 BOOL invarkid[3];
05291 invarkid[0]=invarkid[1]=invarkid[2]=FALSE;
05292 for (INT kid = 0; kid < WN_kid_count(simd_op); kid ++) {
05293 WN* inv_node;
05294
05295 if (simd_operand_invariant[kid][vec_simd_ops->Elements()-i-1] == 1 ||
05296 WN_operator(WN_kid(simd_op, kid)) == OPR_LDID) {
05297 invarkid[kid] = TRUE;
05298 inv_node = WN_kid(simd_op,kid);
05299 if (WN_operator(inv_node) == OPR_PARM) {
05300 inv_node = WN_kid0(inv_node);
05301 if (WN_operator(inv_node) == OPR_REPLICATE) continue;
05302 }
05303
05304
05305 if(simd_operand_invariant[kid][vec_simd_ops->Elements()-i-1] == 1){
05306
05307 if (WN_operator(inv_node) == OPR_CONST ||
05308 WN_operator(inv_node) == OPR_INTCONST){
05309
05310 if(WN_operator(inv_node) == OPR_INTCONST && WN_const_val(inv_node)==2 &&
05311 (WN_opcode(simd_op)==OPC_I4MPY || WN_opcode(simd_op)==OPC_V16I4MPY)){
05312 inv_node = LWN_Copy_Tree(WN_kid(simd_op,1-kid), TRUE, LNO_Info_Map);
05313 LWN_Copy_Def_Use(WN_kid(simd_op,1-kid), inv_node, Du_Mgr);
05314 LWN_Copy_Frequency_Tree(inv_node, WN_kid(simd_op,1-kid));
05315 WN_set_opcode(simd_op, OPCODE_make_op(OPR_ADD, WN_rtype(simd_op), MTYPE_V));
05316 LWN_Set_Parent(inv_node, simd_op);
05317 invarkid[kid] = FALSE;
05318 }else{
05319 if(MTYPE_is_vector(WN_rtype(inv_node))) continue;
05320 inv_node = Simd_Vectorize_Constants(inv_node, istore, simd_op);
05321 }
05322 }
05323 else
05324 inv_node = Simd_Vectorize_Invariants(inv_node, istore, simd_op);
05325
05326 if (WN_operator(WN_kid(simd_op, kid)) == OPR_PARM){
05327 WN_kid0(WN_kid(simd_op, kid)) = inv_node;
05328 LWN_Set_Parent(inv_node, WN_kid(simd_op, kid));
05329 } else
05330 WN_kid(simd_op, kid) = inv_node;
05331
05332 LWN_Set_Parent(WN_kid(simd_op, kid), simd_op);
05333 LWN_Parentize(WN_kid(simd_op, kid));
05334 }else if(Simd_Good_Reduction_Load(innerloop, WN_kid(simd_op, kid))){
05335
05336 WN* stmt = Find_Stmt_Under(simd_op,WN_do_body(innerloop));
05337 if (Is_Last_Red_Stmt(stmt))
05338 reduction_node =
05339 Simd_Vectorize_Scalar_Reduction(WN_kid(simd_op, kid), stmt,
05340 simd_op, innerloop, vmtype, kid);
05341 }else{
05342 WN* operand = WN_kid(simd_op, kid);
05343
05344 if (MTYPE_is_vector(WN_desc(operand))) continue;
05345
05346 if (SYMBOL(operand) == SYMBOL(WN_index(innerloop))){
05347 vec_index_preg_store = Simd_Vectorize_Induction_Variables(
05348 operand, simd_op, innerloop,
05349 vmtype, vec_simd_ops,
05350 simd_op_kind, simd_kind);
05351 }else
05352 Simd_Vectorize_Load_And_Equilvalent(operand, innerloop, vmtype);
05353 }
05354 }
05355 }
05356
05357
05358 Simd_Vectorize_SimdOp_And_Kids(simd_op, vmtype, invarkid);
05359 INT vect = Simd_Unroll_Times_By_SimdKind(simd_kind);
05360
05361
05362 INT stmt_unroll = Simd_Unroll_Times_By_VectorType(vmtype);
05363 INT unroll_times = vect/stmt_unroll;
05364 INT add_to_base = unroll_times>1?vect/unroll_times:0;
05365
05366 if(unroll_times > 1 && WN_operator(istore) == OPR_ISTORE)
05367 Simd_Unroll_Statement( unroll_times, add_to_base,
05368 istore,
05369 vec_index_preg_store,
05370 innerloop, index_type);
05371
05372
05373 if (simd_op_last_in_loop[i])
05374 Simd_Finalize_Loops(innerloop, remainderloop, vect, reduction_node);
05375 }
05376 }
05377 MEM_POOL_Pop(&SIMD_default_pool);
05378
05379 if (debug || LNO_Simd_Verbose) {
05380 printf("(%s:%d) LOOP WAS VECTORIZED.\n",
05381 Src_File_Name,
05382 Srcpos_To_Line(WN_Get_Linenum(innerloop)));
05383 #ifdef Is_True_On
05384 printf("Loop has %d super vectors\n", good_vector);
05385 #endif
05386 }
05387
05388 return 1;
05389 #else
05390 return 0;
05391 #endif // TARG_X8664
05392
05393 }
05394
05395 static void Simd_Walk(WN* wn) {
05396 OPCODE opc=WN_opcode(wn);
05397
05398 if (!OPCODE_is_scf(opc))
05399 return;
05400 else if (opc==OPC_DO_LOOP) {
05401 if (Do_Loop_Is_Good(wn) && Do_Loop_Is_Inner(wn) && !Do_Loop_Has_Calls(wn)
05402 && !Do_Loop_Has_Gotos(wn)) {
05403 if (Simd(wn))
05404 Simd_Align = TRUE;
05405 } else
05406 Simd_Walk(WN_do_body(wn));
05407 } else if (opc==OPC_BLOCK)
05408 for (WN* stmt=WN_first(wn); stmt;) {
05409 WN* next_stmt=WN_next(stmt);
05410 Simd_Walk(stmt);
05411 stmt=next_stmt;
05412 }
05413 else
05414 for (UINT kidno=0; kidno<WN_kid_count(wn); kidno++) {
05415 Simd_Walk(WN_kid(wn,kidno));
05416 }
05417 }
05418
05419 void Simd_Phase(WN* func_nd) {
05420
05421 MEM_POOL_Initialize(&SIMD_default_pool,"SIMD_default_pool",FALSE);
05422 MEM_POOL_Push(&SIMD_default_pool);
05423
05424 adg=Array_Dependence_Graph;
05425
05426 debug = Get_Trace(TP_LNOPT, TT_LNO_DEBUG_SIMD);
05427 if (debug) {
05428 fprintf(TFile, "=======================================================================\n");
05429 fprintf(TFile, "LNO: \"WHIRL tree before simd phase\"\n");
05430 fdump_tree (TFile, func_nd);
05431 }
05432 Simd_Reallocate_Objects = FALSE;
05433 Last_Vectorizable_Loop_Id = 0;
05434 if (LNO_Simd_Reduction) {
05435 simd_red_manager = CXX_NEW
05436 (REDUCTION_MANAGER(&SIMD_default_pool), &SIMD_default_pool);
05437 simd_red_manager->Build(func_nd,TRUE,FALSE);
05438 curr_simd_red_manager = simd_red_manager;
05439 }
05440
05441
05442
05443
05444
05445 Minvariant_Removal_For_Simd = TRUE;
05446 if (!Get_Trace(TP_LNOPT, TT_LNO_GUARD) && LNO_Minvar) {
05447
05448
05449 Guard_Dos(func_nd);
05450 Minvariant_Removal(func_nd, Array_Dependence_Graph);
05451
05452 if (curr_simd_red_manager)
05453 curr_simd_red_manager->Build(func_nd, TRUE, TRUE, adg);
05454 }
05455 Minvariant_Removal_For_Simd = FALSE;
05456 Simd_Walk(func_nd);
05457 if (debug) {
05458 fprintf(TFile, "=======================================================================\n");
05459 fprintf(TFile, "LNO: \"WHIRL tree after simd phase\"\n");
05460 fdump_tree (TFile, func_nd);
05461 }
05462 if (LNO_Simd_Reduction && simd_red_manager)
05463 CXX_DELETE(simd_red_manager,&SIMD_default_pool);
05464 MEM_POOL_Pop(&SIMD_default_pool);
05465 MEM_POOL_Delete(&SIMD_default_pool);
05466 }
05467
05468
05469
05470
05471
05472
05473
05474
05475
05476
05477
05478
05479
05480
05481
05482