00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133 #define __STDC_LIMIT_MACROS
00134 #include <stdint.h>
00135 #ifdef USE_PCH
00136 #include "lno_pch.h"
00137 #endif // USE_PCH
00138 #pragma hdrstop
00139
00140 #include <sys/types.h>
00141 #include <limits.h>
00142 #include "defs.h"
00143 #include "pf_loop.h"
00144 #include "pf_ref.h"
00145 #include "pf_manual.h"
00146 #include "lwn_util.h"
00147 #include "lnopt_main.h"
00148 #include "pf_cg.h"
00149 #include "tlog.h"
00150
00151 #include "targ_sim.h"
00152 #include "opt_du.h"
00153 #include "lnoutils.h"
00154 #include "prompf.h"
00155
00156 extern WN* Find_SCF_Inside(WN* parent_wn, OPCODE opc);
00157 #define minof(x, y) (((x)<(y)) ? (x) : (y))
00158
00159 extern INT64 Get_Good_Num_Iters (DO_LOOP_INFO *dli);
00160
00161 PF_LOOPNODE::~PF_LOOPNODE () {
00162 while (_child.Elements()) CXX_DELETE (_child.Pop(), PF_mpool);
00163 while (_bases.Elements()) CXX_DELETE (_bases.Pop(), PF_mpool);
00164 }
00165
00166 static BOOL Contains_Array(WN *exp)
00167 {
00168 if (WN_operator(exp) == OPR_ARRAY) {
00169 return TRUE;
00170 }
00171 for (INT kid=0; kid < WN_kid_count(exp); kid++) {
00172 if (Contains_Array(WN_kid(exp,kid))) {
00173 return TRUE;
00174 }
00175 }
00176 return FALSE;
00177 }
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187 static BOOL Loop_Before_MP_Region (WN* wn) {
00188 while (wn) {
00189 if (WN_operator(wn) == OPR_DO_LOOP) return TRUE;
00190 if (Is_Mp_Region(wn)) return FALSE;
00191 wn = LWN_Get_Parent(wn);
00192 }
00193 DevWarn ("Reference not contained within a DO-LOOP");
00194 return FALSE;
00195 }
00196
00197 #ifdef KEY
00198 static BOOL Has_Indirect_Ref (WN* wn)
00199 {
00200 if (WN_operator(wn) == OPR_ILOAD) return TRUE;
00201 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
00202 if (Has_Indirect_Ref(WN_kid(wn, kid)))
00203 return TRUE;
00204 return FALSE;
00205 }
00206 #endif
00207 #if defined(TARG_X8664) || defined(TARG_IA64)
00208
00209 static WN *Obtain_Stride_From_Loop_Step(WN *loop)
00210 {
00211 WN *add = WN_kid0(WN_step(loop));
00212 if(WN_operator(add) != OPR_ADD)
00213 return NULL;
00214
00215 if(WN_operator(WN_kid0(add))==OPR_LDID &&
00216 SYMBOL(WN_kid0(add)) == SYMBOL(WN_index(loop)) &&
00217 Is_Loop_Invariant_Exp(WN_kid1(add),loop) &&
00218 WN_operator(WN_kid1(add)) != OPR_INTCONST)
00219 return WN_kid1(add);
00220
00221 if(WN_operator(WN_kid1(add))==OPR_LDID &&
00222 SYMBOL(WN_kid1(add)) == SYMBOL(WN_index(loop)) &&
00223 Is_Loop_Invariant_Exp(WN_kid0(add),loop) &&
00224 WN_operator(WN_kid0(add)) != OPR_INTCONST)
00225 return WN_kid0(add);
00226
00227 return NULL;
00228 }
00229
00230 static BOOL Is_Step_One_Loop(WN *loop)
00231 {
00232 WN *add = WN_kid0(WN_step(loop));
00233 if(WN_operator(add) != OPR_ADD)
00234 return FALSE;
00235
00236 if(WN_operator(WN_kid0(add))==OPR_LDID &&
00237 SYMBOL(WN_kid0(add)) == SYMBOL(WN_index(loop)) &&
00238 WN_operator(WN_kid1(add))== OPR_INTCONST &&
00239 WN_const_val(WN_kid1(add))==1)
00240 return TRUE;
00241
00242 if(WN_operator(WN_kid1(add))==OPR_LDID &&
00243 SYMBOL(WN_kid1(add)) == SYMBOL(WN_index(loop)) &&
00244 WN_operator(WN_kid0(add))== OPR_INTCONST &&
00245 WN_const_val(WN_kid0(add))==1)
00246 return TRUE;
00247
00248 return FALSE;
00249
00250 }
00251
00252
00253 static WN *Stride_From_MPY(WN *mpy, WN *loop)
00254 {
00255 if(!mpy || WN_operator(mpy) != OPR_MPY)
00256 return NULL;
00257
00258 if(WN_operator(WN_kid0(mpy))==OPR_LDID &&
00259 SYMBOL(WN_kid0(mpy)) == SYMBOL(WN_index(loop)) &&
00260 Is_Loop_Invariant_Exp(WN_kid1(mpy),loop) &&
00261 WN_operator(WN_kid1(mpy)) != OPR_INTCONST)
00262 return WN_kid1(mpy);
00263
00264 if (WN_operator(WN_kid1(mpy))==OPR_LDID &&
00265 SYMBOL(WN_kid1(mpy)) == SYMBOL(WN_index(loop)) &&
00266 Is_Loop_Invariant_Exp(WN_kid0(mpy),loop) &&
00267 WN_operator(WN_kid0(mpy)) != OPR_INTCONST)
00268 return WN_kid0(mpy);
00269
00270 return NULL;
00271 }
00272
00273
00274
00275 static WN *Obtain_Stride_From_Array_Index(WN *array_index, WN *loop)
00276 {
00277 if(WN_operator(array_index)==OPR_MPY)
00278 return Stride_From_MPY(array_index, loop);
00279
00280 if(WN_operator(array_index)==OPR_ADD){
00281 WN *stride = Stride_From_MPY(WN_kid0(array_index), loop);
00282 if(stride && Is_Loop_Invariant_Exp(WN_kid1(array_index),loop))
00283 return stride;
00284 stride = Stride_From_MPY(WN_kid1(array_index), loop);
00285 if(stride && Is_Loop_Invariant_Exp(WN_kid0(array_index),loop))
00286 return stride;
00287 }
00288 return NULL;
00289 }
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304 WN *Simple_Invariant_Stride_Access(WN *array, WN *loop)
00305 {
00306
00307 INT kid;
00308 if(!LNO_Prefetch_Invariant_Stride)
00309 return NULL;
00310
00311 ACCESS_ARRAY *aa = (ACCESS_ARRAY *) WN_MAP_Get(LNO_Info_Map,array);
00312 if (!aa || aa->Too_Messy || WN_element_size(array)<0)
00313 return NULL;
00314
00315
00316 if(WN_num_dim(array) != 1) return NULL;
00317
00318
00319 DO_LOOP_INFO *dli = (DO_LOOP_INFO *)WN_MAP_Get(LNO_Info_Map,loop);
00320 if(!dli || !dli->Is_Inner)
00321 return NULL;
00322 WN *up_nest = LWN_Get_Parent(loop);
00323 while(up_nest != NULL){
00324 if(WN_opcode(up_nest) == OPC_DO_LOOP)
00325 return NULL;
00326 up_nest = LWN_Get_Parent(up_nest);
00327 }
00328
00329 if(!Is_Loop_Invariant_Exp(WN_kid0(array), loop)){
00330
00331 for(kid = WN_kid_count(array)-1; kid>0; kid--){
00332 if(!Is_Loop_Invariant_Exp(WN_kid(array, kid),loop))
00333 return NULL;
00334 }
00335
00336 if(WN_operator(WN_kid0(array)) != OPR_LDID ||
00337 SYMBOL(WN_kid0(array)) != SYMBOL(WN_index(loop)))
00338 return NULL;
00339
00340 return Obtain_Stride_From_Loop_Step(loop);
00341 }
00342
00343
00344 if(!Is_Step_One_Loop(loop))
00345 return NULL;
00346
00347
00348
00349
00350
00351
00352 for(kid = 0; kid < WN_kid_count(array)-1; kid++)
00353 if(!Is_Loop_Invariant_Exp(WN_kid(array, kid),loop))
00354 return NULL;
00355
00356 return Obtain_Stride_From_Array_Index(WN_kid(array,WN_num_dim(array)<<1), loop);
00357 }
00358
00359 #endif
00360
00361
00362
00363
00364
00365
00366
00367
00368 void PF_LOOPNODE::Add_Ref (WN* wn_array) {
00369 BOOL messy = FALSE;
00370 ACCESS_ARRAY *array = (ACCESS_ARRAY *) WN_MAP_Get(LNO_Info_Map,wn_array);
00371
00372 if (!array || array->Too_Messy) {
00373 messy = TRUE;
00374 }
00375
00376
00377
00378
00379
00380
00381
00382 if (array && array->Non_Const_Loops() > 0) {
00383
00384 if (array->Non_Const_Loops() == (_depth+1)) {
00385 messy = TRUE;
00386 }
00387 }
00388
00389 if (array) {
00390 #ifdef KEY
00391
00392
00393
00394 for (INT i=0; i<array->Num_Vec(); i++) {
00395 ACCESS_VECTOR *av = array->Dim(i);
00396 #if defined(TARG_IA64)
00397
00398 if (av->Contains_Non_Lin_Symb() || av->Too_Messy) {
00399 if (Has_Indirect_Ref(wn_array))
00400 messy = TRUE;
00401 }
00402 #else
00403 if (av->Contains_Non_Lin_Symb()) {
00404 messy = TRUE;
00405 } else if (av->Too_Messy) {
00406 if (LNO_Run_Prefetch == AGGRESSIVE_PREFETCH) {
00407 if (Has_Indirect_Ref(wn_array))
00408 messy = TRUE;
00409 } else
00410 messy = TRUE;
00411 }
00412 #endif
00413 }
00414 #else
00415 for (INT i=0; i<array->Num_Vec(); i++) {
00416 ACCESS_VECTOR *av = array->Dim(i);
00417 if (av->Too_Messy || av->Contains_Non_Lin_Symb()) {
00418 messy = TRUE;
00419 }
00420 }
00421 #endif
00422 }
00423
00424 if (WN_element_size(wn_array) < 0) {
00425 #ifdef TARG_X8664
00426
00427
00428 WN *parent = LWN_Get_Parent(wn_array);
00429 if((WN_desc(parent) != MTYPE_V && MTYPE_is_vector(WN_desc(parent)))||
00430 (WN_rtype(parent) != MTYPE_V && MTYPE_is_vector(WN_rtype(parent))));
00431 else
00432 #endif
00433
00434 messy = TRUE;
00435 }
00436
00437
00438
00439
00440 #ifdef KEY
00441 if(LNO_Run_Prefetch < AGGRESSIVE_PREFETCH &&
00442 WN_element_size(wn_array) > 16 &&
00443 WN_element_size(wn_array)%64 != 0){
00444 messy = TRUE;
00445 }
00446 #endif
00447
00448 if (LNO_Prefetch_Indirect && messy) {
00449 BOOL is_indirect = FALSE;
00450 for (INT kid=0; !is_indirect && kid<WN_kid_count(wn_array); kid++) {
00451 if (Contains_Array(WN_kid(wn_array,kid))) {
00452 is_indirect = TRUE;
00453 }
00454 }
00455 if (is_indirect) {
00456 UINT32 flag=0;
00457 WN *parent = LWN_Get_Parent(wn_array);
00458 WN_OFFSET offset = WN_offset(parent);
00459 WN *wn_block = parent;
00460 while (wn_block && (WN_opcode(wn_block)!=OPC_DO_LOOP)) {
00461 wn_block = LWN_Get_Parent(wn_block);
00462 }
00463 if (!wn_block) {
00464 _num_bad++;
00465 return;
00466 } else {
00467 wn_block = WN_do_body(wn_block);
00468 }
00469
00470 if (OPCODE_is_load(WN_opcode(parent))) {
00471 PF_SET_READ(flag);
00472 } else {
00473 PF_SET_WRITE(flag);
00474 }
00475 PF_SET_CONFIDENCE(flag,0);
00476 UINT32 save_flag = flag;
00477
00478 PF_SET_STRIDE_1L(flag,1);
00479 WN *arraynode = LWN_Copy_Tree(wn_array,TRUE,LNO_Info_Map);
00480 LWN_Copy_Def_Use(wn_array, arraynode, Du_Mgr);
00481 WN* pfnode = LWN_CreatePrefetch (offset, flag, arraynode);
00482 LWN_Insert_Block_Before (wn_block, WN_first(wn_block), pfnode);
00483 LWN_Copy_Frequency_Tree (pfnode, WN_first(wn_block));
00484 extern MEM_POOL PF_CG_mpool;
00485 PF_POINTER *tmp = CXX_NEW (PF_POINTER, &PF_CG_mpool);
00486 WN_MAP_Set (WN_MAP_PREFETCH, parent, tmp);
00487 PF_PTR_flag(tmp) = 0;
00488 SET_AUTO(tmp);
00489 PF_PTR_wn_pref_1L(tmp) = pfnode;
00490 PF_PTR_lrnum_1L(tmp) = 0;
00491 PF_PTR_distance_1L(tmp) = offset;
00492 PF_PTR_set_conf_1L(tmp, 0);
00493
00494 flag = save_flag;
00495 PF_SET_STRIDE_2L(flag,1);
00496 arraynode = LWN_Copy_Tree(wn_array,TRUE,LNO_Info_Map);
00497 LWN_Copy_Def_Use(wn_array, arraynode, Du_Mgr);
00498 pfnode = LWN_CreatePrefetch (offset, flag, arraynode);
00499 LWN_Insert_Block_Before (wn_block, WN_first(wn_block), pfnode);
00500 LWN_Copy_Frequency_Tree (pfnode, WN_first(wn_block));
00501 PF_PTR_wn_pref_2L(tmp) = pfnode;
00502 PF_PTR_lrnum_2L(tmp) = 0;
00503 PF_PTR_distance_2L(tmp) = offset;
00504 PF_PTR_set_conf_2L(tmp, 0);
00505 }
00506 return;
00507 } else if (messy) {
00508 #if defined(TARG_X8664) || defined(TARG_IA64) //bug 10953: for cases may not be "messy"
00509 if(!Simple_Invariant_Stride_Access(wn_array, _code)){
00510 #endif
00511 _num_bad++;
00512 return;
00513 #if defined(TARG_X8664) || defined(TARG_IA64) //bug 10953
00514 }
00515 #endif
00516 }
00517
00518
00519 WN *base = WN_array_base(wn_array);
00520 if ((WN_operator(base) != OPR_LDA) &&
00521 (WN_operator(base) != OPR_LDID)) {
00522 _num_bad++;
00523 return;
00524 }
00525 SYMBOL symb (base);
00526 if (mpf_syms->In_Manual (&symb)) {
00527 VB_PRINT (printf ("symbol prefetched manually: ");
00528 symb.Print(stdout);
00529 printf ("\n"));
00530 return;
00531 }
00532 WN* parent_ref = LWN_Get_Parent (wn_array);
00533 if (WN_MAP_Get(WN_MAP_PREFETCH, parent_ref)) {
00534 VB_PRINT (printf ("reference prefetched manually, ignoring\n"));
00535 return;
00536 }
00537
00538 if (!Steady_Base(wn_array)) {
00539
00540 _num_bad++;
00541 return;
00542 }
00543
00544 if (!Loop_Before_MP_Region(wn_array)) {
00545 _num_bad++;
00546 return;
00547 }
00548
00549 for (INT i=0; i<_bases.Elements(); i++) {
00550 if (_bases.Bottom_nth(i)->Add_Ref (wn_array)) return;
00551 }
00552
00553 SYMBOL* tmp_symb = CXX_NEW (SYMBOL(&symb), PF_mpool);
00554 _bases.Push (CXX_NEW (PF_BASE_ARRAY (tmp_symb, wn_array,
00555 array->Num_Vec(), this),
00556 PF_mpool));
00557
00558 BOOL tmp = _bases.Bottom_nth(_bases.Elements()-1)->Add_Ref (wn_array, FALSE);
00559 Is_True (tmp, ("Strange -- ref doesn't match sample ref"));
00560 }
00561
00562
00563
00564
00565
00566
00567
00568 static BOOL Store_Is_Useless (const WN* istore_wn) {
00569 Is_True (istore_wn && WN_operator(istore_wn) == OPR_ISTORE &&
00570 WN_operator(WN_kid1(istore_wn)) == OPR_ARRAY,
00571 ("Store_Is_Useless called incorrectly\n"));
00572 WN* array_wn = WN_kid1(istore_wn);
00573 WN* base_wn = WN_array_base(array_wn);
00574 if (WN_operator(base_wn) == OPR_LDA &&
00575 ST_is_not_used(WN_st(base_wn))) return TRUE;
00576 return FALSE;
00577 }
00578
00579 #ifdef KEY
00580
00581 static BOOL Is_Multi_BB (const WN* loop)
00582 {
00583 FmtAssert (LNO_Run_Prefetch == SOME_PREFETCH, ("Should not have reached here"));
00584 OPCODE opcode = WN_opcode(loop);
00585
00586 if (opcode == OPC_BLOCK) {
00587 WN *kid = WN_first (loop);
00588 while (kid) {
00589 if (WN_opcode(kid) == OPC_IF)
00590 return TRUE;
00591 kid = WN_next(kid);
00592 }
00593 return FALSE;
00594 }
00595
00596 for (INT kidno=0; kidno<WN_kid_count(loop); kidno++) {
00597 WN *kid = WN_kid(loop,kidno);
00598 if (WN_opcode(kid) == OPC_IF)
00599 return TRUE;
00600 }
00601 return FALSE;
00602 }
00603 #endif
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615 void PF_LOOPNODE::Process_Refs (const WN* wn) {
00616 if (!wn) return;
00617
00618 OPCODE opcode = WN_opcode(wn);
00619
00620 #ifdef KEY
00621 if (LNO_Run_Prefetch == SOME_PREFETCH && OPCODE_operator(opcode) == OPR_IF) {
00622
00623 Process_Refs (WN_if_test (wn));
00624 return;
00625 }
00626 #endif
00627
00628 if (OPCODE_operator(opcode) == OPR_PREFETCH) {
00629
00630
00631
00632 return;
00633 }
00634
00635 if ((OPCODE_operator(opcode) == OPR_PRAGMA) &&
00636 (WN_pragma(wn) == WN_PRAGMA_PREFETCH_REF)) {
00637
00638 if (LNO_Run_Prefetch_Manual) _manual_volume += WN_pragma_arg2(wn);
00639 return;
00640 }
00641
00642 if (opcode == OPC_BLOCK) {
00643 WN *kid = WN_first (wn);
00644 while (kid) {
00645 if (WN_opcode(kid) == OPC_DO_LOOP) {
00646 PF_LOOPNODE* childnode =
00647 CXX_NEW (PF_LOOPNODE (this, kid, _depth+1), PF_mpool);
00648 _child.Push (childnode);
00649 Process_Refs (WN_start(kid));
00650 Process_Refs (WN_end(kid));
00651 Process_Refs (WN_step(kid));
00652 }
00653 else Process_Refs (kid);
00654 kid = WN_next(kid);
00655 }
00656 return;
00657 }
00658
00659 if (OPCODE_operator(opcode) == OPR_ILOAD) {
00660 if (WN_operator(WN_kid0(wn)) == OPR_ARRAY) {
00661 Add_Ref (WN_kid0(wn));
00662 } else {
00663 _num_bad++;
00664 }
00665 } else if (OPCODE_operator(opcode) == OPR_ISTORE) {
00666 #ifdef KEY
00667 if (LNO_Prefetch_Stores)
00668 {
00669 #endif
00670 if (WN_operator(WN_kid1(wn)) == OPR_ARRAY &&
00671 !Store_Is_Useless(wn)) {
00672 Add_Ref (WN_kid1(wn));
00673 } else {
00674 _num_bad++;
00675 }
00676 #ifdef KEY
00677 } else _num_bad++;
00678 #endif
00679 }
00680
00681 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
00682 WN *kid = WN_kid(wn,kidno);
00683 if (WN_opcode(kid) == OPC_DO_LOOP) {
00684 PF_LOOPNODE* childnode =
00685 CXX_NEW (PF_LOOPNODE (this, kid, _depth+1), PF_mpool);
00686 _child.Push (childnode);
00687 Process_Refs (WN_start(kid));
00688 Process_Refs (WN_end(kid));
00689 Process_Refs (WN_step(kid));
00690 }
00691 else Process_Refs (kid);
00692 }
00693 }
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704 void PF_LOOPNODE::Process_Loop () {
00705
00706
00707
00708
00709 const WN * w = WN_do_body(_code);
00710 #ifdef KEY
00711 DO_LOOP_INFO *dli = (DO_LOOP_INFO *) WN_MAP_Get(LNO_Info_Map, _code);
00712 BOOL single_small_trip_loop = FALSE;
00713
00714
00715 if (LNO_Run_Prefetch != AGGRESSIVE_PREFETCH && dli->Is_Inner) {
00716
00717
00718 WN* parent = LWN_Get_Parent(_code);
00719 while (parent && WN_opcode(parent) != OPC_DO_LOOP)
00720 parent = LWN_Get_Parent(parent);
00721 if (!parent &&
00722 ((!dli->Num_Iterations_Symbolic &&
00723 dli->Est_Num_Iterations < 100) ||
00724 (dli->Num_Iterations_Symbolic &&
00725 LNO_Num_Iters < 100)))
00726 single_small_trip_loop = TRUE;
00727 #if 0 // bug 8560 : performance loss due to avoiding prefetch single - copy -loop
00728 #ifdef TARG_X8664
00729
00730
00731 {
00732 WN* stmt = WN_first(w );
00733 if ( stmt && !WN_next(stmt) &&
00734 OPCODE_is_store(WN_opcode(stmt)) &&
00735 ( OPCODE_is_load(WN_opcode(WN_kid0(stmt))) ||
00736 ( WN_operator(WN_kid0(stmt)) == OPR_PAREN &&
00737 OPCODE_is_load(WN_opcode(WN_kid0(WN_kid0(stmt)))) ) ) )
00738 simple_copy_loop = TRUE;
00739 if ( simple_copy_loop ) {
00740
00741
00742 if ( !dli->Num_Iterations_Symbolic ) {
00743
00744
00745
00746 INT trip_count = dli->Est_Num_Iterations;
00747 INT bytes = 2 * MTYPE_byte_size(WN_desc(stmt)) ;
00748 INT size = trip_count * bytes;
00749 if ( size > 1000*1024 )
00750
00751 simple_copy_loop = FALSE;
00752 }
00753 }
00754 }
00755 #endif
00756 #endif
00757 }
00758 if ((LNO_Run_Prefetch > SOME_PREFETCH ||
00759 (LNO_Run_Prefetch == SOME_PREFETCH && !Is_Multi_BB (w))) &&
00760
00761 !single_small_trip_loop)
00762 #endif
00763 Process_Refs (w);
00764
00765
00766 for (INT i=0; i<_child.Elements(); i++) {
00767 _child.Bottom_nth(i)->Process_Loop ();
00768 }
00769 }
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781 void PF_LOOPNODE::Build_Base_LGs () {
00782
00783
00784 INT i;
00785 for (i=0; i<_child.Elements(); i++) {
00786 _child.Bottom_nth(i)->Build_Base_LGs ();
00787 }
00788
00789 for (i=0; i<_bases.Elements(); i++) {
00790 _bases.Bottom_nth (i)->Build_Base_LGs ();
00791 }
00792 }
00793
00794
00795
00796
00797
00798
00799
00800
00801 PF_VOLUME PF_LOOPNODE::Volume_For_Outer (mINT16 depth) {
00802
00803 PF_VOLUME myvol (_manual_volume, _manual_volume);
00804 INT i;
00805
00806 Is_True ((depth>=0), ("Volume_For_Outer: depth is negative (%d)\n", depth));
00807 PF_PRINT(fprintf (TFile, "Vol-FO[0x%p] depth (%d)\n",
00808 this, depth));
00809
00810 for (i=0; i<_child.Elements(); i++) {
00811 myvol += _child.Bottom_nth(i)->Volume_For_Outer(depth);
00812 PF_PRINT(fprintf (TFile, " [0x%p]: nest (%d)", this, i);
00813 myvol.Print (TFile));
00814 if (!myvol.Localized()) {
00815
00816 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this));
00817 return myvol;
00818 }
00819 }
00820
00821 for (i=0; i<_bases.Elements(); i++) {
00822 myvol += _bases.Bottom_nth(i)->Volume (depth);
00823 PF_PRINT(
00824 fprintf (TFile, " [0x%p]: nest (%d)", this, i);
00825 myvol.Print (TFile);
00826 );
00827 if (!myvol.Localized()) {
00828
00829 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this);)
00830 return myvol;
00831 }
00832 }
00833 return myvol;
00834 }
00835
00836
00837
00838
00839
00840
00841 extern mINT16 Loop_Confidence (DO_LOOP_INFO* dli) {
00842 mINT16 conf;
00843 if (!dli->Num_Iterations_Symbolic)
00844 conf = 3;
00845 else
00846 if (dli->Est_Max_Iterations_Index >= 0
00847
00848
00849 )
00850 conf = 2;
00851 else
00852 conf = 1;
00853 return conf;
00854 }
00855
00856
00857
00858
00859
00860
00861
00862
00863 PF_VOLUME PF_LOOPNODE::Volume () {
00864 INT i;
00865
00866 _single_iter *= 0;
00867
00868 _volume_confidence = Loop_Confidence(Get_LoopInfo());
00869
00870 PF_PRINT(fprintf (TFile, "Volume[0x%p]\n", this);)
00871
00872
00873
00874 for (i=0; i<_child.Elements(); i++) {
00875 _single_iter += _child.Bottom_nth (i)->Volume ();
00876
00877
00878 DO_LOOP_INFO* dli = _child.Bottom_nth(i)->Get_LoopInfo ();
00879 _volume_confidence = minof (_volume_confidence, Loop_Confidence(dli));
00880
00881 PF_PRINT(
00882 fprintf (TFile, " [0x%p]: nest (%d)", this, i);
00883 _single_iter.Print (TFile);
00884 );
00885 }
00886
00887
00888
00889
00890 if (!_single_iter.Localized()) {
00891
00892
00893
00894
00895
00896 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this));
00897 return _single_iter;
00898 }
00899
00900 PF_PRINT(fprintf (TFile, " [0x%p]: now count local refs\n", this));
00901
00902
00903
00904
00905
00906
00907
00908
00909 for (i=0; i<_bases.Elements(); i++) {
00910 _single_iter += _bases.Bottom_nth (i)->Volume (_depth+1);
00911 PF_PRINT(fprintf (TFile, " [0x%p]: ", this);
00912 _single_iter.Print (TFile));
00913
00914
00915 if (!_single_iter.Localized()) {
00916
00917 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this));
00918 return (_single_iter);
00919 }
00920 }
00921
00922 PF_PRINT(fprintf (TFile, " [0x%p]: now compute total iter\n", this));
00923
00924
00925 _total_iter *= 0;
00926
00927
00928
00929 _total_iter.vol_1L += _manual_volume;
00930 _total_iter.vol_2L += _manual_volume;
00931
00932
00933 for (i=0; i<_child.Elements(); i++) {
00934 _total_iter += _child.Bottom_nth (i)->Volume_For_Outer (_depth);
00935 PF_PRINT(fprintf (TFile, " [0x%p]: ", this);
00936 _total_iter.Print (TFile));
00937 if (!_total_iter.Localized()) {
00938
00939
00940
00941 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this));
00942 return _total_iter;
00943 }
00944 }
00945
00946 PF_PRINT(fprintf (TFile, " [0x%p]: now count local refs\n", this));
00947
00948
00949 for (i=0; i<_bases.Elements(); i++) {
00950 _total_iter += _bases.Bottom_nth (i)->Volume (_depth);
00951 PF_PRINT(fprintf (TFile, " [0x%p]: ", this);
00952 _total_iter.Print (TFile));
00953 if (!_total_iter.Localized()) {
00954
00955 PF_PRINT(fprintf (TFile, " [0x%p]: not localized, return\n", this));
00956 return _total_iter;
00957 }
00958 }
00959 return _total_iter;
00960 }
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970 void PF_LOOPNODE::Gen_Prefetch (PF_SPLIT_VECTOR* split_vec) {
00971 INT i;
00972
00973
00974
00975
00976
00977 if (LNO_Analysis) {
00978 WN* index_wn = WN_index(_code);
00979 const char* var_name = ((ST_class(WN_st(index_wn)) != CLASS_PREG) ?
00980 ST_name(WN_st(index_wn)) :
00981 (WN_offset(index_wn) > Last_Dedicated_Preg_Offset ?
00982 Preg_Name(WN_offset(index_wn)) : "DEDICATED PREG"));
00983
00984 ls_print_indent; fprintf (LNO_Analysis, "(PREFETCH-LOOP \"%s\"\n",
00985 var_name);
00986
00987 ls_print_indent; fprintf (LNO_Analysis,
00988 " (PREF-VOLUME (SINGLE %d %d) (TOTAL %d %d))\n",
00989 _single_iter.vol_1L, _single_iter.vol_2L,
00990 _total_iter.vol_1L, _total_iter.vol_2L);
00991 ls_print_indent; fprintf (LNO_Analysis, " (PREF-SPLIT ");
00992 }
00993 VB_PRINT (vb_print_indent;
00994 printf ("Loop: \"%s\" depth (%d) (single %d%s %d%s) (total %d %d)\n",
00995 ST_name(WN_st(WN_kid0(_code))),
00996 _depth,
00997 _single_iter.vol_1L, (_single_iter.Localized_1L()?"*":""),
00998 _single_iter.vol_2L, (_single_iter.Localized_2L()?"*":""),
00999 _total_iter.vol_1L, _total_iter.vol_2L));
01000
01001 if (split_vec == NULL) split_vec = _split_vec;
01002 BOOL versions = FALSE;
01003 INT i_tmp=0;
01004 if (split_vec) {
01005
01006 PF_LOOPNODE* loopnode = split_vec->Get_Loop ();
01007 while (loopnode->Get_Depth() > _depth) loopnode = loopnode->Get_Parent ();
01008 if (loopnode == this) {
01009
01010
01011 mINT16* vec = split_vec->Get_Vector ();
01012 for (i=0; i<= _depth; i++)
01013 if (vec[i] != 0) {
01014 versions = TRUE;
01015 break;
01016 }
01017 if (LNO_Analysis) fprintf (LNO_Analysis, "%d)\n", vec[_depth]);
01018 if (LNO_Tlog) i_tmp=vec[_depth];
01019 }
01020 else if (LNO_Analysis) fprintf (LNO_Analysis, "0)\n");
01021 }
01022 else if (LNO_Analysis) fprintf (LNO_Analysis, "0)\n");
01023
01024 if (LNO_Tlog) {
01025 WN *loop=_code;
01026 char out_string[8];
01027 sprintf(out_string,"%d", i_tmp);
01028 Generate_Tlog("LNO","prefetching", Srcpos_To_Line(WN_Get_Linenum(loop)),
01029 ST_name(WN_st(WN_index(loop))), "", out_string, "PREF-SPLIT");
01030 }
01031
01032
01033 if (LNO_Analysis) {
01034 ls_print_indent; fprintf (LNO_Analysis, " (PREFETCHES\n");
01035 ls_num_indent += 4;
01036 }
01037
01038 for (i=0; i<_bases.Elements(); i++) {
01039 if (versions) _bases.Bottom_nth(i)->Gen_Prefetch (split_vec);
01040 else _bases.Bottom_nth(i)->Gen_Prefetch (NULL);
01041 }
01042 if (LNO_Analysis) {
01043 ls_num_indent -= 4;
01044 ls_print_indent; fprintf (LNO_Analysis, " )\n");
01045 }
01046
01047
01048 VB_PRINT (if (_child.Elements()) {
01049 vb_print_indent;
01050 printf ("Inner loops (%d):\n", _child.Elements());
01051 vb_num_indent += 2;
01052 });
01053 if (LNO_Analysis) ls_num_indent += 2;
01054 for (i=0; i<_child.Elements(); i++)
01055 _child.Bottom_nth(i)->Gen_Prefetch (split_vec);
01056 VB_PRINT (if (_child.Elements()) vb_num_indent -= 2;);
01057 if (LNO_Analysis) {
01058 ls_num_indent -= 2;
01059 ls_print_indent; fprintf (LNO_Analysis, ")\n");
01060 }
01061 }
01062
01063
01064
01065
01066
01067
01068
01069
01070 BOOL Check_Version_Map (WN* body_orig, WN* body_new) {
01071 extern WN_MAP version_map;
01072 if (body_orig == NULL) {
01073 Is_True (body_new == NULL,
01074 ("Check version map: things didn't get copied right\n"));
01075 return TRUE;
01076 }
01077
01078 OPCODE opcode = WN_opcode(body_orig);
01079 if ((OPCODE_operator(opcode) == OPR_ILOAD) &&
01080 (WN_operator(WN_kid0(body_orig)) == OPR_ARRAY))
01081 Is_True (WN_MAP_Get(version_map, WN_kid0(body_orig)) == WN_kid0(body_new),
01082 ("Check version map: error in array load\n"));
01083 if ((OPCODE_operator(opcode) == OPR_ISTORE) &&
01084 (WN_operator(WN_kid1(body_orig)) == OPR_ARRAY))
01085 Is_True (WN_MAP_Get(version_map, WN_kid1(body_orig)) == WN_kid1(body_new),
01086 ("Check version map: error in array store\n"));
01087 #if 0
01088
01089 if (OPCODE_is_load(opcode) || OPCODE_is_store(opcode))
01090 Is_True (WN_MAP_Get(version_map, body_orig) == body_new,
01091 ("Check version map: error in load/store\n"));
01092 #endif
01093 extern ARRAY_DIRECTED_GRAPH16 *pf_array_dep_graph;
01094 if (pf_array_dep_graph->Get_Vertex(body_orig))
01095 Is_True (WN_MAP_Get(version_map, body_orig) == body_new,
01096 ("Check version map: error in node-with-vertex\n"));
01097
01098 if (opcode == OPC_BLOCK) {
01099 WN* kid_orig = WN_first (body_orig);
01100 WN* kid_new = WN_first (body_new);
01101 while (kid_orig) {
01102 Is_True (kid_new, ("check version map: kid_new is missing\n"));
01103 Check_Version_Map (kid_orig, kid_new);
01104 kid_orig = WN_next (kid_orig);
01105 kid_new = WN_next (kid_new);
01106 }
01107 }
01108 else {
01109 INT kidno;
01110 for (kidno=0; kidno<WN_kid_count(body_orig); kidno++)
01111 Check_Version_Map (WN_kid(body_orig, kidno), WN_kid(body_new, kidno));
01112 }
01113 return TRUE;
01114 }
01115
01116
01117
01118
01119
01120
01121
01122 static void Collect_Labels_Gotos (WN* newbody, WN_DA* gotos, WN_DA* labels) {
01123 if (newbody == NULL) return;
01124 OPCODE op = WN_opcode(newbody);
01125 if (OPCODE_has_label(op)) {
01126 if (op == OPC_LABEL) labels->Push (newbody);
01127 else gotos->Push (newbody);
01128 }
01129 else {
01130 if (WN_opcode(newbody) == OPC_BLOCK) {
01131 WN* kid = WN_first (newbody);
01132 while (kid) {
01133 Collect_Labels_Gotos (kid, gotos, labels);
01134 kid = WN_next (kid);
01135 }
01136 return;
01137 }
01138 else if ((OPCODE_is_stmt(WN_opcode(newbody))) ||
01139 (OPCODE_is_scf (WN_opcode(newbody)))) {
01140 for (INT kidno=0; kidno<WN_kid_count(newbody); kidno++)
01141 Collect_Labels_Gotos (WN_kid (newbody,kidno), gotos, labels);
01142 return;
01143 }
01144 else return;
01145 }
01146 }
01147
01148
01149
01150
01151
01152
01153
01154
01155 static void Rename_Labels_Gotos (WN* newbody) {
01156 WN_DA gotos(PF_mpool);
01157 WN_DA labels(PF_mpool);
01158 STACK<INT32> old_label(PF_mpool);
01159
01160 Collect_Labels_Gotos (newbody, &gotos, &labels);
01161
01162 INT i;
01163 for (i=0; i<labels.Elements (); i++) {
01164
01165
01166
01167 WN* label_wn = labels.Bottom_nth(i);
01168 #ifdef _NEW_SYMTAB
01169 LABEL_IDX new_label;
01170 (void) New_LABEL (CURRENT_SYMTAB, new_label);
01171 old_label.Push (WN_label_number(label_wn));
01172 WN_label_number(label_wn) = new_label;
01173 #else
01174 INT32 new_label = ++SYMTAB_last_label(Current_Symtab);
01175 old_label.Push (WN_label_number(label_wn));
01176 WN_label_number(label_wn) = new_label;
01177 WN_st(label_wn) = NULL;
01178 #endif
01179 }
01180 Is_True (old_label.Elements() == labels.Elements(),
01181 ("Mismatch while walking labels"));
01182 for (i=0; i<gotos.Elements(); i++) {
01183 WN* goto_wn = gotos.Bottom_nth(i);
01184 INT32 cur_label = WN_label_number(goto_wn);
01185 INT j;
01186 for (j=0; j<old_label.Elements(); j++)
01187 if (old_label.Bottom_nth(j) == cur_label) break;
01188
01189
01190 if (j == old_label.Elements()) continue;
01191
01192 WN_label_number(goto_wn) = WN_label_number(labels.Bottom_nth(j));
01193 #ifndef _NEW_SYMTAB
01194 WN_st(goto_wn) = NULL;
01195 #endif
01196 }
01197 }
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207
01208 void PF_LOOPNODE::Split_Loops (PF_SPLIT_VECTOR *split_vec) {
01209 INT i;
01210
01211 Is_True (!split_vec->Empty(), ("Split_Loops: Empty split vector\n"));
01212
01213 PF_LOOPNODE* loopnode = split_vec->Get_Loop ();
01214 Is_True (loopnode->Get_Depth() >= _depth,
01215 ("Split_Loops: splitting an inner loop, split_vec is outside\n"));
01216 while (loopnode->Get_Depth() > _depth) loopnode = loopnode->Get_Parent ();
01217 if (loopnode == this) {
01218
01219 mINT16* vec = split_vec->Get_Vector ();
01220 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo();
01221
01222
01223
01224
01225
01226 if ((vec[_depth] > 1) &&
01227
01228
01229
01230
01231
01232 (dli->Num_Iterations_Symbolic ||
01233 (vec[_depth] < Get_Good_Num_Iters(dli)))) {
01234
01235 _split_num = vec[_depth];
01236 extern WN_MAP version_map;
01237 PF_PRINT(fprintf (TFile, "Split loop [0x%p] with stride %d\n",
01238 this, vec[_depth]));
01239
01240 WN* body = WN_do_body(_code);
01241 WN* newbody = LWN_Copy_Tree (body,TRUE,LNO_Info_Map,TRUE,version_map);
01242 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
01243 STACK<WN*> st_old(&PROMPF_pool);
01244 STACK<WN*> st_new(&PROMPF_pool);
01245 Prompf_Assign_Ids(body, newbody, &st_old, &st_new, FALSE);
01246 INT nloops = st_old.Elements();
01247 if (nloops > 0) {
01248 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
01249 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
01250 for (INT i = 0; i < nloops; i++) {
01251 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
01252 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
01253 }
01254 Prompf_Info->Prefetch_Version(old_ids, new_ids, nloops);
01255 }
01256 }
01257 LWN_Copy_Frequency_Tree(newbody, WN_step(_code));
01258 Is_True (Check_Version_Map (body, newbody),
01259 ("Check_Version_Map failed"));
01260 if (Debug_Prefetch) Check_Version_Map (body, newbody);
01261 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
01262 if (dli->Has_Gotos) Rename_Labels_Gotos (newbody);
01263
01264
01265 WN* wn_array[2];
01266 wn_array[0] = body;
01267 wn_array[1] = newbody;
01268 Unrolled_DU_Update (wn_array, 2, dli->Depth);
01269
01270
01271 WN* expr;
01272 TYPE_ID index_type = WN_desc(WN_step(_code));
01273 WN* index_var = LWN_CreateLdid
01274 (OPCODE_make_op(OPR_LDID, Promote_Type(index_type), index_type),
01275 WN_step(_code));
01276
01277 switch (vec[_depth]) {
01278 case 2:
01279 case 4:
01280 case 8:
01281 case 16:
01282 case 32: {
01283
01284 WN* intconst = WN_CreateIntconst (
01285 OPCODE_make_op(OPR_INTCONST, Promote_Type(index_type), MTYPE_V),
01286 (INT64) (vec[_depth]+vec[_depth]-1));
01287 expr = LWN_CreateExp2 (
01288 OPCODE_make_op(OPR_BAND, Promote_Type(index_type), MTYPE_V),
01289 index_var,
01290 intconst);
01291 WN* intconst2 = WN_CreateIntconst (
01292 OPCODE_make_op(OPR_INTCONST, Promote_Type(index_type), MTYPE_V),
01293 (INT64) vec[_depth]);
01294
01295 expr = LWN_CreateExp2 (
01296 OPCODE_make_op(OPR_EQ, Boolean_type, Promote_Type(index_type)),
01297 expr,
01298 intconst2);
01299 }
01300 break;
01301 default: {
01302
01303 WN* intconst = WN_CreateIntconst (
01304 OPCODE_make_op(OPR_INTCONST, Promote_Type(index_type), MTYPE_V),
01305 (INT64) vec[_depth]);
01306 expr = LWN_CreateExp2 (
01307 OPCODE_make_op(OPR_MOD, Promote_Type(index_type), MTYPE_V),
01308 index_var,
01309 intconst);
01310 WN* intconst0 = WN_CreateIntconst (
01311 OPCODE_make_op(OPR_INTCONST, Promote_Type(index_type), MTYPE_V),
01312 (INT64) 0);
01313 expr = LWN_CreateExp2 (
01314 OPCODE_make_op (OPR_EQ, Boolean_type, Promote_Type(index_type)),
01315 expr,
01316 intconst0);
01317 }
01318 break;
01319 }
01320
01321 LWN_Copy_Frequency_Tree(expr, WN_step(_code));
01322 WN* ifnode = LWN_CreateIf (expr, body, newbody);
01323 LWN_Copy_Frequency(ifnode, expr);
01324 LWN_Scale_Frequency_Tree(body, 1.0/vec[_depth]);
01325 LWN_Scale_Frequency_Tree(body, 1.0-1.0/vec[_depth]);
01326
01327 WN* block = WN_CreateBlock ();
01328 LWN_Insert_Block_Before (block, NULL, ifnode);
01329 WN_do_body(_code) = block;
01330 LWN_Set_Parent(block, _code);
01331
01332
01333 Du_Mgr->Du_Add_Use (WN_step(_code), index_var);
01334 Du_Mgr->Du_Add_Use (WN_start(_code), index_var);
01335 Du_Mgr->Ud_Add_Def (index_var, WN_step(_code));
01336 Du_Mgr->Ud_Add_Def (index_var, WN_start(_code));
01337
01338
01339
01340 DEF_LIST *def_list_copy = Du_Mgr->Ud_Get_Def(index_var);
01341 def_list_copy->Set_loop_stmt(_code);
01342
01343
01344 IF_INFO *ii=CXX_NEW (IF_INFO(&LNO_default_pool,TRUE,
01345 Find_SCF_Inside(ifnode,OPC_REGION)!=NULL),
01346 &LNO_default_pool);
01347 WN_MAP_Set(LNO_Info_Map,ifnode,(void *)ii);
01348 DOLOOP_STACK* stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01349 &LNO_local_pool);
01350 Build_Doloop_Stack(ifnode, stack);
01351 LNO_Build_If_Access(ifnode, stack);
01352 CXX_DELETE(stack, &LNO_local_pool);
01353
01354
01355 extern ARRAY_DIRECTED_GRAPH16 *pf_array_dep_graph;
01356 PF_PRINT(fprintf (TFile, "dep-graph: Before updating\n");
01357 pf_array_dep_graph->Print (TFile));
01358
01359
01360 pf_array_dep_graph->Versioned_Dependences_Update (body, newbody,
01361 dli->Depth+1,
01362 version_map);
01363
01364
01365
01366
01367 PF_PRINT(fprintf (TFile, "dep-graph: After updating\n");
01368 pf_array_dep_graph->Print (TFile));
01369
01370
01371 Is_True (LWN_Check_Parentize(_code),
01372 ("CheckParentize failed after loop split\n"));
01373 }
01374
01375
01376 if (split_vec->Get_Loop()->Get_Depth() > _depth) {
01377 for (i=0; i<_child.Elements(); i++)
01378 _child.Bottom_nth(i)->Split_Loops (split_vec);
01379 }
01380
01381 }
01382 }
01383
01384
01385
01386
01387
01388
01389
01390
01391 PF_SPLIT_VECTOR* PF_LOOPNODE::Find_Split_Vector () {
01392 PF_SPLIT_VECTOR* split_vec = NULL;
01393 INT i;
01394 for (i=0; i<_child.Elements(); i++) {
01395 PF_SPLIT_VECTOR* tmp = _child.Bottom_nth(i)->Find_Split_Vector ();
01396 if (tmp) {
01397 if (split_vec) split_vec->Update (tmp);
01398 else split_vec = tmp;
01399 }
01400 }
01401 if (!split_vec) {
01402
01403 for (i=0; i<_bases.Elements(); i++) {
01404 PF_SPLIT_VECTOR* tmp = _bases.Bottom_nth(i)->Find_Split_Vector();
01405 if (tmp) {
01406 if (split_vec) split_vec->Update (tmp);
01407 else split_vec = tmp;
01408 }
01409 }
01410 }
01411 return split_vec;
01412 }
01413
01414
01415
01416
01417
01418
01419
01420
01421
01422 WN* While_Before_Do (WN* do_wn) {
01423 FmtAssert (do_wn && WN_opcode(do_wn) == OPC_DO_LOOP,
01424 ("While_Before_Do: Expected a DO loop"));
01425 do_wn = LWN_Get_Parent(do_wn);
01426 while (do_wn) {
01427 OPERATOR opr = WN_operator(do_wn);
01428
01429 switch (opr) {
01430 case OPR_WHILE_DO:
01431 case OPR_DO_WHILE:
01432 return do_wn;
01433
01434 case OPR_DO_LOOP:
01435 return NULL;
01436
01437 default:
01438 do_wn = LWN_Get_Parent(do_wn);
01439 break;
01440 }
01441 }
01442 return NULL;
01443 }
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453
01454
01455 PF_VOLUME PF_LOOPNODE::Volume_Within_While (WN* while_wn) {
01456
01457 Is_True (while_wn && (WN_opcode(while_wn) == OPC_WHILE_DO ||
01458 WN_opcode(while_wn) == OPC_DO_WHILE),
01459 ("While_Is_Localized: expected a WHILE loop"));
01460
01461
01462
01463
01464
01465
01466
01467
01468 PF_VOLUME while_volume;
01469 PF_LOOPNODE* parent_loop = Get_Parent();
01470 INT i;
01471 INT myidx = INT_MAX;
01472
01473
01474 for (i=0; i<parent_loop->_child.Elements(); i++) {
01475
01476 PF_LOOPNODE* sibling_loop = parent_loop->_child.Bottom_nth(i);
01477 WN* sibling_wn = sibling_loop->Get_Code();
01478
01479 if (sibling_loop == this) myidx = i;
01480
01481 if (Is_Descendent(sibling_wn, while_wn)) {
01482 while_volume += sibling_loop->_total_iter;
01483 if (i > myidx) {
01484
01485
01486
01487 PF_VOLUME tmp (2*Cache.EffSize(1), (Cache.Levels() > 1 ?
01488 2*Cache.EffSize(2) :
01489 0));
01490 while_volume += tmp;
01491 break;
01492 }
01493 }
01494 }
01495
01496 return while_volume;
01497 }
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510 void PF_LOOPNODE::Find_Loc_Loops (PF_LOCLOOP locloop) {
01511 INT i;
01512
01513
01514
01515
01516 BOOL idosplit = locloop.Update (_depth, _single_iter, _volume_confidence);
01517
01518 if (idosplit) {
01519
01520 WN* while_wn = While_Before_Do(Get_Code());
01521
01522 if (while_wn) {
01523
01524 PF_VOLUME while_volume = Volume_Within_While (while_wn);
01525
01526 if (locloop.Loop_1L() == _depth) {
01527
01528
01529
01530 if (while_volume.Localized_1L()) {
01531 locloop.Set_While_Temporal_1L();
01532 }
01533 }
01534 if ((Cache.Levels() > 1) && (locloop.Loop_2L() == _depth)) {
01535
01536 if (while_volume.Localized_2L()) {
01537 locloop.Set_While_Temporal_2L();
01538 }
01539 }
01540 }
01541 }
01542
01543 _locloop = locloop;
01544
01545
01546
01547 for (i=0; i<_child.Elements(); i++)
01548 _child.Bottom_nth(i)->Find_Loc_Loops (locloop);
01549
01550
01551 if (locloop.Localized())
01552 for (i=0; i<_bases.Elements(); i++)
01553 _bases.Bottom_nth(i)->Find_Loc_Space (locloop);
01554
01555
01556
01557 if (idosplit) {
01558
01559 PF_SPLIT_VECTOR* split_vec = Find_Split_Vector ();
01560 PF_PRINT(fprintf (TFile, "After Find_Split_Vector: ");
01561 if (split_vec) split_vec->Print (TFile);
01562 else fprintf (TFile, "split_vec is NULL\n"));
01563 if ((split_vec) && (!split_vec->Empty())) {
01564 _split_vec = split_vec;
01565 Split_Loops (split_vec);
01566 }
01567 }
01568 }
01569
01570 void PF_LOOPNODE::Print (FILE *fp) {
01571 INT i;
01572 fprintf (fp, "Do loop: node 0x%p\n depth %d\n parent 0x%p\n code 0x%p\n refs 0x%p\n",
01573 this, _depth, _parent, _code, &_bases);
01574 fprintf (fp, " single iter: "); _single_iter.Print (fp);
01575 fprintf (fp, " total iter: "); _total_iter.Print (fp);
01576 if (_bases.Elements()) {
01577 fprintf (fp, " Printing the references in each base array (of %d)\n",
01578 _bases.Elements());
01579 for (i=0; i<_bases.Elements(); i++) {
01580 fprintf (fp, " Base array %d -> ", i);
01581 _bases.Bottom_nth(i)->Print (fp);
01582 }
01583 }
01584 else fprintf (fp, " No references, no base arrays\n");
01585 fprintf (fp, " %d children: ", _child.Elements());
01586 for (i=0; i<_child.Elements(); i++) {
01587 fprintf (fp, " 0x%p ", _child.Bottom_nth (i));
01588 }
01589 if (_child.Elements()) {
01590 fprintf (fp, "\n Now printing the children\n\n");
01591 for (i=0; i<_child.Elements(); i++) {
01592 fprintf (fp, "[");
01593 for (INT j=0; j<(_depth+1); j++) fprintf (fp, " *");
01594 fprintf (fp, " %3d ] ", i);
01595 _child.Bottom_nth (i)->Print (fp);
01596 }
01597 }
01598 else fprintf (fp, "\n");
01599 }
01600
01601 void PF_LOOPNODE::Print_Structure () {
01602 INT i;
01603
01604 vb_print_indent;
01605 printf ("Loop: \"%s\" depth (%d)\n",
01606 ST_name(WN_st(WN_kid0(_code))), _depth);
01607 if (_bases.Elements ()) {
01608 vb_print_indent;
01609 printf ("Base arrays (%d): ", _bases.Elements ());
01610 for (i=0; i<_bases.Elements(); i++) {
01611 _bases.Bottom_nth(i)->Get_Symbol()->Print (stdout);
01612 if (i == (_bases.Elements()-1)) printf (".\n");
01613 else printf (", ");
01614 }
01615 }
01616 if (_child.Elements()) {
01617 vb_print_indent;
01618 printf ("Inner loops (%d):\n", _child.Elements());
01619 vb_num_indent += 2;
01620 for (i=0; i<_child.Elements(); i++) {
01621
01622
01623
01624
01625
01626 _child.Bottom_nth (i)->Print_Structure ();
01627 }
01628 vb_num_indent -= 2;
01629 }
01630 }
01631
01632 void PF_LOOPNODE::Print_Volume () {
01633 INT i;
01634 vb_print_indent;
01635 printf ("Loop: \"%s\" depth (%d)\n",
01636 ST_name(WN_st(WN_kid0(_code))), _depth);
01637 vb_print_indent;
01638 printf (" single_iter: "); _single_iter.Print (stdout);
01639 vb_print_indent;
01640 printf (" total_iter: "); _total_iter.Print (stdout);
01641 if (_child.Elements()) {
01642 vb_print_indent;
01643 printf ("Inner loops (%d):\n", _child.Elements());
01644 vb_num_indent += 2;
01645 for (i=0; i<_child.Elements(); i++) {
01646 _child.Bottom_nth (i)->Print_Volume ();
01647 }
01648 vb_num_indent -= 2;
01649 }
01650 }
01651
01652 void PF_LOOPNODE::Print_Splits () {
01653 INT i;
01654 vb_print_indent;
01655 printf ("Loop: depth (%d), index ", _depth); dump_wn (WN_index(_code));
01656 if ((_split_vec) && (!_split_vec->Empty()) && vb_print_split) {
01657 vb_print_indent;
01658 _split_vec->Print (stdout);
01659 }
01660 if ((_split_num > 1) && (vb_print_split)) {
01661 vb_print_indent;
01662 printf (">> split: %d\n", _split_num);
01663 }
01664 if (_child.Elements()) {
01665 if ((_split_num > 1) && (vb_print_split)) {
01666 vb_print_indent;
01667 printf ("Inner loops (%d), prefetch version (stride = %d)\n",
01668 _child.Elements(), _split_num);
01669 vb_num_indent += 2;
01670 for (i=0; i<_child.Elements(); i++) {
01671 _child.Bottom_nth (i)->Print_Splits ();
01672 }
01673 vb_num_indent -= 2;
01674
01675 vb_print_indent;
01676 printf ("Inner loops (%d), non-pref version\n", _child.Elements());
01677 vb_print_split = FALSE;
01678 vb_num_indent += 2;
01679 for (i=0; i<_child.Elements(); i++) {
01680 _child.Bottom_nth (i)->Print_Splits ();
01681 }
01682 vb_num_indent -= 2;
01683 vb_print_split = TRUE;
01684 }
01685 else {
01686 vb_print_indent;
01687 printf ("Inner loops (%d):\n", _child.Elements());
01688 vb_num_indent += 2;
01689 for (i=0; i<_child.Elements(); i++) {
01690 _child.Bottom_nth (i)->Print_Splits ();
01691 }
01692 vb_num_indent -= 2;
01693 }
01694 }
01695 }