00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127 #define __STDC_LIMIT_MACROS
00128 #include <stdint.h>
00129 #ifdef USE_PCH
00130 #include "lno_pch.h"
00131 #endif // USE_PCH
00132 #pragma hdrstop
00133
00134 #define CACHE_LINE_SIZE 128
00135
00136 #include "defs.h"
00137 #include "config_asm.h"
00138 #include "prefetch.h"
00139 #include "access_vector.h"
00140 #include "pf_ref.h"
00141 #include "pf_loop.h"
00142 #include "wn_map.h"
00143 #include "stab.h"
00144 #include "lnopt_main.h"
00145 #include "pf_cache.h"
00146 #include "lwn_util.h"
00147 #include "lnoutils.h"
00148 #include "pf_cg.h"
00149 #include "whirl2src.h"
00150 #include "dep.h"
00151 #include "tlog.h"
00152 #include "alloca.h"
00153 #include "targ_sim.h"
00154
00155 #include "w2c_weak.h"
00156 #include "w2f_weak.h"
00157
00158 #define INT_INFINITY 9999
00159 #define absof(x) (((x)>0) ? (x) : (0-(x)))
00160 #define maxof(x, y) (((x)>(y)) ? (x) : (y))
00161 #define minof(x, y) (((x)<(y)) ? (x) : (y))
00162 #define minmaxof(mn, mx, item) (((item) < (min)) ? ((min) = (item)) : (((item) > (max)) ? ((max) = (item)) : 0))
00163
00164 #ifdef KEY
00165 #define ABS(a) ((a<0)?-(a):(a))
00166 #endif
00167
00168 extern WN_MAP LNO_Info_Map;
00169
00170 inline mINT16 PF_LG::Get_Dim () {
00171 return _myugs->Get_BA()->Get_Dim ();
00172 }
00173 inline mINT16 PF_LG::Get_Depth () {
00174 return _myugs->Get_BA()->Get_Loop()->Get_Depth();
00175 }
00176 inline LU_FMAT* PF_LG::Get_Hslu () {
00177 return _myugs->Get_Hslu ();
00178 }
00179 inline VECTOR_SPACE<FRAC>* PF_LG::Get_KerHs () {
00180 return _myugs->Get_KerHs ();
00181 }
00182 inline VECTOR_SPACE<FRAC>* PF_LG::Get_KerH () {
00183 return _myugs->Get_KerH ();
00184 }
00185 inline PF_LOOPNODE* PF_LG::Get_Loop () {
00186 return _myugs->Get_BA()->Get_Loop ();
00187 }
00188 inline WN* PF_LG::Get_Ref (INT num) {
00189 return _myugs->Get_Ref (num);
00190 }
00191
00192 inline mINT16 PF_LG::Get_Stride_One_Loop () {
00193 return _myugs->Get_Stride_One_Loop ();
00194 }
00195
00196 inline mINT16 PF_LG::Stride_Forward () {
00197 return _myugs->Stride_Forward ();
00198 }
00199
00200 inline mINT16 PF_LG::Get_Stride_One_Size () {
00201 return _myugs->Get_Stride_One_Size ();
00202 }
00203
00204 inline mINT32 PF_LG::Get_Stride_In_Enclosing_Loop () {
00205 return _myugs->Get_Stride_In_Enclosing_Loop ();
00206 }
00207
00208 #if defined(TARG_IA64)
00209 inline BOOL PF_LG::Get_Stride_Accurate() {
00210 return _myugs->Get_Stride_Accurate();
00211 }
00212 #endif
00213
00214 inline PF_LOOPNODE* PF_UGS::Get_Loop () {
00215 return _myba->Get_Loop ();
00216 }
00217 inline mINT16 PF_UGS::Get_Depth () {
00218 return _myba->Get_Loop()->Get_Depth ();
00219 }
00220 inline mINT16 PF_BASE_ARRAY::Get_Depth () {
00221 return _myloopnode->Get_Depth();
00222 }
00223
00224 extern INT64 Get_Good_Num_Iters (DO_LOOP_INFO *dli) {
00225 INT64 real_numiters = dli->Est_Num_Iterations;
00226 if (dli->Num_Iterations_Symbolic &&
00227 dli->Est_Max_Iterations_Index != -1) {
00228 real_numiters = dli->Est_Max_Iterations_Index;
00229 }
00230 return real_numiters;
00231 }
00232
00233 static VECTOR_SPACE<FRAC>* global_lvs[LNO_MAX_DO_LOOP_DEPTH+1][LNO_MAX_DO_LOOP_DEPTH+1];
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 void Initialize_Lvs () {
00247 INT i, j;
00248
00249
00250
00251
00252
00253 for (i=0; i<LNO_MAX_DO_LOOP_DEPTH+1; i++)
00254 for (j=0; j<LNO_MAX_DO_LOOP_DEPTH+1; j++)
00255 global_lvs[i][j] = NULL;
00256
00257 #if 0
00258 FRAC tmp[LNO_MAX_DO_LOOP_DEPTH];
00259 for (i=1; i<=LNO_MAX_DO_LOOP_DEPTH; i++) {
00260
00261
00262 for (j=1; j<=i; j++) {
00263 global_lvs[i][j] = CXX_NEW (VECTOR_SPACE<FRAC>(i,PF_mpool,FALSE),
00264 PF_mpool);
00265
00266 for (INT k=j; k<=i; k++) {
00267
00268 for (INT m=0; m<i; m++)
00269 if (m == (k-1)) tmp[m] = 1; else tmp[m] = 0;
00270 global_lvs[i][j]->Insert (tmp);
00271 }
00272 }
00273 }
00274 #endif
00275 }
00276
00277
00278
00279
00280
00281
00282
00283 void Allocate_Lvs (INT i, INT j) {
00284 static FRAC tmp[LNO_MAX_DO_LOOP_DEPTH];
00285 Is_True (global_lvs[i][j] == NULL,
00286 ("Allocate_Lvs called twice on the same vector space (%d, %d)\n",
00287 i, j));
00288 global_lvs[i][j] = CXX_NEW (VECTOR_SPACE<FRAC>(i,PF_mpool,FALSE),
00289 PF_mpool);
00290
00291 for (INT k=j; k<=i; k++) {
00292
00293 for (INT m=0; m<i; m++)
00294 if (m == (k-1)) tmp[m] = 1; else tmp[m] = 0;
00295 global_lvs[i][j]->Insert (tmp);
00296 }
00297 }
00298
00299
00300
00301
00302
00303
00304
00305 void Cleanup_Lvs () {
00306 for (INT i=1; i<=LNO_MAX_DO_LOOP_DEPTH; i++)
00307 for (INT j=1; j<=i; j++) {
00308 if (global_lvs[i][j])
00309 CXX_DELETE (global_lvs[i][j], PF_mpool);
00310 }
00311 }
00312
00313 PF_LG::PF_LG (PF_LG* lg) : _refvecs(PF_mpool) {
00314 INT dim = lg->Get_Dim();
00315 INT i;
00316
00317 for (i=0; i<lg->_refvecs.Elements(); i++) {
00318 _refvecs.Push (CXX_NEW (PF_REFVEC(lg->_refvecs.Bottom_nth(i)), PF_mpool));
00319 }
00320 _depth = lg->_depth - 1;
00321 _myugs = lg->_myugs;
00322 _leading_ref = lg->_leading_ref;
00323 _c = CXX_NEW_ARRAY (INT64, dim, PF_mpool);
00324 for (i=0; i<dim; i++)
00325 _c[i] = lg->_c[i];
00326 for (i=0; i<LNO_MAX_DO_LOOP_DEPTH; i++) {
00327 _min_iter[i] = lg->_min_iter[i];
00328 _max_iter[i] = lg->_max_iter[i];
00329 }
00330 _min_dist = lg->_min_dist;
00331 _max_dist = lg->_max_dist;
00332
00333 _numlines_1L = lg->_numlines_1L;
00334 _numlines_2L = lg->_numlines_2L;
00335 }
00336
00337
00338
00339
00340
00341
00342
00343 extern void Listing_Emit_WN (FILE* fp, WN* wn) {
00344 fprintf (fp, " \"");
00345 extern WN* pf_func_nd;
00346
00347 if (WN_operator(wn) != OPR_ISTORE) {
00348 Whirl2Src_Emit (fp, wn);
00349 fprintf (fp, "\" ");
00350 return;
00351 }
00352
00353
00354 switch (PU_src_lang(Get_Current_PU())) {
00355 case PU_C_LANG:
00356 case PU_CXX_LANG:
00357 {
00358 W2C_Push_PU (pf_func_nd, wn);
00359
00360 char buf[100];
00361 W2C_Translate_Istore_Lhs (buf, 100, WN_kid1(wn), WN_offset(wn),
00362 WN_ty(wn), WN_desc(wn));
00363 fprintf (fp, "%s", buf);
00364 W2C_Pop_PU();
00365 break;
00366 }
00367 case PU_F90_LANG:
00368 case PU_F77_LANG:
00369 {
00370 W2F_Push_PU(pf_func_nd, wn);
00371
00372 char buf[100];
00373 W2F_Translate_Istore_Lhs (buf, 100, WN_kid1(wn), WN_offset(wn),
00374 WN_ty(wn), WN_desc(wn));
00375 fprintf (fp, "%s", buf);
00376 W2F_Pop_PU();
00377 break;
00378 }
00379 default:
00380 fprintf (fp, "var-unknown-src-lang");
00381 break;
00382 }
00383 fprintf (fp, "\" ");
00384 return;
00385 }
00386
00387
00388
00389
00390
00391
00392
00393 PF_LG::PF_LG (WN* ref,
00394 mINT16 bitpos,
00395 mINT16 depth,
00396 PF_UGS* myugs) : _refvecs(PF_mpool) {
00397
00398
00399 _depth = depth;
00400 _myugs = myugs;
00401 _leading_ref = bitpos;
00402 INT i;
00403 for (i=0; i<LNO_MAX_DO_LOOP_DEPTH; i++) {
00404 _max_iter[i] = _min_iter[i] = 0;
00405 }
00406 _min_dist = _max_dist = 0;
00407
00408 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
00409 _c = CXX_NEW_ARRAY (INT64, aa->Num_Vec (), PF_mpool);
00410 for (i=0; i<aa->Num_Vec(); i++) _c[i] = aa->Dim(i)->Const_Offset;
00411 _numlines_1L = _numlines_2L = 0;
00412 }
00413
00414 PF_LG::~PF_LG () {
00415 while (_refvecs.Elements()) CXX_DELETE (_refvecs.Pop (), PF_mpool);
00416 CXX_DELETE_ARRAY (_c, PF_mpool);
00417 }
00418
00419
00420
00421
00422
00423
00424
00425 BOOL PF_LG::Check () {
00426
00427 INT num = _refvecs.Elements();
00428 INT i;
00429 for (i=0; i<num; i++)
00430 Is_True (_leading_ref != _refvecs.Bottom_nth(i)->Refnum(),
00431 ("oops -- duplicate in LG, with leading ref\n"));
00432 for (i=0; i<num; i++) {
00433 INT cur = _refvecs.Bottom_nth(i)->Refnum();
00434 for (INT j=i+1; j<num; j++)
00435 Is_True (cur != _refvecs.Bottom_nth(j)->Refnum(),
00436 ("oops -- duplicate in LG, between refs\n"));
00437 }
00438 return TRUE;
00439 }
00440
00441
00442
00443
00444
00445
00446
00447 BOOL PF_LG::Check_Ref (mINT16 refnum) {
00448 INT num = _refvecs.Elements();
00449 Is_True (refnum != _leading_ref, ("Check_Ref: ref same as leading ref\n"));
00450 for (INT i=0; i<num; i++)
00451 Is_True (refnum != _refvecs.Bottom_nth(i)->Refnum(),
00452 ("Check_Ref: ref (%d) is a duplicate\n", refnum));
00453 return TRUE;
00454 }
00455
00456
00457
00458
00459
00460
00461
00462
00463 FRAC* PF_LG::Ref_In_LG (WN* ref) {
00464 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
00465 INT indxs = aa->Num_Vec ();
00466 mINT16 maxdepth = Get_Depth()+1;
00467 FRAC* dvec = CXX_NEW_ARRAY (FRAC, maxdepth, PF_mpool);
00468 LU_FMAT* Hslu = Get_Hslu();
00469
00470 INT i;
00471 for (i=0; i<indxs; i++)
00472 if (aa->Dim(i)->Const_Offset != _c[i])
00473 break;
00474
00475 if (i == indxs) {
00476
00477
00478 return dvec;
00479 }
00480
00481
00482 FRAC *deltac = CXX_NEW_ARRAY (FRAC, indxs, PF_mpool);
00483 for (i=0; i<indxs; i++)
00484 deltac[i] = FRAC (_c[i] - aa->Dim(i)->Const_Offset);
00485
00486 BOOL soln = Hslu->Particular_Solution (deltac, dvec);
00487 CXX_DELETE_ARRAY (deltac, PF_mpool);
00488
00489 if (!soln) {
00490
00491 CXX_DELETE_ARRAY (dvec, PF_mpool);
00492 return NULL;
00493 }
00494
00495
00496
00497 PF_PRINT (
00498 fprintf (TFile, "Found a particular solution: ");
00499 dvec->Print (TFile);
00500 fprintf (TFile, "\n");
00501 );
00502
00503
00504
00505 for (i=0; i<_depth; i++) {
00506 if (dvec[i].N()) {
00507
00508 PF_PRINT(fprintf (TFile, "solution not within desired loop-nest\n"););
00509 CXX_DELETE_ARRAY (dvec, PF_mpool);
00510 return NULL;
00511 }
00512 }
00513
00514
00515 for (i=_depth; i<maxdepth; i++) {
00516 if (dvec[i].D() != 1) {
00517
00518
00519 PF_PRINT(fprintf (TFile,
00520 "solution is not integral\n"););
00521 CXX_DELETE_ARRAY (dvec, PF_mpool);
00522 return NULL;
00523 }
00524
00525 if (dvec[i].N() > 20) {
00526
00527 PF_PRINT(fprintf (TFile, "reuse across more than 20 iters\n"););
00528 CXX_DELETE_ARRAY (dvec, PF_mpool);
00529 return NULL;
00530 }
00531 }
00532
00533
00534
00535
00536
00537 PF_LOOPNODE* loopnode = Get_Loop();
00538
00539 for (i=(maxdepth-1); i>=_depth; i--) {
00540 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
00541
00542 if ((dvec[i].N() == 0) ||
00543 ((dli->Step->Is_Const()) &&
00544 ((dli->Step->Const_Offset == 1) ||
00545 (dli->Step->Const_Offset == -1) ||
00546 ((dvec[i].N() % (dli->Step)->Const_Offset) == 0)))) {
00547 loopnode = loopnode->Get_Parent ();
00548 }
00549 else {
00550
00551
00552 CXX_DELETE_ARRAY (dvec, PF_mpool);
00553 return NULL;
00554 }
00555 }
00556
00557
00558
00559
00560 return dvec;
00561 }
00562
00563
00564
00565
00566
00567
00568
00569 INT64 PF_LG::Distance_LR (WN* ref, FRAC* dvec) {
00570
00571 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
00572 INT dims = aa->Num_Vec();
00573 ACCESS_VECTOR* av = aa->Dim(dims-1);
00574 INT64 step = _c[dims-1]-av->Const_Offset;
00575 mINT16 maxdepth = Get_Depth()+1;
00576
00577 for (INT i=_depth; i<maxdepth; i++)
00578 step += dvec[i].N() * av->Loop_Coeff(i);
00579
00580
00581
00582 WN* my_iload_wn = LWN_Get_Parent(ref);
00583 WN* lr_iload_wn = LWN_Get_Parent(Get_Ref(Get_LeadingRef()));
00584
00585 return ((INT64) ((ABS(WN_element_size (ref))*step)+
00586 (WN_offset(my_iload_wn)-WN_offset(lr_iload_wn))));
00587 }
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597 INT PF_LG::LR_Compare (mINT16 refvecnum1, mINT16 refvecnum2) {
00598 INT i;
00599 mINT16 maxdepth = Get_Depth()+1;
00600 WN* ref1 = ((refvecnum1 == -1) ?
00601 Get_Ref (_leading_ref) :
00602 Get_Ref (_refvecs.Bottom_nth(refvecnum1)->Refnum ()));
00603 WN* ref2 = ((refvecnum2 == -1) ?
00604 Get_Ref (_leading_ref) :
00605 Get_Ref (_refvecs.Bottom_nth(refvecnum2)->Refnum ()));
00606
00607 Is_True ((refvecnum1 != -1) || (refvecnum2 != -1),
00608 ("LR_Compare: both refs are the same (leading ref)"));
00609
00610 FRAC* dvec = CXX_NEW_ARRAY (FRAC, maxdepth+1, PF_mpool);
00611
00612 if (refvecnum1 == -1) {
00613
00614 FRAC* orig_dvec = _refvecs.Bottom_nth(refvecnum2)->Dvec ();
00615 for (i=0; i<maxdepth; i++) dvec[i] = orig_dvec[i];
00616 }
00617 else if (refvecnum2 == -1) {
00618
00619 FRAC* orig_dvec = _refvecs.Bottom_nth(refvecnum1)->Dvec ();
00620 for (i=0; i<maxdepth; i++) dvec[i] = ((FRAC) 0) - orig_dvec[i];
00621 }
00622 else {
00623
00624 FRAC* orig_dvec1 = _refvecs.Bottom_nth(refvecnum1)->Dvec ();
00625 FRAC* orig_dvec2 = _refvecs.Bottom_nth(refvecnum2)->Dvec ();
00626 for (i=0; i<maxdepth; i++) dvec[i] = orig_dvec2[i]-orig_dvec1[i];
00627 }
00628
00629
00630
00631 ACCESS_ARRAY* aa1 = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref1);
00632 ACCESS_ARRAY* aa2 = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref2);
00633 INT dims = aa1->Num_Vec ();
00634 ACCESS_VECTOR* av1 = aa1->Dim(dims-1);
00635 ACCESS_VECTOR* av2 = aa2->Dim(dims-1);
00636
00637 for (i=_depth; i<maxdepth; i++)
00638 if (dvec[i].N() != 0) break;
00639
00640 if (i == maxdepth) {
00641
00642
00643
00644 CXX_DELETE_ARRAY (dvec, PF_mpool);
00645
00646 if (av1->Const_Offset == av2->Const_Offset) return 0;
00647
00648
00649 INT stride_one_loop = Get_Stride_One_Loop ();
00650
00651 if (stride_one_loop < 0) return 0;
00652
00653 mINT16 stride_forward = Stride_Forward ();
00654 Is_True (stride_forward, ("stride one loop exists, but no direction\n"));
00655 if (((stride_forward > 0) && (av1->Const_Offset > av2->Const_Offset)) ||
00656 ((stride_forward < 0) && (av1->Const_Offset < av2->Const_Offset)))
00657
00658 return 1;
00659 else return -1;
00660 }
00661
00662
00663 INT nz = i;
00664 PF_LOOPNODE* loopnode = Get_Loop();
00665 for (i=maxdepth-1; i!=nz; i--)
00666 loopnode = loopnode->Get_Parent ();
00667 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
00668
00669
00670
00671 BOOL steppos = TRUE;
00672 if ((dli->Step->Is_Const()) &&
00673 (dli->Step->Const_Offset < 0)) steppos = FALSE;
00674
00675 if (((dvec[nz].N() < 0) && steppos) || ((dvec[nz].N() > 0) && !steppos)) {
00676
00677 CXX_DELETE_ARRAY (dvec, PF_mpool);
00678 PF_PRINT (fprintf (TFile, "LR_Compare: ");
00679 aa2->Print (TFile);
00680 fprintf (TFile, " leads ");
00681 aa1->Print (TFile));
00682 return -1;
00683 }
00684
00685 CXX_DELETE_ARRAY (dvec, PF_mpool);
00686 PF_PRINT (fprintf (TFile, "LR_Compare: ");
00687 aa1->Print (TFile);
00688 fprintf (TFile, " leads ");
00689 aa2->Print (TFile));
00690 return 1;
00691 }
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703 BOOL PF_LG::Add_Ref (WN* ref, mINT16 bitpos) {
00704 Is_True (Check_Ref (bitpos), ("oops - error\n"));
00705 FRAC* dvec = Ref_In_LG (ref);
00706
00707 if (!dvec) return FALSE;
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 mINT16 maxdepth = Get_Depth()+1;
00718 INT i;
00719 INT64 distance = Distance_LR (ref, dvec);
00720 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
00721 INT dims = aa->Num_Vec();
00722 ACCESS_VECTOR* av = aa->Dim(dims-1);
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733 for (i=_depth; i<maxdepth; i++)
00734 if (dvec[i].N() != 0) break;
00735
00736 if (i == maxdepth) {
00737
00738 if (aa->Dim(dims-1)->Const_Offset == _c[dims-1]) {
00739
00740
00741
00742
00743 if (distance >= 0) {
00744
00745 _refvecs.Push (CXX_NEW(PF_REFVEC(bitpos, maxdepth, dvec, distance),
00746 PF_mpool));
00747 }
00748 else {
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758 for (i=0; i<_refvecs.Elements(); i++) {
00759 _refvecs.Bottom_nth(i)->Update_Distance(distance);
00760 }
00761
00762
00763 if (distance > _max_dist) _max_dist = distance;
00764 else if (distance < _min_dist) _min_dist = distance;
00765
00766 _max_dist -= distance;
00767 _min_dist -= distance;
00768
00769
00770
00771 _refvecs.Push (CXX_NEW
00772 (PF_REFVEC(_leading_ref, maxdepth, dvec, -distance),
00773 PF_mpool));
00774 _leading_ref = bitpos;
00775 }
00776
00777 CXX_DELETE_ARRAY (dvec, PF_mpool);
00778 return TRUE;
00779 }
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791 INT stride_one_loop = Get_Stride_One_Loop ();
00792 if (stride_one_loop < 0) {
00793
00794
00795
00796 if (distance > _max_dist) _max_dist = distance;
00797 else if (distance < _min_dist) _min_dist = distance;
00798 _refvecs.Push (CXX_NEW(PF_REFVEC(bitpos, maxdepth, dvec, distance),
00799 PF_mpool));
00800 CXX_DELETE_ARRAY (dvec, PF_mpool);
00801 return TRUE;
00802 }
00803
00804
00805 mINT16 stride_forward = Stride_Forward ();
00806 Is_True (stride_forward, ("Stride-one loop but no direction!"));
00807
00808
00809 if (((stride_forward > 0) &&
00810 (aa->Dim(dims-1)->Const_Offset > _c[dims-1])) ||
00811 ((stride_forward < 0) &&
00812 (aa->Dim(dims-1)->Const_Offset < _c[dims-1]))) {
00813
00814 INT64 const_diff = aa->Dim(dims-1)->Const_Offset-_c[dims-1];
00815
00816
00817 for (i=0; i<dims; i++) _c[i] = aa->Dim(i)->Const_Offset;
00818
00819
00820
00821
00822 _min_iter[stride_one_loop] =
00823 minof(_min_iter[stride_one_loop], const_diff);
00824 _max_iter[stride_one_loop] =
00825 maxof(_max_iter[stride_one_loop], const_diff);
00826
00827
00828 for (i=0; i<_refvecs.Elements(); i++)
00829 _refvecs.Bottom_nth(i)->Update_Distance(distance);
00830
00831
00832 if (distance > _max_dist) _max_dist = distance;
00833 else if (distance < _min_dist) _min_dist = distance;
00834
00835 _max_dist -= distance;
00836 _min_dist -= distance;
00837
00838
00839
00840 _refvecs.Push (CXX_NEW
00841 (PF_REFVEC(_leading_ref, maxdepth, dvec, -distance),
00842 PF_mpool));
00843 _leading_ref = bitpos;
00844 CXX_DELETE_ARRAY (dvec, PF_mpool);
00845 return TRUE;
00846 }
00847
00848
00849
00850 INT64 const_diff = _c[dims-1]-aa->Dim(dims-1)->Const_Offset;
00851 _min_iter[stride_one_loop] = minof(_min_iter[stride_one_loop], const_diff);
00852 _max_iter[stride_one_loop] = maxof(_max_iter[stride_one_loop], const_diff);
00853
00854 if (distance > _max_dist) _max_dist = distance;
00855 else if (distance < _min_dist) _min_dist = distance;
00856
00857 _refvecs.Push (CXX_NEW (PF_REFVEC(bitpos, maxdepth, dvec, distance),
00858 PF_mpool));
00859 CXX_DELETE_ARRAY (dvec, PF_mpool);
00860 return TRUE;
00861 }
00862
00863 INT nz = i;
00864
00865
00866
00867
00868
00869 PF_LOOPNODE* loopnode = Get_Loop();
00870 for (i=maxdepth-1; i!=nz; i--)
00871 loopnode = loopnode->Get_Parent ();
00872 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
00873
00874
00875
00876 BOOL steppos = TRUE;
00877 if ((dli->Step->Is_Const()) &&
00878 (dli->Step->Const_Offset < 0)) steppos = FALSE;
00879
00880 if (((dvec[nz].N() < 0) && steppos) ||
00881 ((dvec[nz].N() > 0) && !steppos)) {
00882
00883
00884
00885
00886
00887
00888 for (i=0; i<aa->Num_Vec(); i++) _c[i] = aa->Dim(i)->Const_Offset;
00889
00890
00891
00892
00893 for (i=0; i<_refvecs.Elements(); i++) {
00894 _refvecs.Bottom_nth(i)->Update_dvec (dvec);
00895 _refvecs.Bottom_nth(i)->Update_Distance (distance);
00896 }
00897
00898
00899 for (i=0; i<maxdepth; i++) {
00900 mINT32 val = dvec[i].N();
00901 _max_iter[i] = _max_iter[i] - val;
00902 _min_iter[i] = _min_iter[i] - val;
00903
00904
00905 dvec[i] = -dvec[i];
00906 val = dvec[i].N();
00907 if (val > _max_iter[i]) _max_iter[i] = val;
00908 else if (val < _min_iter[i]) _min_iter[i] = val;
00909 }
00910
00911
00912 if (distance > _max_dist) _max_dist = distance;
00913 else if (distance < _min_dist) _min_dist = distance;
00914
00915 _max_dist -= distance;
00916 _min_dist -= distance;
00917
00918
00919 _refvecs.Push (CXX_NEW
00920 (PF_REFVEC(_leading_ref, maxdepth, dvec, -distance),
00921 PF_mpool));
00922 _leading_ref = bitpos;
00923 }
00924 else {
00925
00926
00927 _refvecs.Push (CXX_NEW (PF_REFVEC(bitpos, maxdepth, dvec, distance),
00928 PF_mpool));
00929
00930
00931 for (INT i=_depth; i<maxdepth; i++) {
00932 mINT32 val = dvec[i].N();
00933 if (val > _max_iter[i]) _max_iter[i] = val;
00934 else if (val < _min_iter[i]) _min_iter[i] = val;
00935 }
00936 if (distance > _max_dist) _max_dist = distance;
00937 else if (distance < _min_dist) _min_dist = distance;
00938 }
00939 CXX_DELETE_ARRAY (dvec, PF_mpool);
00940 return TRUE;
00941 }
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955 BOOL PF_LG::Add_Group (PF_LG* lg, WN* lref) {
00956 #ifdef Is_True_On
00957 if (Debug_Prefetch) {
00958 Check ();
00959 lg->Check ();
00960 Check_Ref (lg->_leading_ref);
00961 for (INT i=0; i<lg->_refvecs.Elements(); i++)
00962 Check_Ref (lg->_refvecs.Bottom_nth(i)->Refnum());
00963 }
00964 #endif
00965 FRAC* dvec = Ref_In_LG (lref);
00966
00967 if (!dvec) return FALSE;
00968
00969 mINT16 maxdepth = Get_Depth ()+1;
00970 INT i;
00971
00972 _numlines_1L = _numlines_2L = 0;
00973
00974
00975 INT64 distance = Distance_LR (lref, dvec);
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989 for (i=_depth; i<maxdepth; i++)
00990 if (dvec[i].N() != 0) break;
00991
00992 if (i == maxdepth) {
00993
00994 CXX_DELETE_ARRAY (dvec, PF_mpool);
00995 FmtAssert (FALSE,
00996 ("Two identical refs went into different locality groups\n"));
00997 return FALSE;
00998 }
00999
01000 INT nz = i;
01001
01002
01003
01004
01005
01006 PF_LOOPNODE* loopnode = Get_Loop();
01007 for (i=maxdepth-1; i!=nz; i--)
01008 loopnode = loopnode->Get_Parent ();
01009 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
01010
01011
01012
01013 BOOL steppos = TRUE;
01014 if ((dli->Step->Is_Const()) &&
01015 (dli->Step->Const_Offset < 0)) steppos = FALSE;
01016
01017 if (((dvec[nz].N() < 0) && steppos) ||
01018 ((dvec[nz].N() > 0) && !steppos)) {
01019
01020
01021
01022
01023
01024 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, lref);
01025 for (i=0; i<aa->Num_Vec(); i++) _c[i] = aa->Dim(i)->Const_Offset;
01026
01027
01028
01029 for (i=0; i<_refvecs.Elements(); i++) {
01030 _refvecs.Bottom_nth(i)->Update_dvec (dvec);
01031 _refvecs.Bottom_nth(i)->Update_Distance (distance);
01032 }
01033
01034
01035 for (i=0; i<maxdepth; i++) {
01036 mINT32 val = dvec[i].N();
01037 INT64 oldmin = _min_iter[i];
01038 INT64 oldmax = _max_iter[i];
01039 _min_iter[i] = lg->_min_iter[i];
01040 _max_iter[i] = lg->_max_iter[i];
01041
01042
01043
01044 _max_iter[i] = maxof(_max_iter[i], (oldmax-val));
01045 _min_iter[i] = minof(_min_iter[i], (oldmax-val));
01046
01047
01048
01049 dvec[i] = -dvec[i];
01050
01051
01052 val = dvec[i].N();
01053 if (val > _max_iter[i]) _max_iter[i] = val;
01054 else if (val < _min_iter[i]) _min_iter[i] = val;
01055 }
01056
01057
01058 INT64 oldmin = _min_dist;
01059 INT64 oldmax = _max_dist;
01060 _min_dist = lg->_min_dist;
01061 _max_dist = lg->_max_dist;
01062 if ((oldmin-distance) < _min_dist) _min_dist = (oldmin-distance);
01063 if ((oldmax-distance) > _max_dist) _max_dist = (oldmax-distance);
01064
01065
01066 Is_True (Check (), ("oops - error\n"));
01067 _refvecs.Push (CXX_NEW
01068 (PF_REFVEC(_leading_ref, maxdepth, dvec, -distance),
01069 PF_mpool));
01070
01071 for (i=0; i<lg->_refvecs.Elements(); i++) {
01072 _refvecs.Push (CXX_NEW (PF_REFVEC(lg->_refvecs.Bottom_nth(i)),
01073 PF_mpool));
01074 }
01075 _leading_ref = lg->_leading_ref;
01076 Is_True (Check (), ("oops - error\n"));
01077 }
01078 else {
01079
01080
01081 Is_True (Check (), ("oops - error\n"));
01082 _refvecs.Push (CXX_NEW
01083 (PF_REFVEC(lg->_leading_ref, maxdepth, dvec, distance),
01084 PF_mpool));
01085 Is_True (Check (), ("oops - error\n"));
01086
01087 INT oldmax = _refvecs.Elements ();
01088
01089 for (i=0; i<lg->_refvecs.Elements(); i++) {
01090 _refvecs.Push (CXX_NEW (PF_REFVEC(lg->_refvecs.Bottom_nth(i)),
01091 PF_mpool));
01092 }
01093 Is_True (Check (), ("oops - error\n"));
01094
01095
01096
01097
01098
01099 for (i=0; i<maxdepth; i++) dvec[i] = -dvec[i];
01100 for (i=oldmax; i<_refvecs.Elements(); i++) {
01101 _refvecs.Bottom_nth(i)->Update_dvec (dvec);
01102 _refvecs.Bottom_nth(i)->Update_Distance (-distance);
01103 }
01104
01105
01106 for (i=0; i<maxdepth; i++) {
01107 INT inc_min_iter = lg->_min_iter[i] - dvec[i].N();
01108 INT inc_max_iter = lg->_max_iter[i] - dvec[i].N();
01109 _max_iter[i] = maxof (_max_iter[i], inc_max_iter);
01110 _min_iter[i] = minof (_min_iter[i], inc_min_iter);
01111
01112
01113
01114 mINT32 val = -dvec[i].N();
01115 if (val > _max_iter[i]) _max_iter[i] = val;
01116 else if (val < _min_iter[i]) _min_iter[i] = val;
01117 }
01118 if ((lg->_min_dist+distance) < _min_dist)
01119 _min_dist = lg->_min_dist+distance;
01120 if ((lg->_max_dist+distance) > _max_dist)
01121 _max_dist = lg->_max_dist+distance;
01122 }
01123 CXX_DELETE_ARRAY (dvec, PF_mpool);
01124 return TRUE;
01125 }
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135 static void Insert (INT64* dist, INT num, INT64 elem) {
01136 INT i, j;
01137 for (i=0; i<num; i++) if (dist[i] >= elem) break;
01138 if (i == num)
01139
01140 dist[num] = elem;
01141 else {
01142 for (j=num; j>i; j--) dist[j] = dist[j-1];
01143 dist[i] = elem;
01144 }
01145 }
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155 void PF_LG::Split_LG () {
01156
01157 if (_numlines_1L) return;
01158
01159 INT i;
01160 mINT16 stride_one_loop = Get_Stride_One_Loop ();
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183 INT64* dist = CXX_NEW_ARRAY (INT64, _refvecs.Elements()+1, PF_mpool);
01184 Insert (dist, 0, 0);
01185 for (i=0; i<_refvecs.Elements(); i++)
01186 Insert (dist, i+1, _refvecs.Bottom_nth(i)->Distance());
01187
01188
01189
01190
01191 _numlines_1L = 1;
01192 _numlines_2L = 1;
01193
01194 if (stride_one_loop == -1) {
01195
01196
01197
01198
01199 INT64 gap;
01200 INT64 curdist_1L = 0;
01201 INT64 curdist_2L = 0;
01202 for (i=1; i<(_refvecs.Elements()+1); i++) {
01203 gap = dist[i] - dist[i-1];
01204 Is_True ((gap >= 0), ("Split_LG: some error in sorting distances\n"));
01205 curdist_1L += gap;
01206 if (curdist_1L > Cache.LineSize(1)) {
01207 _numlines_1L++;
01208 curdist_1L = 0;
01209 }
01210 if (Cache.Levels() > 1) {
01211 curdist_2L += gap;
01212 if (curdist_2L > Cache.LineSize(2)) {
01213 _numlines_2L++;
01214 curdist_2L = 0;
01215 }
01216 }
01217 }
01218 }
01219 else {
01220 INT64 curdist_1L = 0;
01221 INT64 curdist_2L = 0;
01222 INT64 maxdist_1L = 0;
01223 INT64 maxdist_2L = 0;
01224 INT64 gap = 0;
01225
01226 for (i=1; i<(_refvecs.Elements()+1); i++) {
01227 gap = dist[i] - dist[i-1];
01228 Is_True ((gap >= 0), ("Split_LG: some error in sorting distances\n"));
01229 if (gap > Cache.LineSize(1)) {
01230
01231 _numlines_1L++;
01232 maxdist_1L = maxof (maxdist_1L, curdist_1L);
01233 curdist_1L = 0;
01234 if (Cache.Levels() > 1) {
01235 if (gap > Cache.LineSize(2)) {
01236 _numlines_2L++;
01237 maxdist_2L = maxof (maxdist_2L, curdist_2L);
01238 curdist_2L = 0;
01239 }
01240 else curdist_2L += gap;
01241 }
01242 }
01243 else {
01244 curdist_1L += gap;
01245 curdist_2L += gap;
01246 }
01247 }
01248 maxdist_1L = maxof (maxdist_1L, curdist_1L);
01249 maxdist_2L = maxof (maxdist_2L, curdist_2L);
01250
01251
01252
01253
01254
01255
01256 INT64 maxdist;
01257 if (Cache.Levels() == 1) maxdist = maxdist_1L;
01258 else maxdist = maxdist_2L;
01259
01260
01261 INT64 sz = (INT) ABS(WN_element_size(Get_Ref(_leading_ref)));
01262 maxdist = (maxdist+sz-1)/sz;
01263 if ((_max_iter[stride_one_loop]-_min_iter[stride_one_loop]) < maxdist) {
01264
01265 _min_iter[stride_one_loop] = 0;
01266 _max_iter[stride_one_loop] = maxdist;
01267 }
01268 }
01269
01270 Is_True ((_numlines_1L>0),
01271 ("Split_LG returned 0 (or less) lines in lev-1 cache\n"));
01272 Is_True ((_numlines_2L>0),
01273 ("Split_LG returned 0 (or less) lines in lev-2 cache\n"));
01274 CXX_DELETE_ARRAY (dist, PF_mpool);
01275 }
01276 #if 0
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289 BOOL PF_LG::Is_Outer_Tile () {
01290 INT i, j;
01291 PF_LOOPNODE* ln;
01292 WN* outer_wn;
01293 INT ref_depth;
01294
01295
01296
01297 ln = Get_Loop ();
01298 ref_depth = ln->Get_Depth();
01299 DO_LOOP_INFO* dli;
01300 for (i=ref_depth; i!=_depth; i--) {
01301 dli = ln->Get_LoopInfo ();
01302
01303
01304 ln = ln->Get_Parent ();
01305 }
01306
01307
01308 outer_wn = ln->Get_Code();
01309
01310 {
01311
01312 WN* index_wn = WN_index(outer_wn);
01313 char* name = ((ST_class(WN_st(index_wn)) != CLASS_PREG) ?
01314 ST_name(WN_st(index_wn)) :
01315 (WN_offset(index_wn) > Last_Dedicated_Preg_Offset ?
01316 Preg_Name(WN_offset(index_wn)) : "DEDICATED PREG"));
01317
01318 }
01319
01320 ACCESS_ARRAY* aa = _myugs->Get_AA();
01321 ACCESS_VECTOR* av;
01322 for (i=0; i<aa->Num_Vec(); i++) {
01323 av = aa->Dim(i);
01324 ln = Get_Loop ();
01325 for (j=ref_depth; j>_depth; j--) {
01326 if (av->Loop_Coeff(j)) {
01327
01328 WN* cur_wn = ln->Get_Code();
01329 Is_True (cur_wn != outer_wn,
01330 ("Temporal reuse, but loop var used in index expr"));
01331
01332 while (1) {
01333 cur_wn = Outer_Tile (cur_wn, Du_Mgr);
01334 if (cur_wn == NULL) break;
01335
01336 if (cur_wn == outer_wn) {
01337
01338 return TRUE;
01339 }
01340 }
01341 }
01342 ln=ln->Get_Parent();
01343 }
01344 }
01345
01346 return FALSE;
01347 }
01348 #endif
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362 PF_LOOPNODE* Is_Outer_Tile (PF_LOOPNODE* inner_ln, PF_LOOPNODE* outer_ln,
01363 ACCESS_ARRAY* aa) {
01364 INT i, j;
01365 PF_LOOPNODE* ln;
01366 WN* outer_wn;
01367 INT ref_depth = inner_ln->Get_Depth();
01368 INT loop_depth = outer_ln->Get_Depth();
01369
01370
01371
01372 ln = inner_ln;
01373 ref_depth = ln->Get_Depth();
01374 for (i=ref_depth; i!=loop_depth; i--) {
01375 ln = ln->Get_Parent ();
01376 }
01377 outer_wn = ln->Get_Code();
01378
01379 {
01380
01381 WN* index_wn = WN_index(outer_wn);
01382 const char* name = ((ST_class(WN_st(index_wn)) != CLASS_PREG) ?
01383 ST_name(WN_st(index_wn)) :
01384 (WN_offset(index_wn) > Last_Dedicated_Preg_Offset ?
01385 Preg_Name(WN_offset(index_wn)) : "DEDICATED PREG"));
01386
01387 }
01388
01389
01390 ACCESS_VECTOR* av;
01391 for (i=0; i<aa->Num_Vec(); i++) {
01392 av = aa->Dim(i);
01393 ln = inner_ln;
01394 for (j=ref_depth; j>loop_depth; j--) {
01395 if (av->Loop_Coeff(j)) {
01396
01397 WN* cur_wn = ln->Get_Code();
01398 Is_True (cur_wn != outer_wn,
01399 ("Temporal reuse, but loop var used in index expr"));
01400
01401 while (1) {
01402 cur_wn = Outer_Tile (cur_wn, Du_Mgr);
01403 if (cur_wn == NULL) break;
01404
01405 if (cur_wn == outer_wn) {
01406
01407 return ln;
01408 }
01409 }
01410 }
01411 ln=ln->Get_Parent();
01412 }
01413 }
01414
01415 return NULL;
01416 }
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432 PF_VOLUME PF_LG::Volume () {
01433 PF_VOLUME myvol (1, 1);
01434 INT i, j;
01435 VECTOR_SPACE<FRAC>* KerH = Get_KerH ();
01436 VECTOR_SPACE<FRAC>* KerHs = Get_KerHs ();
01437
01438 INT num_loops = KerHs->N();
01439 INT num_bvs = KerHs->D();
01440 BOOL* done_loop = CXX_NEW_ARRAY (BOOL, num_loops, PF_mpool);
01441
01442 Is_True (Check (), ("oops - error\n"));
01443
01444 Split_LG ();
01445
01446 for (i=0; i<num_loops; i++) done_loop[i] = FALSE;
01447
01448
01449 VECTOR_SPACE<FRAC> sKerH (KerH, PF_mpool);
01450 VECTOR_SPACE<FRAC> sKerHs (KerHs, PF_mpool);
01451
01452
01453 VECTOR_SPACE<FRAC> thisnest (num_loops, PF_mpool, FALSE);
01454 FRAC ubv[LNO_MAX_DO_LOOP_DEPTH];
01455 for (i=_depth; i<num_loops; i++) {
01456
01457 for (j=0; j<num_loops; j++) {
01458 if (j == i) ubv[j] = 1;
01459 else ubv[j] = 0;
01460 }
01461 thisnest.Insert (ubv);
01462 }
01463
01464 BOOL kerh_overflow = FALSE, kerhs_overflow = FALSE;
01465
01466 FRAC::Exception = FALSE;
01467 sKerH *= thisnest; kerh_overflow |= (FRAC::Exception);
01468 FRAC::Exception = FALSE;
01469 sKerHs *= thisnest; kerhs_overflow |= (FRAC::Exception);
01470 FRAC::Exception = FALSE;
01471
01472 if (kerh_overflow) {
01473
01474
01475 DevWarn ("Vector contains large numbers -- assuming no reuse");
01476 }
01477 else {
01478 const MAT<FRAC>& Hbasis = sKerH.Basis ();
01479 num_bvs = sKerH.D();
01480
01481
01482
01483 for (i=0; i<num_bvs; i++) {
01484
01485 # ifdef Is_True_On
01486
01487 for (j=0; j<_depth; j++)
01488 if (Hbasis(i,j).N() != 0)
01489 Is_True (0, ("Intersection of KerH and thisnest is wrong\n"));
01490 # endif
01491
01492 PF_LOOPNODE* tmp_loopnode = Get_Loop ();
01493 for (j=(num_loops-1); j>=_depth; j--) {
01494
01495 if (abs(Hbasis(i,j).N()) >= 20) break;
01496
01497 DO_LOOP_INFO* dli = tmp_loopnode->Get_LoopInfo ();
01498 if (abs(Hbasis(i,j).N()) > Get_Good_Num_Iters(dli)) break;
01499 tmp_loopnode = tmp_loopnode->Get_Parent();
01500 }
01501 if (j != (_depth-1)) continue;
01502
01503
01504 INT product_coeff = 1;
01505
01506
01507 INT num_nz = 0;
01508
01509 for (j=_depth; j<num_loops; j++) {
01510 if (Hbasis(i,j).N() != 0) {
01511 num_nz++;
01512 product_coeff = product_coeff * Hbasis(i,j).N();
01513 done_loop[j] = TRUE;
01514
01515
01516
01517
01518 }
01519 }
01520 product_coeff = abs (product_coeff);
01521
01522 INT num_vectors = 0;
01523
01524
01525 PF_LOOPNODE* loopnode = Get_Loop ();
01526
01527 for (j=(num_loops-1); j>=_depth; j--) {
01528 if (Hbasis(i,j).N() != 0) {
01529
01530 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
01531 UINT64 num_iters = Get_Good_Num_Iters(dli);
01532 INT64 spread = (_max_iter[j]-_min_iter[j]);
01533 spread = abs(spread);
01534 num_iters += spread;
01535 num_vectors += (product_coeff/(abs(Hbasis(i,j).N()))) * num_iters;
01536 }
01537 loopnode = loopnode->Get_Parent ();
01538 }
01539 if (num_nz == 1) {
01540
01541 PF_LOOPNODE* loopnode = Get_Loop ();
01542 INT dp = loopnode->Get_Depth ();
01543
01544
01545
01546 while (dp != _depth) {
01547 dp--;
01548 loopnode = loopnode->Get_Parent();
01549 }
01550 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
01551 if ((!dli->Is_Inner_Tile) &&
01552 (Is_Outer_Tile(Get_Loop(), loopnode, _myugs->Get_AA()))) {
01553
01554 UINT64 num_iters = Get_Good_Num_Iters(dli);
01555 INT64 spread = (_max_iter[dp]-_min_iter[dp]);
01556 spread = abs(spread);
01557 num_iters += spread;
01558 myvol *= num_iters;
01559 }
01560
01561 continue;
01562 }
01563 num_vectors -= product_coeff-1;
01564 FmtAssert (num_vectors >= 0,
01565 ("Computing volume: Num_vectors should be positive, is %d\n",
01566 num_vectors));
01567 myvol *= num_vectors;
01568 }
01569 }
01570
01571
01572
01573 PF_LOOPNODE* loopnode = Get_Loop ();
01574
01575 for (j=(num_loops-1); j>=_depth; j--) {
01576 if (done_loop[j]) continue;
01577 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
01578 UINT64 num_iters = Get_Good_Num_Iters(dli);
01579 INT64 spread = (_max_iter[j]-_min_iter[j]);
01580 spread = abs(spread);
01581 num_iters += spread;
01582 myvol *= num_iters;
01583 loopnode = loopnode->Get_Parent ();
01584 }
01585
01586
01587
01588
01589
01590 if (!(kerhs_overflow || kerh_overflow) &&
01591 (sKerHs.D() > sKerH.D())) {
01592 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map,
01593 Get_Ref(_leading_ref));
01594 ACCESS_VECTOR* av = aa->Dim(aa->Num_Vec()-1);
01595 INT stride = INT_INFINITY;
01596
01597 for (j=_depth; j<num_loops; j++) {
01598 INT64 cur = av->Loop_Coeff(j);
01599 if (cur == 0) continue;
01600 cur = abs(cur);
01601 stride = minof (stride, cur);
01602 }
01603 if (stride < INT_INFINITY) {
01604
01605 stride = stride * (INT) ABS(WN_element_size(Get_Ref(_leading_ref)));
01606
01607 if (stride < Cache.LineSize(1)) {
01608 INT vec = Cache.LineSize(1)/stride;
01609 myvol.vol_1L = (myvol.vol_1L+vec-1)/vec;
01610 if (Cache.Levels()>1) {
01611 vec = Cache.LineSize(2)/stride;
01612 myvol.vol_2L = (myvol.vol_2L+vec-1)/vec;
01613 }
01614 }
01615 else if (stride < Cache.LineSize(2)) {
01616 INT vec = Cache.LineSize(2)/stride;
01617 myvol.vol_2L = (myvol.vol_2L+vec-1)/vec;
01618 }
01619 else {
01620
01621 }
01622 }
01623 }
01624
01625
01626 myvol.vol_1L = myvol.vol_1L * Cache.LineSize(1);
01627 myvol.vol_2L = myvol.vol_2L * Cache.LineSize(2);
01628
01629
01630 myvol.vol_1L = myvol.vol_1L * _numlines_1L;
01631 myvol.vol_2L = myvol.vol_2L * _numlines_2L;
01632
01633 PF_PRINT (
01634 fprintf (TFile, "vol lg[0x%lx]: symbol: ", this);
01635 _myugs->Get_BA()->Get_Symbol()->Print (TFile);
01636 myvol.Print (TFile);
01637 );
01638
01639 return myvol;
01640 }
01641
01642 struct PF_SORTED_REFS {
01643 INT64 dist;
01644 mINT16 refnum;
01645 mINT16 refvecnum;
01646 mINT16 lrnum;
01647 };
01648
01649
01650
01651
01652
01653
01654
01655
01656 static PF_SORTED_REFS* Sort_Refvecs (PF_REFVEC_DA* refvecs, mINT16 leadingref){
01657
01658 PF_SORTED_REFS* srefs =
01659 CXX_NEW_ARRAY (PF_SORTED_REFS, refvecs->Elements()+1, PF_mpool);
01660 srefs[0].dist = 0;
01661 srefs[0].refnum = leadingref;
01662 srefs[0].refvecnum = -1;
01663 INT num = 1;
01664
01665 for (INT i=0; i<refvecs->Elements(); i++) {
01666
01667 PF_REFVEC* refvec = refvecs->Bottom_nth(i);
01668 INT j;
01669 for (j=0; j<(i+1); j++)
01670 #ifdef KEY
01671 if (srefs[j].dist > refvec->Distance()) break;
01672 #else
01673 if (srefs[j].dist >= refvec->Distance()) break;
01674 #endif
01675 if (j == (i+1)) {
01676
01677 srefs[i+1].dist = refvec->Distance();
01678 srefs[i+1].refnum = refvec->Refnum ();
01679 srefs[i+1].refvecnum = i;
01680 }
01681 else {
01682 for (INT k=(i+1); k>j; k--) srefs[k] = srefs[k-1];
01683 srefs[j].dist = refvec->Distance();
01684 srefs[j].refnum = refvec->Refnum ();
01685 srefs[j].refvecnum = i;
01686 }
01687 }
01688
01689 #ifdef Is_True_On
01690 {
01691 for (INT i=0; i<(refvecs->Elements()-1); i++) {
01692 Is_True (srefs[i].dist <= srefs[i+1].dist,
01693 ("Sort_Refvecs: sorting error during prefetching"));
01694 }
01695 }
01696 #endif
01697
01698 return srefs;
01699 }
01700
01701
01702
01703
01704
01705
01706
01707 void PF_LG::LR_Ordering (PF_SORTED_REFS* srefs, INT start, INT stop) {
01708 INT i, j;
01709
01710
01711 struct SORT_STR {
01712 mINT16 num;
01713 mINT16 eq;
01714 };
01715
01716 SORT_STR* lr_num = CXX_NEW_ARRAY (SORT_STR, stop-start, PF_mpool);
01717 lr_num[0].num = start;
01718 lr_num[0].eq = 0;
01719 for (INT cur=start+1; cur<stop; cur++) {
01720
01721
01722 INT order;
01723 for (i=cur-start-1; i>=0; i--) {
01724
01725 order = LR_Compare (srefs[cur].refvecnum,
01726 srefs[lr_num[i].num].refvecnum);
01727
01728 if (order <= 0) break;
01729 }
01730
01731 for (j=cur-start; j>(i+1); j--)
01732 lr_num[j] = lr_num[j-1];
01733 lr_num[j].num = cur;
01734 lr_num[j].eq = (order == 0 ? 1 : 0);
01735 }
01736
01737
01738 j = 1;
01739 for (i=start; i<stop; i++) {
01740 srefs[lr_num[i-start].num].lrnum = j;
01741 if (i<(stop-1)) {
01742 if (lr_num[i-start+1].eq == 0) j++;
01743 }
01744 }
01745
01746 #ifdef Is_True_On
01747 for (i=start; i<stop; i++) {
01748 if ((srefs[i].lrnum <= 0) || (srefs[i].lrnum > (stop-start)))
01749 FmtAssert (FALSE, ("sorting error\n"));
01750 }
01751 #endif
01752
01753 CXX_DELETE_ARRAY (lr_num, PF_mpool);
01754 }
01755
01756
01757
01758
01759
01760
01761
01762
01763
01764
01765
01766
01767
01768
01769 INT PF_LG::Get_Bit_Vec (PF_DESC* pfdesc,
01770 PF_LEVEL level,
01771 PF_SPLIT_VECTOR *split_vec) {
01772 INT bitvec = 0x1;
01773 INT i;
01774
01775 if (split_vec == NULL) {
01776
01777 return bitvec;
01778 }
01779 mINT16 split_depth = split_vec->Get_Depth();
01780 mINT16* orig_split_vector = split_vec->Get_Vector ();
01781
01782
01783
01784
01785 mINT16* split_vector = (mINT16*) alloca(split_depth*sizeof(mINT16));
01786 PF_LOOPNODE* loopnode = split_vec->Get_Loop();
01787 for (i=split_depth-1; i>=0; i--) {
01788 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo();
01789 split_vector[i] = orig_split_vector[i];
01790 if (split_vector[i] > 1) {
01791 if (!dli->Num_Iterations_Symbolic &&
01792 (split_vector[i] >= Get_Good_Num_Iters(dli)))
01793 split_vector[i] = 1;
01794 }
01795 loopnode = loopnode->Get_Parent();
01796 }
01797
01798
01799
01800
01801
01802
01803 #ifdef Is_True_On
01804 {
01805 PF_LOOPNODE* leaf = split_vec->Get_Loop ();
01806 PF_LOOPNODE* myloop = Get_Loop ();
01807 while (leaf) {
01808 if (leaf == myloop) break;
01809 leaf = leaf->Get_Parent ();
01810 }
01811 if (leaf == NULL)
01812 FmtAssert (FALSE, ("Get_Bit_Vec - this loop hasn't been versioned\n"));
01813 }
01814 #endif
01815
01816
01817
01818 mINT16 myloopdepth = Get_Depth ()+1;
01819 mINT16 count = 1;
01820 for (i=0; i<myloopdepth; i++) {
01821
01822
01823
01824 if (split_vector[i] > 1) count++;
01825 }
01826
01827
01828 Is_True (pfdesc->Kind(level) != none,
01829 ("Get_Bit_Vec: prefetch kind is none\n"));
01830
01831 if (pfdesc->Kind(level) == all) {
01832 for (i=0; i<count-1; i++)
01833 bitvec = ((bitvec << 1) | 0x1);
01834 return bitvec;
01835 }
01836
01837
01838 mINT16 depth = Get_Depth ()+1;
01839 mINT16* vec = pfdesc->Vec (level);
01840 mINT16 version_number = 0;
01841 for (i=0; i<depth && i<split_depth; i++) {
01842 if (split_vector[i] < 2) continue;
01843
01844
01845 version_number++;
01846
01847 if ((vec[i] == 0) || ((vec[i] % split_vector[i]) == 0)) {
01848
01849
01850 }
01851 else {
01852
01853 bitvec = bitvec | (1 << version_number);
01854
01855 }
01856 }
01857 return bitvec;
01858 }
01859
01860
01861
01862
01863
01864
01865
01866
01867 WN* PF_LG::Get_Ref_Version (WN* ref, INT bitpos) {
01868 extern WN_MAP version_map;
01869 while (bitpos > 0) {
01870 ref = (WN*) WN_MAP_Get (version_map, ref);
01871 Is_True (ref, ("Get_Ref_Version: couldn't find a map\n"));
01872 bitpos--;
01873 }
01874 return ref;
01875 }
01876
01877 #if defined(TARG_IA64)
01878 BOOL Contain_Induction_Variable (WN* wn, ST_IDX idx)
01879 {
01880 if (WN_st_idx(wn) == idx) return TRUE;
01881 for (INT kid = 0; kid < WN_kid_count(wn); kid ++)
01882 if (Contain_Induction_Variable(WN_kid(wn, kid), idx))
01883 return TRUE;
01884 return FALSE;
01885 }
01886
01887 void Update_Array_Index (WN* wn, WN* wn_incr, WN* wn_induc)
01888 {
01889 for (INT kid = 0; kid < WN_kid_count(wn); kid ++) {
01890 WN* wn_kid = WN_kid(wn, kid);
01891 if (WN_st_idx(wn_kid) == WN_st_idx(wn_induc)
01892 && SYMBOL(wn_kid) == SYMBOL(wn_induc)) {
01893 TYPE_ID desc = Promote_Type(WN_rtype(wn_kid));
01894 WN* wn_ahead = LWN_Make_Icon(desc, LNO_Prefetch_Iters_Ahead);
01895 wn_ahead = LWN_CreateExp2(OPCODE_make_op(OPR_MPY, desc, MTYPE_V),
01896 WN_CopyNode(wn_incr), wn_ahead);
01897 WN_kid(wn, kid) = LWN_CreateExp2(OPCODE_make_op(OPR_ADD, desc, MTYPE_V), wn_kid, wn_ahead);
01898 LWN_Set_Parent(WN_kid(wn, kid), wn);
01899 }
01900
01901
01902 else if ((WN_operator(wn_kid) == OPR_CVT || WN_operator(wn_kid) == OPR_CVTL || WN_operator(wn_kid) == OPR_TRUNC)
01903 && (WN_st_idx(WN_kid(wn_kid, 0)) == WN_st_idx(wn_induc) && SYMBOL(WN_kid(wn_kid, 0)) == SYMBOL(wn_induc)))
01904 {
01905 TYPE_ID desc = Promote_Type(WN_rtype(wn_kid));
01906 WN* wn_ahead = LWN_Make_Icon(desc, LNO_Prefetch_Iters_Ahead);
01907 wn_ahead = LWN_CreateExp2(OPCODE_make_op(OPR_MPY, desc, MTYPE_V),
01908 WN_CopyNode(wn_incr), wn_ahead);
01909 WN_kid(wn, kid) = LWN_CreateExp2(OPCODE_make_op(OPR_ADD, desc, MTYPE_V), wn_kid, wn_ahead);
01910 LWN_Set_Parent(WN_kid(wn, kid), wn);
01911 }
01912 else {
01913 Update_Array_Index(wn_kid, wn_incr, wn_induc);
01914 }
01915 }
01916 }
01917 #endif
01918
01919 #ifdef KEY //bug 10953 -- to generate prefetch address
01920 static WN *Gen_Pf_Addr_Node(WN *invariant_stride, WN *array, WN *loop)
01921 {
01922 OPCODE mpy_opc = OPCODE_make_op(OPR_MPY,WN_rtype(array), MTYPE_V);
01923 OPCODE add_opc= OPCODE_make_op(OPR_ADD,WN_rtype(array), MTYPE_V);
01924 OPCODE intconst_opc= OPCODE_make_op(OPR_INTCONST,WN_rtype(array), MTYPE_V);
01925
01926 WN *stridenon = LWN_Copy_Tree(invariant_stride, TRUE, LNO_Info_Map);
01927 LWN_Copy_Def_Use(invariant_stride, stridenon, Du_Mgr);
01928
01929 INT const_val;
01930 WN *array_index = WN_kid(array, WN_num_dim(array)<<1);
01931 if(Is_Loop_Invariant_Exp(array_index, loop))
01932 const_val = LNO_Prefetch_Stride_Ahead;
01933 else const_val = LNO_Prefetch_Stride_Ahead*ABS(WN_element_size(array));
01934
01935 WN *stride_node = LWN_CreateExp2(add_opc, array,
01936 LWN_CreateExp2(mpy_opc,stridenon,
01937 WN_CreateIntconst(intconst_opc, const_val)));
01938
01939 return stride_node;
01940 }
01941
01942 static BOOL Is_Other_Array_Bad(WN *wn, INT dim)
01943 {
01944 if (WN_operator(wn) == OPR_BLOCK){
01945 for (WN* kid=WN_first(wn); kid; kid=WN_next(kid)){
01946 if(Is_Other_Array_Bad(kid, dim))
01947 return TRUE;
01948 }
01949 return FALSE;
01950 }else if(WN_operator(wn) == OPR_ARRAY){
01951 if(WN_element_size(wn) < 0){
01952 WN *array_parent = LWN_Get_Parent(wn);
01953 if(WN_operator(array_parent)!=OPR_ILOAD &&
01954 WN_operator(array_parent)!=OPR_ISTORE)
01955 return TRUE;
01956 if(!MTYPE_is_vector(WN_desc(array_parent)))
01957 return TRUE;
01958 }
01959 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,wn);
01960 if(aa==NULL || aa->Num_Vec() > dim)
01961 return TRUE;
01962 }
01963
01964 for (UINT kidno = 0; kidno < WN_kid_count(wn); kidno ++){
01965 if (Is_Other_Array_Bad(WN_kid(wn, kidno), dim))
01966 return TRUE;
01967 }
01968 return FALSE;
01969 }
01970
01971
01972 static BOOL Well_Formed_Expr(WN *expr, WN *array)
01973 {
01974 if(WN_operator(expr) == OPR_BLOCK)
01975 return FALSE;
01976 else if(WN_operator(expr)==OPR_ARRAY){
01977 if(!Tree_Equiv(expr, array))
01978 return FALSE;
01979 }
01980 for(INT ii = 0; ii< WN_kid_count(expr); ii++)
01981 if(!Well_Formed_Expr(WN_kid(expr, ii), array))
01982 return FALSE;
01983 return TRUE;
01984 }
01985
01986 static BOOL Good_Stmt_To_Adjust_Offset(WN *stmt, WN *array)
01987 {
01988 if(!stmt) return FALSE;
01989 OPERATOR opr= WN_operator(stmt);
01990 switch(opr){
01991 case OPR_BLOCK:
01992 if(WN_first(stmt)==WN_last(stmt))
01993 return Good_Stmt_To_Adjust_Offset(WN_first(stmt), array);
01994 else return FALSE;
01995 break;
01996 case OPR_IF:{
01997 WN *then_part = WN_then(stmt);
01998 WN *else_part = WN_else(stmt);
01999 if(else_part && WN_first(else_part) != NULL)
02000 return FALSE;
02001 if(!then_part || !WN_first(then_part))
02002 return FALSE;
02003 if(WN_first(then_part) != WN_last(then_part))
02004 return FALSE;
02005 return Good_Stmt_To_Adjust_Offset(WN_first(then_part), array);
02006 }
02007 break;
02008 case OPR_ISTORE:{
02009 WN *kid1 = WN_kid1(stmt);
02010 WN *kid0 = WN_kid0(stmt);
02011 if(WN_operator(kid1) != OPR_ARRAY || !Tree_Equiv(kid1, array))
02012 return FALSE;
02013 if(!Well_Formed_Expr(kid0, array) || WN_operator(kid0) != OPR_BXOR)
02014 return FALSE;
02015 return TRUE;
02016 }
02017 break;
02018 default:
02019 return FALSE;
02020 break;
02021 }
02022 }
02023
02024
02025 static BOOL Good_Loop_To_Adjust_Offset(WN *loop, WN *array)
02026 {
02027 WN *body = WN_do_body(loop);
02028 if(!body || WN_first(body)==NULL )
02029 return FALSE;
02030 if(WN_first(body)==WN_last(body))
02031 return Good_Stmt_To_Adjust_Offset(WN_first(body), array);
02032 return FALSE;
02033 }
02034
02035
02036 static BOOL Larger_Dimension_Arrays_In(WN *wn, INT dim)
02037 {
02038 if (WN_operator(wn) == OPR_BLOCK){
02039 for (WN* kid=WN_first(wn); kid; kid=WN_next(kid)){
02040 if(Larger_Dimension_Arrays_In(kid, dim))
02041 return TRUE;
02042 }
02043 return FALSE;
02044 }else if(WN_operator(wn) == OPR_ARRAY){
02045 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,wn);
02046 if(aa==NULL || aa->Num_Vec() > dim)
02047 return TRUE;
02048 }
02049
02050 for (UINT kidno = 0; kidno < WN_kid_count(wn); kidno ++){
02051 if (Larger_Dimension_Arrays_In(WN_kid(wn, kidno), dim))
02052 return TRUE;
02053 }
02054 return FALSE;
02055 }
02056 #endif
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066 void PF_LG::Gen_Pref_Node (PF_SORTED_REFS* srefs, mINT16 start, mINT16 stop,
02067 PF_LEVEL level, PF_DESC* pfdesc,
02068 PF_SPLIT_VECTOR* split_vec) {
02069
02070
02071
02072
02073
02074 INT bitvec = Get_Bit_Vec (pfdesc, level, split_vec);
02075 INT curbitpos = 0;
02076
02077
02078
02079
02080
02081 PF_LEVEL level_for_cg = level;
02082 if ((level == level_1) &&
02083 (Cache.Levels() == 1) && Cache.Level1_Really_Level2()) {
02084
02085 level_for_cg = level_2;
02086 }
02087
02088
02089 while (bitvec) {
02090
02091
02092 BOOL spatial_in_loop = FALSE;
02093
02094
02095
02096 UINT32 flag = 0;
02097 BOOL readpf = TRUE;
02098 INT j;
02099 for (j=start; j<stop; j++) {
02100 WN* ref = Get_Ref (srefs[j].refnum);
02101 Is_True (WN_operator(LWN_Get_Parent(ref)) == OPR_ILOAD ||
02102 WN_operator(LWN_Get_Parent(ref)) == OPR_ISTORE,
02103 ("Gen_Pref_Node: Parent of array ref is not a iload/istore\n"));
02104 if (WN_operator(LWN_Get_Parent(ref)) == OPR_ISTORE)
02105 readpf = FALSE;
02106 }
02107 if (readpf) PF_SET_READ(flag); else PF_SET_WRITE(flag);
02108
02109
02110 PF_LOOPNODE* loopnode = Get_Loop ();
02111 PF_LOCLOOP locloop = loopnode->Get_locloop ();
02112 mINT16 confidence;
02113
02114 switch (level) {
02115 case level_1:
02116 confidence = locloop.Confidence_1L ();
02117 if (confidence == 3) {
02118 if (locloop.Loop_1L() == 0) {
02119
02120 INT d=Get_Depth();
02121 PF_LOOPNODE* oloop = loopnode;
02122 while (d>0) {
02123 oloop = oloop->Get_Parent ();
02124 d--;
02125 }
02126 switch (Loop_Confidence(oloop->Get_LoopInfo())) {
02127 case 3:
02128 {
02129
02130 PF_VOLUME vol = oloop->Get_Total ();
02131 if (vol.Localized_1L()) {
02132 #ifdef KEY
02133 confidence = (LNO_Run_Prefetch > SOME_PREFETCH) ? (LNO_Run_Prefetch - 1) : LNO_Run_Prefetch;
02134 #else
02135 confidence = LNO_Run_Prefetch;
02136 #endif
02137 }
02138 break;
02139 }
02140 case 2:
02141 case 1:
02142 confidence = 2;
02143 break;
02144 }
02145 }
02146 }
02147 else {
02148
02149
02150
02151 confidence = 2;
02152
02153 INT d = Get_Depth();
02154 if ((locloop.Confidence_1L() == 2) && (locloop.Loop_1L() <= d)) {
02155 PF_LOOPNODE* oloop = loopnode;
02156 while (d > locloop.Loop_1L()) {
02157 oloop = oloop->Get_Parent();
02158 d--;
02159 }
02160 PF_VOLUME vol = oloop->Get_Total();
02161 if (vol.Localized_1L()) confidence = 1;
02162 }
02163
02164 if (confidence == 1) {
02165 #ifdef KEY
02166 confidence = (LNO_Run_Prefetch > SOME_PREFETCH) ? (LNO_Run_Prefetch - 1) : LNO_Run_Prefetch;
02167 #else
02168 confidence = LNO_Run_Prefetch;
02169 #endif
02170 }
02171 }
02172 break;
02173 case level_2:
02174 confidence = locloop.Confidence_2L ();
02175 if (confidence == 3) {
02176 if (locloop.Loop_2L() == 0) {
02177
02178 INT d=Get_Depth();
02179 PF_LOOPNODE* oloop = loopnode;
02180 while (d>0) {
02181 oloop = oloop->Get_Parent ();
02182 d--;
02183 }
02184 switch (Loop_Confidence(oloop->Get_LoopInfo())) {
02185 case 3:
02186 {
02187
02188 PF_VOLUME vol = oloop->Get_Total ();
02189 if (vol.Localized_2L()) {
02190 #ifdef KEY
02191 confidence = (LNO_Run_Prefetch > SOME_PREFETCH) ? (LNO_Run_Prefetch - 1) : LNO_Run_Prefetch;
02192 #else
02193 confidence = LNO_Run_Prefetch;
02194 #endif
02195 }
02196 break;
02197 }
02198 case 2:
02199 case 1:
02200 confidence = 2;
02201 break;
02202 }
02203 }
02204 }
02205 else {
02206
02207
02208
02209 confidence = 2;
02210
02211 INT d = Get_Depth();
02212 if ((locloop.Confidence_2L() == 2) && (locloop.Loop_2L() <= d)) {
02213 PF_LOOPNODE* oloop = loopnode;
02214 while (d > locloop.Loop_2L()) {
02215 oloop = oloop->Get_Parent();
02216 d--;
02217 }
02218 PF_VOLUME vol = oloop->Get_Total();
02219 if (vol.Localized_2L()) confidence = 1;
02220 }
02221
02222 if (confidence == 1) {
02223 #ifdef KEY
02224 confidence = (LNO_Run_Prefetch > SOME_PREFETCH) ? (LNO_Run_Prefetch - 1) : LNO_Run_Prefetch;
02225 #else
02226 confidence = LNO_Run_Prefetch;
02227 #endif
02228 }
02229 }
02230 break;
02231 case level_1and2:
02232 confidence = minof(locloop.Confidence_1L(), locloop.Confidence_2L());
02233 break;
02234 }
02235 PF_SET_CONFIDENCE (flag, confidence);
02236
02237
02238
02239
02240 if ((level == level_1) || (level == level_1and2)) {
02241 if (pfdesc->Kind(level_1) == all){
02242 if (level_for_cg == level_2) PF_SET_STRIDE_2L (flag, 1);
02243 else PF_SET_STRIDE_1L (flag, 1);
02244 }
02245 else {
02246 Is_True (pfdesc->Kind(level_1) == vec,
02247 ("Gen_Pref_Node: prefetch when kind is none\n"));
02248 Is_True (pfdesc->Vec(level_1),
02249 ("Gen_Pref_Node: kind is vec, but vector is NULL\n"));
02250 mINT16* prefetch_vec = pfdesc->Vec(level_1);
02251 mINT16 depth = Get_Depth();
02252 if (prefetch_vec[depth]) {
02253 INT dp = depth-1;
02254 while (dp >= 0) {
02255 if (prefetch_vec[dp]) break;
02256 dp--;
02257 }
02258 if(dp<0) spatial_in_loop = TRUE;
02259
02260 if (level_for_cg == level_2)
02261 PF_SET_STRIDE_2L (flag, prefetch_vec[depth]);
02262 else PF_SET_STRIDE_1L (flag, prefetch_vec[depth]);
02263 }
02264 else {
02265
02266
02267
02268 #ifdef Is_True_On
02269 INT dp = depth-1;
02270 while (dp >= 0) {
02271 if (prefetch_vec[dp]) break;
02272 dp--;
02273 }
02274 Is_True (dp >= 0, ("Error in computing spatial locality"));
02275 #endif
02276 PF_SET_STRIDE_1L (flag, 1);
02277 }
02278 }
02279 }
02280 if ((level == level_2) || (level == level_1and2)) {
02281 if (pfdesc->Kind(level_2) == all)
02282 PF_SET_STRIDE_2L (flag, 1);
02283 else {
02284 Is_True (pfdesc->Kind(level_2) == vec,
02285 ("Gen_Pref_Node: prefetch when kind is none\n"));
02286 Is_True (pfdesc->Vec(level_2),
02287 ("Gen_Pref_Node: kind is vec, but vector is NULL\n"));
02288 mINT16* prefetch_vec = pfdesc->Vec(level_2);
02289 mINT16 depth = Get_Depth();
02290 if (prefetch_vec[depth]) {
02291 INT dp = depth-1;
02292 while (dp >= 0) {
02293 if (prefetch_vec[dp]) break;
02294 dp--;
02295 }
02296 if(dp<0) spatial_in_loop = TRUE;
02297
02298 PF_SET_STRIDE_2L (flag, prefetch_vec[depth]);
02299 }
02300 else {
02301
02302
02303
02304 #ifdef Is_True_On
02305 INT dp = depth-1;
02306 while (dp >= 0) {
02307 if (prefetch_vec[dp]) break;
02308 dp--;
02309 }
02310 Is_True (dp >= 0, ("Error in computing spatial locality"));
02311 #endif
02312 PF_SET_STRIDE_2L (flag, 1);
02313 }
02314 }
02315 }
02316
02317 WN* ref = Get_Ref (srefs[start].refnum);
02318 ref = Get_Ref_Version (ref, curbitpos);
02319 WN* parent_ref = LWN_Get_Parent (ref);
02320
02321
02322 WN* arraynode = LWN_Copy_Tree (ref, TRUE, LNO_Info_Map);
02323 LWN_Copy_Def_Use(ref, arraynode, Du_Mgr);
02324
02325 OPCODE opcode = WN_opcode (parent_ref);
02326 WN_OFFSET offset;
02327 switch (OPCODE_operator(opcode)) {
02328 case OPR_ILOAD:
02329 offset = WN_load_offset (parent_ref);
02330 break;
02331 case OPR_ISTORE:
02332 offset = WN_store_offset (parent_ref);
02333 break;
02334 default:
02335 FmtAssert (false, ("Parent of array ref not a load/store\n"));
02336 break;
02337 }
02338
02339 #if defined(TARG_IA64)
02340 {
02341
02342
02343 if ( LNO_Prefetch_Ahead || LNO_Prefetch_Iters_Ahead) {
02344 INT increment;
02345 if ((level == level_1) || (level == level_1and2))
02346 increment = LNO_Prefetch_Ahead *Cache.LineSize(1);
02347 else increment = LNO_Prefetch_Ahead *Cache.LineSize(2);
02348
02349 if(Stride_Forward())
02350 increment = Stride_Forward()*increment;
02351 else
02352 increment = (Get_Stride_In_Enclosing_Loop()>0) ? increment : (-increment) ;
02353
02354
02355 INT stride_size = ABS(Get_Stride_In_Enclosing_Loop());
02356
02357 if ((((level == level_1) || (level == level_1and2)) &&
02358 (stride_size >= Cache.LineSize(1))) ||
02359 ((level == level_2) && stride_size >= Cache.LineSize(2))) {
02360 if( Stride_Forward() && Get_Stride_Accurate() ) {
02361
02362
02363 increment = Get_Stride_In_Enclosing_Loop() * LNO_Prefetch_Iters_Ahead;
02364 }
02365 else {
02366
02367
02368
02369
02370
02371
02372
02373
02374
02375
02376
02377
02378 increment = 0;
02379
02380 ACCESS_ARRAY *aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
02381 ACCESS_VECTOR *av;
02382
02383
02384
02385
02386 INT i, curr_depth=Get_Depth();
02387 for (i=aa->Num_Vec()-1; i>=0; i--) {
02388 av = aa->Dim(i);
02389 if (av->Loop_Coeff(curr_depth)) break;
02390 }
02391
02392 if(i<0) {
02393 WN* wn_loop = Get_Loop()->Get_Code();
02394 WN* wn_induction = WN_index(wn_loop);
02395 WN* wn_step = WN_step(wn_loop);
02396 WN* wn_incr = WN_kid(wn_step, 0);
02397 Is_True(WN_operator(wn_step) == OPR_STID &&
02398 WN_operator(wn_incr) == OPR_ADD, ("Incorrect loop index"));
02399
02400 for (i = 0; i < WN_kid_count(wn_incr); ++i) {
02401 if( WN_st_idx(WN_kid(wn_incr, i)) != WN_st_idx(wn_induction) ) {
02402 wn_incr = WN_kid(wn_incr, i);
02403 break;
02404 }
02405 }
02406
02407 for(i=0; i<WN_num_dim(arraynode); ++i) {
02408 Update_Array_Index(WN_array_index(arraynode, i), wn_incr, wn_induction);
02409 }
02410
02411 } else {
02412
02413
02414
02415 if(i >= WN_num_dim(arraynode))
02416 i = WN_num_dim(arraynode) - 1;
02417
02418 DO_LOOP_INFO* dli = Get_Loop()->Get_LoopInfo();
02419 INT step;
02420 if (dli->Step->Is_Const()) {
02421 step = LNO_Prefetch_Iters_Ahead * av->Loop_Coeff(curr_depth) * dli->Step->Const_Offset;
02422 }
02423 else {
02424
02425 step = LNO_Prefetch_Iters_Ahead * av->Loop_Coeff(curr_depth);
02426 }
02427
02428
02429 WN* wn_index = WN_array_index(arraynode, i);
02430 TYPE_ID desc = Promote_Type(WN_rtype(wn_index));
02431 OPCODE addop = OPCODE_make_op(OPR_ADD, desc, MTYPE_V);
02432 WN* wn_ahead = LWN_Make_Icon(desc, step);
02433 WN_array_index(arraynode, i) = LWN_CreateExp2(addop, wn_index, wn_ahead);
02434 LWN_Set_Parent(WN_array_index(arraynode, i), arraynode);
02435 }
02436 }
02437
02438 }
02439
02440 offset += increment;
02441 }
02442 }
02443
02444 #else
02445 {
02446
02447
02448 if ( LNO_Prefetch_Ahead || LNO_Prefetch_Iters_Ahead) {
02449 INT increment;
02450 if ((level == level_1) || (level == level_1and2))
02451 increment = LNO_Prefetch_Ahead *Cache.LineSize(1);
02452 else increment = LNO_Prefetch_Ahead*Cache.LineSize(2);
02453
02454 increment = Stride_Forward()*increment;
02455
02456
02457 INT stride_size = Get_Stride_In_Enclosing_Loop();
02458 if ((((level == level_1) || (level == level_1and2)) &&
02459 (stride_size >= Cache.LineSize(1))) ||
02460 ((level == level_2) && stride_size >= Cache.LineSize(2))) {
02461
02462
02463
02464 stride_size = stride_size * LNO_Prefetch_Iters_Ahead;
02465 increment = stride_size;
02466 }
02467
02468 offset += increment;
02469 }
02470 }
02471 #endif
02472 #if defined(TARG_X8664) || defined(TARG_IA64)
02473
02474
02475
02476
02477
02478
02479 if(LNO_Run_Prefetch > SOME_PREFETCH &&
02480 (WN_element_size(arraynode) < 0 ||
02481 (Get_Dim()==1 &&( confidence ==3 || WN_element_size(arraynode) > 8))||
02482 MTYPE_is_vector(WN_desc(parent_ref)) ||
02483 MTYPE_is_vector(WN_rtype(parent_ref)))){
02484 PF_SET_KEEP_ANYWAY(flag);
02485 }
02486
02487
02488
02489 if(LNO_Run_Prefetch > SOME_PREFETCH && offset != 0 &&
02490 Get_Dim() == 1 && confidence >=2 &&
02491 WN_operator(WN_array_base(arraynode))==OPR_LDID &&
02492 strncmp(SYMBOL(WN_array_base(arraynode)).Name(),
02493 Temp_Symbol_Prefix "_misym",
02494 sizeof(Temp_Symbol_Prefix "_misym") - 1)==0){
02495 PF_SET_KEEP_ANYWAY(flag);
02496 WN *loop = Enclosing_Do_Loop(parent_ref);
02497 if(loop && Good_Loop_To_Adjust_Offset(loop,ref)){
02498 INT fancy_offset_incr=0;
02499 #ifdef TARG_X8664
02500 if(Is_Target_Core() || Is_Target_EM64T())
02501 fancy_offset_incr=8;
02502 else if(Is_Target_Barcelona())
02503 fancy_offset_incr=28;
02504 else
02505 #endif
02506 {
02507 #if defined(TARG_IA64)
02508 fancy_offset_incr=28;
02509 #else
02510 fancy_offset_incr=8;
02511 #endif
02512 if(LNO_Run_Stream_Prefetch && spatial_in_loop)
02513 PF_SET_NON_TEMPORAL(flag);
02514 }
02515 if(LNO_Prefetch_Ahead==2){
02516 if((level == level_1) || (level == level_1and2))
02517 offset += fancy_offset_incr*Cache.LineSize(1);
02518 else
02519 offset += fancy_offset_incr*Cache.LineSize(2);
02520 }
02521 }
02522 }
02523
02524
02525
02526
02527
02528
02529
02530
02531
02532
02533 if(LNO_Run_Stream_Prefetch &&
02534 _refvecs.Elements() == 0 &&
02535 spatial_in_loop &&
02536 WN_operator(parent_ref)==OPR_ILOAD){
02537
02538 BOOL stream_pf = TRUE;
02539 ACCESS_ARRAY* aa=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,arraynode);
02540 INT loopdepth = Get_Depth();
02541 ACCESS_VECTOR* av1 = aa->Dim(aa->Num_Vec()-1);
02542
02543 if(av1->Non_Const_Loops() != loopdepth)
02544 stream_pf = FALSE;
02545
02546 if(av1->Loop_Coeff(loopdepth) != 1)
02547 stream_pf = FALSE;
02548 for(INT ii=0; ii<loopdepth; ii++)
02549 if(av1->Loop_Coeff(ii)!=0){
02550 stream_pf = FALSE;
02551 break;
02552 }
02553 for(INT ii=0; ii < aa->Num_Vec(); ii++){
02554 ACCESS_VECTOR* av = aa->Dim(ii);
02555 if (av->Contains_Lin_Symb() || av->Contains_Non_Lin_Symb()){
02556 stream_pf = FALSE;
02557 break;
02558 }
02559 }
02560
02561 if(stream_pf){
02562 WN *doloop = Enclosing_Do_Loop(parent_ref);
02563 #if defined(TARG_IA64)
02564 if(Larger_Dimension_Arrays_In(doloop, Get_Dim()))
02565 #elif defined(TARG_X8664)
02566 if(Is_Other_Array_Bad(doloop, Get_Dim()))
02567 #else
02568
02569 if(Larger_Dimension_Arrays_In(doloop, Get_Dim()))
02570 #endif
02571 stream_pf = FALSE;
02572 }
02573
02574 if(stream_pf)
02575 PF_SET_NON_TEMPORAL(flag);
02576 }
02577 #endif
02578
02579
02580 #if !(defined(TARG_X8664) || defined(TARG_IA64)) //bug 10953
02581 WN* pfnode = LWN_CreatePrefetch (offset, flag, arraynode);
02582 #else //bug 10953
02583 WN* pfnode=NULL;
02584 WN *do_loop = ref;
02585 while(do_loop && WN_operator(do_loop) != OPR_DO_LOOP)
02586 do_loop = LWN_Get_Parent(do_loop);
02587
02588 WN *invariant_stride=Simple_Invariant_Stride_Access(ref, do_loop);
02589 if(NULL == invariant_stride)
02590 pfnode = LWN_CreatePrefetch (offset, flag, arraynode);
02591 else{
02592 WN *pf_addr_node = Gen_Pf_Addr_Node(invariant_stride, arraynode, do_loop);
02593 PF_SET_STRIDE_2L(flag, 0);
02594 PF_SET_STRIDE_1L(flag, 0);
02595 if(level_for_cg==level_2)
02596 PF_SET_STRIDE_2L (flag, 1);
02597 else{
02598 PF_SET_STRIDE_1L (flag, 1);
02599 PF_SET_NON_TEMPORAL(flag);
02600 PF_SET_KEEP_ANYWAY(flag);
02601 }
02602 pfnode = LWN_CreatePrefetch (offset, flag, pf_addr_node);
02603 }
02604 #endif //bug 10953
02605
02606 WN_linenum(pfnode) = LWN_Get_Linenum(ref);
02607 VB_PRINT (vb_print_indent;
02608 printf (">> pref ");
02609 Listing_Emit_WN (stdout, parent_ref);
02610 printf (" %c (L1 %3d) (L2 %3d)\n",
02611 (WN_pf_read(pfnode) ? 'R' : 'W'),
02612 WN_pf_stride_1L(pfnode),
02613 WN_pf_stride_2L(pfnode)));
02614 if (LNO_Analysis) {
02615 ls_print_indent; fprintf (LNO_Analysis,
02616 "(PREF-NODE %c (CONF %3d) (L1 %3d) (L2 %3d) ",
02617 (WN_pf_read(pfnode) ? 'R' : 'W'),
02618 WN_pf_confidence(pfnode),
02619 WN_pf_stride_1L(pfnode),
02620 WN_pf_stride_2L(pfnode));
02621 Listing_Emit_WN (LNO_Analysis, parent_ref);
02622 fprintf (LNO_Analysis, "\n");
02623 ls_num_indent += 2;
02624 ls_print_indent; fprintf (LNO_Analysis, "(PREF-REFS\n");
02625 ls_num_indent += 2;
02626 }
02627 #define MAX_LEN 1024
02628 char out_string[MAX_LEN];
02629 char aux_string[MAX_LEN];
02630 INT out_string_count=0;
02631 if (LNO_Tlog) {
02632 extern WN* pf_func_nd;
02633 Whirl2Src_Init(pf_func_nd);
02634
02635 char str_buf[64];
02636 for (INT i=0; i<MAX_LEN; i++)
02637 out_string[i]='\0';
02638 sprintf (aux_string, "PREF-NODE %c (CONF %3d) (L1 %3d) (L2 %3d)",
02639 (WN_pf_read(pfnode) ? 'R' : 'W'),
02640 WN_pf_confidence(pfnode),
02641 WN_pf_stride_1L(pfnode),
02642 WN_pf_stride_2L(pfnode));
02643
02644
02645 OPERATOR opr = WN_operator(parent_ref);
02646 char* name;
02647 switch (opr) {
02648 case OPR_ILOAD:
02649 name = ST_name(WN_st(WN_array_base(WN_kid0(parent_ref))));
02650 break;
02651 case OPR_ISTORE:
02652 name = ST_name(WN_st(WN_array_base(WN_kid1(parent_ref))));
02653 break;
02654 }
02655 if (out_string_count+strlen(name)+1 < MAX_LEN) {
02656 sprintf (&out_string[out_string_count], "%s ", name);
02657 out_string_count += (strlen(name)+1);
02658 }
02659 else out_string_count += strlen(name)+1;
02660 }
02661 Is_True(WN_opcode(pfnode) == OPC_PREFETCH,
02662 ("oops - LWN_CreatePrefetch returned a non-prefetch node\n"));
02663
02664
02665
02666
02667
02668
02669
02670
02671
02672
02673
02674
02675
02676
02677
02678
02679
02680
02681
02682
02683
02684
02685
02686 WN *wn_loop = NULL;
02687
02688
02689
02690
02691 BOOL drop_prefetch = FALSE;
02692
02693 if ((WN_operator(WN_array_base(ref)) == OPR_LDID ||
02694 WN_operator(WN_array_base(ref)) == OPR_LDA) &&
02695 (ST_sclass(WN_st(WN_array_base(ref))) == SCLASS_FORMAL_REF ||
02696 ST_sclass(WN_st(WN_array_base(ref))) == SCLASS_FORMAL) &&
02697 ST_is_optional_argument(WN_st(WN_array_base(ref)))) {
02698
02699
02700
02701
02702 WN *earliest_ref = ref;
02703 for (INT er=start; er<stop; er++) {
02704 WN *tmp_ref = Get_Ref (srefs[er].refnum);
02705 tmp_ref = Get_Ref_Version(tmp_ref, curbitpos);
02706 if (Is_Lex_Before (tmp_ref, earliest_ref)) earliest_ref = tmp_ref;
02707 }
02708 while (!OPCODE_is_stmt(WN_opcode(earliest_ref)) &&
02709 !OPCODE_is_scf(WN_opcode(earliest_ref))) {
02710 earliest_ref = LWN_Get_Parent(earliest_ref);
02711 if (earliest_ref == NULL) {
02712 DevWarn ("Where did the stmt/scf parent go?");
02713 }
02714 if (earliest_ref == NULL ||
02715 WN_operator(earliest_ref) == OPR_CAND ||
02716 WN_operator(earliest_ref) == OPR_CIOR) {
02717 drop_prefetch = TRUE;
02718 LWN_Delete_Tree (pfnode);
02719 pfnode = NULL;
02720 break;
02721 }
02722 }
02723 if (!drop_prefetch) {
02724 LWN_Insert_Block_Before (NULL, earliest_ref, pfnode);
02725 wn_loop = earliest_ref;
02726 }
02727 }
02728 else {
02729
02730 wn_loop = ref;
02731 while (WN_opcode(wn_loop) != OPC_DO_LOOP) {
02732
02733 if (WN_opcode(LWN_Get_Parent(wn_loop)) == OPC_DO_LOOP &&
02734 wn_loop != WN_do_body (LWN_Get_Parent(wn_loop))) {
02735
02736
02737
02738 break;
02739 }
02740
02741 wn_loop = LWN_Get_Parent(wn_loop);
02742 if (Is_Mp_Region(wn_loop)) break;
02743 Is_True (wn_loop, ("Error trying to find loop: parent is NULL\n"));
02744 }
02745
02746 WN* wn_block;
02747 WN* wn_first;
02748 if (WN_opcode(wn_loop) == OPC_DO_LOOP) {
02749
02750 wn_block = WN_do_body(wn_loop);
02751 wn_first = WN_first(wn_block);
02752 }
02753 else if (Is_Mp_Region(wn_loop)) {
02754
02755 wn_block = WN_region_body(wn_loop);
02756 wn_first = WN_first(wn_block);
02757 }
02758 else {
02759
02760 wn_first = LWN_Get_Parent(wn_loop);
02761 Is_True (WN_opcode(wn_first) == OPC_DO_LOOP,
02762 ("pref-ref under do-loop, but where is the do?"));
02763 wn_block = LWN_Get_Parent(wn_first);
02764 wn_first = WN_first(wn_block);
02765 }
02766
02767 Is_True (wn_block, ("Body of loop/region is NULL\n"));
02768 Is_True (WN_opcode(wn_block) == OPC_BLOCK,
02769 ("Body of do-loop/region not a block\n"));
02770
02771 LWN_Copy_Frequency_Tree(pfnode, WN_first(wn_block));
02772 LWN_Insert_Block_Before (wn_block, wn_first, pfnode);
02773 Is_True (LWN_Get_Parent(pfnode) == wn_block,
02774 ("Inserted pfnode, but parent got screwed up\n"));
02775 }
02776
02777
02778
02779 LR_Ordering (srefs, start, stop);
02780 for (j=start; j<stop && !drop_prefetch; j++) {
02781 WN* ref = Get_Ref (srefs[j].refnum);
02782 ref = Get_Ref_Version (ref, curbitpos);
02783 WN* parent_ref = LWN_Get_Parent (ref);
02784 if (LNO_Analysis) {
02785 ls_print_indent; fprintf (LNO_Analysis, "(");
02786 Listing_Emit_WN (LNO_Analysis, parent_ref);
02787 fprintf (LNO_Analysis, "%d)\n", srefs[j].lrnum);
02788 }
02789 if (LNO_Tlog) {
02790
02791
02792 OPERATOR opr = WN_operator(parent_ref);
02793 char* name;
02794 switch (opr) {
02795 case OPR_ILOAD:
02796 name = ST_name(WN_st(WN_array_base(WN_kid0(parent_ref))));
02797 break;
02798 case OPR_ISTORE:
02799 name = ST_name(WN_st(WN_array_base(WN_kid1(parent_ref))));
02800 break;
02801 }
02802 char str_buf[64];
02803 sprintf(str_buf,"%d", srefs[j].lrnum);
02804
02805 if (out_string_count+strlen(name)+3+strlen(str_buf)+3 < MAX_LEN) {
02806 sprintf (&out_string[out_string_count], "( %s ", name);
02807 out_string_count += (strlen(name)+3);
02808 sprintf(&out_string[out_string_count],"%s ) ", str_buf);
02809 out_string_count += (strlen(str_buf)+3);
02810 }
02811 else out_string_count += strlen(name)+3+strlen(str_buf)+3;
02812 }
02813 VB_PRINT (vb_print_indent;
02814 printf ("ref: ");
02815 Listing_Emit_WN (stdout, parent_ref);
02816 printf ("\n");
02817 );
02818 BOOL newnode = FALSE;
02819
02820
02821 PF_POINTER* tmp = (PF_POINTER*) WN_MAP_Get (WN_MAP_PREFETCH, parent_ref);
02822 if (tmp == NULL) {
02823 extern MEM_POOL PF_CG_mpool;
02824 tmp = CXX_NEW (PF_POINTER, &PF_CG_mpool);
02825 WN_MAP_Set (WN_MAP_PREFETCH, parent_ref, tmp);
02826 PF_PTR_flag(tmp) = 0;
02827 SET_AUTO(tmp);
02828 newnode = TRUE;
02829 }
02830 Is_True (WN_MAP_Get (WN_MAP_PREFETCH, parent_ref),
02831 ("Oops - where did the prefetch map go?\n"));
02832
02833 switch (level_for_cg) {
02834 case level_1:
02835 if (newnode) {
02836 PF_PTR_wn_pref_2L(tmp) = NULL;
02837 PF_PTR_lrnum_2L(tmp) = 0;
02838 PF_PTR_distance_2L(tmp) = 0;
02839 PF_PTR_set_conf_2L(tmp, 0);
02840 }
02841 else {
02842 #ifdef Is_True_On
02843 Is_True (PF_PTR_wn_pref_1L(tmp) == NULL,
02844 ("Error in old node, wn_pref_1L\n"));
02845 Is_True (PF_PTR_wn_pref_2L(tmp) != NULL,
02846 ("Error in old node, wn_pref_2L\n"));
02847 #endif
02848 }
02849 PF_PTR_wn_pref_1L(tmp) = pfnode;
02850 PF_PTR_lrnum_1L(tmp) = srefs[j].lrnum;
02851 PF_PTR_distance_1L(tmp) = offset;
02852 PF_PTR_set_conf_1L(tmp, confidence);
02853 break;
02854 case level_2:
02855 if (newnode) {
02856 PF_PTR_wn_pref_1L(tmp) = NULL;
02857 PF_PTR_lrnum_1L(tmp) = 0;
02858 PF_PTR_distance_1L(tmp) = 0;
02859 PF_PTR_set_conf_1L(tmp, 0);
02860 }
02861 else {
02862 #ifdef Is_True_On
02863 Is_True (PF_PTR_wn_pref_1L(tmp) != NULL,
02864 ("Error in old node, wn_pref_1L\n"));
02865 Is_True (PF_PTR_wn_pref_2L(tmp) == NULL,
02866 ("Error in old node, wn_pref_2L\n"));
02867 #endif
02868 }
02869 PF_PTR_wn_pref_2L(tmp) = pfnode;
02870 PF_PTR_lrnum_2L(tmp) = srefs[j].lrnum;
02871 PF_PTR_distance_2L(tmp) = offset;
02872 PF_PTR_set_conf_2L(tmp, confidence);
02873 break;
02874 case level_1and2:
02875 Is_True (newnode, ("Duplicate pfnode\n"));
02876 PF_PTR_wn_pref_1L(tmp) = pfnode;
02877 PF_PTR_lrnum_1L(tmp) = srefs[j].lrnum;
02878 PF_PTR_distance_1L(tmp) = offset;
02879 PF_PTR_set_conf_1L(tmp, confidence);
02880 PF_PTR_wn_pref_2L(tmp) = pfnode;
02881 PF_PTR_lrnum_2L(tmp) = srefs[j].lrnum;
02882 PF_PTR_distance_2L(tmp) = offset;
02883 PF_PTR_set_conf_2L(tmp, confidence);
02884 break;
02885 default:
02886 Is_True (FALSE, ("Error in level type\n"));
02887 break;
02888 }
02889 }
02890
02891 if (LNO_Analysis) {
02892 ls_num_indent -= 2;
02893 ls_print_indent; fprintf (LNO_Analysis, ")\n");
02894 }
02895
02896
02897 bitvec = bitvec >> 1;
02898 curbitpos++;
02899 while (bitvec && ((bitvec & 0x1) == 0)) {
02900 bitvec = bitvec >> 1;
02901 curbitpos++;
02902 }
02903 if (LNO_Analysis) {
02904 ls_num_indent -= 2;
02905 ls_print_indent; fprintf (LNO_Analysis, ")\n");
02906 }
02907 if (LNO_Tlog) {
02908 if (out_string_count>=MAX_LEN)
02909 sprintf(out_string,"ref-list too long");
02910
02911
02912
02913
02914 if (WN_opcode(wn_loop) != OPC_DO_LOOP) {
02915 if (!Is_Mp_Region(wn_loop)) {
02916 wn_loop = LWN_Get_Parent(wn_loop);
02917
02918 wn_loop = LWN_Get_Parent(wn_loop);
02919 }
02920 while (WN_opcode(wn_loop) != OPC_DO_LOOP) {
02921 wn_loop = LWN_Get_Parent(wn_loop);
02922 }
02923 }
02924 Generate_Tlog("LNO","prefetching",
02925 Srcpos_To_Line(WN_Get_Linenum(wn_loop)),
02926 ST_name(WN_st(WN_index(wn_loop))), "", out_string, aux_string);
02927 }
02928 }
02929 }
02930
02931
02932
02933
02934
02935
02936
02937
02938
02939 void PF_LG::Gen_Prefetch (PF_DESC *pfdesc,
02940 PF_SPLIT_VECTOR* split_vec,
02941 PF_LEVEL level) {
02942
02943 PF_SORTED_REFS* srefs = Sort_Refvecs (&_refvecs, _leading_ref);
02944 INT num = _refvecs.Elements()+1;
02945
02946
02947 switch (level) {
02948 case level_1:
02949 {
02950 INT64 curdist = 0;
02951 mINT16 start = 0;
02952 for (mINT16 i=1; i<num; i++) {
02953 INT64 gap = (srefs[i].dist-srefs[i-1].dist);
02954 Is_True ( gap >= 0,
02955 ("Error in Sort_Refvecs - returned non-sorted list\n"));
02956 curdist += gap;
02957
02958 if (curdist > Cache.LineSize(1)) {
02959
02960 Gen_Pref_Node (srefs, start, i, level_1, pfdesc, split_vec);
02961 curdist = 0;
02962 start = i;
02963 }
02964 }
02965
02966 Gen_Pref_Node (srefs, start, num, level_1, pfdesc, split_vec);
02967 break;
02968 }
02969
02970 case level_2:
02971 {
02972 INT64 curdist = 0;
02973 mINT16 start = 0;
02974 for (mINT16 i=1; i<num; i++) {
02975 INT64 gap = (srefs[i].dist-srefs[i-1].dist);
02976 Is_True ( gap >= 0,
02977 ("Error in Sort_Refvecs - returned non-sorted list\n"));
02978 curdist += gap;
02979
02980 if (curdist > Cache.LineSize(2)) {
02981
02982 Gen_Pref_Node (srefs, start, i, level_2, pfdesc, split_vec);
02983 curdist = 0;
02984 start = i;
02985 }
02986 }
02987
02988 Gen_Pref_Node (srefs, start, num, level_2, pfdesc, split_vec);
02989 break;
02990 }
02991
02992 case level_1and2:
02993 {
02994
02995
02996
02997 Is_True (FALSE, ("Gen_Pref_Node: one level at a time...\n"));
02998 INT64 curdist_1L = 0;
02999 INT64 curdist_2L = 0;
03000 mINT16 start_1L = 0;
03001 mINT16 start_2L = 0;
03002 for (mINT16 i=1; i<num; i++) {
03003 INT64 gap = (srefs[i].dist-srefs[i-1].dist);
03004 Is_True ( gap >= 0,
03005 ("Error in Sort_Refvecs - returned non-sorted list\n"));
03006 curdist_1L += gap;
03007 curdist_2L += gap;
03008
03009 if ((Cache.Levels () > 1) &&
03010 (curdist_1L > Cache.LineSize(1)) &&
03011 (curdist_2L > Cache.LineSize(2)) &&
03012 (start_1L == start_2L)) {
03013
03014
03015 Gen_Pref_Node (srefs, start_1L, i, level_1and2, pfdesc, split_vec);
03016 curdist_1L = curdist_2L = 0;
03017 start_1L = start_2L = i;
03018 continue;
03019 }
03020
03021
03022 if (curdist_1L > Cache.LineSize(1)) {
03023
03024 Gen_Pref_Node (srefs, start_1L, i, level_1, pfdesc, split_vec);
03025 curdist_1L = 0;
03026 start_1L = i;
03027 }
03028 if ((Cache.Levels() > 1) && curdist_2L > Cache.LineSize(2)) {
03029
03030 Gen_Pref_Node (srefs, start_2L, i, level_2, pfdesc, split_vec);
03031 curdist_2L = 0;
03032 start_2L = i;
03033 }
03034 }
03035
03036 if (Cache.Levels() > 1) {
03037 if (start_1L == start_2L)
03038 Gen_Pref_Node (srefs, start_2L, num, level_1and2, pfdesc, split_vec);
03039 else {
03040 Gen_Pref_Node (srefs, start_1L, num, level_1, pfdesc, split_vec);
03041 Gen_Pref_Node (srefs, start_2L, num, level_2, pfdesc, split_vec);
03042 }
03043 }
03044 else {
03045
03046 Gen_Pref_Node (srefs, start_1L, num, level_1, pfdesc, split_vec);
03047 }
03048 break;
03049 }
03050
03051 default:
03052 Is_True (FALSE, ("Illegal case in switch for level\n"));
03053 break;
03054 }
03055
03056 CXX_DELETE_ARRAY (srefs, PF_mpool);
03057 }
03058
03059 void PF_LG::Print (FILE *fp) {
03060 fprintf (fp, " Locality group: (0x%p)\n", this);
03061 fprintf (fp, " depth : %d\n", _depth);
03062 fprintf (fp, " leading ref : %d\n", _leading_ref);
03063 fprintf (fp, " numlines: 1L %d, 2L %d\n",
03064 _numlines_1L, _numlines_2L);
03065 fprintf (fp, " C. Min. Max.\n");
03066 INT i;
03067 for (i=0; i<Get_Dim(); i++)
03068 fprintf (fp, " %4lld %3lld %3lld\n",
03069 _c[i], _min_iter[i], _max_iter[i]);
03070 fprintf (fp, " Distance (bytes): Min %lld, Max %lld\n",
03071 _min_dist, _max_dist);
03072 fprintf (fp, " References in this LG (%d) and their vecs\n",
03073 _refvecs.Elements()+1);
03074 for (i=0; i<_refvecs.Elements(); i++)
03075 _refvecs.Bottom_nth(i)->Print (fp);
03076 fprintf (fp, " Done with Locality group (0x%p)\n", this);
03077 }
03078
03079 PF_UGS::PF_UGS (WN* wn_array, PF_BASE_ARRAY* myba) : _refs (PF_mpool) {
03080 INT num_loops = myba->Get_Depth()+1;
03081 INT i, j;
03082 FMAT *H;
03083 LU_FMAT *Hlu;
03084
03085 _aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, wn_array);
03086 _refs.Push (wn_array);
03087
03088
03089 H = CXX_NEW (FMAT(_aa->Num_Vec(), num_loops, PF_mpool), PF_mpool);
03090 _Hs = CXX_NEW (FMAT(_aa->Num_Vec()-1, num_loops, PF_mpool), PF_mpool);
03091
03092 INT indxs = _aa->Num_Vec ();
03093 for (i=0; i<indxs; i++) {
03094 for (j=0; j<num_loops; j++) {
03095 (*H)(i,j) = _aa->Dim(i)->Loop_Coeff(j);
03096 if (i != (indxs-1))
03097 (*_Hs)(i,j) = _aa->Dim(i)->Loop_Coeff(j);
03098 }
03099 }
03100
03101
03102 Hlu = CXX_NEW (LU_FMAT(*H, PF_mpool), PF_mpool);
03103 _Hslu = CXX_NEW (LU_FMAT(*_Hs, PF_mpool), PF_mpool);
03104
03105 _KerH = CXX_NEW(VECTOR_SPACE<FRAC>(*Hlu, PF_mpool), PF_mpool);
03106 _KerH->Beautify ();
03107 _KerHs = CXX_NEW(VECTOR_SPACE<FRAC>(*_Hslu, PF_mpool), PF_mpool);
03108 _KerHs->Beautify ();
03109
03110 CXX_DELETE (H, PF_mpool);
03111 CXX_DELETE (Hlu, PF_mpool);
03112
03113 #if 0
03114
03115
03116 fprintf (stderr, "TODO: Get rid of the following junk...\n");
03117
03118
03119 if (_KerHs->D() == 0) {
03120
03121 _stride = NULL;
03122 }
03123 else {
03124 const MAT<FRAC>& Hsbasis = _KerHs->Basis ();
03125
03126 # ifdef Is_True_On
03127 for (i=0; i<_KerHs->D(); i++) {
03128 for (j=0; j<_KerHs->N(); j++)
03129 Is_True ((Hsbasis(i,j).D() == 1),
03130 ("Kernel has a real frac element\n"));
03131 }
03132
03133
03134 for (j=0; j<_KerHs->N(); j++) {
03135 BOOL isPresent = FALSE;
03136 for (i=0; i<_KerHs->D(); i++)
03137 if (Hsbasis(i,j).N() != 0) {
03138 Is_True (!isPresent,
03139 ("loop index (%d) in more than one basis vector", j));
03140 isPresent = TRUE;
03141 }
03142 }
03143 # endif
03144
03145 _stride = CXX_NEW_ARRAY (mINT16, _KerHs->D(), PF_mpool);
03146
03147
03148 ACCESS_VECTOR* av = _aa->Dim(_aa->Num_Vec()-1);
03149 for (i=0; i<_KerHs->D(); i++) {
03150
03151
03152 INT dotproduct = 0;
03153 for (j=0; j<_KerHs->N(); j++) {
03154 dotproduct += Hsbasis(i,j).N() * av->Loop_Coeff(j);
03155 }
03156 if (dotproduct == 0) {
03157
03158 _stride[i] = 0;
03159 }
03160 else {
03161
03162
03163
03164 dotproduct = ((dotproduct<0) ? (-dotproduct) : dotproduct);
03165 INT sz = (INT) WN_element_size(wn_array);
03166 sz = sz * dotproduct;
03167
03168 if ((sz < Cache.LineSize(2)) || (sz < Cache.LineSize(1)))
03169
03170
03171
03172 _stride[i] = sz;
03173 else _stride[i] = -1;
03174 }
03175 }
03176
03177
03178
03179
03180
03181 }
03182 #endif // 0
03183
03184
03185 PF_LOOPNODE* loopnode = myba->Get_Loop();
03186 INT maxdepth = loopnode->Get_Depth() + 1;
03187 ACCESS_ARRAY *aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, wn_array);
03188 ACCESS_VECTOR *av = aa->Dim(aa->Num_Vec()-1);
03189
03190
03191 for (i=(maxdepth-1); i>=0; i--) {
03192 if (av->Loop_Coeff(i)) break;
03193 loopnode = loopnode->Get_Parent ();
03194 }
03195 _stride_one_loop = i;
03196
03197
03198
03199
03200
03201
03202
03203 _stride_forward = 1;
03204 _stride_one_size = (mINT16) ABS(WN_element_size(wn_array));
03205 if (i == -1) {
03206
03207 _stride_forward = 0;
03208 _stride_one_size = 0;
03209 }
03210 else {
03211 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
03212 if (!(dli->Step->Is_Const())) {
03213
03214 if (av->Loop_Coeff(i) < 0) _stride_forward = -1;
03215 _stride_one_size *= absof(av->Loop_Coeff(i));
03216 }
03217 else {
03218
03219 if (dli->Step->Const_Offset > 0) {
03220
03221 if (av->Loop_Coeff(i) < 0) _stride_forward = -1;
03222 }
03223 else {
03224
03225 if (av->Loop_Coeff(i) > 0) _stride_forward = -1;
03226 }
03227 _stride_one_size *= (absof(av->Loop_Coeff(i)*dli->Step->Const_Offset));
03228 }
03229 }
03230
03231
03232 {
03233 _stride_in_enclosing_loop = 0;
03234 #if defined(TARG_IA64)
03235 _stride_accurate = TRUE;
03236 #endif
03237
03238 ACCESS_ARRAY *aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, wn_array);
03239
03240
03241
03242
03243 for (i=aa->Num_Vec()-1; i>=0; i--) {
03244 ACCESS_VECTOR *av = aa->Dim(i);
03245 if (av->Loop_Coeff(maxdepth-1)) break;
03246 }
03247
03248 if (i >= 0) {
03249
03250 _stride_in_enclosing_loop = (mINT16) ABS(WN_element_size(wn_array));
03251 for (INT j=aa->Num_Vec()-1; j>i; j--) {
03252 WN* dim_wn = NULL;
03253 if (j < WN_num_dim(wn_array)) dim_wn = WN_array_dim(wn_array, j);
03254 if (dim_wn && WN_operator(dim_wn) == OPR_INTCONST) {
03255
03256 _stride_in_enclosing_loop *= WN_const_val(dim_wn);
03257 }
03258 else {
03259 #if defined(TARG_IA64)
03260
03261
03262
03263
03264
03265
03266
03267
03268
03269
03270
03271
03272 _stride_accurate = FALSE;
03273 #endif
03274
03275 _stride_in_enclosing_loop *= 100;
03276 }
03277 }
03278
03279 ACCESS_VECTOR *av = aa->Dim(i);
03280 PF_LOOPNODE* loopnode = myba->Get_Loop ();
03281 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
03282 if (dli->Step->Is_Const()) {
03283 _stride_in_enclosing_loop *= (av->Loop_Coeff(maxdepth-1)*dli->Step->Const_Offset);
03284 }
03285 else {
03286
03287 _stride_in_enclosing_loop *= (av->Loop_Coeff(maxdepth-1));
03288 }
03289 }
03290 else {
03291 #if defined(TARG_IA64)
03292 BOOL messy=FALSE;
03293
03294 for (i=aa->Num_Vec()-1; i>=0; i--) {
03295 ACCESS_VECTOR *av = aa->Dim(i);
03296 if (av->Too_Messy || av->Contains_Non_Lin_Symb()) {
03297 messy = TRUE;
03298 break;
03299 }
03300 }
03301
03302 WN* wn_loop = LWN_Get_Parent(wn_array);
03303 while (wn_loop && WN_opcode(wn_loop) != OPC_DO_LOOP)
03304 wn_loop = LWN_Get_Parent(wn_loop);
03305
03306 WN* wn_induction = WN_index(wn_loop);
03307 if(messy) {
03308 for(i=0; i<WN_num_dim(wn_array); ++i) {
03309 if( Contain_Induction_Variable(WN_array_index(wn_array, i), WN_st_idx(wn_induction)) ) {
03310 _stride_in_enclosing_loop = 100*ABS(WN_element_size(wn_array));
03311 _stride_accurate = FALSE;
03312 break;
03313 }
03314 }
03315 }
03316 #endif
03317
03318
03319
03320 }
03321 }
03322
03323
03324 #ifdef PF_PrintDebugging
03325 if (Debug_Prefetch) {
03326 fprintf (TFile, "Just constructed a new UGS: 0x%p. symbol = ", this);
03327 myba->Get_Symbol()->Print (TFile); fprintf (TFile, "\n");
03328 fprintf (TFile, "_Hs is: "); _Hs->Print (TFile);
03329 fprintf (TFile, "_Hslu is: "); _Hslu->Print (TFile);
03330 fprintf (TFile, "_KerHs is: "); _KerHs->Print (TFile);
03331 fprintf (TFile, "Hsbasis is:"); _KerHs->Basis().Print (TFile);
03332 fprintf (TFile, "Basis vectors: %d, with locality/stride as follows:\n",
03333 _KerHs->D());
03334
03335
03336
03337 fprintf (TFile, "\n");
03338 }
03339 #endif
03340
03341
03342
03343 _lg = CXX_NEW_ARRAY (PF_LG_DA*, num_loops+1, PF_mpool);
03344 for (i=0; i<=num_loops; i++) {
03345 _lg[i] = NULL;
03346
03347
03348 }
03349 _myba = myba;
03350 }
03351
03352 PF_UGS::~PF_UGS () {
03353 INT depth = Get_Depth();
03354
03355 for (INT i=0; i<=depth; i++) {
03356 if (_lg[i]) {
03357 while (_lg[i]->Elements()) CXX_DELETE (_lg[i]->Pop(), PF_mpool);
03358 CXX_DELETE (_lg[i], PF_mpool);
03359 }
03360 }
03361 CXX_DELETE_ARRAY (_lg, PF_mpool);
03362
03363
03364
03365
03366 CXX_DELETE (_Hs, PF_mpool);
03367 CXX_DELETE (_Hslu, PF_mpool);
03368 CXX_DELETE (_KerH, PF_mpool);
03369 CXX_DELETE (_KerHs, PF_mpool);
03370 }
03371
03372
03373
03374
03375
03376
03377 void PF_UGS::Build_Base_LGs () {
03378 WN* tref;
03379 PF_LG_DA* lglist;
03380 INT depth = Get_Depth()+1;
03381
03382 Is_True ((_lg[depth] == NULL),
03383 ("Already processed this LG at depth %d\n", depth));
03384
03385 _lg[depth] = lglist = CXX_NEW (PF_LG_DA(PF_mpool), PF_mpool);
03386 for (INT i=0; i<_refs.Elements(); i++) {
03387 tref = _refs.Bottom_nth(i);
03388 INT j;
03389 for (j=0; j<lglist->Elements(); j++) {
03390 if (lglist->Bottom_nth(j)->Add_Ref (tref, i))
03391 break;
03392 }
03393 if (j == lglist->Elements()) {
03394
03395
03396 PF_LG* tmp_lg = CXX_NEW(PF_LG(tref, i, depth, this), PF_mpool);
03397 lglist->Push (tmp_lg);
03398 }
03399 }
03400 }
03401
03402
03403
03404
03405
03406
03407
03408
03409
03410
03411
03412 void PF_UGS::BuildLG (mINT16 depth) {
03413 if (_lg[depth] == NULL) {
03414
03415 FmtAssert((_lg[depth+1] != NULL),
03416 ("Build LG: somehow previous LG missing!\n"));
03417 PF_LG_DA* cur_lglist = _lg[depth] = CXX_NEW (PF_LG_DA(PF_mpool), PF_mpool);
03418
03419
03420 for (INT i=0; i<_lg[depth+1]->Elements(); i++) {
03421 PF_LG* cur_lg = _lg[depth+1]->Bottom_nth(i);
03422 INT j;
03423 for (j=0; j<cur_lglist->Elements(); j++) {
03424 if (cur_lglist->Bottom_nth(j)->Add_Group
03425 (cur_lg, _refs.Bottom_nth(cur_lg->Get_LeadingRef()))) {
03426 Is_True (cur_lglist->Bottom_nth(j)->Check (), ("oops - error\n"));
03427 break;
03428 }
03429 }
03430 if (j == cur_lglist->Elements()) {
03431
03432 cur_lglist->Push (CXX_NEW (PF_LG(cur_lg), PF_mpool));
03433 }
03434 }
03435 }
03436 }
03437
03438
03439
03440
03441
03442
03443
03444 PF_VOLUME PF_UGS::Volume (mINT16 depth) {
03445
03446 PF_PRINT(fprintf (TFile, "Volugs[0x%p] depth (%d)\n",
03447 this, depth););
03448 if (_lg[depth] == NULL) BuildLG (depth);
03449 PF_VOLUME myvol (0,0);
03450
03451 for (INT i=0; i<_lg[depth]->Elements(); i++) {
03452 myvol += _lg[depth]->Bottom_nth(i)->Volume ();
03453 }
03454 PF_PRINT(fprintf (TFile, "Volugs[0x%p] depth (%d) vol ",
03455 this, depth);
03456 myvol.Print (TFile);
03457 );
03458 return myvol;
03459 }
03460
03461
03462
03463
03464
03465
03466
03467
03468 BOOL PF_UGS::Add_Ref (WN* ref) {
03469 INT i, j;
03470 ACCESS_ARRAY *new_aa = (ACCESS_ARRAY*) WN_MAP_Get (LNO_Info_Map, ref);
03471 ACCESS_VECTOR *new_av, *myav;
03472
03473 if (new_aa->Num_Vec() != _aa->Num_Vec()) return FALSE;
03474
03475 for (i=0; i<new_aa->Num_Vec(); i++) {
03476 myav = _aa->Dim(i);
03477 new_av = new_aa->Dim(i);
03478 if (new_av->Nest_Depth() != myav->Nest_Depth()) return FALSE;
03479 for (j=0; j<new_av->Nest_Depth(); j++) {
03480 if (new_av->Loop_Coeff(j) != myav->Loop_Coeff(j)) return FALSE;
03481
03482
03483
03484
03485
03486
03487 if (new_av->Delinearized_Symbol != myav->Delinearized_Symbol)
03488 return FALSE;
03489 }
03490 }
03491
03492
03493 _refs.Push (ref);
03494 return TRUE;
03495 }
03496
03497 #ifdef KEY //bug 10953
03498 static BOOL Pseudo_Temporal_Locality(WN *array)
03499 {
03500 WN *loop = LWN_Get_Parent(array);
03501 while(loop && WN_opcode(loop) != OPC_DO_LOOP)
03502 loop = LWN_Get_Parent(loop);
03503 if(loop == NULL)
03504 return FALSE;
03505
03506
03507
03508
03509 #if (defined(TARG_X8664) || defined(TARG_IA64)) //bug 10953
03510 if(Simple_Invariant_Stride_Access(array, loop))
03511 return TRUE;
03512 #endif
03513
03514 return FALSE;
03515 }
03516 #endif
03517
03518
03519
03520
03521
03522
03523
03524
03525
03526 void PF_UGS::ComputePFVec (PF_LEVEL level, PF_LOCLOOP locloop) {
03527 #if defined(TARG_IA64)
03528 if (!Get_Stride_Accurate())
03529 return;
03530 #endif
03531
03532 INT depth = Get_Depth ();
03533 INT linesize;
03534 mINT16 loopdepth;
03535 switch (level) {
03536 case level_1:
03537 loopdepth = locloop.Loop_1L ();
03538 linesize = Cache.LineSize(1);
03539 break;
03540 case level_2:
03541 loopdepth = locloop.Loop_2L ();
03542 linesize = Cache.LineSize(2);
03543 break;
03544 default:
03545 Is_True (FALSE, ("ComputePFVec for level one OR two\n"));
03546 break;
03547 }
03548
03549 if (global_lvs[depth+1][loopdepth+1] == NULL)
03550 Allocate_Lvs (depth+1, loopdepth+1);
03551 Is_True (global_lvs[depth+1][loopdepth+1], ("global_lvs not initialized\n"));
03552 Is_True (global_lvs[depth+1][loopdepth+1]->N() == (depth+1),
03553 ("global_lvs has size %d, should be %d\n",
03554 global_lvs[depth+1][loopdepth+1]->N(), (depth+1)));
03555 Is_True (global_lvs[depth+1][loopdepth+1]->D() == (depth-loopdepth+1),
03556 ("global_lvs has %d vecs, should have %d vecs\n",
03557 global_lvs[depth+1][loopdepth+1]->D(), (loopdepth+1)));
03558
03559 VECTOR_SPACE<FRAC>& lvs = *global_lvs[depth+1][loopdepth+1];
03560
03561
03562 VECTOR_SPACE<FRAC> sKerH (_KerH, PF_mpool);
03563 sKerH *= lvs;
03564
03565 PF_PRINT ({
03566 fprintf (TFile, "ugs [0x%p]. Ref: ", this);
03567 Get_BA()->Get_Symbol()->Print (TFile);
03568 ACCESS_ARRAY* aa = (ACCESS_ARRAY*)
03569 WN_MAP_Get (LNO_Info_Map,_refs.Bottom_nth(0));
03570 aa->Print (TFile);
03571 });
03572
03573 #ifdef KEY //bug 10953: temporal locality estimation may not be
03574
03575
03576 BOOL pseudo_temporal = TRUE;
03577 if(!LNO_Prefetch_Invariant_Stride)
03578 pseudo_temporal = FALSE;
03579 else
03580 for(INT kk=0; kk<_refs.Elements(); kk++){
03581 if(!Pseudo_Temporal_Locality(_refs.Bottom_nth(kk))){
03582 pseudo_temporal = FALSE;
03583 break;
03584 }
03585 }
03586 #endif
03587
03588 if ((level == level_1) ?
03589 locloop.While_Temporal_1L() : locloop.While_Temporal_2L()) {
03590
03591 #ifdef KEY //bug 10953
03592 if(!pseudo_temporal){
03593 #endif
03594 _pfdesc.Turn_Off (level);
03595 PF_PRINT(fprintf (TFile, " while temporal locality (no prefetch)\n"));
03596 #ifdef KEY
03597 }
03598 #endif
03599 return;
03600 }
03601
03602 if (sKerH.D()) {
03603
03604
03605 PF_LOOPNODE* loopnode = Get_Loop ();
03606 INT dp = loopnode->Get_Depth ();
03607
03608
03609
03610 INT not_loopdepth = loopdepth;
03611
03612
03613
03614
03615
03616
03617
03618
03619
03620
03621 PF_LOOPNODE* ln = NULL;
03622 while (1) {
03623
03624 loopnode = Get_Loop ();
03625 dp = loopnode->Get_Depth();
03626
03627
03628
03629
03630
03631 while (dp != not_loopdepth) {
03632 dp--;
03633 loopnode = loopnode->Get_Parent();
03634 }
03635 DO_LOOP_INFO* dli = loopnode->Get_LoopInfo ();
03636 if (!dli->Is_Inner_Tile) {
03637 ln = Is_Outer_Tile (Get_Loop(), loopnode, Get_AA());
03638 if (ln) {
03639
03640
03641 not_loopdepth++;
03642 continue;
03643 }
03644 else break;
03645 }
03646 else break;
03647 }
03648
03649
03650
03651
03652 if (not_loopdepth > loopdepth) {
03653
03654
03655
03656
03657
03658 VECTOR_SPACE<FRAC> msKerHs (_KerHs, PF_mpool);
03659 if (global_lvs[depth+1][not_loopdepth+1] == NULL)
03660 Allocate_Lvs (depth+1, not_loopdepth+1);
03661 VECTOR_SPACE<FRAC>& mlvs = *global_lvs[depth+1][not_loopdepth+1];
03662 msKerHs *= mlvs;
03663 if (msKerHs.D() == 0) {
03664 _pfdesc.Turn_Off (level);
03665 return;
03666 }
03667 INT64 stride = 0;
03668 const MAT<FRAC>& msHsbasis = msKerHs.Basis ();
03669 ACCESS_VECTOR *av = _aa->Dim(_aa->Num_Vec()-1);
03670
03671 INT i;
03672 for (i=0; i<=depth; i++)
03673 stride += msHsbasis(0, i).N() * av->Loop_Coeff(i);
03674 stride = abs(stride);
03675 if (stride == 0) {
03676 _pfdesc.Turn_Off (level);
03677 return;
03678 }
03679
03680
03681 stride = stride * ABS(WN_element_size(_refs.Bottom_nth(0)));
03682
03683
03684 if (stride > linesize) return;
03685
03686 mINT16 trips = (mINT16) linesize / stride;
03687
03688 mINT16* prefetch_vec = CXX_NEW_ARRAY (mINT16, depth+1, PF_mpool);
03689 for (i=0; i<=depth; i++) {
03690 Is_True (((i>=loopdepth) || (msHsbasis(0,i).N() == 0)),
03691 ("Spatial locality vector in subnest spilt over\n"));
03692 prefetch_vec[i] = abs(msHsbasis(0,i).N()) * trips;
03693 }
03694 PF_PRINT(fprintf (TFile, " level %d, spatial locality with vector: ",
03695 level);
03696 for (INT i=0; i<depth+1; i++)
03697 fprintf (TFile, " %4d ", prefetch_vec[i]);
03698 fprintf (TFile, "\n"));
03699 _pfdesc.Turn_On (level, prefetch_vec, depth+1);
03700 return;
03701 }
03702 #ifdef KEY //bug 10953
03703 if(!pseudo_temporal){
03704 #endif
03705 _pfdesc.Turn_Off (level);
03706 PF_PRINT(fprintf (TFile, " temporal locality with basis (no prefetch)\n");
03707 sKerH.Print (TFile));
03708 #ifdef KEY
03709 }
03710 #endif
03711 return;
03712 }
03713
03714 VECTOR_SPACE<FRAC> sKerHs (_KerHs, PF_mpool);
03715 sKerHs *= lvs;
03716 if (sKerHs.D() == 0) {
03717
03718
03719 PF_PRINT(fprintf (TFile, " no spatial locality (prefetch all)\n"));
03720 return;
03721 }
03722
03723 PF_PRINT(fprintf (TFile, " spatial locality, basis is 0x%p\n", this);
03724 sKerHs.Print (TFile));
03725
03726 Is_True (sKerHs.D() == 1,
03727 ("Error in dimension of spatial locality subspace\n"));
03728
03729
03730 INT64 stride = 0;
03731 VB_PRINT(printf ("** compute spatial locality vector and stride correctly! **\n"));
03732 const MAT<FRAC>& sHsbasis = sKerHs.Basis ();
03733
03734 ACCESS_VECTOR *av = _aa->Dim(_aa->Num_Vec()-1);
03735
03736 INT i;
03737 for (i=0; i<=depth; i++)
03738 stride += sHsbasis(0, i).N() * av->Loop_Coeff(i);
03739 stride = abs(stride);
03740
03741
03742 stride = stride * ABS(WN_element_size(_refs.Bottom_nth(0)));
03743
03744
03745 if (stride > linesize) return;
03746
03747 mINT16 trips = (mINT16) (linesize / stride);
03748
03749 mINT16* prefetch_vec = CXX_NEW_ARRAY (mINT16, depth+1, PF_mpool);
03750 for (i=0; i<=depth; i++) {
03751 Is_True (((i>=loopdepth) || (sHsbasis(0,i).N() == 0)),
03752 ("Spatial locality vector in subnest spilt over\n"));
03753 prefetch_vec[i] = abs(sHsbasis(0,i).N()) * trips;
03754 }
03755 PF_PRINT(fprintf (TFile, " level %d, spatial locality with vector: ",
03756 level);
03757 for (INT i=0; i<depth+1; i++)
03758 fprintf (TFile, " %4d ", prefetch_vec[i]);
03759 fprintf (TFile, "\n"));
03760 _pfdesc.Turn_On (level, prefetch_vec, depth+1);
03761 PF_PRINT (_pfdesc.Print (TFile));
03762 }
03763
03764
03765
03766
03767
03768
03769
03770
03771
03772
03773 PF_SPLIT_VECTOR* PF_UGS::Find_Split_Vector () {
03774 mINT16* prefetch_vec;
03775 if (((Cache.Levels() > 1) && (prefetch_vec = _pfdesc.Vec(level_2))) ||
03776 ((Cache.Levels() == 1) && (prefetch_vec = _pfdesc.Vec(level_1)))) {
03777
03778
03779 INT i;
03780 for (i=0; i<Get_Depth(); i++)
03781 if (prefetch_vec[i] > 1) break;
03782 if (i == Get_Depth()) {
03783
03784 return NULL;
03785 }
03786 PF_SPLIT_VECTOR* split_vec = CXX_NEW (
03787 PF_SPLIT_VECTOR(Get_Depth()+1,
03788 _pfdesc.Num_Lines (),
03789 prefetch_vec,
03790 Get_Loop()),
03791 PF_mpool);
03792 Is_True (!split_vec->Empty (), ("Just created an empty split_vec\n"));
03793 return split_vec;
03794 }
03795 return NULL;
03796 }
03797
03798
03799
03800
03801
03802
03803
03804
03805
03806
03807
03808 void PF_UGS::Find_Loc_Space (PF_LOCLOOP locloop) {
03809
03810 if (Cache.Levels() > 1) {
03811 if (locloop.Localized_1L()) ComputePFVec (level_1, locloop);
03812 if (locloop.Localized_2L()) ComputePFVec (level_2, locloop);
03813 }
03814 else {
03815 Is_True (locloop.Localized_1L(), ("Find_Loc_Space, but not localized\n"));
03816 ComputePFVec (level_1, locloop);
03817 }
03818
03819
03820
03821
03822 mINT16 loopdepth;
03823 mINT16* prefetch_vec = NULL;
03824 mINT16 count = 0;
03825
03826
03827 if ((Cache.Levels() > 1) && (prefetch_vec = _pfdesc.Vec(level_2))) {
03828 loopdepth = locloop.Loop_2L();
03829
03830
03831 if (_lg[loopdepth] == NULL) BuildLG (loopdepth);
03832 for (INT i=0; i<_lg[loopdepth]->Elements(); i++)
03833 count += _lg[loopdepth]->Bottom_nth(i)->Lines (level_2);
03834 }
03835 else if ((Cache.Levels() == 1) && (prefetch_vec = _pfdesc.Vec(level_1))) {
03836 loopdepth = locloop.Loop_1L();
03837
03838
03839 if (_lg[loopdepth] == NULL) BuildLG (loopdepth);
03840 for (INT i=0; i<_lg[loopdepth]->Elements(); i++)
03841 count += _lg[loopdepth]->Bottom_nth(i)->Lines (level_1);
03842 }
03843
03844 if (prefetch_vec) _pfdesc.Set_Num_Lines (count);
03845 }
03846
03847
03848
03849
03850
03851
03852 void PF_UGS::Gen_Prefetch (PF_SPLIT_VECTOR* split_vec) {
03853 if (!_pfdesc.Is_On()) return;
03854
03855 INT i;
03856 PF_LG_DA* curlg;
03857 PF_LOCLOOP locloop = Get_Loop()->Get_locloop ();
03858 mINT16 loopdepth = Get_Depth() + 1;
03859
03860
03861
03862
03863
03864
03865
03866
03867
03868
03869
03870
03871
03872
03873
03874
03875
03876
03877
03878
03879
03880
03881 if (_pfdesc.Kind(level_1) != none) {
03882 if (locloop.Localized_1L ())
03883 loopdepth = locloop.Loop_1L();
03884 if (_lg[loopdepth] == NULL) BuildLG (loopdepth);
03885 curlg = _lg[loopdepth];
03886 for (i=0; i<curlg->Elements(); i++)
03887 curlg->Bottom_nth(i)->Gen_Prefetch (&_pfdesc, split_vec, level_1);
03888 }
03889
03890 if (Cache.Levels() == 1) return;
03891
03892 if (_pfdesc.Kind(level_2) != none) {
03893 loopdepth = Get_Depth() + 1;
03894 if (locloop.Localized_2L())
03895 loopdepth = locloop.Loop_2L ();
03896 if (_lg[loopdepth] == NULL) BuildLG (loopdepth);
03897 curlg = _lg[loopdepth];
03898 for (i=0; i<curlg->Elements(); i++)
03899 curlg->Bottom_nth(i)->Gen_Prefetch (&_pfdesc, split_vec, level_2);
03900 }
03901 }
03902
03903 void PF_UGS::Print (FILE* fp) {
03904 fprintf (fp, " UGS/Access array: ");
03905 _aa->Print (fp);
03906 fprintf (fp, " FMAT is: \n"); _Hs->Print (fp);
03907 fprintf (fp, " LU_FMAT is: \n"); _Hslu->Print (fp);
03908 fprintf (fp, " KerHsis: \n"); _KerHs->Print (fp);
03909
03910 fprintf (fp, " The references are (%d):\n", _refs.Elements());
03911 INT i;
03912 for (i=0; i<_refs.Elements(); i++) {
03913 ACCESS_ARRAY* aa = (ACCESS_ARRAY*)
03914 WN_MAP_Get (LNO_Info_Map,_refs.Bottom_nth(i));
03915 fprintf (fp, " [%d] 0x%p ", i, _refs.Bottom_nth (i));
03916 aa->Print (fp);
03917 fprintf (fp, "\n");
03918 }
03919 fprintf (fp, " The locality groups are:\n");
03920 for (i=0; i<=(Get_Depth()+1); i++) {
03921 if (_lg[i] == NULL) continue;
03922 fprintf (fp, " Group# %d\n", i);
03923 for (INT j=0; j<_lg[i]->Elements(); j++) {
03924 _lg[i]->Bottom_nth(j)->Print (fp);
03925 }
03926 }
03927 }
03928
03929 PF_BASE_ARRAY::~PF_BASE_ARRAY () {
03930 CXX_DELETE (_array_base, PF_mpool);
03931 while (_ugs.Elements())
03932 CXX_DELETE (_ugs.Pop(), PF_mpool);
03933 }
03934
03935 void PF_BASE_ARRAY::Build_Base_LGs () {
03936 for (INT i=0; i<_ugs.Elements (); i++) {
03937 _ugs.Bottom_nth(i)->Build_Base_LGs ();
03938 }
03939 }
03940
03941
03942
03943
03944
03945
03946
03947 extern BOOL Steady_Base (WN* wn_array) {
03948 Is_True (WN_operator(wn_array) == OPR_ARRAY,
03949 ("Steady_base must be called with an array node"));
03950 WN* load_wn = LWN_Get_Parent(wn_array);
03951 return (DEPV_COMPUTE::Base_Test(load_wn,NULL, load_wn,NULL) == DEP_CONTINUE);
03952 }
03953
03954
03955
03956
03957
03958
03959
03960
03961
03962
03963 BOOL PF_BASE_ARRAY::Add_Ref (WN* wn_array, BOOL do_check) {
03964 if (do_check) {
03965 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get(LNO_Info_Map, wn_array);
03966
03967 if (aa->Num_Vec() != _dim) return FALSE;
03968
03969 ACCESS_ARRAY* sample_aa =
03970 (ACCESS_ARRAY*) WN_MAP_Get(LNO_Info_Map, _sample_wn_array);
03971 for (INT i=0; i<_dim ; i++) {
03972 ACCESS_VECTOR *av = aa->Dim(i);
03973 ACCESS_VECTOR *sample_av = sample_aa->Dim(i);
03974 if (av->Delinearized_Symbol && sample_av->Delinearized_Symbol) {
03975 if (!(*av->Delinearized_Symbol == *sample_av->Delinearized_Symbol)) {
03976 return FALSE;
03977 }
03978 } else if (av->Delinearized_Symbol || sample_av->Delinearized_Symbol) {
03979 return FALSE;
03980 }
03981 }
03982
03983 switch (DEPV_COMPUTE::Base_Test(LWN_Get_Parent(wn_array),NULL,
03984 LWN_Get_Parent(_sample_wn_array),NULL)) {
03985 case DEP_CONTINUE:
03986
03987 break;
03988 case DEP_INDEPENDENT:
03989
03990 {
03991 if (Tree_Equiv(wn_array, _sample_wn_array)) {
03992
03993 break;
03994 }
03995 else return FALSE;
03996 }
03997 default:
03998 return FALSE;
03999 }
04000 }
04001
04002 #ifdef Is_True_On
04003 ACCESS_ARRAY* aa = (ACCESS_ARRAY*) WN_MAP_Get(LNO_Info_Map, wn_array);
04004 FmtAssert (aa->Num_Vec() == _dim,
04005 ("mismatch -- %d != %d\n", aa->Num_Vec(), _dim));
04006 #endif
04007 INT i;
04008 for (i=0; i<_ugs.Elements (); i++) {
04009 if (_ugs.Bottom_nth(i)->Add_Ref (wn_array)) break;
04010 }
04011 if (i == _ugs.Elements ()) {
04012
04013 _ugs.Push (CXX_NEW (PF_UGS(wn_array, this), PF_mpool));
04014 }
04015 return TRUE;
04016 }
04017
04018 PF_VOLUME PF_BASE_ARRAY::Volume (mINT16 depth) {
04019 PF_VOLUME myvol (0, 0);
04020 for (INT i=0; i<_ugs.Elements(); i++) {
04021 myvol += _ugs.Bottom_nth(i)->Volume(depth);
04022 }
04023 return myvol;
04024 }
04025
04026
04027
04028
04029
04030
04031
04032 void PF_BASE_ARRAY::Find_Loc_Space (PF_LOCLOOP locloop) {
04033 for (INT i=0; i<_ugs.Elements(); i++)
04034 _ugs.Bottom_nth(i)->Find_Loc_Space (locloop);
04035 }
04036
04037 PF_SPLIT_VECTOR* PF_BASE_ARRAY::Find_Split_Vector () {
04038 PF_SPLIT_VECTOR* split_vec = NULL;
04039 for (INT i=0; i<_ugs.Elements(); i++) {
04040 PF_SPLIT_VECTOR* tmp = _ugs.Bottom_nth(i)->Find_Split_Vector ();
04041 if (tmp) {
04042 if (split_vec) split_vec->Update(tmp);
04043 else split_vec = tmp;
04044 }
04045 }
04046 return split_vec;
04047 }
04048
04049 void PF_BASE_ARRAY::Gen_Prefetch (PF_SPLIT_VECTOR* split_vec) {
04050 for (INT i=0; i<_ugs.Elements(); i++)
04051 _ugs.Bottom_nth(i)->Gen_Prefetch (split_vec);
04052 }
04053
04054 void PF_BASE_ARRAY::Print (FILE* fp) {
04055 fprintf (fp, "Symbol : "); _array_base->Print (fp);
04056 if (_ugs.Elements() == 0)
04057 fprintf (fp, " No uniformly generated sets\n");
04058 else {
04059 fprintf (fp, " %d uniformly generated sets\n", _ugs.Elements());
04060 for (INT i=0; i<_ugs.Elements(); i++) {
04061 _ugs.Bottom_nth(i)->Print (fp);
04062 }
04063 }
04064 }