00001 /* 00002 00003 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00004 00005 This program is free software; you can redistribute it and/or modify it 00006 under the terms of version 2 of the GNU General Public License as 00007 published by the Free Software Foundation. 00008 00009 This program is distributed in the hope that it would be useful, but 00010 WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 Further, this software is distributed without any warranty that it is 00014 free of the rightful claim of any third person regarding infringement 00015 or the like. Any license provided herein, whether implied or 00016 otherwise, applies only to this software file. Patent licenses, if 00017 any, provided herein do not apply to combinations of this program with 00018 other software, or any other product whatsoever. 00019 00020 You should have received a copy of the GNU General Public License along 00021 with this program; if not, write the Free Software Foundation, Inc., 59 00022 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00023 00024 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00025 Mountain View, CA 94043, or: 00026 00027 http://www.sgi.com 00028 00029 For further information regarding this notice, see: 00030 00031 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00032 00033 */ 00034 00035 00036 // c-or-c++ 00037 00038 /* This file includes the data structures corresponding to array references 00039 * within a loop. These data structures are organized hierarchically -- 00040 * - references in a loop are partitioned across different base-arrays. 00041 * - Multiple references in a base-array are partitioned into their individual 00042 * uniformly generated sets. 00043 * - references within a UGS are partitioned into locality groups, 00044 * with a set of locality groups for each loop in the loop-nest. 00045 * 00046 * The data structures are documented here, the main algorithms are described 00047 * at the beginning of pf_ref.cxx and within the code. 00048 * 00049 * Exported Types: 00050 * 00051 * PF_REFVEC 00052 * 00053 * Multiple references within a locality group are stored in a stack of 00054 * PF_REFVECs. Each refvec stores the information required to relate a 00055 * reference to the other references (specifically to the leading 00056 * reference) of the locality group. 00057 * The fields that are stored within a PF_REFVEC are documented in the 00058 * class description. 00059 * 00060 * Exported Functions 00061 * 00062 * PF_REFVEC (mINT16 refnum, mINT16 depth, FRAC* dvec, INT distance) 00063 * 00064 * Constructor for a PF_REFVEC that takes a value for each of the 00065 * four fields. 00066 * 00067 * PF_REFVEC (PF_REFVEC* refvec) 00068 * 00069 * Constructor that takes a refvec, and makes a copy into the new refvec. 00070 * 00071 * ~PF_REFVEC () 00072 * 00073 * Destructor for a refvec. Deallocated the _dvec. 00074 * 00075 * void Update_dvec (FRAC* lr_dvec) 00076 * 00077 * Basically subtracts the given dvec from stored dvec. 00078 * Update the stored dvec relative to the supplied dvec of the new 00079 * leading reference relative to the old leading reference. 00080 * Called when the leading reference changes. 00081 * 00082 * INT64 Distance () 00083 * 00084 * Return the distance of this reference from the leading reference. 00085 * 00086 * mINT16 Refnum () 00087 * 00088 * Return the reference number of this reference in the reflist in UGS. 00089 * 00090 * mINT16 Trips () 00091 * 00092 * Returns the trip separation from the leading reference in the 00093 * inner-most loop. 00094 * 00095 * void Update_Distance (INT64 dist) 00096 * 00097 * Subtract the supplied distance from the stored distance. 00098 * Again useful when the leading reference changes, and the distance 00099 * is now relative to the new leading reference. 00100 * 00101 * void Print (FILE* fp) 00102 * 00103 * Print the contents of a refvec. 00104 * 00105 * 00106 * Exported Type 00107 * 00108 * enum PF_KIND { all, none, vec} 00109 * 00110 * Useful to store the kind of prefetching required for a reference. 00111 * All means all reference should be prefetched, 00112 * none means there is temporal locality and no reference should be 00113 * prefetched, vec means that there is spatial locality, and prefetches 00114 * should be done based on the vector in PF_DESC. 00115 * 00116 * enum PF_LEVEL { level_1, level_2, level_1and2 } 00117 * 00118 * Describes the cache level. 00119 * 00120 * PF_DESC 00121 * 00122 * This object describes the prefetch desired for each level of the cache. 00123 * For each cache level, it stores the kind of prefetch and the prefetch 00124 * vector, if any. It also stores the number of cache lines that are 00125 * contained within this locality group (for the outer-level cache). 00126 * 00127 * Exported Functions 00128 * 00129 * PF_DESC () 00130 * 00131 * Constructor. Initializes fields to NULL, initialize kind to "all". 00132 * 00133 * ~PF_DESC () 00134 * 00135 * Destructor - deletes the two prefetch vectors, if any. 00136 * 00137 * void Turn_Off (PF_LEVEL level) 00138 * 00139 * Disable prefetching in the specified level of the cache. 00140 * 00141 * void Turn_On (PF_LEVEL level, mINT16* pf_vec) 00142 * 00143 * Enable prefetching with stride along supplied prefetch vector 00144 * 00145 * void Set_Num_Lines (mINT16 numlines) 00146 * 00147 * Set the number of lines in this prefetchi descriptor. 00148 * 00149 * mINT16 Num_Lines () 00150 * 00151 * Read the number of lines in this descriptor. 00152 * 00153 * PF_KIND Kind (PF_LEVEL level) 00154 * 00155 * Return the kind of prefetching for the specified cache level. 00156 * 00157 * mINT16* Vec (PF_LEVEL level) 00158 * 00159 * Return the prefetch vector for the specified cache level. 00160 * 00161 * BOOL Is_On () 00162 * 00163 * Return true if any prefetching for any level of the cache is turned on, 00164 * either all or vec. 00165 * 00166 * 00167 * Exported Types 00168 * 00169 * PF_LG 00170 * 00171 * Locality group: bunch of references that could get spatical locality 00172 * i.e. there is a solution to Ax = c1-c2, and the solution is contained 00173 * within the appropriate depth. Separation between references of greater 00174 * than a cache line doesn't matter --- that separation is made 00175 * individually for the two cache levels (with their difference 00176 * line-sizes) later. 00177 * 00178 * Private Functions 00179 * 00180 * void Split_LG () 00181 * 00182 * Take the references in the locality group, sort them based on their 00183 * distance from the leading reference, and try to get a count of the 00184 * number of distinct lines in each level of the cache. 00185 * 00186 * void Gen_Pref_Node (PF_SORTED_REFS* srefs, mINT16 start, mINT16 stop, 00187 * PF_LEVEL level, PF_DESC*, PF_SPLIT_VECTOR*) 00188 * 00189 * Actually generate the prefetch node for the references start through 00190 * stop-1 in the array srefs of sorted refs. level gives the level of 00191 * the cache to prefetch for. pfdesc gives the desired prefetch vector, 00192 * and split-vec gives the loop versioning that has happened. 00193 * 00194 * INT Get_Bit_Vec (PF_DESC* pfdesc, 00195 * PF_LEVEL level, 00196 * PF_SPLIT_VECTOR* split_vec) 00197 * 00198 * Given a desired prefetch vector in pfdesc, and the versioning vector in 00199 * split_vec, this returns a bitvector that specifies which of the 00200 * strung-together references (through version_map) need be prefetched. 00201 * This is done based on a (currently) dumb comparison of pfdesc and 00202 * split_vec, and can be improved. 00203 * 00204 * WN* Get_Ref_Version (WN* ref, INT bitpos) 00205 * 00206 * Given the bitvector returned by Get_Bit_Vec, and given the first 00207 * reference, this routing returns the version of the reference that 00208 * should be prefetched based on bitpos. 00209 * 00210 * INT64 Distance_LR (WN* ref, FRAC* dvec) 00211 * 00212 * Given a ref and its dvec relative to leading reference, compute its 00213 * distance in bytes from the leading reference. 00214 * 00215 * Exported Functions 00216 * 00217 * PF_LG (PF_LG* lg) 00218 * 00219 * Construct a locality group initialized to the supplied locality group. 00220 * 00221 * PF_LG (WN* ref, mINT16 bitpos, mINT16 depth, PF_UGS* myugs) 00222 * 00223 * Construct a locality group from scratch. Initialize it to the one 00224 * single reference, that is in the given bitposition in the UGS, the 00225 * locality group is for a loop at the given depth, and it belongs to the 00226 * given UGS. 00227 * 00228 * ~PF_LG () 00229 * 00230 * Destructor for a locality group. Deallocates all the pf_refvecs 00231 * allocated within this locality group, as well as the _c (constant 00232 * vector for leading ref) vector. 00233 * 00234 * FRAC* Ref_In_LG (WN* ref) 00235 * 00236 * Given a ref, determine if this reference belongs to this locality 00237 * group or not. This basically involves finding a solution to Ax = c1-c2, 00238 * seeing if a solution exists, and whether the solution vector is 00239 * contained within the desired loop depth. 00240 * Returns a pointer to the dvec if true, otherwise it returns NULL. 00241 * 00242 * BOOL Add_Ref (WN* ref, mINT16 bitpos) 00243 * 00244 * Given a reference and its bitpos in the reflist of the UGS, determine 00245 * if the reference belongs to this locality group. If yes, then add it, 00246 * including updating data structures such as the spread (min/max iter), 00247 * distance (min/max bytes), etc. If this reference is not in this LG, 00248 * return FALSE. 00249 * 00250 * BOOL Add_Group (PF_LG* lg, WN* lref) 00251 * 00252 * Given a group and it's leading reference, determine if the incoming 00253 * group and the this group should be merged together. This is done based 00254 * on the dvec of the two leading references relative to each other. 00255 * If yes, then merge them together, otherwise return NULL> 00256 * 00257 * BOOL Check () 00258 * 00259 * Check that refvecs within this LG has no duplicate references. 00260 * For debugging. 00261 * 00262 * BOOL Check_Ref (mINT16 refnum) 00263 * 00264 * Check that the given ref doesn't already exist in the refvecs for this 00265 * LG. For debugging. 00266 * 00267 * PF_VOLUME Volume () 00268 * 00269 * Compute the volume of this locality group for the loop at depth _depth 00270 * that the LG is for. Volume is computed in bytes. 00271 * 00272 * mINT16 Lines (PF_LEVEL level) 00273 * 00274 * Return the number of cache lines in this LG at the given cache level. 00275 * 00276 * mINT16 Get_Dim () 00277 * 00278 * Return the dimensionality of this array (stored in base array node). 00279 * 00280 * mINT16 Get_Depth () 00281 * 00282 * Get the depth of the loop (outermost loop is 0). 00283 * 00284 * LU_FMAT Get_Hslu () 00285 * 00286 * Get the Hs in LU form for the UGS of this LG. 00287 * 00288 * VECTOR_SPACE<FRAC>* Get_KerH () 00289 * 00290 * Get the kernel for H for the UGS of this LG. 00291 * 00292 * VECTOR_SPACE<FRAC>* Get_KerHs () 00293 * 00294 * Get the kernel for Hs for the UGS of this LG. 00295 * 00296 * PF_LOOPNODE* Get_Loop () 00297 * 00298 * Get the loopnode for the references in this LG. 00299 * 00300 * WN* Get_Ref (INT num) 00301 * 00302 * Get this reference num from the reflist in the UGS. 00303 * 00304 * mINT16 Get_LeadingRef () 00305 * 00306 * Return the refnum of the leading ref of the LG> 00307 * 00308 * mINT16 Get_Stride_One_Loop () 00309 * 00310 * Find the inner-most loop that is also in the stride-one dimension 00311 * (in this UGS). 00312 * 00313 * mINT16 Stride_Forward () 00314 * 00315 * Return 1 if this array reference is travelling forwards in memory 00316 * based on stride-one loop, -1 if travelling backwards, 0 otherwise. 00317 * 00318 * mINT16 Get_Stride_One_Size () 00319 * 00320 * Return the size in bytes travelled in one iteration of the 00321 * stride-one loop along the stride-one dimension. 00322 * 00323 * mINT16 Get_Stride_In_Enclosing_Loop () 00324 * 00325 * Return the size in bytes travelled in one iteration of the 00326 * immediately enclosing loop. Useful for prefetch_ahead. 00327 * 00328 * void Gen_Prefetch (PF_DESC* pfdesc, 00329 * PF_SPLIT_VECTOR* split_vec, 00330 * PF_LEVEL level); 00331 * 00332 * Given the desired prefetch in pfdesc, and the way the loops are 00333 * versioned in split_vec, generate the prefetches for the appropriate 00334 * cache-level. 00335 * 00336 * void Print (FILE* fp) 00337 * 00338 * Print the locality group. 00339 * 00340 * 00341 * Exported Types 00342 * 00343 * PF_UGS 00344 * 00345 * An object corresponding to a uniformly generated set --- i.e. all 00346 * references with the same index expression that differ only in the 00347 * constant terms in any dimension of the array. 00348 * 00349 * Exported Functions 00350 * 00351 * PF_UGS (WN* wn_array, PF_BASE_ARRAY* myba) 00352 * 00353 * Constructor for a UGS. Given an array node and the base array, 00354 * initialize the UGS. 00355 * 00356 * ~PF_UGS () 00357 * 00358 * Destructor for a UGS. Deallocate locality group, H, Hs, KerH, KerHs. 00359 * 00360 * BOOL Add_Ref (WN* ref) 00361 * 00362 * Given a reference, add the reference if it belongs to this UGS, 00363 * otherwise return FALSE. 00364 * 00365 * void Build_Base_LGs () 00366 * 00367 * Build the base locality groups for a depth of (loopdepth+1), i.e. the 00368 * innermost level. 00369 * 00370 * PF_VOLUME Volume (mINT16 depth) 00371 * 00372 * Compute the volume (in bytes) for this UGS at the supplied loopdepth. 00373 * 00374 * void Find_Loc_Space (PF_LOCLOOP locloop) 00375 * 00376 * Given the localized loops in each cache level, find the desired 00377 * prefetch, and the prefetch vector (if spatial) for the references in 00378 * this UGS, into pfdesc. Also count the number of lines that are 00379 * contained within this UGS, store in pfdesc. Used later to determine 00380 * how to version loops. 00381 * 00382 * PF_SPLIT_VECTOR* Find_Split_Vector () 00383 * 00384 * If this UGS has spatial locality prefetch vectors in its pfdesc, then 00385 * return how this UGS would like the loops to be versioned. 00386 * 00387 * void Gen_Prefetch (PF_SPLIT_VECTOR* split_vec) 00388 * 00389 * Given how the loops were versioned, generate prefetches for this UGS. 00390 * 00391 * LU_FMAT *Get_Hslu () 00392 * 00393 * Return Hs in LU form. 00394 * 00395 * VECTOR_SPACE<FRAC> *Get_KerH () 00396 * 00397 * Return the kernel of H 00398 * 00399 * VECTOR_SPACE<FRAC> *Get_KerHs () 00400 * 00401 * Return the Kernel of Hs 00402 * 00403 * PF_BASE_ARRAY *Get_BA () 00404 * 00405 * Return the base array of this UGS 00406 * 00407 * ACCESS_ARRAY *Get_AA () 00408 * 00409 * Return the access array of this UGS 00410 * 00411 * mINT16 Get_Stride_One_Loop () 00412 * 00413 * Return the depth of the stride-one loop for this UGS 00414 * (innermost loop that occurs in stride-one dimension of the array). 00415 * 00416 * mINT16 Stride_Forward () 00417 * 00418 * Return 1 if this array reference is travelling forwards in memory 00419 * based on stride-one loop, -1 if it is travelling backwards, 0 if not 00420 * going anywhere in stride one dimension. 00421 * 00422 * mINT16 Get_Stride_One_Size () 00423 * 00424 * Return the size in bytes travelled in one iteration of the 00425 * stride-one loop along the stride-one dimension. 00426 * 00427 * mINT16 Get_Stride_In_Enclosing_Loop () 00428 * 00429 * Return the size in bytes travelled in one iteration of the 00430 * immediately enclosing loop. Useful for prefetch_ahead. 00431 * 00432 * WN *Get_Ref (INT num) 00433 * 00434 * Return the reference "num" in the list of references in this UGS 00435 * 00436 * mINT16 Get_Depth () 00437 * 00438 * Return depth of the loop for this UGS. 00439 * 00440 * PF_LOOPNODE* Get_Loop () 00441 * 00442 * Return the loopnode for this UGS. 00443 * 00444 * void Print (FILE*) 00445 * 00446 * Print this UGS. 00447 * 00448 * 00449 * Exported Type 00450 * 00451 * PF_BASE_ARRAY 00452 * 00453 * Exported Functions 00454 * 00455 * PF_BASE_ARRAY (SYMBOL* symb, mINT16 dim, PF_LOOPNODE* loopnode) 00456 * 00457 * Constructor for a base array - copy the supplied fields. 00458 * 00459 * ~PF_BASE_ARRAY () 00460 * 00461 * Destructor for the base array. Delete the array base, and all the UGSs. 00462 * 00463 * SYMBOL *Get_Symbol () 00464 * 00465 * Return the base symbol for this PF_BASE_ARRAY 00466 * 00467 * BOOL Add_Ref (WN* wn_array, BOOL do_check = TRUE) 00468 * 00469 * Given a reference, add it to this base array into the appropriate UGS 00470 * (if it belongs here) and return TRUE, otherwise return FALSE. 00471 * If do_check is FALSE then the check for "belonging" is NOT 00472 * performed. This is because the test can sometimes fail for 00473 * a reference known to belong because of incomplete DU-chains. 00474 * Default do_check is TRUE. 00475 * 00476 * PF_VOLUME Volume (mINT16 depth) 00477 * 00478 * Compute the volume of this base array (in bytes) for a loop at given 00479 * depth. 00480 * 00481 * void Build_Base_LGs () 00482 * 00483 * Build the base locality groups for all the UGSs in this base array. 00484 * 00485 * void Find_Loc_Space (PF_LOCLOOP locloop) 00486 * 00487 * Given the localized loops, for each UGS compute the desired prefetch vec 00488 * and the number of lines in each UGS. This is used to determine how to 00489 * version loops. 00490 * 00491 * PF_SPLIT_VECTOR* Find_Split_Vector () 00492 * 00493 * Having voted how to split, compute the best split of all UGSs 00494 * 00495 * void Gen_Prefetch (PF_SPLIT_VECTOR*) 00496 * 00497 * Having versioned the loops, generate prefetches for the UGSs in this 00498 * basearray. 00499 * 00500 * mINT16 Get_Dim() 00501 * 00502 * Return the dimensionality of this base array. 00503 * 00504 * mINT16 Get_Depth () 00505 * 00506 * Return the loopdepth of these references to the base array. 00507 * 00508 * PF_LOOPNODE* Get_Loop () 00509 * 00510 * Return the loopnode for these references. 00511 * 00512 * void Print (FILE*) 00513 * 00514 * Print the base array. 00515 * 00516 * 00517 * Exported Function 00518 * 00519 * BOOL Steady_Base (WN* wn_array) 00520 * Called with an array reference. Return TRUE if the base of the 00521 * array is known to be invariant within the current loop nest, 00522 * FALSE otherwise. 00523 * 00524 */ 00525 00526 #ifndef pf_ref_INCLUDED 00527 #define pf_ref_INCLUDED 00528 00529 #include "defs.h" 00530 #include "cxx_base.h" 00531 #include "cxx_template.h" 00532 #include "wn.h" 00533 #include "cxx_memory.h" 00534 #include "lnopt_main.h" 00535 #include "pf_common.h" 00536 #include "pf_cache.h" 00537 #include "lu_mat.h" 00538 #include "vs.h" 00539 00540 class SYMBOL; 00541 class PF_REFVEC; 00542 class PF_LG; 00543 class PF_UGS; 00544 class PF_BASE_ARRAY; 00545 class PF_LOOPNODE; 00546 00547 typedef STACK<PF_REFVEC*> PF_REFVEC_DA; 00548 typedef STACK<WN*> WN_DA; 00549 typedef STACK<PF_LG*> PF_LG_DA; 00550 00551 class PF_REFVEC { 00552 // TODO: the following types can be unsigned 00553 mINT16 _refnum; // number of this reference in the reflist of a UGS 00554 mINT16 _depth; /* number of elements in _dvec, 00555 * same as maxdepth of parent 00556 */ 00557 INT64 _distance; // distance in bytes from leading reference 00558 FRAC *_dvec; /* distance vector of this ref, 00559 * relative to leading ref of LG 00560 */ 00561 00562 PF_REFVEC (void); 00563 PF_REFVEC (const PF_REFVEC&); 00564 PF_REFVEC* operator= (const PF_REFVEC&); 00565 public: 00566 PF_REFVEC (mINT16 refnum, mINT16 depth, FRAC* dvec, INT64 distance) { 00567 _refnum = refnum; 00568 _depth = depth; 00569 _dvec = CXX_NEW_ARRAY (FRAC, _depth, PF_mpool); 00570 for (INT i=0; i<_depth; i++) { 00571 _dvec[i] = dvec[i]; 00572 } 00573 _distance = distance; 00574 } 00575 PF_REFVEC (PF_REFVEC* refvec) { 00576 _refnum = refvec->_refnum; 00577 _depth = refvec->_depth; 00578 _dvec = CXX_NEW_ARRAY (FRAC, _depth, PF_mpool); 00579 for (INT i=0; i<_depth; i++) { 00580 _dvec[i] = refvec->_dvec[i]; 00581 } 00582 _distance = refvec->_distance; 00583 } 00584 ~PF_REFVEC () { 00585 CXX_DELETE_ARRAY (_dvec, PF_mpool); 00586 } 00587 00588 // update dvec relative to dvec of new leading ref 00589 void Update_dvec (FRAC* lr_dvec) { 00590 // subtract given lr_dvec from stored vec 00591 // Stored: d_lm, s.t. Ad_lm = C_l - C_m (l == leading ref, m == me) 00592 // want to store: d_l'm, s.t. Ad_l'm = C_l' - C_m 00593 // Ad_l'm = (C_l - C_m) - (C_l - C_l') 00594 // or Ad_l'm = Ad_lm - Ad_ll' 00595 // or d_l'm = d_lm - d_ll' 00596 // d_lm is stored here, d_ll' is given as input. 00597 // so subtract incoming vector from stored vector 00598 for (INT i=0; i<_depth; i++) { 00599 _dvec[i] = _dvec[i] - lr_dvec[i]; 00600 } 00601 } 00602 INT64 Distance () const { return _distance; } 00603 mINT16 Refnum () const { return _refnum; } 00604 FRAC* Dvec () const { return _dvec; } 00605 // the following returns the trip separation in the 00606 // innermost loop from leadingref 00607 mINT16 Trips () const { return _dvec[_depth-1].N(); } 00608 void Update_Distance (INT64 dist) { _distance -= dist; } 00609 void Print (FILE* fp) { 00610 fprintf (fp, " Reference number: %hd. Distance %lld. Vector is\n", 00611 _refnum, _distance); 00612 for (INT i=0; i<_depth; i++) 00613 _dvec[i].Print (fp); 00614 fprintf (fp, "\n"); 00615 } 00616 }; 00617 00618 enum PF_KIND { all, none, vec }; 00619 enum PF_LEVEL { level_1, level_2, level_1and2 }; 00620 00621 class PF_DESC { 00622 PF_KIND _pf_p_1L; // prefetch or not, what stride 00623 PF_KIND _pf_p_2L; 00624 mINT16 *_pf_vec_1L; // vector for prefetch, if any 00625 mINT16 *_pf_vec_2L; 00626 mINT16 _numlines; /* number of lines that would miss if no prefetch. 00627 * Based on the outermost cache level. 00628 * Used for voting on loop splitting. 00629 */ 00630 mINT16 _depth; // size of prefetch vectors 00631 // PF_DESC (void); 00632 PF_DESC (const PF_DESC&); 00633 PF_DESC* operator= (const PF_DESC&); 00634 public: 00635 PF_DESC () { 00636 _pf_p_1L = all; 00637 _pf_p_2L = all; 00638 _pf_vec_1L = NULL; 00639 _pf_vec_2L = NULL; 00640 _numlines = 0; 00641 _depth = 0; 00642 } 00643 ~PF_DESC () { 00644 if (_pf_vec_1L) CXX_DELETE_ARRAY (_pf_vec_1L, PF_mpool); 00645 if (_pf_vec_2L) CXX_DELETE_ARRAY (_pf_vec_2L, PF_mpool); 00646 } 00647 void Turn_Off (PF_LEVEL level) { 00648 switch (level) { 00649 case level_1: 00650 _pf_p_1L = none; 00651 break; 00652 case level_2: 00653 _pf_p_2L = none; 00654 break; 00655 default: 00656 Is_True (FALSE, ("Turn_Off: broken level\n")); 00657 } 00658 } 00659 void Turn_On (PF_LEVEL level, mINT16* pf_vec, mINT16 depth) { 00660 switch (level) { 00661 case level_1: 00662 _pf_p_1L = vec; 00663 _pf_vec_1L = pf_vec; 00664 break; 00665 case level_2: 00666 _pf_p_2L = vec; 00667 _pf_vec_2L = pf_vec; 00668 break; 00669 default: 00670 Is_True (FALSE, ("Turn_On: broken level\n")); 00671 } 00672 _depth = depth; 00673 } 00674 void Set_Num_Lines (mINT16 numlines) { _numlines = numlines; } 00675 mINT16 Num_Lines () const { return _numlines; } 00676 PF_KIND Kind (PF_LEVEL level) { 00677 switch (level) { 00678 case level_1: 00679 return _pf_p_1L; 00680 case level_2: 00681 return _pf_p_2L; 00682 default: 00683 Is_True (FALSE, ("Kind: broken level\n")); 00684 return none; 00685 } 00686 } 00687 mINT16* Vec (PF_LEVEL level) const { 00688 switch (level) { 00689 case level_1: 00690 return _pf_vec_1L; 00691 case level_2: 00692 return _pf_vec_2L; 00693 default: 00694 Is_True (FALSE, ("Vec: broken level\n")); 00695 return NULL; 00696 } 00697 } 00698 BOOL Is_On () { 00699 if (Cache.Levels() == 1) return (_pf_p_1L != none); 00700 return ((_pf_p_1L != none) || (_pf_p_2L != none)); 00701 } 00702 void Print (FILE* fp) { 00703 INT i; 00704 fprintf (fp, "Printing pf descriptor (0 == all, 1 == none, 2 == vec)\n"); 00705 fprintf (fp, " 1st level: kind %2d. ", _pf_p_1L); 00706 if (_pf_p_1L == vec) 00707 for (i=0; i<_depth; i++) fprintf (fp, " %3d ", _pf_vec_1L[i]); 00708 fprintf (fp, "\n"); 00709 if (Cache.Levels() > 1) { 00710 fprintf (fp, " 2nd level: kind %2d. ", _pf_p_2L); 00711 if (_pf_p_2L == vec) 00712 for (i=0; i<_depth; i++) fprintf (fp, " %3d ", _pf_vec_2L[i]); 00713 fprintf (fp, "\n"); 00714 } 00715 fprintf (fp, " numlines = %d, _depth = %d\n", _numlines, _depth); 00716 } 00717 }; 00718 00719 struct PF_SORTED_REFS; 00720 class PF_SPLIT_VECTOR; 00721 00722 class PF_LG { 00723 PF_REFVEC_DA _refvecs; // list of references and their dvecs 00724 mINT16 _depth; // loop nesting level that this LG is for 00725 00726 mINT16 _leading_ref; // number, not bitvector 00727 INT64 *_c; // constant vector for representative ref 00728 INT64 _min_iter[LNO_MAX_DO_LOOP_DEPTH]; // spread in the iteration space 00729 INT64 _max_iter[LNO_MAX_DO_LOOP_DEPTH]; 00730 INT64 _min_dist; // spread in bytes for this locality group 00731 INT64 _max_dist; // (distance from leading reference) 00732 PF_UGS *_myugs; // pointer to my UGS 00733 mINT16 _numlines_1L; // for Split_LG 00734 mINT16 _numlines_2L; 00735 00736 void Split_LG (); // split if large spread 00737 void Gen_Pref_Node (PF_SORTED_REFS* srefs, mINT16 start, mINT16 stop, 00738 PF_LEVEL, PF_DESC*, PF_SPLIT_VECTOR*); 00739 INT Get_Bit_Vec (PF_DESC* pfdesc, 00740 PF_LEVEL level, 00741 PF_SPLIT_VECTOR* split_vec); 00742 WN* Get_Ref_Version (WN* ref, INT bitpos); 00743 INT64 Distance_LR (WN* ref, FRAC* dvec); /* Compute distance of ref 00744 * from leading ref. 00745 */ 00746 INT LR_Compare (mINT16 refvecnum1, mINT16 refvecnum2); 00747 void LR_Ordering (PF_SORTED_REFS* srefs, INT start, INT stop); 00748 00749 PF_LG (void); 00750 PF_LG (const PF_LG&); 00751 PF_LG* operator= (const PF_LG&); 00752 public: 00753 // create a new locality group by unioning a given group with this 00754 PF_LG (PF_LG* lg); 00755 00756 // create a new locality group with the given reference 00757 PF_LG (WN* ref, mINT16 bitpos, mINT16 depth, PF_UGS* myugs); 00758 00759 ~PF_LG (); 00760 00761 00762 FRAC* Ref_In_LG (WN* ref); // check if reference is in this LG 00763 BOOL Add_Ref (WN* ref, mINT16 bitpos); // add a reference to PF_REFLIST 00764 BOOL Add_Group (PF_LG* lg, WN* lref); // add a locality group 00765 BOOL Check (); // just check that there are no duplicates. For debugging. 00766 BOOL Check_Ref (mINT16 refnum); /* just check that there are no duplicates. 00767 * For debugging. 00768 */ 00769 00770 // Given the UGS and the loop number, 00771 // compute the volume of this locality group 00772 // try not to do this repeatedly 00773 // need to redo this only when a new locality vecter 00774 // gets added to the sKer in the loop subnest. 00775 PF_VOLUME Volume (); 00776 mINT16 Lines (PF_LEVEL level) const { 00777 switch (level) { 00778 case level_1: 00779 return _numlines_1L; 00780 case level_2: 00781 return _numlines_2L; 00782 default: 00783 Is_True (FALSE, ("Lines in PF_LG: pick a level\n")); 00784 return 0; 00785 } 00786 } 00787 mINT16 Get_Dim (); 00788 mINT16 Get_Depth (); 00789 LU_FMAT *Get_Hslu (); 00790 VECTOR_SPACE<FRAC> *Get_KerH (); 00791 VECTOR_SPACE<FRAC> *Get_KerHs (); 00792 // mINT16 *Get_Stride (); 00793 PF_LOOPNODE *Get_Loop (); 00794 WN *Get_Ref (INT num); 00795 mINT16 Get_LeadingRef () const { return _leading_ref; } 00796 mINT16 Get_Stride_One_Loop (); 00797 mINT16 Get_Stride_One_Size (); 00798 mINT32 Get_Stride_In_Enclosing_Loop (); 00799 #if defined(TARG_IA64) 00800 BOOL Get_Stride_Accurate(); 00801 #endif 00802 mINT16 Stride_Forward (); 00803 void Gen_Prefetch (PF_DESC* pfdesc, 00804 PF_SPLIT_VECTOR* split_vec, 00805 PF_LEVEL level); 00806 void Print (FILE*); 00807 }; 00808 00809 class PF_BASE_ARRAY; 00810 00811 class PF_UGS { 00812 ACCESS_ARRAY* _aa; // access array for this ugs. 00813 // pointer to first ref that comes along 00814 // FMAT *_H; 00815 // LU_FMAT *_Hlu; 00816 FMAT *_Hs; 00817 LU_FMAT *_Hslu; 00818 00819 VECTOR_SPACE<FRAC> *_KerH; 00820 VECTOR_SPACE<FRAC> *_KerHs; 00821 // mINT16 *_stride; 00822 00823 // innermost loop that occurs in stride-one dimension. 00824 // -1 if none. 00825 mINT16 _stride_one_loop; 00826 // 1 if stride direction is +ve, -1 if -ve, 0 otherwise 00827 // i.e. travelling forward or backwards in memory 00828 mINT16 _stride_forward; 00829 // size in bytes travelled per iteration of stride-one loop 00830 // do i a[i] --> (elem_size) 00831 // do i a[2*i] --> (2*elem_size) 00832 // do i=..,..,2 a[i] --> (2*elem_size) 00833 mINT16 _stride_one_size; 00834 // size in bytes travelled per iteration of immediately enclosing loop 00835 mINT32 _stride_in_enclosing_loop; 00836 00837 #if defined(TARG_IA64) 00838 // this variable is used to indicate whether the computation of 00839 // _stride_in_enclosing_loop is accurate or is just a kind of estimation. 00840 BOOL _stride_accurate; 00841 #endif 00842 00843 WN_DA _refs; // list of references in this UGS 00844 PF_LG_DA **_lg; // list of lgs, one for each loop 00845 PF_BASE_ARRAY *_myba; // my base array 00846 PF_DESC _pfdesc; 00847 00848 void ComputePFVec (PF_LEVEL level, PF_LOCLOOP locloop); 00849 void BuildLG (mINT16 depth); 00850 00851 PF_UGS (void); 00852 PF_UGS (const PF_UGS&); 00853 PF_UGS* operator= (const PF_UGS&); 00854 public: 00855 PF_UGS (WN* wn_array, PF_BASE_ARRAY* myba); 00856 ~PF_UGS (); 00857 BOOL Add_Ref (WN* ref); 00858 void Build_Base_LGs (); 00859 PF_VOLUME Volume (mINT16 depth); 00860 00861 void Find_Loc_Space (PF_LOCLOOP locloop); 00862 PF_SPLIT_VECTOR* Find_Split_Vector (); 00863 void Gen_Prefetch (PF_SPLIT_VECTOR*); 00864 00865 LU_FMAT *Get_Hslu () const { return _Hslu; } 00866 VECTOR_SPACE<FRAC> *Get_KerH () { return _KerH; } 00867 VECTOR_SPACE<FRAC> *Get_KerHs () { return _KerHs; } 00868 // mINT16 *Get_Stride () { return _stride; } 00869 PF_BASE_ARRAY *Get_BA () const { return _myba; } 00870 ACCESS_ARRAY *Get_AA () const { return _aa; } 00871 mINT16 Get_Stride_One_Loop () const { return _stride_one_loop; } 00872 mINT16 Get_Stride_One_Size () const { return _stride_one_size; } 00873 mINT32 Get_Stride_In_Enclosing_Loop () const { return _stride_in_enclosing_loop; } 00874 #if defined(TARG_IA64) 00875 BOOL Get_Stride_Accurate () const { return _stride_accurate; } 00876 #endif 00877 mINT16 Stride_Forward () const { return _stride_forward; } 00878 WN *Get_Ref (INT num) const { return _refs.Bottom_nth(num); } 00879 mINT16 Get_Depth (); 00880 PF_LOOPNODE* Get_Loop (); 00881 void Print (FILE*); 00882 }; 00883 00884 typedef STACK<PF_UGS*> PF_UGS_DA; 00885 00886 class PF_LOOPNODE; 00887 00888 class PF_BASE_ARRAY { 00889 SYMBOL *_array_base; 00890 WN *_sample_wn_array; /* symbols aren't good enough, need 00891 * a sample reference 00892 */ 00893 PF_UGS_DA _ugs; 00894 mINT16 _dim; // dimension of this array 00895 PF_LOOPNODE *_myloopnode; 00896 00897 PF_BASE_ARRAY (void); 00898 PF_BASE_ARRAY (const PF_BASE_ARRAY&); 00899 PF_BASE_ARRAY* operator= (const PF_BASE_ARRAY&); 00900 public: 00901 PF_BASE_ARRAY (SYMBOL* symb, WN* wn_array, mINT16 dim, PF_LOOPNODE* loopnode) 00902 : _ugs (PF_mpool) { 00903 _array_base = symb; 00904 _sample_wn_array = wn_array; 00905 _dim = dim; 00906 _myloopnode = loopnode; 00907 } 00908 ~PF_BASE_ARRAY (); 00909 SYMBOL *Get_Symbol () { return _array_base; } 00910 BOOL Add_Ref (WN* wn_array, BOOL do_check = TRUE); 00911 PF_VOLUME Volume (mINT16 depth); 00912 void Build_Base_LGs (); 00913 void Find_Loc_Space (PF_LOCLOOP locloop); 00914 PF_SPLIT_VECTOR* Find_Split_Vector (); 00915 void Gen_Prefetch (PF_SPLIT_VECTOR*); 00916 00917 mINT16 Get_Dim() const { return _dim; } 00918 mINT16 Get_Depth (); 00919 PF_LOOPNODE* Get_Loop () const { return _myloopnode; } 00920 void Print (FILE*); 00921 }; 00922 00923 extern BOOL Steady_Base (WN* wn_array); 00924 00925 #endif // pf_ref_INCLUDED
1.5.6