00001 /* 00002 * Copyright 2004, 2005, 2006 PathScale, Inc. All Rights Reserved. 00003 */ 00004 00005 /* 00006 00007 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00008 00009 This program is free software; you can redistribute it and/or modify it 00010 under the terms of version 2 of the GNU General Public License as 00011 published by the Free Software Foundation. 00012 00013 This program is distributed in the hope that it would be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00016 00017 Further, this software is distributed without any warranty that it is 00018 free of the rightful claim of any third person regarding infringement 00019 or the like. Any license provided herein, whether implied or 00020 otherwise, applies only to this software file. Patent licenses, if 00021 any, provided herein do not apply to combinations of this program with 00022 other software, or any other product whatsoever. 00023 00024 You should have received a copy of the GNU General Public License along 00025 with this program; if not, write the Free Software Foundation, Inc., 59 00026 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00027 00028 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00029 Mountain View, CA 94043, or: 00030 00031 http://www.sgi.com 00032 00033 For further information regarding this notice, see: 00034 00035 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00036 00037 */ 00038 00039 00040 //-*-c++-*- 00041 /* ==================================================================== 00042 * ==================================================================== 00043 * 00044 * Module: cg_swp.h 00045 * 00046 * ==================================================================== 00047 * ==================================================================== 00048 */ 00049 00050 #ifndef cg_swp_INCLUDED 00051 #define cg_swp_INCLUDED "cg_swp.h" 00052 00053 #ifndef USE_STANDARD_TYPES 00054 #define USE_STANDARD_TYPES 00055 #endif 00056 00057 #include <stdio.h> 00058 #include "defs.h" 00059 #include <vector> 00060 #include <map> 00061 #include "mempool.h" 00062 #include "tn.h" 00063 #include "tn_set.h" 00064 #include "op.h" 00065 #include "bb.h" 00066 00067 // #define for SWP INTERNAL USE only 00068 // 00069 #define SWP_DEBUG 0 // do not enable expensive sanity checks 00070 #define SWP_USE_STL 0 // do not use STL instead of hard-coded vector 00071 #define SWP_OPS_OVERHEAD 2 // SWP has 2 overhead OPs (the fake start and stop OPs) 00072 #define SWP_OPS_LIMIT 130 // max # of OPs if STL is not used 00073 00074 00075 // Forward references 00076 class LOOP_DESCR; 00077 class CG_LOOP; 00078 class ROTATING_KEREL_INFO; 00079 00080 00081 //************************************************************************ 00082 // Data structure to perform register assignment 00083 // 00084 // - Initial register allocation for loop variants assigns to each TN* 00085 // a physical pseudo-register location, where the location numbers 00086 // start at zero and are initially unbounded. There is however an 00087 // upper bound on the number of rotating logical registers that can 00088 // be used from each register bank, and if we exceed that number, 00089 // register allocation for a modulo scheduled loop fails. 00090 // 00091 // - ISA registers are registers numbered according to the ISA target 00092 // 00093 // - reg_allocation is a mapping from a TN to a physical pseudo-register. 00094 // In terms of rotating "logical" registers, the physical location is 00095 // the same as the logical register at cycle 0 of the 1st iteration. 00096 // After cycle zero the logical register number for a TN increases by 00097 // one for each successive stage. The rotating logical register number 00098 // is a function of: 00099 // 00100 // i) the physical pseudo-register location 00101 // ii) the sequence of register numbers that can be made to rotate 00102 // iii) where the OP belongs: the prolog, epilog, body 00103 // iv) the stage count of the schedule 00104 // v) omega 00105 // 00106 // The functions: 00107 // 00108 // INT Get_Register_Offset(INT cycle, INT ii, INT omega), 00109 // INT Get_Livein_Register_Offset(INT omega), and 00110 // INT Get_Liveout_Register_Offset(INT sc, INT omega) 00111 // 00112 // computes the register number difference between the physical 00113 // pseudo-register location and the corresponding logical 00114 // register location at the given cycle (or time-unit), without 00115 // taking into regard any upper bound on register numbers (i.e. without 00116 // wrapping the logical registers around a bounded sequence of 00117 // register numbers). The reg_allocation is always >= 0, and since 00118 // the offsets also are >= 0, so are the rotating logical registers. 00119 // 00120 // - reg2tn_map is a mapping from a ISA register into its represented TN. 00121 // 00122 // **************************************** 00123 // Algorithm to perform register assignment 00124 // 00125 // - For each loop we assign a physical pseudo-register location to each 00126 // loop variable TN, by means of Allocate_Loop_Variants(). This 00127 // register assignment is denoted by "reg_allocation", while the minimum 00128 // number of rotating registers required is denoted by "rotating_reg_used". 00129 // 00130 // - For predicate registers we also assign an extra physical pseudo- 00131 // register location for loop control purposes, which should be live 00132 // throughout the sc stages of the schedule. We denote this location 00133 // as the "control_predicate_loc". 00134 // 00135 // - Then, later during SWP_Emit(), we convert the "reg_allocation" and 00136 // "control_predicate_loc" into unbounded rotating logical register 00137 // numbers (by means of SWP_Rename_TNs()) and assign these numbers to 00138 // TN refs and defs in the schedule. 00139 // 00140 // - After all loops have been processed, we have a lower bound on the 00141 // number of rotating registers required by each loop, and at this 00142 // point we can further convert the unbounded rotating logical register 00143 // numbers into bounded rotating logical register numbers by means of 00144 // SWP_Fixup() during Perform_Loop_Optimizations(). At this point we 00145 // can also rotate the predicate register assignment across the 00146 // boundaries to force the special control predicate 00147 // (control_predicate_loc) to be assigned the lower "sc" rotating 00148 // register numbers. 00149 // 00150 //************************************************************************ 00151 00152 // Define an ordering for CLASS_REG_PAIR. 00153 // - required by STL map. 00154 // 00155 inline bool operator<(CLASS_REG_PAIR x, CLASS_REG_PAIR y) { 00156 return CLASS_REG_PAIR_class_n_reg(x) < CLASS_REG_PAIR_class_n_reg(y); 00157 } 00158 00159 inline bool operator==(CLASS_REG_PAIR x, CLASS_REG_PAIR y) { 00160 return memcmp(&x, &y, sizeof(CLASS_REG_PAIR)) == 0; 00161 } 00162 00163 00164 class SWP_OP_vector; // Defined below. 00165 00166 00167 struct SWP_REG_ASSIGNMENT { 00168 typedef std::map<CLASS_REG_PAIR, TN*> REG2TN_MAP; 00169 typedef std::map<TN*, CLASS_REG_PAIR> TN2REG_MAP; 00170 00171 // 1st rotating registers ( 00172 INT rotating_reg_base[ISA_REGISTER_CLASS_MAX+1]; 00173 00174 // Num of available rotating registers (an upper limit on what we can use) 00175 INT rotating_reg_avail[ISA_REGISTER_CLASS_MAX+1]; 00176 00177 // Num of rotating registers used by this allocation (<= rotating_reg_avail) 00178 INT rotating_reg_used[ISA_REGISTER_CLASS_MAX+1]; 00179 00180 // The allocated physical pseudo-register location for the control predicate. 00181 // This may be any register number, and once we know the size of the 00182 // rotating register bank we intend to use, we can add in an appropriate 00183 // offset to force the control_predicate_loc to be a certain value. 00184 // 00185 INT control_predicate_loc; 00186 00187 // Sets of all non-rotating registers 00188 REGISTER_SET non_rotating_reg[ISA_REGISTER_CLASS_MAX+1]; 00189 00190 // reg_allocation is a mapping from a TN into a logical register 00191 // as if it is used at cycle 0 00192 TN2REG_MAP reg_allocation; 00193 00194 // a mapping from a ISA register into the assigned TN. 00195 REG2TN_MAP reg2tn_map; 00196 00197 // Calculate the offset in register numbering, when converting 00198 // from a physical pseudo-register location for a ref with 00199 // the given omega at the given cycle to the corresponding 00200 // (unbounded) rotating register number. 00201 INT Get_Register_Offset(INT cycle, INT ii, INT omega) { 00202 return cycle/ii + omega; 00203 } 00204 00205 // Calculate the offset in register numbering, when converting 00206 // from a physical pseudo-register location for a live-in def to 00207 // the corresponding (unbounded) rotating register number. 00208 INT Get_Livein_Register_Offset(INT omega) { 00209 return omega; 00210 } 00211 00212 // Calculate the offset in register numbering, when converting 00213 // from a physical pseudo-register location for a live-out ref to 00214 // the corresponding (unbounded) rotating register number. 00215 INT Get_Liveout_Register_Offset(INT sc, INT omega) { 00216 return sc + omega; 00217 } 00218 00219 // Determine the rotating register assigned for 'tn', assuming unbounded 00220 // rotating register banks. 00221 // The 'adjustment' is the offset in register numbering, when converting 00222 // from a physical pseudo-register location of the current reference to 00223 // the corresponding (unbounded) rotating register number. 00224 TN *Get_Register_TN(TN *tn, INT adjustment); 00225 00226 TN *Get_Non_Rotating_Register_TN(TN *tn); 00227 00228 TN *Get_Control_Predicate(INT stage) const; 00229 00230 bool Enough_Non_Rotating_Registers(TN_SET *) const; 00231 00232 // Allocate loop variants to physical pseudo register locations 00233 // (see cg_swp_allocator.cxx for the definition of this function). 00234 bool Allocate_Loop_Variants(const SWP_OP_vector& op_state, 00235 BB *head, 00236 BB *tail); 00237 00238 void Print_Allocation(FILE *file); 00239 00240 // Update SWP BB annotation 00241 // - the livein and kill REGSET 00242 void Update_Annotation(ROTATING_KERNEL_INFO *); 00243 00244 SWP_REG_ASSIGNMENT(); 00245 00246 // True when we are to trace register allocation for swp 00247 static bool Trace(); 00248 00249 }; 00250 00251 00252 00253 //************************************************************************ 00254 // SWP_OP 00255 //************************************************************************ 00256 00257 enum SWP_RETURN_CODE { 00258 SWP_OK, // no constraints to disable swp 00259 SWP_PREP_ONLY, // disabled swp due to -SWP:prep_only 00260 SWP_ASM, // disabled swp due to asm statement 00261 SWP_WORKAROUND, // disabled swp due to hardware workaround 00262 SWP_DEDICATED_ROT_REG, // disabled swp due to dedicated rot register 00263 SWP_LOOP_EMPTY, // disabled swp due to empty loop 00264 SWP_LOOP_LIMIT, // disabled swp due to loop too large 00265 REG_ALLOC_FAILED, // swp failed: failure to allocate register 00266 NON_ROT_REG_ALLOC_FAILED, // swp failed: failure to allocate non-rot reg 00267 MOD_SCHED_FAILED, // swp failed: failure to schedule 00268 REG_ALLOC_SUCCEEDED, // reg alloc succeeded 00269 MOD_SCHED_SUCCEEDED, // mod sched succeeded 00270 SWP_LOW_TRIP_COUNT, // disable swp due to low trip count 00271 }; 00272 00273 enum SCHED_DIRECTION { 00274 SWP_UNKOWN, 00275 SWP_TOP_DOWN, 00276 SWP_BOTTOM_UP, 00277 }; 00278 00279 00280 // Use mapping from OP_map_idx to Unique Number 00281 extern INT *swp_map_tbl; 00282 00283 inline INT SWP_index(OP *op) { return swp_map_tbl[OP_map_idx(op)]; } 00284 00285 // inline INT SWP_index(OP *op) { return OP_map_idx(op); } 00286 00287 struct SWP_OP { 00288 SCHED_DIRECTION direction; // direction 00289 OP *op; 00290 bool placed; 00291 bool is_noop; 00292 double scale; 00293 INT cycle; // placed cycle 00294 INT trials; // statistics -- number of trials 00295 INT modulo_cycle; 00296 INT slot; 00297 void Print(FILE *fp) const; 00298 INT Index() const { return SWP_index(op); } 00299 SWP_OP():op(NULL),is_noop(false),placed(false),cycle(0) {} 00300 }; 00301 00302 class SWP_OP_vector { 00303 00304 #if SWP_USE_STL 00305 vector<SWP_OP> v; 00306 #else 00307 INT v_size; 00308 SWP_OP v[SWP_OPS_LIMIT * 4]; 00309 #endif 00310 00311 public: 00312 INT start; 00313 INT stop; 00314 INT branch; 00315 INT num_mops; // number of memory ops 00316 INT num_flops; // number of floating point ops 00317 00318 TN *control_predicate_tn; // for while-loops 00319 bool is_doloop; 00320 bool loop_one_more_time; // the loop will execute at one more time because of entry cond 00321 INT ii; // initiation interval in cycles 00322 INT ii_slots; // initiation interval in slots after bundling 00323 TN_SET *tn_invariants; // set of invariant TNs 00324 TN_SET *tn_non_rotating; // set of TN that must be allocated to non-rotating reg 00325 bool succeeded; 00326 00327 // Statistics 00328 INT min_ii; // min II = max(res_mii, recur_mii) 00329 INT res_min_ii; // resource MII 00330 INT rec_min_ii; // recurrence MII 00331 INT min_sl; // minimium schedule length 00332 INT sl; // found schedule length 00333 INT sc; // found stage count 00334 INT previous_trials; 00335 double prep_time; 00336 double sched_time; 00337 double reg_alloc_time; 00338 double code_gen_time; 00339 00340 #if SWP_USE_STL 00341 INT size() const { return v.size(); } 00342 void push_back(const SWP_OP &op) {v.push_back(op);} 00343 #else 00344 INT size() const { return v_size; } 00345 void push_back(const SWP_OP &op) { v[v_size++] = op; } 00346 #endif 00347 00348 SWP_OP& operator[](INT i) { return v[i]; } 00349 const SWP_OP& operator[](INT i) const { return v[i]; } 00350 void Verify() const; 00351 void Print(FILE *fp) const; 00352 SWP_OP_vector(BB *body, BOOL doloop, MEM_POOL *pool); 00353 }; 00354 00355 00356 //************************************************************************ 00357 // MinDist calculation 00358 //************************************************************************ 00359 00360 class MinDist { 00361 static const INT NEG_INF = -999; 00362 #if SWP_USE_STL 00363 vector< vector<INT> > mindist; 00364 INT size() const { mindist.size(); } 00365 #else 00366 INT mindist[SWP_OPS_LIMIT][SWP_OPS_LIMIT]; 00367 INT mindist_size; 00368 INT size() const { return mindist_size; } 00369 #endif 00370 INT found_ii; 00371 INT Compute(const SWP_OP_vector& v, INT start, INT stop, INT branch, INT ii); 00372 public: 00373 INT operator()(INT i, INT j) const { return mindist[i][j]; } 00374 INT Found_ii() { return found_ii; } 00375 void Print(FILE *fp) const; 00376 MinDist(const SWP_OP_vector& v, INT start, INT stop, INT branch, INT ii); 00377 }; 00378 00379 00380 00381 extern SWP_RETURN_CODE 00382 Modulo_Schedule(SWP_OP_vector& v, INT min_ii, INT max_ii, 00383 double incr_alpha, double incr_beta, 00384 INT budget, bool trace, bool trace_details); 00385 00386 extern void SWP_Emit(SWP_OP_vector& op_state, 00387 SWP_REG_ASSIGNMENT& reg_assign, 00388 TN *trip_count_tn, 00389 BB *head, BB *body, BB *tail, 00390 bool is_doloop, bool trace); 00391 00392 extern void Emit_SWP_Note(BB *bb, FILE *file); 00393 00394 #ifdef TARG_IA64 00395 SWP_RETURN_CODE Detect_SWP_Constraints(CG_LOOP &cl, bool trace); 00396 #endif 00397 00398 // dep_graph_manager class makes sure that CG_DEP_Delete is invoked 00399 // before Perform_SWP exits 00400 class DEP_GRAPH_MANAGER { 00401 private: 00402 BB *_body; 00403 public: 00404 DEP_GRAPH_MANAGER( BB *body, TN_SET *non_rotating, MEM_POOL *pool ); 00405 ~DEP_GRAPH_MANAGER(); 00406 }; 00407 00408 #endif 00409 00410
1.5.6