00001 /* 00002 * Copyright 2004, 2005, 2006 PathScale, Inc. All Rights Reserved. 00003 */ 00004 00005 /* 00006 00007 Copyright (C) 2000, 2001 Silicon Graphics, Inc. All Rights Reserved. 00008 00009 This program is free software; you can redistribute it and/or modify it 00010 under the terms of version 2 of the GNU General Public License as 00011 published by the Free Software Foundation. 00012 00013 This program is distributed in the hope that it would be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00016 00017 Further, this software is distributed without any warranty that it is 00018 free of the rightful claim of any third person regarding infringement 00019 or the like. Any license provided herein, whether implied or 00020 otherwise, applies only to this software file. Patent licenses, if 00021 any, provided herein do not apply to combinations of this program with 00022 other software, or any other product whatsoever. 00023 00024 You should have received a copy of the GNU General Public License along 00025 with this program; if not, write the Free Software Foundation, Inc., 59 00026 Temple Place - Suite 330, Boston MA 02111-1307, USA. 00027 00028 Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pky, 00029 Mountain View, CA 94043, or: 00030 00031 http://www.sgi.com 00032 00033 For further information regarding this notice, see: 00034 00035 http://oss.sgi.com/projects/GenInfo/NoticeExplan 00036 00037 */ 00038 00039 00040 /* ======================================================================= 00041 * ======================================================================= 00042 * 00043 * Module: cg_loop.h 00044 * $Revision: 1.6 $ 00045 * $Date: 05/12/05 08:59:03-08:00 $ 00046 * $Author: bos@eng-24.pathscale.com $ 00047 * $Source: /scratch/mee/2.4-65/kpro64-pending/be/cg/SCCS/s.cg_loop.h $ 00048 * 00049 * Revision comments: 00050 * 00051 * 3-May-1995 - Initial version 00052 * 00053 * Description: 00054 * ============ 00055 * 00056 * Interface to most CG loop analysis and transformation. See also 00057 * "cg_dep_graph.h" for support of cyclic dependence graphs and 00058 * "findloops.h" for loop descriptor documentation. 00059 * 00060 * At the start of loop processing for a PU or region, the following 00061 * initialization function should be called: 00062 * void CG_LOOP_Init() 00063 * 00064 * The trip count of a loop is given by: 00065 * TN *CG_LOOP_Trip_Count(LOOP_DESCR *loop), or 00066 * which may generate prolog code if the trip count TN hasn't yet been 00067 * generated, and which returns NULL if the trip count isn't computable 00068 * before the loop body. 00069 * 00070 * After initialization, if any new loop body OPs are created, the 00071 * following routine must be called to initialize the CG_LOOP data 00072 * structures for each new OP: 00073 * void CG_LOOP_Init_Op(OP *op) 00074 * void CG_LOOP_Init_Ops(OPS *ops) 00075 * 00076 * To see whether an arbitrary OP has CG_LOOP info attached, use: 00077 * BOOL Is_CG_LOOP_Op(OP *op) 00078 * 00079 * 00080 * As CGPREP analyzes and transforms loop bodies, it annotates some 00081 * uses of TNs with iteration distances, or "omegas", to specify the 00082 * relative iteration producing the value to be used. For example, 00083 * TN100[2] means the value of TN100 produced two iterations before 00084 * the current one. An omega of MAX_OMEGA means the value comes from 00085 * at *least* MAX_OMEGA iterations back. An omega of zero means the 00086 * value comes from the current iteration. The omega associated with 00087 * a particular TN use is accessed with: 00088 * UINT8 OP_omega(OP *op, UINT8 opnd) 00089 * void Set_OP_omega(OP *op, UINT8 opnd, UINT8 omega) 00090 * Uses of dedicated TNs cannot have nonzero omegas. Uses of loop- 00091 * invariant TNs cannot have nonzero omegas. 00092 * 00093 * Analogous to omegas for TNs, a use of a spill location can 00094 * have an omega. The omega associated with a restore is accessed with: 00095 * UINT8 OP_restore_omega(OP *op) 00096 * void Set_OP_restore_omega(OP *op, UINT8 omega) 00097 * Note that the restore omega is only valid for restore OPs. 00098 * 00099 * The following routine is provided to check whether a TN is loop- 00100 * invariant: 00101 * BOOL CG_LOOP_TN_Is_Invariant(LOOP_DESCR *loop, TN *tn) 00102 * 00103 * 00104 * Loop unrolling is performed by: 00105 * BB *CG_LOOP_Unroll(LOOP_DESCR *loop) 00106 * which returns a new unrolled body if the loop is unrolled. It 00107 * may also create a "remainder loop" in the prolog or epilog. 00108 * 00109 * CGPREP (and the software pipeliner) may generate prolog and/or 00110 * epilog code to be appended to the original loop head (prolog) or 00111 * prepended to the original loop body (epilog). The following 00112 * global BB pointers are used to hold this code for the current 00113 * loop body being processed: 00114 * BB *CG_LOOP_prolog, *CG_LOOP_epilog; 00115 * These are initially NULL. In general, these may each be a list of 00116 * BBs with arbitrarily complex control flow within. The prolog 00117 * always points to the last BB in the prolog (which must be 00118 * dominated by the first BB in the prolog after any transformation). 00119 * The epilog always points to the first BB in the epilog (which must 00120 * be post-dominated by the last BB in the epilog). The starting 00121 * prolog and ending epilog BBs can be referenced by: 00122 * BB *CG_LOOP_prolog_start, *CG_LOOP_epilog_end; 00123 * 00124 * The following routines are provided to trace the prolog and epilog 00125 * blocks: 00126 * void CG_LOOP_Trace_Prolog(void) 00127 * void CG_LOOP_Trace_Epilog(void) 00128 * 00129 * To trace the whole loop, including the prolog and epilog, 00130 * and preceded by a comment given with "printf"-style args, use: 00131 * void CG_LOOP_Trace_Loop(LOOP_DESCR *loop, const char *fmt, ...) 00132 * 00133 * 00134 * When a use of a TN with a non-zero omega is introduced into the 00135 * loop body, we must "prime the pump" by specifying the initial 00136 * values for TN[1..omega] so that the omega uses are defined during 00137 * the first <omega> iterations. Likewise, upon exit from the loop 00138 * we may want to use some of the TNs defined in the loop (possibly 00139 * from iterations prior to the last one) in the code that follows 00140 * the loop. We relate these initial values and live-out values for 00141 * the body TNs to the TNs used/defined outside the body ("non body" 00142 * TNs) with the CG_LOOP_BACKPATCH data structures. These exist for 00143 * the CG_LOOP_prolog and the CG_LOOP_epilog. They are operated upon 00144 * with: 00145 * 00146 * CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Add(BB *bb, TN *non_body_tn, 00147 * TN *body_tn, UINT8 omega) 00148 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog. 00149 * Add a backpatch entry relating <non_body_tn> with <body_tn>[omega] 00150 * in the prolog/epilog specified by <bb>. Returns the new backpatch. 00151 * 00152 * TN *CG_LOOP_Backpatch_Find_Body_TN(BB *bb, TN *tn, UINT8 *omptr) 00153 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog. 00154 * If <bp> contains an entry for non-body TN <tn>, return the body TN 00155 * related to it, and set <*omptr> to the related omega unless <omptr> 00156 * is NULL. Otherwise return NULL and don't touch <*omptr>. 00157 * 00158 * TN *CG_LOOP_Backpatch_Find_Non_Body_TN(BB *bb, TN *tn, UINT8 om) 00159 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog. 00160 * If the backpatches for <bb> contains an entry for body TN <tn> 00161 * with omega <om>, return the related non-body TN, otherwise return 00162 * NULL. 00163 * 00164 * CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_First(BB *bb, TN *body_tn) 00165 * CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Next(CG_LOOP_BACKPATCH *bp) 00166 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog. 00167 * Iterate over the backpatches for <bb>. If <body_tn> is NULL, 00168 * iterate over all backpatches, otherwise iterate only over the 00169 * backpatches for <body_tn>. The Next function steps to the next 00170 * backpatch. Both functions return NULL to indicate that iteration 00171 * has completed. The following rvalue accessors are provided: 00172 * TN *CG_LOOP_BACKPATCH_body_tn(CG_LOOP_BACKPATCH *bp) 00173 * UINT8 CG_LOOP_BACKPATCH_omega(CG_LOOP_BACKPATCH *bp) 00174 * TN *CG_LOOP_BACKPATCH_non_body_tn(CG_LOOP_BACKPATCH *bp) 00175 * and the following mutators are provided: 00176 * void CG_LOOP_BACKPATCH_Set_body_tn(CG_LOOP_BACKPATCH *bp, TN *tn) 00177 * void CG_LOOP_BACKPATCH_Set_omega(CG_LOOP_BACKPATCH *bp, UINT8 omega) 00178 * void CG_LOOP_BACKPATCH_Set_non_body_tn(CG_LOOP_BACKPATCH *bp, TN *tn) 00179 * 00180 * void CG_LOOP_Backpatch_Delete(BB *bb, CG_LOOP_BACKPATCH *bp) 00181 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog 00182 * <bp> is a backpatch for <bb> 00183 * Remove <bp> from <bb>'s backpatches. 00184 * 00185 * void CG_LOOP_Backpatch_Replace_Body_TN(BB *bb, TN *tn, TN *newtn, 00186 * INT16 om_adj) 00187 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog 00188 * In the backpatches for <bb>, replace each entry with body 00189 * TN <tn>[omega] with <newtn>[omega+om_adj]. 00190 * 00191 * void CG_LOOP_Backpatch_Replace_Non_Body_TN(BB *bb, TN *tn, TN *newtn) 00192 * Requires: bb == CG_LOOP_prolog || bb == CG_LOOP_epilog 00193 * In the backpatches for <bb>, replace each entry with non-body 00194 * TN <tn> with <newtn>. 00195 * 00196 * void CG_LOOP_Backpatch_Trace(BB *bb, CG_LOOP_BACKPATCH *bp) 00197 * Print a representation of <bp> to the trace file. If <bb> 00198 * is CG_LOOP_prolog, the backpatch will be printed as: 00199 * TNbody[omega] <- TNnon_body 00200 * If <bb> is CG_LOOP_epilog, it will be printed as: 00201 * TNnon_body <- TNbody[omega] 00202 * Otherwise (e.g., if <bb> is NULL) it will be printed as: 00203 * TNbody[omega] TNnon_body 00204 * If <bp> is NULL, we print all backpatches for the given <bb>. 00205 * If both <bp> and <bb> are NULL, we print all backpatches 00206 * (indicating which ones are for the prolog and which are for 00207 * the epilog). 00208 * 00209 * 00210 * CG_LOOP provides the per-OP containers for SCC (Strongly Connected 00211 * Component) information. See "cg_loop_scc.h" for per-SCC data 00212 * structures and utility routines. The following accessors are 00213 * provided for the CG_LOOP SCC containers provided here: 00214 * 00215 * CG_LOOP_SCC *OP_scc(OP *op) 00216 * void Set_OP_scc(OP *op, CG_LOOP_SCC *scc) 00217 * Get/set the SCC info for <op>. See "cg_loop_scc.h" for the 00218 * CG_LOOP_SCC description. The Set accessor is intended for 00219 * use only by the cg_loop_scc module. 00220 * 00221 * INT16 OP_scc_index(OP *op) 00222 * void Set_OP_scc_index(OP *op, INT16 idx) 00223 * Get/set the index of <op> within OP_scc(op). The Set accessor 00224 * is intended for use only by the cg_loop_scc module. 00225 * 00226 * ARC_LIST *OP_scc_ancestors(OP *op) 00227 * ARC_LIST *OP_scc_descendents(OP *op) 00228 * Return the list of SCC ancestor/descendent ARCs for <op>. 00229 * 00230 * void CG_LOOP_Add_SCC_Arc(OP *ancestor, OP *descendent, UINT8 omega, 00231 * INT16 latency) 00232 * Construct a new SCC arc from <ancestor> to <descendent> with the 00233 * given <omega> and <latency> and attach it to <ancestor's> descendent 00234 * list and <descendent's> ancestor list. 00235 * 00236 * void CG_LOOP_Clear_SCCs(LOOP_DESCR *loop) 00237 * Delete all SCC arcs for the OPs in <loop>, making them available 00238 * for reuse by the dependence graph builder and SCC arc builder. 00239 * Also set all OP_scc's to NULL. 00240 * 00241 * 00242 * CG_LOOP provides a service for finding loop overhead OPs within a BB: 00243 * 00244 * OP_LIST *CG_LOOP_Identify_Loop_Overhead(LOOP_DESCR *loop, 00245 * INT32 *loop_overhead_count, 00246 * MEM_POOL *pool); 00247 * The function return value is a list of OPs that can be treated 00248 * as overhead in loop body <loop>. The number of OPs on the list 00249 * is returned through the reference parameter <loop_overhead_count>. 00250 * <pool> identifies the pool that the list of OPs should be 00251 * allocated from. 00252 * 00253 * TODO: Describe loh accessors: OP_loh, OP_loh_mii, "set" accessors. 00254 * 00255 * 00256 * Currently the software pipeliner is the only subphase following 00257 * CGPREP that can make use of the loop notations. The following 00258 * routine converts a loop so that the loop notations are no longer 00259 * needed. This conversion may involve generating some copies in the 00260 * loop body and some head/tail code so that the converted loop has 00261 * the same semantics as the original loop plus the notations. The 00262 * BB's for the head/tail code are passed in explicitly since some 00263 * clients (e.g., inner loop unrolling) may want to place this code 00264 * somewhere other than the loop prolog: 00265 * void CG_LOOP_Remove_Notations(LOOP_DESCR *loop, BB *head, BB *tail) 00266 * 00267 * 00268 * Finally, the following function should be called to update the loop 00269 * global liveness info: 00270 * 00271 * void CG_LOOP_Recompute_Liveness(LOOP_DESCR *loop) 00272 * Recompute global liveness info for <loop>. 00273 * 00274 * The following function inserts prolog and epilog BB's around a loop: 00275 * 00276 * BB* CG_LOOP_Gen_And_Prepend_To_Prolog(BB *loop_head, LOOP_DESCR *loop) 00277 * Create and prepend a <new_bb> to loop prologue, retargets all 00278 * the outside loop entrances to this <new_bb>, updates the pred/succ 00279 * and live-ness info for this <new_bb> accordingly. Frequency 00280 * and LOOP_DESCR updation is also done. 00281 * 00282 * BB* CG_LOOP_Append_BB_To_Prolog(BB *loop_prolog, BB *loop_head) 00283 * Create and append a <new_bb> to the <loop_prolog> bb (if necessary), 00284 * such that if it ends in a branch, it creates a fall-thru block and 00285 * puts the branch there. This is used as a placeholder <bb> to keep 00286 * all the instructions which are moved out of the loop. This relies on 00287 * CFLOW pass to later merge them. 00288 * 00289 * double CG_LOOP_Prefetch_Stride(OP *pref) 00290 * Requires: OP_prefetch(pref) 00291 * Returns the stride of prefetch OP <pref>. 00292 * 00293 * CG_LOOP_Coalesce_Backedges(LOOP_DESCR *loop) 00294 * Coalesce all backedge branches in a loop into a single branch, 00295 * i.e. create a block with an unconditional branch to the loop head 00296 * and have all backedges branch to that. This allows if-conversion 00297 * of backedge branches. 00298 * 00299 * ======================================================================= 00300 * ======================================================================= */ 00301 00302 #ifndef CG_LOOP_INCLUDED 00303 #define CG_LOOP_INCLUDED 00304 00305 00306 #include "cg_dep_graph.h" 00307 #include "cg_loop_scc.h" 00308 #include "op_list.h" 00309 #include "whirl2ops.h" 00310 #include "findloops.h" 00311 00312 struct _CG_LOOP_INFO { 00313 /* ---------------------------------------------------- */ 00314 /* All fields are private. */ 00315 /* This 'omega' field of this structure is variable- */ 00316 /* size according to the number of operands of the OP */ 00317 /* to which this structure is attached. However, its */ 00318 /* minimum size is large enough for all fixed size OPs. */ 00319 /* See CG_LOOP_Init_Op() for more info. */ 00320 /* ---------------------------------------------------- */ 00321 CG_LOOP_SCC *scc; 00322 ARC_LIST *scc_ancestors; 00323 ARC_LIST *scc_descendents; 00324 mINT16 loh_mii; 00325 mINT16 scc_index; 00326 mUINT8 restore_omega; 00327 mBOOL loh; 00328 mUINT8 omega[OP_MAX_FIXED_OPNDS]; // THIS FIELD MUST BE LAST (see above) 00329 }; 00330 00331 /* Given an op, return the 'sizeof' the _CG_LOOP_INFO struct needed 00332 * for that OP. For the sake of simplicity, the result of the calculation 00333 * is often slightly larger than necessary and not rounded up to 00334 * the alignment of the struct. 00335 */ 00336 inline UINT _CG_LOOP_info_sizeof(OP *op) 00337 { 00338 UINT sizeof_info = sizeof(_CG_LOOP_INFO); 00339 if (OP_opnds(op) > OP_MAX_FIXED_OPNDS) { 00340 sizeof_info += sizeof(mUINT8) * (OP_opnds(op) - OP_MAX_FIXED_OPNDS); 00341 } 00342 return sizeof_info; 00343 } 00344 00345 /* Internal OP_MAP to hold loop info. 00346 */ 00347 extern OP_MAP _CG_LOOP_info_map; 00348 00349 #define _CG_LOOP_info(op) ((_CG_LOOP_INFO *)OP_MAP_Get(_CG_LOOP_info_map, op)) 00350 00351 /* Internal backpatch data structure representation. When iterating 00352 * over backpatches, we need to know whether we're iterating over the 00353 * backpatches for a single body_tn or all body_tns. We indicate this 00354 * with the least significant bit of the pointer result. The internal 00355 * _CG_LOOP_BP macros below are used to access this. 00356 */ 00357 struct cg_loop_backpatch { 00358 TN *non_body_tn; 00359 TN *body_tn; 00360 struct cg_loop_backpatch *next; 00361 UINT8 omega; 00362 }; 00363 00364 #define _CG_LOOP_BP_iter_limited(bp) ((INTPTR)(bp) & 1) 00365 #define _CG_LOOP_BP_limit_iter(bp) \ 00366 ((struct cg_loop_backpatch *)((INTPTR)(bp) | 1)) 00367 #define _CG_LOOP_BP_actual_ptr(bp) \ 00368 ((struct cg_loop_backpatch *)(((INTPTR)(bp)) & ~1)) 00369 00370 /* Exported types. 00371 */ 00372 typedef struct cg_loop_backpatch CG_LOOP_BACKPATCH; 00373 00374 00375 /* Exported macros. 00376 */ 00377 00378 /* This could be as high as 255, but really large omegas make SWP 00379 * get very slow, so use 32 for now. 00380 */ 00381 #define MAX_OMEGA 32 00382 00383 #define Is_CG_LOOP_Op(op) \ 00384 (_CG_LOOP_info_map && _CG_LOOP_info(op) ? TRUE : FALSE) 00385 #define OP_omega(op, opnd) (_CG_LOOP_info(op)->omega[opnd]+0) 00386 00387 inline void Set_OP_omega(OP *op, UINT8 opnd, UINT8 omega) 00388 { 00389 _CG_LOOP_INFO *info = _CG_LOOP_info(op); 00390 Is_True(omega < 2 || !TN_is_dedicated(OP_opnd(op,opnd)), 00391 ("Trying to put omega of %d on use of dedicated TN", omega)); 00392 info->omega[opnd] = omega; 00393 } 00394 00395 #define OP_restore_omega(op) (_CG_LOOP_info(op)->restore_omega+0) 00396 #define Set_OP_restore_omega(op, omega) \ 00397 (_CG_LOOP_info(op)->restore_omega = (omega)) 00398 00399 #define OP_loh_mii(op) (_CG_LOOP_info(op)->loh_mii+0) 00400 #define Set_OP_loh_mii(op, mii) (_CG_LOOP_info(op)->loh_mii = mii) 00401 00402 #define OP_loh(op) (_CG_LOOP_info(op)->loh+0) 00403 #define Set_OP_loh(op) (_CG_LOOP_info(op)->loh = TRUE) 00404 #define Reset_OP_loh(op) (_CG_LOOP_info(op)->loh = FALSE) 00405 00406 #define OP_scc(op) ((struct cg_loop_scc *)_CG_LOOP_info(op)->scc) 00407 #define Set_OP_scc(op, s) (_CG_LOOP_info(op)->scc = s) 00408 00409 #define OP_scc_index(op) (_CG_LOOP_info(op)->scc_index+0) 00410 #define Set_OP_scc_index(op, idx) (_CG_LOOP_info(op)->scc_index = idx) 00411 00412 #define OP_scc_ancestors(op) (_CG_LOOP_info(op)->scc_ancestors+0) 00413 #define OP_scc_descendents(op) (_CG_LOOP_info(op)->scc_descendents+0) 00414 00415 #define CG_LOOP_Add_SCC_Arc(ancestor, descendent, omega, latency) \ 00416 CG_DEP_Add_SCC_Arc(ancestor, descendent, omega, latency, \ 00417 &_CG_LOOP_info(descendent)->scc_ancestors, \ 00418 &_CG_LOOP_info(ancestor)->scc_descendents) 00419 00420 00421 #define CG_LOOP_BACKPATCH_non_body_tn(bp) \ 00422 (_CG_LOOP_BP_actual_ptr(bp)->non_body_tn+0) 00423 #define CG_LOOP_BACKPATCH_body_tn(bp) \ 00424 (_CG_LOOP_BP_actual_ptr(bp)->body_tn+0) 00425 #define CG_LOOP_BACKPATCH_omega(bp) \ 00426 (_CG_LOOP_BP_actual_ptr(bp)->omega+0) 00427 00428 #define CG_LOOP_BACKPATCH_Set_non_body_tn(bp, newtn) \ 00429 (_CG_LOOP_BP_actual_ptr(bp)->non_body_tn = (newtn)) 00430 #define CG_LOOP_BACKPATCH_Set_body_tn(bp, newtn) \ 00431 (_CG_LOOP_BP_actual_ptr(bp)->body_tn = (newtn)) 00432 #define CG_LOOP_BACKPATCH_Set_omega(bp,om) \ 00433 (_CG_LOOP_BP_actual_ptr(bp)->omega = (om)) 00434 00435 extern BOOL CG_LOOP_TN_Is_Invariant(LOOP_DESCR *loop, TN *tn); 00436 00437 /* Exported global variables. 00438 */ 00439 extern BB *CG_LOOP_prolog; 00440 extern BB *CG_LOOP_epilog; 00441 extern BB *CG_LOOP_prolog_start; 00442 extern BB *CG_LOOP_epilog_end; 00443 00444 extern UINT32 CG_LOOP_unroll_times_max; 00445 extern UINT32 CG_LOOP_unrolled_size_max; 00446 extern BOOL CG_LOOP_unroll_fully; 00447 extern BOOL CG_LOOP_unroll_remainder_fully; 00448 extern UINT32 CG_LOOP_unroll_min_trip; 00449 extern BOOL CG_LOOP_unroll_analysis; 00450 extern BOOL CG_LOOP_ooo_unroll_heuristics; 00451 extern BOOL CG_LOOP_ooo_unroll_heuristics_set; 00452 extern UINT32 CG_LOOP_reorder_buffer_size; 00453 extern UINT32 CG_LOOP_cache_miss_threshold; 00454 extern BOOL CG_LOOP_multi_bb_unroll_analysis; 00455 extern UINT32 CG_LOOP_unroll_analysis_threshold; 00456 extern BOOL CG_LOOP_unroll_multi_bb; 00457 extern BOOL CG_LOOP_unroll_non_trip_countable; 00458 extern BOOL CG_LOOP_create_loop_prologs; 00459 extern BOOL CG_LOOP_create_loop_epilogs; 00460 extern INT32 CG_LOOP_force_ifc; 00461 00462 extern BOOL CG_LOOP_optimize_lno_winddown_cache; 00463 extern BOOL CG_LOOP_optimize_lno_winddown_reg; 00464 extern BOOL CG_LOOP_optimize_non_innermost; 00465 extern BOOL CG_LOOP_optimize_multi_targ; 00466 extern BOOL CG_LOOP_optimize_non_trip_countable; 00467 00468 #ifdef KEY 00469 extern INT32 CG_Enable_Loop_Opt_Limit; 00470 #endif 00471 00472 /* Exported functions. 00473 */ 00474 extern void CG_LOOP_Init(); 00475 extern void CG_LOOP_Finish(); 00476 00477 inline TN *CG_LOOP_Trip_Count(LOOP_DESCR *loop) 00478 { 00479 LOOPINFO *info = LOOP_DESCR_loopinfo(loop); 00480 return info ? LOOPINFO_trip_count_tn(info) : NULL; 00481 } 00482 00483 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Add(BB *bb, TN *non_body_tn, 00484 TN *body_tn, UINT8 omega); 00485 00486 TN *CG_LOOP_Backpatch_Find_Body_TN(BB *bb, TN *tn, UINT8 *omptr); 00487 TN *CG_LOOP_Backpatch_Find_Non_Body_TN(BB *bb, TN *tn, UINT8 om); 00488 00489 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_First(BB *bb, TN *body_tn); 00490 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Next(CG_LOOP_BACKPATCH *bp); 00491 00492 void CG_LOOP_Backpatch_Delete(BB *bb, CG_LOOP_BACKPATCH *bp); 00493 00494 void CG_LOOP_Backpatch_Replace_Non_Body_TN(BB *bb, TN *tn, TN *newtn); 00495 void CG_LOOP_Backpatch_Replace_Body_TN(BB *bb, TN *tn, TN *newtn, 00496 INT16 om_adj); 00497 00498 void CG_LOOP_Backpatch_Trace(BB *bb, CG_LOOP_BACKPATCH *bp); 00499 00500 void CG_LOOP_Remove_Notations(LOOP_DESCR *loop, BB *head, BB *tail); 00501 00502 OP_LIST *CG_LOOP_Identify_Loop_Overhead(LOOP_DESCR *loop, 00503 INT32 *loop_overhead_count, 00504 MEM_POOL *pool); 00505 00506 void CG_LOOP_Clear_SCCs(LOOP_DESCR *loop); 00507 00508 void CG_LOOP_Init_Op(OP *op); 00509 void CG_LOOP_Init_OPS(OPS *ops); 00510 00511 void CG_LOOP_Recompute_Liveness(LOOP_DESCR *loop); 00512 00513 BB *CG_LOOP_Unroll(LOOP_DESCR *loop, BOOL); 00514 void CG_LOOP_Prune_Prefetches(BB *bb, INT32 offset, BOOL before_swp, BOOL swp_wind); 00515 00516 void CG_LOOP_Trace_Prolog(void); 00517 void CG_LOOP_Trace_Epilog(void); 00518 void CG_LOOP_Trace_Loop(LOOP_DESCR *loop, const char *fmt, ...); 00519 00520 void CG_LOOP_Remove_Prolog_OPs(BB *head); 00521 void CG_LOOP_Remove_Epilog_OPs(BB *tail); 00522 BB* CG_LOOP_Gen_And_Prepend_To_Prolog(BB *loop_head, LOOP_DESCR* loop); 00523 BB* CG_LOOP_Append_BB_To_Prolog(BB *loop_prolog, BB *loop_head); 00524 00525 void CG_LOOP_Coalesce_Backedges(LOOP_DESCR *loop); 00526 00527 TN *CG_LOOP_unroll_names_get(TN *tn, UINT8 unrolling); 00528 extern INT Branch_Target_Operand(OP *br_op); 00529 00530 inline UINT32 CG_LOOP_Prefetch_Stride(OP *pref) 00531 { 00532 WN *pref_wn = Get_WN_From_Memory_OP(pref); 00533 UINT32 s1 = WN_pf_stride_1L(pref_wn); 00534 UINT32 s2 = WN_pf_stride_2L(pref_wn); 00535 Is_True(s1 == 0 || s2 == 0, ("prefetch has L1 and L2 strides")); 00536 return s1 + s2; 00537 } 00538 00539 00540 void unroll_do_loop(LOOP_DESCR *, UINT32); 00541 void unroll_do_loop_fully(LOOP_DESCR *, UINT32); 00542 void unroll_dowhile_loop(LOOP_DESCR *, UINT32); 00543 00544 00545 // A class to keep trace of all global states needed by loop optimizations. 00546 // 00547 // Currently the content of this class mirrows the global variables 00548 // needed by existing functions. 00549 // 00550 enum CG_LOOP_FLAGS { 00551 CG_LOOP_NONE = 0x0, 00552 CG_LOOP_HAS_PROLOG = 0x1, 00553 CG_LOOP_HAS_EPILOG = 0x2, 00554 CG_LOOP_EPILOG_REACHABLE = 0x4, // epilog reachable from loop 00555 }; 00556 00557 class CG_LOOP { 00558 00559 private: 00560 LOOP_DESCR *loop; 00561 BOOL unroll_fully; 00562 INT32 unroll_factor; 00563 OP_MAP op_map; 00564 BB *unroll_remainder_bb; 00565 INT32 flags; 00566 BB *prolog_start; 00567 BB *prolog_end; 00568 BB *epilog_start; 00569 BB *epilog_end; 00570 BB *trip_count_bb; // The BB contains the trip count expr 00571 #ifdef TARG_IA64 00572 INT32 acyclic_len; 00573 INT32 acyclic_len_wo_dspec; 00574 #endif 00575 00576 void Attach_Prolog_And_Epilog(LOOP_DESCR *loop); 00577 00578 public: 00579 BOOL Has_prolog() const { return flags & CG_LOOP_HAS_PROLOG; } 00580 BOOL Has_epilog() const { return flags & CG_LOOP_HAS_EPILOG; } 00581 BOOL Has_prolog_epilog() const { return Has_prolog() && Has_epilog(); } 00582 void Set_has_prolog() { flags |= CG_LOOP_HAS_PROLOG; } 00583 void Set_has_epilog() { flags |= CG_LOOP_HAS_EPILOG; } 00584 BB *Prolog_start() const { return prolog_start; } 00585 BB *Prolog_end() const { return prolog_end; } 00586 BB *Epilog_start() const { return epilog_start; } 00587 BB *Epilog_end() const { return epilog_end; } 00588 BB *Trip_count_bb() const { return trip_count_bb; } 00589 BB *Loop_header() const { return LOOP_DESCR_loophead(loop); } 00590 TN *Trip_count_tn() const { return CG_LOOP_Trip_Count(loop); } 00591 00592 LOOP_DESCR *Loop() const { return loop; } 00593 OP_MAP Op_map() const { return op_map; } 00594 BOOL Single_BB() const { return BB_SET_Size(LOOP_DESCR_bbset(loop)) == 1; } 00595 BOOL Unroll_fully() const { return unroll_fully; } 00596 void Set_unroll_fully() { unroll_fully = TRUE; } 00597 INT32 Unroll_factor() const { return unroll_factor; } 00598 void Set_unroll_factor(INT32 n) { unroll_factor = n; } 00599 #ifdef TARG_IA64 00600 INT32 Acyclic_len (void) const { return acyclic_len; } 00601 void Set_acyclic_len(INT32 len) { acyclic_len = len; } 00602 INT32 Acyclic_len_wo_dspec (void) const { return acyclic_len_wo_dspec; } 00603 void Set_acyclic_len_wo_dspec (INT32 len) { acyclic_len_wo_dspec = len; } 00604 #endif 00605 00606 void Recompute_Liveness(); 00607 bool Determine_Unroll_Fully(); 00608 void Determine_Unroll_Factor(); 00609 void Determine_SWP_Unroll_Factor(); 00610 void Build_CG_LOOP_Info(); 00611 void EBO_Before_Unrolling(); 00612 void EBO_After_Unrolling(); 00613 void Print(FILE *fp); 00614 void Verify(); 00615 00616 CG_LOOP(LOOP_DESCR *); 00617 ~CG_LOOP(); 00618 }; 00619 00620 // Data structure for fixup SWP loops 00621 // 00622 struct SWP_FIXUP { 00623 BB *prolog; 00624 BB *body; 00625 BB *epilog; 00626 INT control_loc; 00627 SWP_FIXUP(BB *bb1, BB *bb2, BB *bb3, INT cntrl_loc): 00628 prolog(bb1),body(bb2),epilog(bb3), control_loc(cntrl_loc) {} 00629 }; 00630 00631 typedef vector<SWP_FIXUP> SWP_FIXUP_VECTOR; 00632 00633 00634 #include "tn_map.h" 00635 00636 // Create the mapping from a TN to its first definition 00637 // in the loop body (only for single BB loop). 00638 // 00639 struct CG_LOOP_DEF { 00640 TN_MAP tn_map; 00641 OP *Get(TN *tn); 00642 BOOL Is_invariant(TN *tn); 00643 CG_LOOP_DEF(BB *bb); 00644 ~CG_LOOP_DEF(); 00645 }; 00646 00647 00648 // Construct a vector of OP*. 00649 // and then uses the index to represent the OP. 00650 // 00651 struct OP_VECTOR { 00652 typedef int index_type; 00653 typedef vector<OP *>::iterator iterator; 00654 vector<OP *> op_vec; 00655 00656 iterator begin() { return op_vec.begin(); } 00657 00658 iterator end() { return op_vec.end(); } 00659 00660 index_type size() const { return op_vec.size(); } 00661 00662 OP* operator[](int i) const { return op_vec[i]; } 00663 00664 OP* operator[](int i) { return op_vec[i]; } 00665 00666 OP_VECTOR(BB *body) { 00667 OP *op; 00668 INT op_num = 0; 00669 FOR_ALL_BB_OPs(body, op) { 00670 op_vec.push_back(op); 00671 op_num++; 00672 } 00673 } 00674 }; 00675 00676 00677 extern CG_LOOP *Current_CG_LOOP; 00678 00679 #if defined(TARG_IA64) || defined(TARG_SL) || defined(TARG_MIPS) 00680 extern void Perform_Loop_Optimizations(void *rgn_loop_update=NULL); 00681 00682 extern BOOL CG_LOOP_Optimize(LOOP_DESCR *loop, SWP_FIXUP_VECTOR& fixup, 00683 void **par_rgn, void *rgn_loop_update); 00684 #else 00685 extern void Perform_Loop_Optimizations(); 00686 00687 extern BOOL CG_LOOP_Optimize(LOOP_DESCR *loop, SWP_FIXUP_VECTOR& fixup); 00688 #endif 00689 00690 #if defined(TARG_SL) 00691 #define TRACE_ZDL_GEN 0x1 00692 #define TRACE_ZDL_IR 0x2 00693 #define TRACE_ZDL_SEQ_NO 0x4 00694 #define TRACE_ZDL_ALL 0x7 00695 extern void CG_LOOP_zero_delay_loop_gen(); 00696 #endif 00697 00698 extern BOOL Perform_SWP(CG_LOOP& cl, SWP_FIXUP_VECTOR& fixup, bool is_doloop); 00699 00700 extern void SWP_Fixup(SWP_FIXUP& fixup); 00701 00702 #endif /* CG_LOOP_INCLUDED */
1.5.6