00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifdef USE_PCH
00039 #include "lno_pch.h"
00040 #endif // USE_PCH
00041 #pragma hdrstop
00042
00043 #include <sys/types.h>
00044 #include <alloca.h>
00045 #include "pu_info.h"
00046 #include "lwn_util.h"
00047 #include "lnoutils.h"
00048 #include "prompf.h"
00049 #include "config.h"
00050 #include "debug.h"
00051 #include "glob.h"
00052 #include "lnopt_main.h"
00053 #include "reduc.h"
00054 #include "wind_down.h"
00055 #include "snl_utils.h"
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 extern void Wind_Down_Dep_V(WN* orig,
00083 WN* copy,
00084 HASH_TABLE<VINDEX16,VINDEX16>* hash_table,
00085 ARRAY_DIRECTED_GRAPH16* dg)
00086 {
00087 if (orig == NULL) {
00088 FmtAssert(copy == NULL, ("Bad call to Wind_Down_Dep_V()"));
00089 return;
00090 }
00091
00092 FmtAssert(copy,
00093 ("Copy null for non-null orig, opcode %d", WN_opcode(orig)));
00094 FmtAssert(WN_opcode(copy) == WN_opcode(orig),
00095 ("opcode orig = %d, opcode copy = %d",
00096 WN_opcode(copy), WN_opcode(orig)));
00097
00098 OPCODE op = WN_opcode(orig);
00099
00100 switch (op) {
00101 case OPC_BLOCK:
00102 {
00103 WN *orig_kid = WN_first(orig);
00104 WN *copy_kid = WN_first(copy);
00105 while (orig_kid) {
00106 Wind_Down_Dep_V(orig_kid, copy_kid, hash_table, dg);
00107 orig_kid = WN_next(orig_kid);
00108 copy_kid = WN_next(copy_kid);
00109 }
00110 }
00111 break;
00112
00113 default:
00114 {
00115 if (OPCODE_is_load(op) || OPCODE_is_store(op) || OPCODE_is_call(op)) {
00116 VINDEX16 origv = dg->Get_Vertex(orig);
00117 if (origv) {
00118 VINDEX16 copyv = dg->Get_Vertex(copy);
00119 FmtAssert(copyv, ("Missing corresponding vertex"));
00120 hash_table->Enter(origv, copyv);
00121 }
00122 }
00123 for (INT kidno = 0; kidno < WN_kid_count(orig); kidno++)
00124 Wind_Down_Dep_V(WN_kid(orig,kidno),WN_kid(copy,kidno),hash_table,dg);
00125 }
00126 }
00127 }
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 static DEPV_ARRAY* Wind_Down_Shorten(DEPV_ARRAY* orig_dv,
00140 INT num_dim,
00141 INT num_unused_dim,
00142 BOOL allow_zerovec)
00143 {
00144 if (num_dim <= 0)
00145 return NULL;
00146
00147 DEPV_ARRAY* copy_dv = Create_DEPV_ARRAY(orig_dv->Num_Vec(),
00148 num_dim, num_unused_dim,
00149 &LNO_default_pool);
00150 INT vnew = 0;
00151
00152 for (INT v = 0; v < orig_dv->Num_Vec(); v++) {
00153 DEPV* odv = orig_dv->Depv(v);
00154 DEPV* ndv = copy_dv->Depv(vnew);
00155
00156
00157
00158
00159
00160 BOOL zerovec = TRUE;
00161 for (INT dd = 0; dd < num_dim; dd++) {
00162 DEPV_Dep(ndv, dd) = DEPV_Dep(odv, dd);
00163 zerovec = zerovec && DEP_Direction(DEPV_Dep(ndv,dd)) == DIR_EQ;
00164 }
00165
00166 if (zerovec) {
00167 if (allow_zerovec) {
00168 switch (DEP_Direction(DEPV_Dep(odv, num_dim))) {
00169 case DIR_POS:
00170 case DIR_POSEQ:
00171 vnew++;
00172 }
00173 }
00174 }
00175 else
00176 vnew++;
00177 }
00178
00179 if (vnew == 0)
00180 return NULL;
00181 else if (vnew == copy_dv->Num_Vec())
00182 return copy_dv;
00183 else {
00184 DEPV_ARRAY* copy2_dv = Create_DEPV_ARRAY(vnew, num_dim, num_unused_dim,
00185 &LNO_default_pool);
00186 for (INT v = 0; v < vnew; v++) {
00187 DEPV* ndv = copy_dv->Depv(v);
00188 DEPV* n2dv = copy2_dv->Depv(v);
00189 for (INT dd = 0; dd < num_dim; dd++)
00190 DEPV_Dep(n2dv, dd) = DEPV_Dep(ndv, dd);
00191 }
00192 return copy2_dv;
00193 }
00194 }
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205 extern BOOL Wind_Down_Dep_E(HASH_TABLE<VINDEX16,VINDEX16>* hash_table,
00206 INT d,
00207 ARRAY_DIRECTED_GRAPH16* dg)
00208 {
00209
00210 HASH_TABLE_ITER<VINDEX16,VINDEX16> htit(hash_table);
00211 VINDEX16 origv;
00212 VINDEX16 copyv;
00213
00214 DYN_ARRAY<EINDEX16> interesting_edges(&LNO_local_pool);
00215 while (htit.Step(&origv, ©v)) {
00216 FmtAssert(origv && copyv, ("broken vertex table %d %d", origv, copyv));
00217 for (EINDEX16 edge = dg->Get_Out_Edge(origv); edge;
00218 edge = dg->Get_Next_Out_Edge(edge)) {
00219
00220 VINDEX16 orig_sinkv = dg->Get_Sink(edge);
00221 VINDEX16 new_sinkv = hash_table->Find(orig_sinkv);
00222
00223
00224 if (new_sinkv) {
00225 INT idx = interesting_edges.Newidx();
00226 interesting_edges[idx] = edge;
00227 }
00228 }
00229 }
00230
00231 for (INT i = interesting_edges.Elements() - 1; i >= 0; i--) {
00232 EINDEX16 edge = interesting_edges[i];
00233 VINDEX16 orig_sinkv = dg->Get_Sink(edge);
00234 VINDEX16 new_sinkv = hash_table->Find(orig_sinkv);
00235 origv = dg->Get_Source(edge);
00236 copyv = hash_table->Find(origv);
00237
00238 FmtAssert(new_sinkv, ("impossible"));
00239
00240
00241
00242 DEPV_ARRAY* orig_dv = dg->Depv_Array(edge);
00243 Is_True(orig_dv, ("pro-blem: edge has no arc"));
00244 INT num_dim = orig_dv->Num_Dim();
00245 INT num_unused_dim = orig_dv->Num_Unused_Dim();
00246 FmtAssert(num_unused_dim <= d, ("Bug1 in Wind_Down_Dep_E"));
00247 FmtAssert(d < num_unused_dim + num_dim,
00248 ("Bug2 in Wind_Down_Dep_E: %d,%d,%d",
00249 d, num_unused_dim, num_dim));
00250 num_dim = d - num_unused_dim;
00251
00252 if (num_dim) {
00253 DEPV_ARRAY* copy_dv;
00254 copy_dv = Wind_Down_Shorten(orig_dv, num_dim, num_unused_dim, TRUE);
00255 if (copy_dv) {
00256 if (!dg->Add_Edge(origv, new_sinkv, copy_dv))
00257 return FALSE;
00258 }
00259
00260 copy_dv = Wind_Down_Shorten(orig_dv, num_dim, num_unused_dim, FALSE);
00261 if (copy_dv) {
00262 if (!dg->Add_Edge(copyv, orig_sinkv, copy_dv))
00263 return FALSE;
00264 }
00265 }
00266 }
00267 return TRUE;
00268 }
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279 static void Set_Winddown_Annotations_B(WN* newbody,
00280 BOOL cache_winddown,
00281 EST_REGISTER_USAGE est_register_usage)
00282 {
00283 FmtAssert(WN_opcode(newbody) == OPC_BLOCK, ("Bad newbody"));
00284
00285 for (WN* wn = WN_first(newbody); wn; wn = WN_next(wn)) {
00286 switch (WN_opcode(wn)) {
00287 case OPC_IF:
00288 Set_Winddown_Annotations_B(WN_then(wn), cache_winddown,
00289 est_register_usage);
00290 Set_Winddown_Annotations_B(WN_else(wn), cache_winddown,
00291 est_register_usage);
00292 break;
00293 case OPC_DO_LOOP:
00294 Set_Winddown_Annotations(wn, cache_winddown,
00295 est_register_usage, FALSE);
00296 break;
00297 case OPC_DO_WHILE:
00298 case OPC_WHILE_DO:
00299 Set_Winddown_Annotations_B(WN_while_body(wn), cache_winddown,
00300 est_register_usage);
00301 default:
00302 break;
00303 }
00304 }
00305 }
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316 extern void Set_Winddown_Annotations(WN* newloop,
00317 BOOL cache_winddown,
00318 EST_REGISTER_USAGE est_register_usage,
00319 BOOL outer)
00320 {
00321 DO_LOOP_INFO* dli = Get_Do_Loop_Info(newloop);
00322
00323 if (outer) {
00324 if (cache_winddown)
00325 dli->Set_Cache_Winddown();
00326 else
00327 dli->Set_Register_Winddown();
00328 }
00329 else {
00330 if (cache_winddown)
00331 dli->Set_In_Cache_Winddown();
00332 else
00333 dli->Set_In_Register_Winddown();
00334 }
00335
00336 if (dli->Is_Inner)
00337 dli->Est_Register_Usage = est_register_usage;
00338 else
00339 Set_Winddown_Annotations_B(WN_do_body(newloop), cache_winddown,
00340 est_register_usage);
00341 }
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374 extern WN* Wind_Down(WN* loop,
00375 INT est_num_iterations,
00376 BOOL cache_winddown,
00377 EST_REGISTER_USAGE est_register_usage)
00378 {
00379 const INT bufsz = 64;
00380 char buf[bufsz];
00381 INT bufcnt;
00382
00383 ARRAY_DIRECTED_GRAPH16* dg = Array_Dependence_Graph;
00384
00385 WN* newloop = LWN_Copy_Tree(loop, TRUE, LNO_Info_Map);
00386 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00387 STACK<WN*> st_old(&PROMPF_pool);
00388 STACK<WN*> st_new(&PROMPF_pool);
00389 Prompf_Assign_Ids(loop, newloop, &st_old, &st_new, FALSE);
00390 INT nloops = st_old.Elements();
00391 INT* old_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
00392 INT* new_ids = CXX_NEW_ARRAY(INT, nloops, &PROMPF_pool);
00393 for (INT i = 0; i < nloops; i++) {
00394 old_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_old.Bottom_nth(i));
00395 new_ids[i] = WN_MAP32_Get(Prompf_Id_Map, st_new.Bottom_nth(i));
00396 }
00397 Prompf_Info->Register_Winddown(old_ids, new_ids, nloops);
00398 }
00399 WN* fake_unroll_bodies[2];
00400 fake_unroll_bodies[0] = loop;
00401 fake_unroll_bodies[1] = newloop;
00402 if (red_manager) red_manager->Unroll_Update(fake_unroll_bodies, 2);
00403 Unrolled_DU_Update(fake_unroll_bodies, 2, Do_Loop_Depth(loop)-1, TRUE, FALSE); if (dg->Add_Deps_To_Copy_Block(loop, newloop)) {
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 HASH_TABLE<VINDEX16,VINDEX16>
00415 hash_table(MIN(dg->Get_Vertex_Count(),512),&LNO_local_pool);
00416
00417 Wind_Down_Dep_V(loop, newloop, &hash_table, dg);
00418
00419 if (!Wind_Down_Dep_E(&hash_table, Do_Depth(loop), dg)) {
00420 SNL_DEBUG0(0, "Wind_Down_Dep_E() failed -- continuing");
00421 LWN_Update_Dg_Delete_Tree(newloop, dg);
00422 LNO_Erase_Dg_From_Here_In(newloop, dg);
00423 Unmapped_Vertices_Here_Out(LWN_Get_Parent(loop));
00424 }
00425 } else {
00426 SNL_DEBUG0(0, "Add_Deps_To_Copy_Block() failed -- continuing");
00427 LWN_Update_Dg_Delete_Tree(newloop, dg);
00428 LNO_Erase_Dg_From_Here_In(newloop, dg);
00429 Unmapped_Vertices_Here_Out(LWN_Get_Parent(loop));
00430 }
00431
00432 LWN_Insert_Block_After(LWN_Get_Parent(loop), loop, newloop);
00433 ST* st0 = WN_st(WN_index(loop));
00434 WN_OFFSET offset0 = WN_offset(WN_index(loop));
00435 TYPE_ID wtype0 = WN_desc(WN_start(loop));
00436 bufcnt = sprintf(buf, "$wd_");
00437 (SYMBOL(WN_index(loop))).Name(buf+bufcnt, bufsz-bufcnt);
00438 Replace_Symbol(newloop, SYMBOL(st0, offset0, wtype0),
00439 Create_Preg_Symbol(buf, wtype0), NULL, newloop);
00440 Fix_Do_Du_Info(newloop, NULL, TRUE, NULL, 1);
00441
00442 WN* newbegin = WN_start(newloop);
00443 LWN_Delete_Tree(WN_kid0(newbegin));
00444 WN_kid0(newbegin) = LWN_CreateLdid(OPCODE_make_op(OPR_LDID, wtype0, wtype0),
00445 WN_start(loop));
00446 LWN_Set_Parent(WN_kid0(newbegin), newbegin);
00447 Fix_Do_Du_Info(newbegin, NULL, FALSE, loop, 0);
00448
00449 DO_LOOP_INFO* dli = Get_Do_Loop_Info(newloop);
00450 if (Cur_PU_Feedback && dli->Est_Num_Iterations>0) {
00451 INT32 org_count = WN_MAP32_Get(WN_MAP_FEEDBACK, WN_start(loop));
00452 float ratio = ((float) est_num_iterations)
00453 / ((float) dli->Est_Num_Iterations);
00454 LWN_Scale_Frequency(WN_step(newloop), ratio);
00455 LWN_Scale_Frequency_Tree(WN_do_body(newloop), ratio);
00456 }
00457 dli->Est_Num_Iterations = est_num_iterations;
00458 dli->Num_Iterations_Symbolic = FALSE;
00459 dli->Num_Iterations_Profile = FALSE;
00460 dli->Is_Ivdep = Get_Do_Loop_Info(loop)->Is_Ivdep;
00461 dli->Is_Concurrent_Call = Get_Do_Loop_Info(loop)->Is_Concurrent_Call;
00462 dli->Concurrent_Directive = Get_Do_Loop_Info(loop)->Concurrent_Directive;
00463 dli->LB->Too_Messy = TRUE;
00464 if (cache_winddown)
00465 dli->Set_Cache_Winddown();
00466 else
00467 dli->Set_Register_Winddown();
00468
00469 Set_Winddown_Annotations(newloop, cache_winddown, est_register_usage, TRUE);
00470
00471 DOLOOP_STACK shortstack(&LNO_local_pool);
00472 Build_Doloop_Stack(LWN_Get_Parent(newloop), &shortstack);
00473 LNO_Build_Access(newloop, &shortstack, &LNO_default_pool);
00474
00475 return newloop;
00476 }
00477