00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 #define USE_STANDARD_TYPES
00057 #include "defs.h"
00058 #include <vector>
00059 #include <utility>
00060 #include <iterator>
00061 #include <math.h>
00062 #include "cg_swp.h"
00063 #include "cg_swp_options.h"
00064 #include "mempool.h"
00065 #include "errors.h"
00066 #include "tracing.h"
00067 #include "op.h"
00068 #include "cg_dep_graph.h"
00069 #include "ti_res_res.h"
00070 #include "cg_loop_mii.h"
00071 #ifdef TARG_IA64
00072 #include "cache_analysis.h"
00073 #endif
00074 #if defined(_LP64) && defined(_SGI_COMPILER_VERSION)
00075
00076 #define min(a,b) ((a < b) ? a : b)
00077 #endif
00078
00079 void MinDist::Print(FILE *fp) const
00080 {
00081 const int n_col = 16;
00082 fprintf(fp, "MinDist %dx%d:\n", size(), size());
00083 fprintf(fp, " ");
00084 for (INT j = 0; j < size(); j++) {
00085 if (j != 0 && j % n_col == 0)
00086 fprintf(fp, "\n ");
00087 fprintf(fp,"%4d", j);
00088 }
00089 fprintf(fp, "\n");
00090 for (INT i = 0; i < size(); i++) {
00091 fprintf(fp, "%3d: ", i);
00092 for (INT j = 0; j < size(); j++) {
00093 if (j != 0 && j % n_col == 0)
00094 fprintf(fp, "\n ");
00095 fprintf(fp,"%4d", mindist[i][j]);
00096 }
00097 fprintf(fp, "\n");
00098 }
00099 }
00100
00101 MinDist::MinDist(const SWP_OP_vector& v, INT start, INT stop, INT branch, INT ii)
00102 {
00103 int n_ops = v.size();
00104 mindist_size = n_ops;
00105
00106 INT step = ii;
00107 INT ii_lower = ii;
00108 INT ii_try;
00109
00110 while (1) {
00111
00112 ii_try = ii_lower;
00113 while (Compute(v, start, stop, branch, ii_try) > 0) {
00114 ii_lower = ii_try + 1;
00115 ii_try += step;
00116 }
00117
00118
00119
00120 if (ii_lower == ii_try) {
00121 found_ii = ii_lower;
00122 return;
00123 }
00124 step = MAX(1, step / 2);
00125 }
00126 }
00127
00128
00129 INT MinDist::Compute(const SWP_OP_vector& v, INT start, INT stop, INT branch, INT ii)
00130 {
00131 int n_ops = v.size();
00132 INT i, j, k;
00133
00134
00135
00136 for (i = 0; i < n_ops; i++)
00137 for (j = 0; j < n_ops; j++)
00138 mindist[i][j] = NEG_INF;
00139
00140 for (i = 0; i < v.size(); i++) {
00141 OP *op = v[i].op;
00142 if (op) {
00143 for ( ARC_LIST *al = OP_succs(op) ; al; al = ARC_LIST_rest(al) ) {
00144 ARC *arc = ARC_LIST_first(al);
00145
00146
00147 if (ARC_kind(arc) == CG_DEP_PREBR)
00148 continue;
00149 OP *succ = ARC_succ(arc);
00150 mindist[i][SWP_index(succ)] =
00151 MAX(mindist[i][SWP_index(succ)], ARC_latency(arc) - ARC_omega(arc) * ii);
00152 Is_True(succ == v[SWP_index(succ)].op, ("MinDIST: wrong SWP_index."));
00153 }
00154 mindist[start][i] = 0;
00155 mindist[i][stop] = 0;
00156 #ifdef TARG_IA64
00157 mindist[i][branch] = MAX(mindist[i][branch], 0);
00158 #endif
00159 }
00160 }
00161
00162
00163
00164 for (k = 0; k < n_ops; k++)
00165 for (i = 0; i < n_ops; i++)
00166 for (j = 0; j < n_ops; j++) {
00167 #if SWP_DEBUG
00168
00169 if (mindist[i][k] != NEG_INF && mindist[k][j] != NEG_INF)
00170 #endif
00171 mindist[i][j] = MAX(mindist[i][j], mindist[i][k] + mindist[k][j]);
00172 }
00173
00174 for (i = 0; i < n_ops; i++) {
00175 if (mindist[i][i] > 0)
00176 return mindist[i][i];
00177 else
00178 mindist[i][i] = 0;
00179 }
00180 return 0;
00181 }
00182
00183
00184
00185
00186
00187
00188
00189 class MinLT {
00190 std::vector<INT> minlt;
00191 public:
00192 INT size() const { return minlt.size(); }
00193 INT operator()(INT i) const { return minlt[i]; }
00194 void Print(FILE *fp) const;
00195 MinLT(const SWP_OP_vector& v, INT ii, const MinDist& mindist);
00196 };
00197
00198 void MinLT::Print(FILE *fp) const
00199 {
00200 fprintf(fp, "MinLT: ");
00201 for (INT i = 0; i < minlt.size(); i++) {
00202 if (minlt[i] >= 0)
00203 fprintf(fp, "(%d,%d) ", i, minlt[i]);
00204 }
00205 fprintf(fp, "\n");
00206 }
00207
00208 MinLT::MinLT(const SWP_OP_vector& v, INT ii, const MinDist& mindist)
00209 :minlt(v.size(), 0)
00210 {
00211 for (INT i = 0; i < v.size(); i++) {
00212 minlt[i] = -1;
00213 OP *op = v[i].op;
00214 if (op) {
00215 minlt[i] = 1;
00216 for ( ARC_LIST *al = OP_succs(op) ; al; al = ARC_LIST_rest(al) ) {
00217 ARC *arc = ARC_LIST_first(al);
00218 if (ARC_kind(arc) == CG_DEP_REGIN) {
00219 OP *succ = ARC_succ(arc);
00220 INT succ_idx = SWP_index(succ);
00221 if (OP_opnd(succ,ARC_opnd(arc)) == OP_result(op,0)) {
00222 INT live_range = ARC_omega(arc) * ii + mindist(i,succ_idx);
00223 minlt[i] = MAX(minlt[i], live_range);
00224 }
00225 }
00226 }
00227 }
00228 }
00229 }
00230
00231
00232
00233
00234
00235
00236 INT MinAvg(INT ii, const MinLT& minlt)
00237 {
00238 INT minavg = 0;
00239 for (INT i = 0; i < minlt.size(); i++) {
00240 if (minlt(i) > 0)
00241 minavg += minlt(i);
00242 }
00243 return (INT) ceil(minavg/ii);
00244 }
00245
00246
00247
00248
00249
00250
00251 class Slack {
00252 std::vector<INT> estart;
00253 std::vector<INT> lstart;
00254 INT start;
00255 INT stop;
00256 bool trace;
00257
00258 public:
00259 INT Start() const { return start; }
00260 INT Stop() const { return stop; }
00261 INT Estart(INT i) const { return estart[i]; }
00262 INT Lstart(INT i) const { return lstart[i]; }
00263 INT operator()(INT i) const { return lstart[i] - estart[i]; }
00264 void Verify() const;
00265 void Print(FILE *fp) const;
00266 void Print(FILE *fp, const SWP_OP_vector& v) const;
00267 void Set_last_cycle(const SWP_OP_vector& v, INT last_cycle, const MinDist& mindist);
00268 void Relax_Precedence(const SWP_OP_vector& v, const std::vector<INT>& unplaced, const std::vector<INT>& need_relax, const MinDist& mindist);
00269 void Update_Slack_From_Placed_Ops(const SWP_OP_vector& v, const MinDist& mindist);
00270 void Update_Slack_From_Placed_Op(INT candidate,
00271 const SWP_OP_vector& v, const MinDist& mindist);
00272 Slack(const SWP_OP_vector& v, INT start_idx, INT stop_index, INT ii, const MinDist& m, bool trace);
00273 };
00274
00275
00276
00277 void Slack::Verify() const
00278 {
00279 INT min_cycle = estart[start];
00280 INT max_cycle = lstart[stop];
00281 FmtAssert(min_cycle >= 0, ("Slack: min_cycle (%d) < 0", min_cycle));
00282 for (INT i = 0; i < estart.size(); i++) {
00283 FmtAssert(estart[i] <= lstart[i],
00284 ("Slack: estart (%d) > lstart (%d) for OP %d\n",estart[i], lstart[i], i));
00285 FmtAssert(min_cycle <= estart[i],
00286 ("Slack: min_cycle (%d) > estart (%d) for OP %d\n", min_cycle, estart[i], i));
00287 FmtAssert(max_cycle >= lstart[i],
00288 ("Slack: max_cycle (%d) < lstart (%d) for OP %d\n", max_cycle, lstart[i], i));
00289 }
00290 }
00291
00292 void Slack::Print(FILE *fp) const
00293 {
00294 for (INT i = 0; i < estart.size(); i++) {
00295 fprintf(fp, "[%d] e=%d l=%d s=%d\n", i, estart[i], lstart[i],
00296 lstart[i] - estart[i]);
00297 }
00298 }
00299
00300
00301 void Slack::Print(FILE *fp, const SWP_OP_vector& v) const
00302 {
00303 for (INT i = 0; i < v.size(); i++) {
00304 fprintf(fp, "[%d] e=%d l=%d s=%d", i, estart[i], lstart[i],
00305 lstart[i] - estart[i]);
00306 if (v[i].placed)
00307 fprintf(fp, ", cycle=%d", v[i].cycle);
00308 fprintf(fp, "\n");
00309 }
00310 }
00311
00312 void Slack::Set_last_cycle(const SWP_OP_vector& v, INT last_cycle, const MinDist& mindist)
00313 {
00314 for (INT i = 0; i < v.size(); i++) {
00315 OP *op = v[i].op;
00316 if (op) {
00317 estart[i] = mindist(start, i);
00318 lstart[i] = last_cycle - mindist(i, stop);
00319 } else {
00320 estart[i] = 0;
00321 lstart[i] = 0;
00322 }
00323 }
00324 estart[start] = 0;
00325 lstart[start] = last_cycle - mindist(start, stop);
00326 estart[stop] = mindist(start, stop);
00327 lstart[stop] = last_cycle;
00328 if (trace) {
00329 fprintf(TFile, "set_last_cycle:\n");
00330 Print(TFile, v);
00331 }
00332 }
00333
00334 void Slack::Relax_Precedence(const SWP_OP_vector& v, const std::vector<INT>& unplaced,
00335 const std::vector<INT>& need_relax, const MinDist& mindist)
00336 {
00337 if (trace) {
00338 fprintf(TFile, "before relax_precedence:\n");
00339 Print(TFile, v);
00340 }
00341 std::vector<bool> processed(v.size(), false);
00342 std::vector<INT> need_process;
00343 {
00344 for (INT u = 0; u < unplaced.size(); u++) {
00345 INT i = unplaced[u];
00346 if (v[i].op)
00347 need_process.push_back(i);
00348 }
00349 for (INT r = 0; r < need_relax.size(); r++) {
00350 INT i = need_relax[r];
00351 if (v[i].op)
00352 need_process.push_back(i);
00353 }
00354 while (need_process.size() > 0) {
00355 INT i = need_process.back();
00356 need_process.pop_back();
00357 processed[i] = true;
00358 ARC_LIST* al;
00359 for (al = OP_preds(v[i].op) ; al; al = ARC_LIST_rest(al) ) {
00360 ARC *arc = ARC_LIST_first(al);
00361 OP *pred = ARC_pred(arc);
00362 INT pred_idx = SWP_index(pred);
00363 if (!v[pred_idx].placed && !processed[pred_idx]) {
00364 processed[pred_idx] = true;
00365 need_process.push_back(pred_idx);
00366 }
00367 }
00368 for (al = OP_succs(v[i].op) ; al; al = ARC_LIST_rest(al) ) {
00369 ARC *arc = ARC_LIST_first(al);
00370 OP *succ = ARC_succ(arc);
00371 INT succ_idx = SWP_index(succ);
00372 if (!v[succ_idx].placed && !processed[succ_idx]) {
00373 processed[succ_idx] = true;
00374 need_process.push_back(succ_idx);
00375 }
00376 }
00377 }
00378 }
00379
00380
00381
00382
00383 INT last_cycle = lstart[stop];
00384 for (INT i=0; i < processed.size(); i++) {
00385 if (!processed[i]) continue;
00386 if (trace) {
00387 fprintf(TFile, "relax_precedence for %d\n", i);
00388 }
00389 estart[i] = mindist(start, i);
00390 lstart[i] = last_cycle - mindist(i, stop);
00391 for (INT j = 0; j < v.size(); j++) {
00392 if (v[j].placed) {
00393 INT cycle = v[j].cycle;
00394 estart[i] = MAX(estart[i], cycle + mindist(j, i));
00395 lstart[i] = MIN(lstart[i], cycle - mindist(i, j));
00396 }
00397 }
00398 }
00399 if (trace) {
00400 fprintf(TFile, "after relax_precedence:\n");
00401 Print(TFile, v);
00402 }
00403 }
00404
00405 void Slack::Update_Slack_From_Placed_Op(INT candidate,
00406 const SWP_OP_vector& v, const MinDist& mindist)
00407 {
00408 INT cycle = v[candidate].cycle;
00409 if (trace) {
00410 fprintf(TFile, "update_slack_from_placed_op:\n");
00411 fprintf(TFile, "candidate = %d, cycle = %d\n", candidate, cycle);
00412 }
00413 for (INT i = 0; i < v.size(); i++) {
00414 if (!v[i].placed) {
00415 estart[i] = MAX(estart[i], cycle + mindist(candidate, i));
00416 lstart[i] = MIN(lstart[i], cycle - mindist(i, candidate));
00417 }
00418 }
00419 if (trace) {
00420 Print(TFile, v);
00421 }
00422 }
00423
00424 void Slack::Update_Slack_From_Placed_Ops(const SWP_OP_vector& v, const MinDist& mindist)
00425 {
00426 INT last_cycle = lstart[stop];
00427 if (trace) {
00428 fprintf(TFile, "update_slack_from_placed_ops:\n");
00429 fprintf(TFile, "last_cycle = %d, start = %d, stop = %d\n", last_cycle, start, stop);
00430 }
00431 for (INT i = 0; i < v.size(); i++) {
00432 if (v[i].op && !v[i].placed) {
00433 estart[i] = mindist(start, i);
00434 lstart[i] = last_cycle - mindist(i, stop);
00435 for (INT j = 0; j < v.size(); j++) {
00436 if (v[j].placed) {
00437 INT cycle = v[j].cycle;
00438 estart[i] = MAX(estart[i], cycle + mindist(j, i));
00439 lstart[i] = MIN(lstart[i], cycle - mindist(i, j));
00440 }
00441 }
00442 }
00443 }
00444 if (trace) {
00445 Print(TFile, v);
00446 }
00447 }
00448
00449
00450 Slack::Slack(const SWP_OP_vector& v, INT start_idx, INT stop_idx, INT ii, const MinDist &mindist, bool trace)
00451 :start(start_idx),stop(stop_idx),estart(stop_idx+1,0),lstart(stop_idx+1,0),trace(trace)
00452 {
00453
00454
00455
00456 INT len = (INT) ceil(((double) mindist(start, stop) + 1) / ii) * ii;
00457 if (mindist(start, stop) > 8) {
00458 len = MAX(mindist(start,stop) + 1 + 2 , len);
00459 len = (INT) ceil(((double) len) / ii) * ii;
00460 }
00461 Set_last_cycle(v, len-1, mindist);
00462 if (trace) {
00463 fprintf(TFile, "slack:\n");
00464 Print(TFile, v);
00465 }
00466 }
00467
00468
00469
00470
00471
00472
00473 class MRT {
00474 INT ii;
00475 INT grainy_resources_length;
00476 TI_RES_RES *resources;
00477
00478 public:
00479 TI_RES_RES *Res() { return resources; }
00480
00481 void Reserve_Op_Resources(const SWP_OP& swp_op, INT cycle) {
00482 TI_RES_RES_Reserve_Resources(resources, OP_code(swp_op.op), cycle);
00483 }
00484 void Unreserve_Op_Resources(const SWP_OP& swp_op) {
00485 TI_RES_RES_Unreserve_Resources(resources, OP_code(swp_op.op), swp_op.cycle);
00486 }
00487 bool Resources_Available(const SWP_OP& swp_op, INT cycle) const {
00488 return TI_RES_RES_Resources_Available(resources, OP_code(swp_op.op), cycle);
00489 }
00490 bool Resources_Grainy(const SWP_OP& swp_op) const {
00491
00492 return TI_RES_RES_Resources_Length(resources, OP_code(swp_op.op)) >= grainy_resources_length;
00493 }
00494 bool Resources_Equivalent(const SWP_OP& sop1, const SWP_OP& sop2) const {
00495 return
00496 sop1.op == sop2.op ||
00497 TI_RES_RES_Resources_Equivalent(resources, OP_code(sop1.op), OP_code(sop2.op));
00498 }
00499 bool Resources_Relevant(const SWP_OP& sop1, const SWP_OP& sop2) const {
00500 if (sop1.cycle < sop2.cycle)
00501 return TI_RES_RES_Resources_Relevant(resources, OP_code(sop1.op), OP_code(sop2.op),
00502 sop2.cycle - sop1.cycle);
00503 else
00504 return TI_RES_RES_Resources_Relevant(resources, OP_code(sop2.op), OP_code(sop1.op),
00505 sop1.cycle - sop2.cycle);
00506 }
00507 void Verify() const {
00508 FmtAssert(! TI_RES_RES_Is_Bad_II(resources, ii), ("MRT: bad II."));
00509 }
00510 INT Find_Resources_In_Range(INT candidate, const SWP_OP_vector& v,
00511 INT earliest, INT latest, bool top_down) const;
00512 void Verify2(const SWP_OP_vector& v);
00513 MRT(const SWP_OP_vector& v, INT _ii, MEM_POOL *pool);
00514 };
00515 #ifdef TARG_IA64
00516 BOOL Violate_forbid_latency(INT candidate, const SWP_OP_vector& v, INT cycle, INT loop_cycle)
00517 {
00518
00519 if (OP_load(v[candidate].op)) {
00520 for (INT i=0; i<v.size();i++){
00521 if (!v[i].op || !v[i].placed) continue;
00522 if (v[i].cycle != cycle) continue;
00523 if (!OP_load(v[i].op) && i!=candidate) continue;
00524 if (Cache_Has_Conflict(v[candidate].op,v[i].op, CG_DEP_MEMREAD))
00525 return TRUE;
00526 }
00527 }
00528 return FALSE;
00529 }
00530 #endif
00531
00532 INT MRT::Find_Resources_In_Range(INT candidate, const SWP_OP_vector& v,
00533 INT earliest, INT latest, bool top_down) const {
00534 INT incr = top_down ? 1 : -1;
00535 INT begin = top_down ? earliest : latest;
00536 INT finish = top_down ? (latest+1) : (earliest-1);
00537 INT cycle;
00538 Is_True (earliest <= latest, ("swp sched: earliest %d > latest %d", earliest, latest));
00539 for (cycle = begin; cycle != finish; cycle += incr) {
00540 #ifdef TARG_IA64
00541 if (Violate_forbid_latency(candidate, v, cycle, ii)) continue;
00542 #endif
00543 if (Resources_Available(v[candidate], cycle))
00544 return cycle;
00545 }
00546 return cycle;
00547 }
00548
00549
00550 void MRT::Verify2(const SWP_OP_vector& v)
00551 {
00552 CXX_MEM_POOL local_mem_pool("MRT verify", FALSE);
00553 MRT mrt_verify(v, ii, local_mem_pool());
00554 for (INT i = 0; i < v.size(); i++) {
00555 OP *op = v[i].op;
00556 if (op && v[i].placed)
00557 mrt_verify.Reserve_Op_Resources(v[i], v[i].cycle);
00558 }
00559 FmtAssert(TI_RES_RES_Equal(Res(), mrt_verify.Res()),
00560 ("resource table inconsistent state."));
00561 }
00562
00563
00564 MRT::MRT(const SWP_OP_vector& v, INT _ii, MEM_POOL *pool):ii(_ii)
00565 {
00566 resources = TI_RES_RES_Alloc(TRUE , pool);
00567 for (INT i = 0; i < v.size(); i++) {
00568 OP *op = v[i].op;
00569 if (op)
00570 TI_RES_RES_Has_TOP(resources, OP_code(op));
00571 }
00572 TI_RES_RES_Set_BB_Cycle_Count(resources, ii);
00573 grainy_resources_length = SWP_Options.Grainy_Resources_Length;
00574 }
00575
00576
00577
00578
00579
00580
00581
00582 class LT_Heuristics {
00583
00584 bool trace;
00585 bool trace_details;
00586 bool min_retry;
00587 std::vector<INT> unplaced;
00588 std::vector<INT> need_relax;
00589 INT max_sched_length;
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600 bool Sched_Top_Down(INT candidate, SWP_OP_vector& v, const Slack& slack)
00601 {
00602 if (slack.Estart(candidate) == slack.Lstart(candidate)) return true;
00603 if (v[candidate].direction == SWP_TOP_DOWN) return true;
00604 if (v[candidate].direction == SWP_BOTTOM_UP) return false;
00605 INT placed_preds = 0;
00606 INT placed_succs = 0;
00607 ARC_LIST* al;
00608 for (al = OP_preds(v[candidate].op) ; al; al = ARC_LIST_rest(al) ) {
00609 ARC *arc = ARC_LIST_first(al);
00610 OP *pred = ARC_pred(arc);
00611 INT pred_idx = SWP_index(pred);
00612 if (v[pred_idx].placed)
00613 placed_preds++;
00614 }
00615 for (al = OP_succs(v[candidate].op) ; al; al = ARC_LIST_rest(al) ) {
00616 ARC *arc = ARC_LIST_first(al);
00617 OP *succ = ARC_succ(arc);
00618 INT succ_idx = SWP_index(succ);
00619 if (v[succ_idx].placed)
00620 placed_succs++;
00621 }
00622
00623
00624
00625
00626 if (placed_preds >= placed_succs) {
00627 v[candidate].direction = SWP_TOP_DOWN;
00628 return true;
00629 } else {
00630 v[candidate].direction = SWP_BOTTOM_UP;
00631 return false;
00632 }
00633 }
00634
00635 public:
00636
00637
00638
00639
00640
00641
00642
00643
00644 void Init_SWP_OP_state(SWP_OP_vector& v, INT ii,
00645 const Slack& slack, const MinLT& minlt, const MRT& mrt,
00646 bool mop_critical, bool flop_critical)
00647 {
00648 INT start = v.start;
00649 INT stop = v.stop;
00650
00651
00652 v[start].placed = true;
00653 v[start].cycle = slack.Estart(start);
00654 v[stop].placed = true;
00655 v[stop].cycle = slack.Lstart(stop);
00656 max_sched_length = slack.Lstart(stop) + SWP_Options.Max_Schedule_Incr;
00657
00658 for (INT i = 0; i < v.size(); i++) {
00659 OP *op = v[i].op;
00660 if (!op) continue;
00661
00662 v[i].placed = false;
00663 v[i].cycle = 0;
00664 v[i].scale = 1.0;
00665 v[i].trials = 0;
00666
00667
00668 if (mrt.Resources_Grainy(v[i]))
00669 v[i].scale *= 0.5;
00670
00671
00672 if (mop_critical && OP_memory(op))
00673 v[i].scale *= 0.1;
00674 if (flop_critical && OP_flop(op))
00675 v[i].scale *= 0.2;
00676
00677 if (SWP_Options.Sched_Direction == 1)
00678 v[i].direction = SWP_TOP_DOWN;
00679 else if (SWP_Options.Sched_Direction == 2)
00680 v[i].direction = SWP_BOTTOM_UP;
00681 else {
00682
00683 INT stretchable_inputs = 0;
00684 INT stretchable_outputs = 1;
00685 for (INT j = 0; j < OP_opnds(op); j++) {
00686
00687
00688 for (INT k = j+1; k < OP_opnds(op); k++) {
00689 if (OP_opnd(op, j) == OP_opnd(op, k))
00690 goto next_opnd;
00691 }
00692
00693 ARC *arc;
00694 arc = ARC_LIST_Find_First(OP_preds(op), CG_DEP_REGIN, j);
00695 if (arc == NULL)
00696 goto next_opnd;
00697 if (ARC_pred(arc) == op)
00698 goto next_opnd;
00699
00700
00701 INT pred_op_idx;
00702 pred_op_idx = SWP_index(ARC_pred(arc));
00703 if (slack.Estart(pred_op_idx) + minlt(pred_op_idx) <
00704 ARC_omega(arc) * ii + slack.Lstart(i))
00705 stretchable_inputs++;
00706
00707 next_opnd: ;
00708 }
00709
00710
00711
00712 if (stretchable_inputs > stretchable_outputs)
00713 v[i].direction = SWP_TOP_DOWN;
00714 else if (stretchable_inputs < stretchable_outputs)
00715 v[i].direction = SWP_BOTTOM_UP;
00716 else
00717 v[i].direction = SWP_UNKOWN;
00718 }
00719 }
00720 }
00721
00722
00723
00724 INT Choose_Op(const SWP_OP_vector& v, const Slack& slack)
00725 {
00726 INT candidate = -1;
00727 double highest_priority = 100000;
00728 switch (SWP_Options.Heuristics) {
00729 case 0:
00730 {
00731
00732
00733 INT candidate_lstart = 0;
00734 INT trials = 0;
00735 for (INT i = 0; i < v.size(); i++) {
00736 if (v[i].op == NULL) continue;
00737 if (v[i].placed) continue;
00738 if (i == slack.Start()) continue;
00739 if (i == slack.Stop()) continue;
00740
00741 INT pri = (INT) (slack(i) * v[i].scale);
00742 if (pri < highest_priority ||
00743 pri == highest_priority &&
00744 (slack.Lstart(i) < candidate_lstart ||
00745 (slack.Lstart(i) == candidate_lstart &&
00746 v[i].trials < trials))) {
00747 highest_priority = pri;
00748 candidate_lstart = slack.Lstart(i);
00749 trials = v[i].trials;
00750 candidate = i;
00751 }
00752 }
00753 }
00754 #ifdef TARG_IA64
00755 break;
00756 #endif
00757 case 1:
00758 {
00759
00760
00761 INT candidate_slack = 0;
00762 for (INT i = 0; i < v.size(); i++) {
00763 if (v[i].op == NULL) continue;
00764 if (v[i].placed) continue;
00765 if (i == slack.Start()) continue;
00766 if (i == slack.Stop()) continue;
00767 INT pri = slack.Lstart(i);
00768 if (pri < highest_priority ||
00769 pri == highest_priority && slack(i) < candidate_slack) {
00770 highest_priority = pri;
00771 candidate_slack = slack(i);
00772 candidate = i;
00773 }
00774 }
00775 }
00776 #ifdef TARG_IA64
00777 break;
00778 #endif
00779 case 2:
00780 {
00781
00782
00783 INT last_cycle = slack.Lstart(slack.Stop());
00784 INT candidate_slack = 0;
00785 for (INT i = 0; i < v.size(); i++) {
00786 if (v[i].op == NULL) continue;
00787 if (v[i].placed) continue;
00788 if (i == slack.Start()) continue;
00789 if (i == slack.Stop()) continue;
00790 INT pri = last_cycle - slack.Estart(i);
00791 if (pri < highest_priority ||
00792 pri == highest_priority && slack(i) < candidate_slack) {
00793 highest_priority = pri;
00794 candidate_slack = slack(i);
00795 candidate = i;
00796 }
00797 }
00798 }
00799 }
00800 return candidate;
00801 }
00802
00803
00804
00805
00806
00807 std::pair<bool, bool>
00808 Choose_Issue_Cycle(INT candidate, SWP_OP_vector& v, INT ii,
00809 const Slack& slack, const MRT& mrt)
00810 {
00811 v[candidate].trials++;
00812 v[candidate].placed = true;
00813 INT earliest = slack.Estart(candidate);
00814 INT latest = MIN(earliest + ii - 1, slack.Lstart(candidate));
00815 bool top_down = Sched_Top_Down(candidate, v, slack);
00816
00817 Is_True(earliest < 128 * ii, ("SWP Choose_Issue_Cycle: earliest=%d\n", earliest));
00818 Is_True(latest < 128 * ii, ("SWP Choose_Issue_Cycle: latest=%d\n", latest));
00819
00820
00821
00822 if (min_retry && v[candidate].trials > 1) {
00823 INT e = top_down ? MAX(earliest, v[candidate].cycle+1) : earliest;
00824 INT l = top_down ? latest : MIN(latest, v[candidate].cycle-1);
00825 if (e <= l) {
00826 INT cycle = mrt.Find_Resources_In_Range(candidate, v, e, l, top_down);
00827 if (e <= cycle && cycle <= l) {
00828 v[candidate].cycle = cycle;
00829 return std::pair<bool,bool>(false, false);
00830 }
00831 }
00832 }
00833
00834
00835 INT cycle = mrt.Find_Resources_In_Range(candidate, v, earliest, latest, top_down);
00836 if (earliest <= cycle && cycle <= latest) {
00837 v[candidate].cycle = cycle;
00838 return std::pair<bool,bool>(false, false);
00839 }
00840
00841
00842
00843 if (v[candidate].trials == 1)
00844 v[candidate].cycle = top_down ? earliest : latest;
00845 else
00846 top_down ? ++v[candidate].cycle : --v[candidate].cycle;
00847 return std::pair<bool,bool>(v[candidate].cycle < earliest || v[candidate].cycle > latest
00848 ,
00849 !mrt.Resources_Available(v[candidate], v[candidate].cycle)
00850 );
00851 }
00852
00853
00854
00855 void Eject_Precedence_Conflict_OPs(INT candidate, SWP_OP_vector& v,
00856 const Slack& slack, const MinDist& mindist, MRT& mrt)
00857 {
00858 std::insert_iterator<std::vector<INT> > ui(unplaced, unplaced.end());
00859 std::insert_iterator<std::vector<INT> > ri(need_relax, need_relax.end());
00860 INT sched_cycle = v[candidate].cycle;
00861 for (INT i = 0; i < v.size(); i++) {
00862 if (v[i].placed) {
00863 INT estart = sched_cycle + mindist(candidate,i);
00864 INT lstart = sched_cycle - mindist(i, candidate);
00865 if (v[i].cycle < estart || v[i].cycle > lstart) {
00866 *ui = i;
00867 v[i].placed = false;
00868 if (trace_details)
00869 fprintf(TFile, " eject OP %d due to precedence constraints.\n", i);
00870 }
00871 }
00872 else if (v[i].op) {
00873
00874
00875
00876
00877 INT estart = sched_cycle + mindist(candidate,i);
00878 INT lstart = sched_cycle - mindist(i, candidate);
00879 if (slack.Estart(i) > estart || slack.Lstart(i) < lstart) {
00880 *ri = i;
00881 if (trace_details)
00882 fprintf(TFile, " add OP %d to relaxed due to precedence constraints.\n", i);
00883 }
00884 }
00885 }
00886 }
00887
00888
00889 void Eject_Resources_Conflict_OPs(INT candidate, SWP_OP_vector& v, INT ii, MRT& mrt)
00890 {
00891 std::insert_iterator<std::vector<INT> > ins(unplaced, unplaced.end());
00892 INT sched_cycle = v[candidate].cycle;
00893 bool ops_unplaced = false;
00894 for (INT i = 0; i < v.size(); i++) {
00895 if (v[i].placed &&
00896 v[i].op &&
00897 (v[i].cycle - sched_cycle) % ii == 0 &&
00898 i != candidate &&
00899 mrt.Resources_Equivalent(v[i], v[candidate])) {
00900 *ins = i;
00901 v[i].placed = false;
00902 ops_unplaced = true;
00903 if (trace_details)
00904 fprintf(TFile, " eject OP %d due to equivalent resources.\n", i);
00905 }
00906 }
00907 if (!ops_unplaced) {
00908 for (INT i = 0; i < v.size(); i++) {
00909 if (v[i].placed &&
00910 v[i].op &&
00911 i != candidate &&
00912 mrt.Resources_Relevant(v[i], v[candidate])) {
00913 *ins = i;
00914 v[i].placed = false;
00915 ops_unplaced = true;
00916 if (trace_details)
00917 fprintf(TFile, " eject OP %d due to relevant resources.\n", i);
00918 }
00919 }
00920 }
00921 if (!ops_unplaced) {
00922
00923 for (INT i = 0; i < v.size(); i++) {
00924 if (v[i].placed &&
00925 v[i].op &&
00926 i != candidate &&
00927 ((v[i].cycle - v[candidate].cycle) % ii) == 0) {
00928 *ins = i;
00929 v[i].placed = false;
00930 ops_unplaced = true;
00931 if (trace_details)
00932 fprintf(TFile, " eject OP %d due to common resources.\n", i);
00933 }
00934 }
00935 }
00936 Is_True(ops_unplaced, ("Eject_Resources_Conflict_OPs: cannot eject."));
00937 }
00938
00939
00940 void Update_Resources(INT candidate, SWP_OP_vector& v, MRT& mrt)
00941 {
00942 for (INT u = 0; u < unplaced.size(); u++) {
00943 INT i = unplaced[u];
00944 if (v[i].op)
00945 mrt.Unreserve_Op_Resources(v[i]);
00946 }
00947 Is_True(mrt.Resources_Available(v[candidate], v[candidate].cycle),
00948 ("Update_Reources: cannot eject enough resources for OP %d.", candidate));
00949 mrt.Reserve_Op_Resources(v[candidate], v[candidate].cycle);
00950 }
00951
00952
00953 bool Update_Precedence(INT candidate, SWP_OP_vector& v, Slack& slack, const MinDist& mindist, MRT& mrt)
00954 {
00955 if (unplaced.size() > 0 || need_relax.size() > 0)
00956 slack.Relax_Precedence(v, unplaced, need_relax, mindist);
00957 slack.Update_Slack_From_Placed_Op(candidate, v, mindist);
00958
00959 INT start = slack.Start();
00960 INT stop = slack.Stop();
00961
00962 if (!v[start].placed || !v[stop].placed) {
00963 #pragma mips_frequency_hint NEVER
00964 INT adjustment = MAX(slack.Estart(start) - slack.Lstart(start), 0);
00965 INT sched_len = MAX(slack.Estart(stop), slack.Lstart(stop) + adjustment);
00966 if (sched_len > max_sched_length)
00967 return false;
00968 if (trace)
00969 fprintf(TFile, "Increase sched_len to %d. Adjust start by %d.\n", sched_len, adjustment);
00970 if (adjustment > 0) {
00971 INT i;
00972 for (i = 0; i < v.size(); i++) {
00973 OP *op = v[i].op;
00974 if (op && v[i].placed) {
00975 mrt.Unreserve_Op_Resources(v[i]);
00976 v[i].cycle += adjustment;
00977 if (trace)
00978 fprintf(TFile, "Adjust OP %d to cycle %d\n", i, v[i].cycle);
00979 }
00980 }
00981 for (i = 0; i < v.size(); i++) {
00982 OP *op = v[i].op;
00983 if (op && v[i].placed)
00984 mrt.Reserve_Op_Resources(v[i], v[i].cycle);
00985 }
00986 }
00987 slack.Set_last_cycle(v, sched_len, mindist);
00988
00989 v[start].placed = true;
00990 v[start].cycle = slack.Estart(start);
00991 v[stop].placed = true;
00992 v[stop].cycle = slack.Lstart(stop);
00993 slack.Update_Slack_From_Placed_Ops(v, mindist);
00994 }
00995 unplaced.erase(unplaced.begin(), unplaced.end());
00996 need_relax.erase(need_relax.begin(), need_relax.end());
00997 return true;
00998 }
00999
01000 void Print(FILE *fp) {
01001 if (unplaced.size() > 0) {
01002 fprintf(fp, "unplaced Ops: ");
01003 for (INT i = 0; i < unplaced.size(); i++)
01004 fprintf(fp, "%d ", unplaced[i]);
01005 fprintf(fp, "\n");
01006 }
01007 if (need_relax.size() > 0) {
01008 fprintf(fp, "need_relax Ops: ");
01009 for (INT i = 0; i < need_relax.size(); i++)
01010 fprintf(fp, "%d ", need_relax[i]);
01011 fprintf(fp, "\n");
01012 }
01013 }
01014
01015 LT_Heuristics(bool t, bool d):trace(t), trace_details(d), min_retry(SWP_Options.Min_Retry) {}
01016 };
01017
01018
01019 void Modulo_Schedule_Verify(SWP_OP_vector &v, INT ii, MRT& mrt)
01020 {
01021
01022 mrt.Verify2(v);
01023
01024
01025 for (INT i = 0; i < v.size(); i++) {
01026 if (v[i].op) {
01027 FmtAssert(v[i].placed, ("op is not placed."));
01028 INT sched_cycle = v[i].cycle;
01029 ARC_LIST *al;
01030 for (al = OP_preds(v[i].op) ; al; al = ARC_LIST_rest(al) ) {
01031 ARC *arc = ARC_LIST_first(al);
01032 OP *pred = ARC_pred(arc);
01033 INT pred_idx = SWP_index(pred);
01034
01035 FmtAssert(sched_cycle - v[pred_idx].cycle >= ARC_latency(arc) - ARC_omega(arc) * ii,
01036 ("OP %d at cycle %d and OP %d at cycle %d violated precedence constraints of %d cycles.",
01037 pred_idx, v[pred_idx].cycle, i, v[i].cycle, ARC_latency(arc)));
01038 }
01039 for (al = OP_succs(v[i].op) ; al; al = ARC_LIST_rest(al) ) {
01040 ARC *arc = ARC_LIST_first(al);
01041 OP *succ = ARC_succ(arc);
01042 INT succ_idx = SWP_index(succ);
01043 FmtAssert(v[succ_idx].cycle - sched_cycle >= ARC_latency(arc) - ARC_omega(arc) * ii,
01044 ("OP %d at cycle %d and OP %d at cycle %d violated precedence constraints of %d cycles.",
01045 i, v[i].cycle, succ_idx, v[succ_idx].cycle, ARC_latency(arc)));
01046 }
01047 }
01048 }
01049 }
01050
01051
01052 void Modulo_Schedule_Succeeded(SWP_OP_vector &v,
01053 INT ii, INT min_ii, INT max_ii, const MinDist& mindist,
01054 MRT& mrt, bool trace)
01055 {
01056 #ifdef Is_True_On
01057 Modulo_Schedule_Verify(v, ii, mrt);
01058 #endif
01059
01060
01061 v.succeeded = true;
01062 v.min_ii = CG_LOOP_min_ii;
01063 v.res_min_ii = CG_LOOP_res_min_ii;
01064 v.rec_min_ii = CG_LOOP_rec_min_ii;
01065 v.ii = ii;
01066 v.min_sl = mindist(v.start,v.stop)+1;
01067
01068
01069 INT adjustment = ((v[v.branch].cycle + ii) / ii) * ii - 1 - v[v.branch].cycle;
01070 INT max_cycle;
01071 INT min_cycle;
01072 do {
01073 min_cycle = 1000;
01074 max_cycle = 0;
01075 for (INT i = 0; i < v.size(); i++) {
01076 v[i].cycle += adjustment;
01077
01078
01079
01080
01081
01082 if (v[i].op && (i != v.branch || !v.is_doloop)) {
01083 min_cycle = MIN(min_cycle, v[i].cycle);
01084 max_cycle = MAX(max_cycle, v[i].cycle);
01085 }
01086 if (v[i].op && OP_dummy(v[i].op))
01087 v[i].op = NULL;
01088 }
01089 adjustment = -ii;
01090 } while (min_cycle >= ii);
01091
01092 v.sc = max_cycle / ii + 1;
01093
01094 max_cycle = MAX(max_cycle, v[v.branch].cycle);
01095 v.sl = max_cycle - min_cycle + 1;
01096
01097 Is_True(min_cycle < ii, ("first operation is not at stage 0."));
01098
01099 if (!v.is_doloop) {
01100 v.loop_one_more_time = true;
01101
01102
01103
01104 INT branch = v.branch;
01105 OP *br_op = v[branch].op;
01106 if (v[branch].cycle / ii == 0)
01107 v.loop_one_more_time = false;
01108 if (trace)
01109 fprintf(TFile, "while-loop has cmp and branch %s at stage 0.\n",
01110 v.loop_one_more_time ? "" : "not");
01111 }
01112 }
01113
01114
01115
01116
01117
01118 SWP_RETURN_CODE
01119 Modulo_Schedule(SWP_OP_vector &swp_op_vector, INT min_ii, INT max_ii,
01120 double incr_alpha, double incr_beta,
01121 INT budget, bool trace, bool trace_details)
01122 {
01123 #ifdef Is_True_On
01124 swp_op_vector.Verify();
01125 #endif
01126 swp_op_vector.ii = 0;
01127
01128 bool trace_slack = Get_Trace(TP_SWPIPE, 16);
01129 if (swp_op_vector.size() >= SWP_Options.OPS_Limit) {
01130 if (trace)
01131 fprintf(TFile, "MOD SCHED FAILED: loop too big!");
01132 return MOD_SCHED_FAILED;
01133 }
01134
01135 for (INT ii = min_ii;
01136 ii <= max_ii;
01137 ii = MAX(ii+1, (INT)((ii + incr_alpha) * incr_beta - incr_alpha))) {
01138
01139 if (trace)
01140 fprintf(TFile, "============================\nSWP SCHED with ii %d\n", ii);
01141
01142 bool mop_critical = ((double) swp_op_vector.num_mops / ii >
01143 2 * SWP_Options.Critical_Threshold / 100.0);
01144 bool flop_critical = ((double) swp_op_vector.num_flops / ii >
01145 2 * SWP_Options.Critical_Threshold / 100.0);
01146 if (trace) {
01147 fprintf(TFile, "swp: %d memop is %s critical\n",
01148 swp_op_vector.num_mops, mop_critical ? "" : "not");
01149 fprintf(TFile, "swp: %d flop is %s critical\n",
01150 swp_op_vector.num_flops, flop_critical ? "" : "not");
01151 }
01152
01153 CXX_MEM_POOL local_mem_pool("modulo schedule pool", FALSE);
01154 MRT mrt(swp_op_vector, ii, local_mem_pool());
01155 MinDist mindist(swp_op_vector, swp_op_vector.start, swp_op_vector.stop, swp_op_vector.branch, ii);
01156 Is_True(ii == mindist.Found_ii(), ("Modulo_Schedule: MinDist found a different ii."));
01157
01158 MinLT minlt(swp_op_vector, ii, mindist);
01159 Slack slack(swp_op_vector, swp_op_vector.start, swp_op_vector.stop, ii, mindist, trace_slack);
01160 Slack slack_static(swp_op_vector, swp_op_vector.start, swp_op_vector.stop, ii, mindist, trace_slack);
01161 LT_Heuristics heur(trace, trace_details);
01162 heur.Init_SWP_OP_state(swp_op_vector, ii, slack, minlt, mrt,
01163 mop_critical, flop_critical);
01164
01165 slack.Verify();
01166 mrt.Verify();
01167
01168 if (trace) {
01169 swp_op_vector.Print(TFile);
01170 mindist.Print(TFile);
01171 slack.Print(TFile, swp_op_vector);
01172 minlt.Print(TFile);
01173 }
01174
01175 INT num_placed;
01176 for (num_placed = 0; true; num_placed++) {
01177
01178 #ifdef Is_True_On
01179
01180 slack.Verify();
01181 mrt.Verify2(swp_op_vector);
01182 #endif
01183
01184 if (trace_details)
01185 slack.Print(TFile, swp_op_vector);
01186
01187 INT candidate = heur.Choose_Op(swp_op_vector, slack);
01188
01189 if (candidate < 0) {
01190 Modulo_Schedule_Succeeded(swp_op_vector, ii, min_ii, max_ii, mindist, mrt, trace);
01191 return MOD_SCHED_SUCCEEDED;
01192 }
01193
01194 std::pair<bool,bool> issue = heur.Choose_Issue_Cycle(candidate, swp_op_vector, ii, slack, mrt);
01195
01196 if (SWP_Options.Opt_Level == 0) {
01197 if (swp_op_vector[candidate].cycle > slack_static.Lstart(candidate) ||
01198 swp_op_vector[candidate].cycle < slack_static.Estart(candidate)) {
01199 if (trace)
01200 fprintf(TFile,
01201 "MOD SCHED FAILED: OP %d at cycle %d exceed estart (%d) lstart (%d) range.\n",
01202 candidate, swp_op_vector[candidate].cycle,
01203 slack_static.Estart(candidate), slack_static.Lstart(candidate));
01204 break;
01205 }
01206 }
01207 if (swp_op_vector[candidate].trials > budget) {
01208 if (trace)
01209 fprintf(TFile, "MOD SCHED FAILED: exceed trial budget.\n");
01210 break;
01211 }
01212
01213 if (trace) {
01214 fprintf(TFile, "placed OP %d at cycle %d estart %d lstart %d\n",
01215 candidate, swp_op_vector[candidate].cycle,
01216 slack.Estart(candidate), slack.Lstart(candidate));
01217 }
01218
01219 if (issue.second)
01220 heur.Eject_Resources_Conflict_OPs(candidate, swp_op_vector, ii, mrt);
01221
01222 if (issue.first)
01223 heur.Eject_Precedence_Conflict_OPs(candidate, swp_op_vector, slack, mindist, mrt);
01224
01225 if (trace)
01226 heur.Print(TFile);
01227
01228 heur.Update_Resources(candidate, swp_op_vector, mrt);
01229
01230 bool satisfiable = heur.Update_Precedence(candidate, swp_op_vector, slack, mindist, mrt);
01231 if (!satisfiable) {
01232 if (trace) {
01233 swp_op_vector.Print(TFile);
01234 slack.Print(TFile, swp_op_vector);
01235 fprintf(TFile, "failed to SWP due to exceeding max sched length.\n");
01236 fprintf(TFile, "current II=%d max II=%d\n", ii, max_ii);
01237 }
01238 break;
01239 }
01240 }
01241 swp_op_vector.previous_trials += num_placed;
01242 }
01243 if (trace)
01244 fprintf(TFile, "MOD SCHED FAILED: cannot find any schedule smaller than maxII cycles.\n");
01245 return MOD_SCHED_FAILED;
01246 }