00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <stdint.h>
00036
00037 #define USE_STANDARD_TYPES
00038 #include <vector>
00039 #include <list>
00040 #include "defs.h"
00041 #include "errors.h"
00042 #include "tracing.h"
00043 #include "matrix.h"
00044 #include "mempool_allocator.h"
00045 #include "cg.h"
00046 #include "cg_swp.h"
00047 #include "cg_swp_options.h"
00048 #include "cg_swp_target.h"
00049 #include "cgprep.h"
00050 #include "glob.h"
00051 #include "op.h"
00052 #include "cg_loop.h"
00053 #include "cgtarget.h"
00054 #include "ti_si.h"
00055 #include "cg_grouping.h"
00056 #include "be_util.h"
00057 #include "cggrp_microsched.h"
00058
00059 static const INT SWP_INVALID_OP_IDX = -1024;
00060
00061 static bool Trace_Swp_Bundling = false;
00062
00063 typedef mempool_allocator<bool> BOOL_MEMALLOC;
00064 typedef MATRIX<INT32, INT32_MEMALLOC> INT32_MATRIX;
00065 typedef MATRIX<bool, BOOL_MEMALLOC> BOOL_MATRIX;
00066 typedef std::list<INT32, INT32_MEMALLOC> INT32_LIST;
00067
00068
00069
00070
00071
00072
00073 class SLOT_ORDER_CMP
00074 {
00075 private:
00076
00077 const SWP_OP_vector *_op_state;
00078 const CG_GROUPING *_grouping;
00079 const BOOL_MATRIX *_dep_order;
00080 const INT32_VECTOR *_resource_slack;
00081
00082 bool _is_loop_variant(TN *tn) const
00083 {
00084 return (TN_is_register(tn) &&
00085 !TN_is_dedicated(tn) &&
00086 !TN_SET_MemberP(_op_state->tn_invariants, tn));
00087 }
00088
00089 bool _loop_variant_lhs(OP *op) const
00090 {
00091 bool is_variant = (OP_results(op) > 0);
00092 for (INT k = 0; k < OP_results(op); k++)
00093 is_variant = (is_variant && _is_loop_variant(OP_result(op, k)));
00094 return is_variant;
00095 }
00096
00097 bool _schedule_early(INT i) const
00098 {
00099 return (*_op_state)[i].direction == SWP_TOP_DOWN;
00100 }
00101
00102 bool _schedule_late(INT i) const
00103 {
00104 return (*_op_state)[i].direction == SWP_BOTTOM_UP;
00105 }
00106
00107 public:
00108
00109 SLOT_ORDER_CMP(const SWP_OP_vector &op_state,
00110 const CG_GROUPING &grouping,
00111 const BOOL_MATRIX &dep_order,
00112 const INT32_VECTOR &resource_slack):
00113 _op_state(&op_state),
00114 _grouping(&grouping),
00115 _dep_order(&dep_order),
00116 _resource_slack(&resource_slack)
00117 {}
00118
00119 bool operator ()(INT32 i, INT32 j) const
00120 {
00121 bool cmp = (*_dep_order)(i, j);
00122
00123 if (!cmp)
00124 {
00125
00126
00127 OP * const op1 = (*_op_state)[i].op;
00128 OP * const op2 = (*_op_state)[j].op;
00129 const TOP top1 = OP_code(op1);
00130 const TOP top2 = OP_code(op2);
00131
00132 if (_grouping->is_branch(top1) || _grouping->is_branch(top2))
00133 {
00134
00135
00136 cmp = _grouping->is_branch(top2) && !_grouping->is_branch(top1);
00137 }
00138 else if ((OP_l_group(op2) && !OP_l_group(op1)) ||
00139 (OP_f_group(op1) && !OP_f_group(op2)) ||
00140 ((*_resource_slack)[i] < (*_resource_slack)[j]) ||
00141 #ifdef SWP_BUNDLE_APPLY_LIFETIME_HEURISTICS
00142
00143
00144 (_schedule_early(i) && !_schedule_early(j)) ||
00145 (_schedule_late(j) && !_schedule_late(i)) ||
00146 (_loop_variant_lhs(op2) && !_loop_variant_lhs(op1)) ||
00147 #endif
00148 (_grouping->bundling_order(top1) <
00149 _grouping->bundling_order(top2)))
00150
00151 {
00152
00153
00154
00155
00156
00157
00158
00159 cmp = true;
00160 }
00161 }
00162 return cmp;
00163 }
00164 };
00165
00166
00167 inline TI_BUNDLE *
00168 TI_BUNDLE_Create(MEM_POOL *pool)
00169 {
00170 TI_BUNDLE *b = TYPE_MEM_POOL_ALLOC (TI_BUNDLE, pool);
00171 TI_BUNDLE_bundle_info(b) = TYPE_MEM_POOL_ALLOC(ISA_BUNDLE_INFO, pool);
00172 TI_BUNDLE_Clear(b);
00173 return b;
00174 }
00175
00176
00177 inline void
00178 SWP_Reserve_Bundle_Slot(TI_BUNDLE *bundle,
00179 INT32 next_slotpos,
00180 ISA_EXEC_UNIT_PROPERTY op_exec_unit,
00181 TOP top)
00182 {
00183 TI_BUNDLE_Reserve_Slot(bundle, next_slotpos, op_exec_unit);
00184 if (Trace_Swp_Bundling)
00185 fprintf(TFile, "Bundle slot %d = %s\n", next_slotpos, TOP_Name(top));
00186
00187 }
00188
00189 static void
00190 SWP_Init_Samecycle_Dep_Order(SWP_OP_vector &op_state, BOOL_MATRIX &dep_order)
00191 {
00192
00193
00194
00195
00196 for (INT i = 0; i < op_state.size(); i++)
00197 {
00198 OP * const op = op_state[i].op;
00199
00200 if (op != NULL)
00201 {
00202 for (ARC_LIST *al = OP_succs(op) ; al != NULL; al = ARC_LIST_rest(al) )
00203 {
00204 ARC * const arc = ARC_LIST_first(al);
00205 OP * const op2 = ARC_succ(arc);
00206 const INT i2 = SWP_index(op2);
00207
00208 if (op_state[i].cycle == op_state[i2].cycle && ARC_omega(arc) == 0)
00209 {
00210
00211
00212
00213
00214 if (ARC_kind(arc) != CG_DEP_MEMREAD) {
00215 dep_order(i, i2) = true;
00216 if (Trace_Swp_Bundling)
00217 fprintf(TFile, " dep_order(%d, %d)\n", i, i2);
00218 }
00219 }
00220 }
00221 }
00222 }
00223 }
00224
00225 static void
00226 SWP_Verify_Samecycle_Dep_Order(SWP_OP_vector &op_state)
00227 {
00228 for (INT i = 0; i < op_state.size(); i++) {
00229 OP * const op = op_state[i].op;
00230 if (op != NULL && !op_state[i].is_noop) {
00231 for (ARC_LIST *al = OP_succs(op) ; al != NULL; al = ARC_LIST_rest(al) ) {
00232 ARC * const arc = ARC_LIST_first(al);
00233 OP * const op2 = ARC_succ(arc);
00234 const INT i2 = SWP_index(op2);
00235
00236
00237
00238 if (ARC_is_reg(arc) && ARC_is_anti(arc)) continue;
00239
00240 if (ARC_omega(arc) == 0) {
00241 FmtAssert(i == i2 ||
00242 op_state[i].cycle != op_state[i2].cycle ||
00243 op_state[i].slot < op_state[i2].slot,
00244 ("OP%d cycle=%d slot=%d violates dependences with OP%d cycle=%d slot=%d",
00245 i, op_state[i].cycle, op_state[i].slot,
00246 i2, op_state[i2].cycle, op_state[i2].slot));
00247 }
00248
00249 }
00250 }
00251 }
00252 }
00253
00254
00255 inline INT32
00256 SWP_Num_Remaining_Slots(TI_BUNDLE *bundle)
00257 {
00258 INT32 i = TI_BUNDLE_slot_count(bundle) - 1;
00259 while (i >= 0 && !TI_BUNDLE_slot_filled(bundle, i)) --i;
00260 return (TI_BUNDLE_slot_count(bundle) - (i + 1));
00261 }
00262
00263
00264 static INT32
00265 SWP_Min_Slot_Count(SWP_OP_vector &op_state,
00266 INT32_MATRIX::iterator opset,
00267 INT32_MATRIX::iterator opset_end,
00268 const CG_GROUPING &grouping)
00269 {
00270
00271
00272 INT32 count = 0;
00273 for (INT32_MATRIX::iterator it = opset; it < opset_end; ++it)
00274 {
00275 if (op_state[*it].op != NULL)
00276 count += grouping.num_slots(OP_code(op_state[*it].op));
00277 }
00278 return count;
00279 }
00280
00281
00282 static std::pair<bool, ISA_EXEC_UNIT_PROPERTY>
00283 SWP_Noop_Property(CG_GROUPING &grouping,
00284 TI_BUNDLE *bundle,
00285 INT32 slot_pos,
00286 bool end_of_group)
00287 {
00288 INT preference;
00289 bool found_prop = false;
00290 ISA_EXEC_UNIT_PROPERTY prop;
00291
00292 #if 0
00293
00294
00295
00296 if (end_of_group)
00297 {
00298 prop = ISA_EXEC_PROPERTY_B_Unit;
00299 found_prop = TI_BUNDLE_Slot_Available(bundle, prop, slot_pos);
00300 }
00301 #endif
00302
00303
00304
00305
00306 for (preference = 0;
00307 !found_prop &&
00308 preference < grouping.no_of_preferred_slot_kinds(slot_pos);
00309 ++preference)
00310 {
00311 prop = grouping.get_preferred_slot_kind(slot_pos, preference);
00312 if (grouping.avail_resource_units(prop) > 0)
00313 found_prop = TI_BUNDLE_Slot_Available(bundle, prop, slot_pos);
00314 }
00315
00316
00317
00318
00319 for (preference = 0;
00320 !found_prop &&
00321 preference < grouping.no_of_preferred_slot_kinds(slot_pos);
00322 ++preference)
00323 {
00324 prop = grouping.get_preferred_slot_kind(slot_pos, preference);
00325 found_prop = TI_BUNDLE_Slot_Available(bundle, prop, slot_pos);
00326 }
00327
00328 return std::pair<bool, ISA_EXEC_UNIT_PROPERTY>(found_prop, prop);
00329 }
00330
00331
00332 static INT32
00333 SWP_Append_Noop(SWP_OP_vector &op_state,
00334 CG_GROUPING &grouping,
00335 INT32 append_after_op_idx,
00336 INT32 next_op_idx,
00337 TI_BUNDLE *bundle,
00338 INT32 slot_pos)
00339 {
00340
00341
00342
00343
00344 Is_True(append_after_op_idx >= 0,
00345 ("Must have a previous op for SWP_Append_Noop"));
00346
00347
00348
00349
00350
00351 bool found_prop = false;
00352 ISA_EXEC_UNIT_PROPERTY slot_prop;
00353
00354 if (next_op_idx != SWP_INVALID_OP_IDX)
00355 {
00356
00357
00358
00359
00360 for (INT preference = 0;
00361 !found_prop &&
00362 preference < grouping.no_of_preferred_slot_kinds(slot_pos);
00363 ++preference)
00364 {
00365 slot_prop = grouping.get_preferred_slot_kind(slot_pos, preference);
00366 if (grouping.avail_resource_units(slot_prop) > 0) {
00367 if (slot_pos+1 < TI_BUNDLE_slot_count(bundle)) {
00368 ISA_EXEC_UNIT_PROPERTY tmp_slot_prop;
00369 TI_BUNDLE_Reserve_Slot(bundle, slot_pos, slot_prop);
00370 found_prop =
00371 CGTARG_Bundle_Slot_Available(bundle,
00372 op_state[next_op_idx].op,
00373 slot_pos+1,
00374 &tmp_slot_prop,
00375 FALSE,
00376 &grouping);
00377 }
00378 }
00379 }
00380 }
00381
00382 if (!found_prop)
00383 {
00384
00385
00386
00387
00388 const std::pair<bool, ISA_EXEC_UNIT_PROPERTY> prop =
00389 SWP_Noop_Property(grouping, bundle, slot_pos,
00390 next_op_idx == SWP_INVALID_OP_IDX && slot_pos > 0);
00391
00392 found_prop = prop.first;
00393 slot_prop = prop.second;
00394 }
00395
00396 Is_True(found_prop,
00397 ("Cannot find suitable ISA_EXEC_UNIT_PROPERTY for slot %d"
00398 " in %s bundle in SWP_Append_Noop",
00399 slot_pos,
00400 (ISA_EXEC_Name(TI_BUNDLE_pack_code(bundle)) != NULL?
00401 ISA_EXEC_Name(TI_BUNDLE_pack_code(bundle)): "unknown")));
00402
00403
00404
00405 const TOP ntop = CGTARG_Noop_Top(slot_prop);
00406 OP *noop = Mk_OP(ntop, True_TN, Gen_Literal_TN(0, 4));
00407 SWP_OP swp_noop = op_state[append_after_op_idx];
00408
00409 swp_noop.op = noop;
00410 swp_noop.is_noop = true;
00411 swp_noop.slot = grouping.absolute_slot_no(slot_pos);
00412
00413
00414
00415
00416 op_state.push_back(swp_noop);
00417
00418 BB_Insert_Op_After(OP_bb(op_state[append_after_op_idx].op),
00419 op_state[append_after_op_idx].op,
00420 noop);
00421 OP_scycle(noop) = -1;
00422
00423 SWP_Reserve_Bundle_Slot(bundle, slot_pos, slot_prop, ntop);
00424 grouping.reserve_resource(slot_prop);
00425 Set_OP_bundled(noop);
00426 if (ntop == TOP_nop_m)
00427 Set_OP_m_unit(noop);
00428
00429
00430
00431 if (TI_BUNDLE_stop_bit(bundle, slot_pos))
00432 Set_OP_end_group(noop);
00433
00434 return op_state.size() - 1;
00435 }
00436
00437
00438 static void
00439 SWP_Set_Stop_At(CG_GROUPING &grouping,
00440 TI_BUNDLE *bundle,
00441 OP *op,
00442 INT32 slotpos)
00443 {
00444 const bool start_next_group_in_this_bundle =
00445 (slotpos < (TI_BUNDLE_slot_count(bundle) - 1));
00446
00447 TI_BUNDLE_Reserve_Stop_Bit(bundle, slotpos);
00448 Set_OP_end_group(op);
00449 grouping.start_new_group(start_next_group_in_this_bundle);
00450
00451 if (Trace_Swp_Bundling)
00452 fprintf(TFile, "<Stop-bit>\n");
00453 }
00454
00455
00456 static void
00457 SWP_Fillup_Bundle(SWP_OP_vector &op_state,
00458 CG_GROUPING &grouping,
00459 INT32 &prev_op_idx,
00460 TI_BUNDLE *bundle,
00461 INT32 from_slotpos)
00462 {
00463 Is_True(from_slotpos > 0 && TI_BUNDLE_slot_filled(bundle, from_slotpos - 1),
00464 ("Can only call SWP_Fillup_Bundle with partially filled bundle!"));
00465
00466
00467
00468
00469 Reset_OP_end_group(op_state[prev_op_idx].op);
00470
00471 if (Trace_Swp_Bundling && TI_BUNDLE_stop_bit(bundle, from_slotpos - 1))
00472 fprintf(TFile, "... moving <Stop-bit> to end of bundle\n");
00473 TI_BUNDLE_Unreserve_Stop_Bit(bundle, from_slotpos - 1);
00474
00475 for (INT32 i = from_slotpos; i < TI_BUNDLE_slot_count(bundle); ++i)
00476 {
00477 prev_op_idx = SWP_Append_Noop(op_state,
00478 grouping,
00479 prev_op_idx, SWP_INVALID_OP_IDX, bundle, i);
00480 }
00481 SWP_Set_Stop_At(grouping, bundle, op_state[prev_op_idx].op,
00482 TI_BUNDLE_slot_count(bundle) - 1);
00483
00484
00485 }
00486
00487
00488 static INT32
00489 SWP_Insert_Stop_Bit(SWP_OP_vector &op_state,
00490 CG_GROUPING &grouping,
00491 INT32 &op_idx,
00492 TI_BUNDLE *bundle,
00493 INT32 slotpos)
00494 {
00495
00496
00497
00498
00499 INT32 slot = slotpos;
00500
00501 Is_True(TI_BUNDLE_slot_filled(bundle, slot),
00502 ("Expected filled slot in SWP_Insert_Stop_Bit"));
00503
00504 if (TI_BUNDLE_stop_bit(bundle, slot))
00505 {
00506
00507 }
00508 else if (TI_BUNDLE_Stop_Bit_Present(bundle))
00509 {
00510 SWP_Fillup_Bundle(op_state, grouping, op_idx, bundle, slot + 1);
00511 slot = TI_BUNDLE_slot_count(bundle) - 1;
00512 }
00513 else
00514 {
00515 while(!CGTARG_Bundle_Stop_Bit_Available(bundle, slot))
00516 {
00517 ++slot;
00518 op_idx = SWP_Append_Noop(op_state,
00519 grouping,
00520 op_idx, SWP_INVALID_OP_IDX, bundle, slot);
00521 }
00522
00523 Is_True (slot < TI_BUNDLE_slot_count(bundle),
00524 ("Could not find position for stop bit in SWP_Insert_Stop_Bit"));
00525
00526 SWP_Set_Stop_At(grouping, bundle, op_state[op_idx].op, slot);
00527
00528
00529 }
00530 return slot + 1;
00531 }
00532
00533
00534 static bool
00535 SWP_Bundle_First_In_Group(SWP_OP_vector &op_state,
00536 CG_GROUPING &grouping,
00537 TI_BUNDLE *bundle,
00538 INT32 prev_op_idx,
00539 INT32 op_idx,
00540 INT32 at_slotpos)
00541 {
00542
00543
00544
00545
00546 bool ok = false;
00547 OP * const op = op_state[op_idx].op;
00548
00549 if (OP_f_group(op))
00550 {
00551 OP * const prev_op = op_state[prev_op_idx].op;
00552 ISA_EXEC_UNIT_PROPERTY op_exec_unit;
00553
00554 if (at_slotpos == 0)
00555 {
00556
00557
00558 ok = CGTARG_Bundle_Slot_Available(bundle, op, at_slotpos,
00559 &op_exec_unit, FALSE,
00560 &grouping);
00561
00562
00563
00564
00565 if (!grouping.in_first_bundle_of_group())
00566 {
00567 if (Trace_Swp_Bundling)
00568 fprintf(TFile, "<Stop-bit>\n");
00569 Set_OP_end_group(op_state[prev_op_idx].op);
00570 grouping.start_new_group(true);
00571 }
00572 }
00573 else
00574 {
00575
00576
00577
00578 ok = CGTARG_Bundle_Slot_Available(bundle, op, at_slotpos,
00579 &op_exec_unit, TRUE,
00580 &grouping);
00581
00582 if (ok)
00583 SWP_Set_Stop_At(grouping, bundle,
00584 op_state[prev_op_idx].op, at_slotpos - 1);
00585 }
00586
00587 if (ok)
00588 {
00589 Set_OP_bundled(op);
00590 op_state[op_idx].slot = grouping.absolute_slot_no(at_slotpos);
00591 SWP_Reserve_Bundle_Slot(bundle, at_slotpos, op_exec_unit, OP_code(op));
00592 }
00593 }
00594 return ok;
00595 }
00596
00597
00598 static bool
00599 SWP_Bundle_Next_In_Group(SWP_OP_vector &op_state,
00600 CG_GROUPING &grouping,
00601 TI_BUNDLE *bundle,
00602 INT32 op_idx,
00603 INT32 next_slotpos,
00604 ISA_EXEC_UNIT_PROPERTY preferred_exec_unit)
00605 {
00606
00607
00608
00609
00610 OP * const op = op_state[op_idx].op;
00611 ISA_EXEC_UNIT_PROPERTY op_exec_unit = ISA_EXEC_Unit_Prop(OP_code(op));
00612
00613
00614
00615
00616
00617
00618
00619 const bool ok = ((preferred_exec_unit & op_exec_unit) &&
00620 (!OP_l_group(op) ||
00621 CGTARG_Bundle_Stop_Bit_Available(bundle, next_slotpos)) &&
00622 CGTARG_Bundle_Slot_Available(bundle, op, next_slotpos,
00623 &op_exec_unit,
00624 FALSE,
00625 &grouping) &&
00626 op_exec_unit == preferred_exec_unit);
00627 if (ok)
00628 {
00629 Set_OP_bundled(op);
00630 op_state[op_idx].slot = grouping.absolute_slot_no(next_slotpos);
00631 SWP_Reserve_Bundle_Slot(bundle, next_slotpos, op_exec_unit, OP_code(op));
00632 }
00633
00634
00635
00636 if (OP_l_group(op))
00637 SWP_Set_Stop_At(grouping, bundle, op, next_slotpos);
00638
00639 return ok;
00640 }
00641
00642
00643 static bool
00644 SWP_Violates_Dep_Order(BOOL_MATRIX &dep_order,
00645 INT32_LIST::const_iterator first,
00646 INT32_LIST::const_iterator current)
00647 {
00648 bool violation = false;
00649 for (INT32_LIST::const_iterator it = first;
00650 it != current && !violation;
00651 ++it)
00652 {
00653 if (dep_order(*it, *current))
00654 violation = true;
00655 }
00656 return violation;
00657 }
00658
00659
00660 static void
00661 SWP_Pack_A_Bundle(SWP_OP_vector &op_state,
00662 CG_GROUPING &grouping,
00663 BOOL_MATRIX &dep_order,
00664 INT32 &prev_op_idx,
00665 TI_BUNDLE *bundle,
00666 INT32_LIST &ops_list)
00667 {
00668
00669
00670
00671
00672 INT slot = 0;
00673
00674 grouping.start_new_bundle(bundle);
00675
00676 for (slot = 0;
00677 slot < TI_BUNDLE_slot_count(bundle) && !ops_list.empty();
00678 ++slot)
00679 {
00680 INT32_LIST::iterator placed_op = ops_list.end();
00681
00682
00683
00684
00685 for (INT preference = 0;
00686 (placed_op == ops_list.end() &&
00687 preference < grouping.no_of_preferred_slot_kinds(slot));
00688 ++preference)
00689 {
00690 ISA_EXEC_UNIT_PROPERTY slot_kind =
00691 grouping.get_preferred_slot_kind(slot, preference);
00692
00693
00694
00695
00696 for (INT32_LIST::iterator it = ops_list.begin();
00697 placed_op == ops_list.end() && it != ops_list.end();
00698 ++it)
00699 {
00700 const TOP top = OP_code(op_state[*it].op);
00701 const INT remaining_slots = TI_BUNDLE_slot_count(bundle) - slot;
00702
00703
00704
00705
00706
00707 if ((!grouping.is_branch(top) || remaining_slots >= ops_list.size()) &&
00708 !SWP_Violates_Dep_Order(dep_order, ops_list.begin(), it) &&
00709 (remaining_slots >= grouping.num_slots(top)))
00710 {
00711 const bool first_in_group = SWP_Bundle_First_In_Group(op_state,
00712 grouping,
00713 bundle,
00714 prev_op_idx,
00715 *it,
00716 slot);
00717 const bool next_in_group = (!first_in_group &&
00718 SWP_Bundle_Next_In_Group(op_state,
00719 grouping,
00720 bundle,
00721 *it,
00722 slot,
00723 slot_kind));
00724 if (first_in_group || next_in_group)
00725 placed_op = it;
00726 }
00727 }
00728 }
00729
00730
00731
00732
00733
00734 if (placed_op != ops_list.end())
00735 {
00736 const TOP top = OP_code(op_state[*placed_op].op);
00737
00738 prev_op_idx = *placed_op;
00739 ops_list.erase(placed_op);
00740 if (grouping.has_resource_choice(top))
00741 grouping.reserve_resource(TI_BUNDLE_exec_property(bundle, slot));
00742 }
00743 else
00744 {
00745 const INT insert_noop_after =
00746 (prev_op_idx < 0? ops_list.front() : prev_op_idx);
00747
00748 prev_op_idx =
00749 SWP_Append_Noop(op_state,
00750 grouping,
00751 insert_noop_after, ops_list.front(), bundle, slot);
00752 }
00753 }
00754
00755
00756
00757 if (ops_list.empty())
00758 SWP_Insert_Stop_Bit(op_state, grouping, prev_op_idx, bundle, slot - 1);
00759
00760 }
00761
00762
00763
00764 struct Order_Bundled_OPs_By_slot {
00765 const SWP_OP_vector& state;
00766 bool operator()(INT i, INT j) {
00767 return state[i].slot < state[j].slot;
00768 }
00769 Order_Bundled_OPs_By_slot(const SWP_OP_vector& op_state):state(op_state) {}
00770 };
00771
00772
00773 struct OP_PARTIAL_BACKUP{
00774 OP* prev;
00775 OP* next;
00776 BOOL valid;
00777 };
00778
00779 void SWP_Delete_Noop(SWP_OP_vector &op_state,
00780 std::vector<INT> &exist_noops)
00781 {
00782
00783 for (INT i=0; i<exist_noops.size(); i++){
00784 INT nop_idx = exist_noops[i];
00785
00786 BB_Remove_Op(op_state[nop_idx].op->bb, op_state[nop_idx].op);
00787 op_state[nop_idx].op = NULL;
00788 op_state[nop_idx].is_noop = FALSE;
00789 }
00790 }
00791
00792 void SWP_Update_OP_slot(SWP_OP_vector &op_state,
00793 OPS *ops,
00794 const INT slot_yardstick,
00795 INT32 &prev_op_idx)
00796 {
00797 OP* cur_op;
00798 INT i = 0;
00799 FOR_ALL_OPS_OPs(ops, cur_op) {
00800 if (!OP_noop(cur_op))
00801
00802 {
00803 INT idx = SWP_index(cur_op);
00804 op_state[idx].slot = slot_yardstick - (OPS_length(ops)-1-i);
00805 OP_scycle(op_state[idx].op) = 0;
00806 Set_OP_bundled(cur_op);
00807
00808 }
00809 else {
00810
00811 SWP_OP swp_noop = op_state[prev_op_idx];
00812
00813 swp_noop.op = cur_op;
00814 swp_noop.is_noop = true;
00815 swp_noop.slot = slot_yardstick - (OPS_length(ops)-1-i);
00816
00817 op_state.push_back(swp_noop);
00818 OP_scycle(cur_op) = -1;
00819
00820 Set_OP_bundled(cur_op);
00821 }
00822 i++;
00823 }
00824
00825
00826 OP* last_op = OPS_last(ops);
00827 if (!OP_noop(last_op))
00828 prev_op_idx = SWP_index(last_op);
00829 else
00830 prev_op_idx = op_state.size() - 1 ;
00831
00832 }
00833
00834
00835 BOOL SWP_Slot_Helper(SWP_OP_vector &op_state,
00836 CG_GROUPING &grouping,
00837 BOOL_MATRIX &dep_order,
00838 INT32 &prev_op_idx,
00839 TI_BUNDLE *bundle,
00840 INT32_LIST &ops_list)
00841
00842
00843
00844
00845
00846 {
00847
00848 std::vector<INT> bundled_ops, exist_noops;
00849 INT slot_yardstick = op_state[prev_op_idx].slot;
00850 INT32_LIST backup_ops_list = ops_list;
00851
00852 BOOL int_ld_exist=FALSE;
00853 BOOL fload_ld_exist=FALSE;
00854
00855
00856
00857
00858 std::vector<OP_PARTIAL_BACKUP> OP_Partial_Backup(op_state.size());
00859 for (INT i=0; i<OP_Partial_Backup.size(); i++) {
00860 OP_Partial_Backup[i].prev=OP_Partial_Backup[i].next=NULL;
00861 OP_Partial_Backup[i].valid=FALSE;
00862 }
00863 for (INT32_LIST::iterator it = ops_list.begin();
00864 it != ops_list.end();
00865 ++it) {
00866
00867 OP_Partial_Backup[*it].prev = OP_prev(op_state[*it].op);
00868 OP_Partial_Backup[*it].next = OP_next(op_state[*it].op);
00869 OP_Partial_Backup[*it].valid =TRUE;
00870
00871
00872
00873 OP *op = op_state[*it].op;
00874 OP_scycle(op) = op_state[*it].cycle;
00875
00876
00877 if (OP_Is_Float_Mem(op)) fload_ld_exist=TRUE;
00878 if (OP_load(op) && !OP_Is_Float_Mem(op)) int_ld_exist=TRUE;
00879
00880
00881 }
00882
00883
00884
00885
00886 INT low = op_state[prev_op_idx].slot - ISA_MAX_SLOTS * ISA_MAX_ISSUE_BUNDLES + 1 ;
00887 INT high = op_state[prev_op_idx].slot ;
00888
00889 for (INT i = 0; i < op_state.size(); i++) {
00890 if ( low <= op_state[i].slot && op_state[i].slot <= high &&
00891 op_state[i].op!=NULL &&
00892 OP_bundled(op_state[i].op) ) {
00893
00894
00895 bundled_ops.push_back(i);
00896
00897
00898
00899 OP_Partial_Backup[i].prev = OP_prev(op_state[i].op);
00900 OP_Partial_Backup[i].next = OP_next(op_state[i].op);
00901 OP_Partial_Backup[i].valid = TRUE;
00902 }
00903 }
00904
00905
00906 std::sort(bundled_ops.begin(), bundled_ops.end(), Order_Bundled_OPs_By_slot(op_state));
00907
00908
00909
00910 for (INT i = 0; i < bundled_ops.size(); i++) {
00911 INT idx = bundled_ops[i];
00912
00913 if (op_state[idx].is_noop)
00914 exist_noops.push_back(idx);
00915
00916
00917 OP* op = op_state[idx].op;
00918 if (OP_Is_Float_Mem(op)) fload_ld_exist=TRUE;
00919 if (OP_load(op) && !OP_Is_Float_Mem(op)) int_ld_exist=TRUE;
00920
00921 }
00922
00923
00924 if (int_ld_exist && fload_ld_exist) {
00925 return FALSE;
00926 }
00927
00928
00929 OPS ops = OPS_EMPTY;
00930 for (INT i = 0; i < bundled_ops.size(); i++) {
00931 OP *op = op_state[bundled_ops[i]].op;
00932 OP_scycle(op) = op_state[bundled_ops[i]].cycle;
00933 OPS_Append_Op(&ops, op);
00934 }
00935
00936
00937
00938 BOOL abandoned=FALSE;
00939 INT split_op_idx;
00940 OP* op;
00941 while (!ops_list.empty() && abandoned==FALSE ) {
00942 split_op_idx = ops_list.front();
00943 op = op_state[split_op_idx].op;
00944
00945 if (CGGRP_Bundle_OPS(&ops, op, 0, TRUE)==FALSE) {
00946 abandoned=TRUE;
00947 break;
00948 }
00949 else {
00950 ops_list.erase(ops_list.begin());
00951
00952
00953 OP *tmp;
00954 if (OP_bb(op))
00955 FOR_ALL_OPS_OPs(&ops, tmp)
00956 tmp->bb = OP_bb(op);
00957 }
00958 }
00959
00960
00961
00962 if (abandoned) {
00963
00964
00965 ops_list = backup_ops_list;
00966
00967
00968 for (INT i = 0; i < OP_Partial_Backup.size(); i++) {
00969 if (OP_Partial_Backup[i].valid) {
00970 op_state[i].op->prev = OP_Partial_Backup[i].prev;
00971 op_state[i].op->next = OP_Partial_Backup[i].next;
00972 }
00973 }
00974
00975 return FALSE;
00976 }
00977
00978
00979
00980 if (ops_list.empty()) {
00981
00982 SWP_Update_OP_slot(op_state, &ops, slot_yardstick, prev_op_idx);
00983
00984
00985
00986 for (INT i = 0; i < OP_Partial_Backup.size(); i++) {
00987 if (OP_Partial_Backup[i].valid) {
00988
00989 op_state[i].op->prev = OP_Partial_Backup[i].prev;
00990 op_state[i].op->next = OP_Partial_Backup[i].next;
00991 }
00992 }
00993
00994
00995
00996 if(op_state.size() > OP_Partial_Backup.size()) {
00997 OP* new_noop = op_state[op_state.size()-1].op;
00998 BB_Insert_Op_After(OP_bb(new_noop), new_noop->prev, new_noop);
00999 }
01000
01001
01002 SWP_Delete_Noop(op_state, exist_noops);
01003
01004 return TRUE;
01005 }
01006
01007
01008 return FALSE;
01009 }
01010
01011
01012
01013 static void
01014 SWP_Pack_Into_New_Bundles(SWP_OP_vector &op_state,
01015 CG_GROUPING &grouping,
01016 BOOL_MATRIX &dep_order,
01017 INT32 &prev_op_idx,
01018 TI_BUNDLE *bundle,
01019 INT32_LIST &ops_list)
01020 {
01021
01022
01023
01024 while (!ops_list.empty())
01025 {
01026 SWP_Pack_A_Bundle(op_state, grouping, dep_order,
01027 prev_op_idx, bundle, ops_list);
01028
01029
01030
01031
01032
01033
01034 if (!ops_list.empty() && grouping.new_bundle_will_break_cycle())
01035 {
01036 DevWarn("SWP: Bundling group will be split into 2 cycles!");
01037
01038 if (Trace_Swp_Bundling) {
01039 for (INT32_LIST::iterator it = ops_list.begin();
01040 it != ops_list.end();
01041 ++it) {
01042 OP* op = op_state[*it].op;
01043 TOP top = OP_code(op);
01044 fprintf(TFile, "PU%d|BB:%d|Freq:%f|Cycle%d|SPLIT_OP%d -- %s (UNIT_Prop=%d)\n",
01045 Current_PU_Count(), BB_id(OP_bb(op)), BB_freq(OP_bb(op)),
01046 op_state[*it].modulo_cycle,
01047 *it, TOP_Name(top), ISA_EXEC_Unit_Prop(top));
01048 }
01049 }
01050
01051
01052
01053 OP * op = op_state[ops_list.front()].op;
01054 if (BB_freq(OP_bb(op))>20 && !OP_flop(op)) {
01055
01056 if ( SWP_Slot_Helper(op_state, grouping, dep_order,
01057 prev_op_idx, bundle, ops_list) ) {
01058 DevWarn("SWP: Trying to fix split cycle......fixed!");
01059 if (Trace_Swp_Bundling) {
01060 fprintf(TFile, "SWP: Trying to fix split cycle......fixed!\n");
01061 }
01062 }
01063 else {
01064 DevWarn("SWP: Trying to fix split cycle......abandoned!");
01065 if (Trace_Swp_Bundling) {
01066 fprintf(TFile, "SWP: Trying to fix split cycle......abandoned!\n");
01067 }
01068 }
01069
01070 }
01071
01072
01073
01074 SWP_Insert_Stop_Bit(op_state, grouping, prev_op_idx,
01075 bundle, TI_BUNDLE_slot_count(bundle) - 1);
01076
01077
01078
01079 }
01080 }
01081 }
01082
01083
01084 static void
01085 SWP_Pack_Partially_Into_Prev_Bundle(SWP_OP_vector &op_state,
01086 CG_GROUPING &grouping,
01087 BOOL_MATRIX &dep_order,
01088 INT32 &prev_op_idx,
01089 TI_BUNDLE *bundle,
01090 INT32 next_avail_slot,
01091 INT32_LIST &ops_list)
01092 {
01093
01094
01095
01096
01097
01098 SWP_Fillup_Bundle(op_state, grouping, prev_op_idx, bundle, next_avail_slot);
01099 SWP_Pack_Into_New_Bundles(op_state,
01100 grouping,
01101 dep_order,
01102 prev_op_idx,
01103 bundle,
01104 ops_list);
01105 }
01106
01107
01108 static void
01109 SWP_Bundle_Next_Cycle(SWP_OP_vector &op_state,
01110 CG_GROUPING &grouping,
01111 BOOL_MATRIX &dep_order,
01112 INT32_VECTOR &resource_slack,
01113 INT32_MATRIX::iterator opset,
01114 INT32_MATRIX::iterator opset_end,
01115 INT32 &prev_op_idx,
01116 TI_BUNDLE *current_bundle,
01117 MEM_POOL *mpool)
01118 {
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134 INT32_MATRIX::iterator it, it2;
01135
01136
01137
01138
01139
01140
01141
01142
01143 SLOT_ORDER_CMP in_order =
01144 SLOT_ORDER_CMP(op_state, grouping, dep_order, resource_slack);
01145
01146
01147 for (it = opset; it != opset_end; ++it) {
01148 for (it2 = it + 1; it2 != opset_end; ++it2)
01149 if (!in_order(*it, *it2))
01150 std::swap(*it, *it2);
01151 }
01152
01153
01154 for (it = opset; it != opset_end; ++it) {
01155 for (it2 = it + 1; it2 != opset_end; ++it2)
01156 if (dep_order(*it2, *it))
01157 std::swap(*it2, *it);
01158 }
01159
01160 if (Trace_Swp_Bundling && opset != opset_end)
01161 {
01162 fprintf(TFile, "\nSWP Grouping order for cycle %d: ",
01163 op_state[*opset].modulo_cycle);
01164 for (it = opset; it != opset_end; ++it)
01165 fprintf(TFile, "OP%d %s (%d) ", *it, TOP_Name(OP_code(op_state[*it].op)),
01166 op_state[*it].cycle);
01167 fprintf(TFile, "\n");
01168
01169
01170 fprintf(TFile,"\n");
01171 for (it = opset; it != opset_end; ++it)
01172 Print_OP_No_SrcLine(op_state[*it].op);
01173
01174 }
01175
01176
01177
01178
01179
01180 const INT32 num_needed_slots = SWP_Min_Slot_Count(op_state,
01181 opset, opset_end,
01182 grouping);
01183 INT32 num_remaining_slots = SWP_Num_Remaining_Slots(current_bundle);
01184 INT32 next_avail_slot = (TI_BUNDLE_slot_count(current_bundle) -
01185 num_remaining_slots);
01186 INT32_LIST ops_list(opset, opset_end, INT32_LIST::allocator_type(mpool));
01187
01188
01189
01190 BOOL fillup_bundle = (num_remaining_slots > 0) ;
01191
01192
01193
01194
01195
01196
01197
01198 if (fillup_bundle)
01199 {
01200
01201
01202 SWP_Fillup_Bundle(op_state, grouping,
01203 prev_op_idx, current_bundle, next_avail_slot);
01204 }
01205
01206 grouping.clear_reserved();
01207 for (it = opset; it != opset_end; ++it)
01208 {
01209 TOP top = OP_code(op_state[*it].op);
01210 if (!grouping.has_resource_choice(top))
01211 {
01212 const bool ok = grouping.inorder_reserve_resource(top);
01213 Is_True(ok, ("Running out of resources in SWP_Bundle_Next_Cycle()"));
01214 }
01215 }
01216
01217 if (num_remaining_slots > 0)
01218 {
01219 if (fillup_bundle) {
01220 SWP_Pack_Into_New_Bundles(op_state,
01221 grouping,
01222 dep_order,
01223 prev_op_idx,
01224 current_bundle,
01225 ops_list);
01226 } else {
01227 SWP_Pack_Partially_Into_Prev_Bundle(op_state,
01228 grouping,
01229 dep_order,
01230 prev_op_idx,
01231 current_bundle,
01232 next_avail_slot,
01233 ops_list);
01234 }
01235 } else {
01236 SWP_Pack_Into_New_Bundles(op_state,
01237 grouping,
01238 dep_order,
01239 prev_op_idx,
01240 current_bundle,
01241 ops_list);
01242 }
01243
01244
01245
01246
01247
01248
01249
01250
01251 }
01252
01253
01254 void
01255 SWP_Bundle(SWP_OP_vector& op_state, bool trace)
01256 {
01257
01258
01259
01260 MEM_POOL bundle_pool;
01261
01262 Trace_Swp_Bundling = trace;
01263
01264 MEM_POOL_Initialize(&bundle_pool, "CG_SWP_ALLOCATOR POOL", FALSE);
01265 MEM_POOL_Push(&bundle_pool);
01266 {
01267
01268
01269
01270
01271
01272 const INT32 empty_slot = -1;
01273 INT32 i;
01274 CG_GROUPING grouping(&bundle_pool, SI_resource_count);
01275 INT32_MATRIX cycle2op(op_state.ii,
01276 grouping.max_slots_per_group(),
01277 empty_slot,
01278 INT32_MATRIX::allocator_type(&bundle_pool));
01279 INT32_VECTOR numops(op_state.ii,
01280 0,
01281 INT32_VECTOR::allocator_type(&bundle_pool));
01282 INT32_VECTOR resource_slack(op_state.size(), INT32_MAX,
01283 INT32_VECTOR::allocator_type(&bundle_pool));
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294 for (i = 0; i < op_state.size(); ++i)
01295 {
01296 if (op_state[i].op != NULL)
01297 {
01298 const INT32 mcycle = op_state[i].cycle % op_state.ii;
01299 op_state[i].modulo_cycle = mcycle;
01300 cycle2op(mcycle, numops[mcycle]++) = i;
01301 resource_slack[i] = grouping.resource_slack(OP_code(op_state[i].op));
01302 }
01303 }
01304
01305
01306
01307
01308
01309 BOOL_MATRIX dep_order(op_state.size(), op_state.size(), false,
01310 BOOL_MATRIX::allocator_type(&bundle_pool));
01311 SWP_Init_Samecycle_Dep_Order(op_state, dep_order);
01312
01313
01314
01315
01316
01317 INT32 prev_op_idx = SWP_INVALID_OP_IDX;
01318 TI_BUNDLE *current_bundle = TI_BUNDLE_Create(&bundle_pool);
01319
01320
01321
01322
01323 TI_BUNDLE_Clear(current_bundle);
01324 for (INT slot = 0; slot < TI_BUNDLE_slot_count(current_bundle); ++slot)
01325 TI_BUNDLE_slot_filled(current_bundle, slot) = true;
01326
01327 for (INT32 c = 0; c < op_state.ii; c++)
01328 {
01329 if (numops[c] > 0)
01330 {
01331 MEM_POOL_Push(&bundle_pool);
01332 SWP_Bundle_Next_Cycle(op_state,
01333 grouping,
01334 dep_order,
01335 resource_slack,
01336 cycle2op.pos(c, 0),
01337 cycle2op.pos(c, numops[c] - 1) + 1,
01338 prev_op_idx,
01339 current_bundle,
01340 &bundle_pool);
01341 MEM_POOL_Pop(&bundle_pool);
01342 }
01343 }
01344
01345
01346
01347 SWP_Fillup_Bundle(op_state, grouping,
01348 prev_op_idx, current_bundle,
01349 (TI_BUNDLE_slot_count(current_bundle) -
01350 SWP_Num_Remaining_Slots(current_bundle)));
01351
01352 op_state.ii_slots = grouping.total_no_of_bundles() * ISA_MAX_SLOTS;
01353
01354 }
01355
01356 SWP_Verify_Samecycle_Dep_Order(op_state);
01357
01358 MEM_POOL_Pop(&bundle_pool);
01359 MEM_POOL_Delete(&bundle_pool);
01360 }
01361
01362
01363 void
01364 SWP_Dont_Bundle(SWP_OP_vector& op_state)
01365 {
01366
01367
01368
01369 op_state.ii_slots = op_state.ii;
01370 for (INT i = 0; i < op_state.size(); i++) {
01371 if (op_state[i].op) {
01372 op_state[i].modulo_cycle = op_state[i].cycle % op_state.ii;
01373 op_state[i].slot = op_state[i].modulo_cycle;
01374 }
01375 }
01376 }
01377
01378
01379 void
01380 SWP_Undo_Bundle(SWP_OP_vector& op_state, BB *body)
01381 {
01382 for (INT i = 0; i < op_state.size(); i++) {
01383 if (op_state[i].is_noop && op_state[i].op) {
01384 BB_Remove_Op(body, op_state[i].op);
01385 op_state[i].op = NULL;
01386 op_state[i].is_noop = FALSE;
01387 }
01388 if (op_state[i].op) {
01389 Reset_OP_end_group(op_state[i].op);
01390 Reset_OP_bundled(op_state[i].op);
01391 }
01392 }
01393 }