00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "defs.h"
00038 #include "tracing.h"
00039 #include "cg_flags.h"
00040 #include "cgprep.h"
00041 #include "cg_loop.h"
00042 #include "cg_loop_mii.h"
00043 #include "cg_loop_recur.h"
00044 #include "cg_dep_graph.h"
00045 #include "cg_sched_est.h"
00046 #include "cg_swp.h"
00047 #include "cg_swp_target.h"
00048 #include "cgexp.h"
00049 #include "const.h"
00050 #include "tn_set.h"
00051 #include "ti_res_count.h"
00052
00053 typedef std::pair<TN*, OP*> tn_def_op;
00054 typedef std::vector<tn_def_op> op_vec_type;
00055
00056
00057 bool operator<(tn_def_op& t1, tn_def_op& t2)
00058 {
00059 return (t1.first < t2.first);
00060 }
00061
00062 class REPLACE_TN_LIST {
00063
00064 struct REPLACE_TN {
00065 TN *old_tn;
00066 TN *new_tn;
00067 OP *begin;
00068 OP *end;
00069 REPLACE_TN(TN *o, TN *n, OP *b, OP *e):
00070 old_tn(o), new_tn(n), begin(b), end(e) {}
00071 };
00072
00073 std::vector<REPLACE_TN> v;
00074
00075 public:
00076
00077 void record(TN *o, TN *n, OP *b, OP *e) {
00078 v.push_back(REPLACE_TN(o, n, b, e));
00079 }
00080
00081
00082 void commit() {
00083 for (INT32 i = 0; i < v.size(); i++) {
00084 TN *old_tn = v[i].old_tn;
00085 TN *new_tn = v[i].new_tn;
00086 for (OP *op = v[i].begin; op != v[i].end; op = OP_next(op)) {
00087 INT32 j = 0;
00088 for (j = 0; j < OP_opnds(op); j++)
00089 if (OP_opnd(op, j) == old_tn)
00090 Set_OP_opnd(op, j, new_tn);
00091 for (j = 0; j < OP_results(op); j++)
00092 if (OP_result(op, j) == old_tn)
00093 Set_OP_result(op, j, new_tn);
00094 }
00095 }
00096 }
00097 };
00098
00099
00100 void Interleave_Base_Update(op_vec_type::iterator first, op_vec_type::iterator last,
00101 BB *prolog, BB *body, BB *epilog, bool trace)
00102 {
00103
00104 if (last - first <= 1) return;
00105
00106
00107
00108
00109
00110 TN *tn = (*first).first;
00111 op_vec_type::iterator p;
00112 INT32 incr = 0;
00113 INT32 first_incr = 0;
00114 INT32 liveout_adj = 0;
00115 bool need_reg_update = false;
00116
00117 for (p = first; p != last; p++) {
00118 OP *op = (*p).second;
00119 BASE_UPDATE kind = OP_base_update_kind(op);
00120 if (kind == NO_BASE_UPDATE || OP_cond_def(op))
00121 return;
00122
00123 INT32 base_opnd_num = OP_base_opnd_num(op);
00124 INT32 base_res_num = OP_base_res_num(op);
00125 INT32 incr_opnd_num = OP_incr_opnd_num(op);
00126
00127 Is_True(base_opnd_num >= 0 && base_res_num >= 0,
00128 ("can't find base opnd/result."));
00129
00130 if (OP_result(op, base_res_num) != tn)
00131 return;
00132
00133 Is_True(OP_result(op, base_res_num) == OP_opnd(op, base_opnd_num),
00134 ("opnd != res in base update form."));
00135
00136 if ((p != first && OP_omega(op, base_opnd_num) != 0) ||
00137 (p == first && OP_omega(op, base_opnd_num) != 1))
00138
00139 return;
00140
00141 if (kind == REG_BASE_UPDATE) {
00142 need_reg_update = true;
00143 } else {
00144 INT32 value = TN_value(OP_opnd(op, incr_opnd_num));
00145 liveout_adj = -incr;
00146 incr += value;
00147 if (p == first)
00148 first_incr = value;
00149 }
00150 }
00151 for (p = first; p != last; p++) {
00152 OP *op = (*p).second;
00153 if (!Imm_Value_In_Range(op, incr))
00154 need_reg_update = true;
00155 }
00156
00157 if (need_reg_update) {
00158 DevWarn("recurrence breaking does not support register base update form.");
00159 return;
00160 }
00161
00162
00163
00164 {
00165 OP *op;
00166 FOR_ALL_BB_OPs(body, op) {
00167 if (OP_Defs_TN(op, tn)) continue;
00168 if (OP_Refs_TN(op, tn)) return;
00169 }
00170 }
00171
00172
00173 CG_LOOP_BACKPATCH *bp = CG_LOOP_Backpatch_First(prolog, NULL);
00174 while (bp && CG_LOOP_BACKPATCH_body_tn(bp) != tn )
00175 bp = CG_LOOP_Backpatch_Next(bp);
00176 Is_True(bp, ("unable to find backpatch."));
00177 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
00178 CG_LOOP_Backpatch_Delete(prolog, bp);
00179
00180 REPLACE_TN_LIST use_update;
00181 TN *incr_tn = Gen_Literal_TN(incr, 4);
00182 OPS prolog_ops = OPS_EMPTY;
00183 INT32 partial_sum = 0;
00184
00185 for (p = first; p != last; p++) {
00186
00187 OP *op = (*p).second;
00188 TN *newtn = Dup_TN(tn);
00189 (*p).first = newtn;
00190
00191 if (trace) {
00192 fprintf(TFile, "<recur> TN%d replaces the %d-th occurrence of TN%d\n",
00193 TN_number(newtn), (INT)(p - first), TN_number(tn));
00194 }
00195
00196
00197 {
00198 op_vec_type::iterator q = p + 1;
00199 if (q >= last) q -= (last - first);
00200 OP *op = (*p).second;
00201 if (q != first)
00202 use_update.record(tn, newtn, OP_next(op), (*q).second);
00203 else {
00204 use_update.record(tn, newtn, OP_next(op), NULL);
00205 use_update.record(tn, newtn, BB_first_op(body), op);
00206 }
00207 }
00208
00209
00210 {
00211 TN *prolog_tn = Dup_TN(tn);
00212 CG_LOOP_Backpatch_Add(prolog, prolog_tn, newtn, 1);
00213 Build_OP(TOP_adds,
00214 prolog_tn,
00215 True_TN,
00216 Gen_Literal_TN(partial_sum, 4),
00217 non_body_tn,
00218 &prolog_ops);
00219 }
00220
00221
00222 {
00223 INT32 base_opnd_num = OP_base_opnd_num(op);
00224 INT32 base_res_num = OP_base_res_num(op);
00225 INT32 incr_opnd_num = OP_incr_opnd_num(op);
00226 partial_sum += TN_value(OP_opnd(op, incr_opnd_num));
00227 Set_OP_opnd(op, incr_opnd_num, incr_tn);
00228 Set_OP_opnd(op, base_opnd_num, newtn);
00229 Set_OP_result(op, base_res_num, newtn);
00230 Set_OP_omega(op, base_opnd_num, 1);
00231 }
00232
00233 {
00234 INT32 base_opnd_num = OP_base_opnd_num(op);
00235 for (int j = 0; j < OP_opnds(op); j++)
00236 if (j != base_opnd_num && OP_opnd(op, j) == tn){
00237 Set_OP_opnd(op, j, newtn);
00238 Set_OP_omega(op,j, 1);
00239 }
00240 }
00241 }
00242 use_update.commit();
00243 BB_Append_Ops(prolog, &prolog_ops);
00244
00245
00246 {
00247 CG_LOOP_BACKPATCH *bp = CG_LOOP_Backpatch_First(epilog, NULL);
00248 while (bp && CG_LOOP_BACKPATCH_body_tn(bp) != tn )
00249 bp = CG_LOOP_Backpatch_Next(bp);
00250 if (bp) {
00251 CG_LOOP_BACKPATCH_Set_body_tn(bp, (*(last-1)).first);
00252 OPS epilog_ops = OPS_EMPTY;
00253 Build_OP(TOP_adds,
00254 non_body_tn,
00255 True_TN,
00256 Gen_Literal_TN(liveout_adj, 4),
00257 non_body_tn,
00258 &epilog_ops);
00259 BB_Prepend_Ops(epilog, &epilog_ops);
00260 }
00261 }
00262 }
00263
00264
00265
00266 TYPE_ID Mtype_from_opc(TOP top)
00267 {
00268 switch (top) {
00269 case TOP_fma:
00270 case TOP_fnma:
00271 case TOP_fadd:
00272 case TOP_fsub:
00273 return MTYPE_F10;
00274
00275 case TOP_fma_d:
00276 case TOP_fnma_d:
00277 case TOP_fadd_d:
00278 case TOP_fsub_d:
00279 return MTYPE_F8;
00280
00281 case TOP_fma_s:
00282 case TOP_fnma_s:
00283 case TOP_fadd_s:
00284 case TOP_fsub_s:
00285 return MTYPE_F4;
00286
00287 case TOP_add:
00288 case TOP_adds:
00289 case TOP_sub:
00290 return MTYPE_I8;
00291 }
00292
00293 Is_True(FALSE, ("Mtype_from_opc: unknown opcode."));
00294 return MTYPE_UNKNOWN;
00295 }
00296
00297
00298 enum RECUR_ACTION {
00299 RECUR_NONE,
00300 RECUR_BACK_SUB_INVARIANT,
00301 RECUR_INTERLEAVE,
00302 RECUR_BACK_SUB_VARIANT
00303 };
00304
00305
00306
00307
00308
00309
00310 class RECUR_OP_DESC {
00311 RECUR_ACTION action;
00312 INT invar_opnd_num;
00313 INT second_invar_opnd_num;
00314 INT reg_opnd_num;
00315 INT res_num;
00316 INT new_omega;
00317 OP *op;
00318 TYPE_ID mtype;
00319 bool is_add;
00320 bool has_one_invar_opnd;
00321 bool allow_back_sub_variant;
00322 bool need_copy_variant;
00323 TN *identity;
00324
00325 public:
00326 OP *Op() { return op; }
00327 RECUR_ACTION Action() { return action; }
00328 INT Invar_opnd_num() { return invar_opnd_num; }
00329 INT Second_invar_opnd_num() { return second_invar_opnd_num; }
00330 INT Reg_opnd_num() { return reg_opnd_num; }
00331 INT Res_num() { return res_num; }
00332 TYPE_ID Mtype() { return mtype; }
00333 bool Is_add() { return is_add; }
00334 INT New_omega() { return new_omega; }
00335 TN *Identity() { return identity; }
00336 bool Need_copy_variant() { return need_copy_variant; }
00337
00338 RECUR_OP_DESC(const RECUR_OP_DESC &r) { *this = r; }
00339
00340 RECUR_OP_DESC(BB *body, BB *epilog, OP *operation, CG_LOOP_DEF& tn_def,
00341 TN_SET *multi_def, double estimate_ResMII, bool trace) {
00342
00343 op = operation;
00344 action = RECUR_NONE;
00345
00346
00347 if (OP_results(op) != 1) return;
00348
00349
00350 if (TN_SET_MemberP(multi_def, OP_result(op, 0))) return;
00351
00352
00353 if (TN_is_dedicated(OP_result(op,0))) return;
00354
00355
00356
00357 INT i, j;
00358 INT8 ref_count = 0;
00359 for (i = 0; i < OP_results(op); i++) {
00360 TN *result_tn = OP_result(op, i);
00361 for (j = 0; j < OP_opnds(op); j++) {
00362 TN *opnd_tn = OP_opnd(op, j);
00363 if (TN_is_constant(opnd_tn)) continue;
00364 if (opnd_tn == result_tn) ref_count++;
00365 }
00366 }
00367 if (ref_count > 1) return;
00368
00369 res_num = -1;
00370 has_one_invar_opnd = true;
00371 allow_back_sub_variant = false;
00372 need_copy_variant = false;
00373 identity = NULL;
00374
00375 switch (OP_code(op)) {
00376 case TOP_fma:
00377 case TOP_fma_d:
00378 case TOP_fma_s:
00379 has_one_invar_opnd = false;
00380 if (Roundoff_Level >= ROUNDOFF_ASSOC) {
00381 is_add = true;
00382 if (OP_result(op, 0) == OP_opnd(op, 4)) {
00383 invar_opnd_num = 2;
00384 second_invar_opnd_num = 3;
00385 reg_opnd_num = 4;
00386 res_num = 0;
00387 }
00388 mtype = Mtype_from_opc(OP_code(op));
00389 }
00390 break;
00391
00392 case TOP_fnma:
00393 case TOP_fnma_d:
00394 case TOP_fnma_s:
00395 has_one_invar_opnd = false;
00396 if (Roundoff_Level >= ROUNDOFF_ASSOC) {
00397 if (OP_result(op, 0) == OP_opnd(op, 4)) {
00398 invar_opnd_num = 2;
00399 second_invar_opnd_num = 3;
00400 reg_opnd_num = 4;
00401 res_num = 0;
00402 }
00403 mtype = Mtype_from_opc(OP_code(op));
00404 }
00405 break;
00406
00407 case TOP_fadd:
00408 case TOP_fadd_d:
00409 case TOP_fadd_s:
00410 allow_back_sub_variant = true;
00411 if (Roundoff_Level >= ROUNDOFF_ASSOC) {
00412 is_add = true;
00413 if (OP_result(op, 0) == OP_opnd(op, 3)) {
00414 invar_opnd_num = 2;
00415 reg_opnd_num = 3;
00416 res_num = 0;
00417 } else if (OP_result(op, 0) == OP_opnd(op, 2)) {
00418 invar_opnd_num = 3;
00419 reg_opnd_num = 2;
00420 res_num = 0;
00421 }
00422 mtype = Mtype_from_opc(OP_code(op));
00423 }
00424 break;
00425
00426 case TOP_fsub:
00427 case TOP_fsub_d:
00428 case TOP_fsub_s:
00429 allow_back_sub_variant = true;
00430 if (Roundoff_Level >= ROUNDOFF_ASSOC) {
00431 is_add = false;
00432 if (OP_result(op, 0) == OP_opnd(op, 2)) {
00433 invar_opnd_num = 3;
00434 reg_opnd_num = 2;
00435 res_num = 0;
00436 }
00437 mtype = Mtype_from_opc(OP_code(op));
00438 }
00439 break;
00440
00441 case TOP_sub:
00442 is_add = false;
00443 if (OP_result(op, 0) == OP_opnd(op, 1)) {
00444 invar_opnd_num = 2;
00445 reg_opnd_num = 1;
00446 res_num = 0;
00447 }
00448 mtype = Mtype_from_opc(OP_code(op));
00449 break;
00450
00451 case TOP_add:
00452 case TOP_adds:
00453 is_add = true;
00454 if (OP_result(op, 0) == OP_opnd(op, 2)) {
00455 invar_opnd_num = 1;
00456 reg_opnd_num = 2;
00457 res_num = 0;
00458 } else if (OP_result(op, 0) == OP_opnd(op, 1)) {
00459 invar_opnd_num = 2;
00460 reg_opnd_num = 1;
00461 res_num = 0;
00462 }
00463 mtype = Mtype_from_opc(OP_code(op));
00464 break;
00465 }
00466
00467
00468 if (res_num >= 0 && !TN_is_dedicated(OP_result(op, res_num))) {
00469
00470 bool can_interleave = true;
00471 bool can_back_sub_variant = true;
00472
00473 if (OP_omega(op, reg_opnd_num) == 1) {
00474 if (tn_def.Is_invariant(OP_opnd(op, invar_opnd_num)) && has_one_invar_opnd)
00475 action = RECUR_BACK_SUB_INVARIANT;
00476 else {
00477 OP *other_op;
00478 TN *use = OP_opnd(op, reg_opnd_num);
00479 FOR_ALL_BB_OPs(body, other_op) {
00480 if (other_op != op &&
00481 OP_Refs_TN(other_op, use)) {
00482 can_interleave = false;
00483 break;
00484 }
00485 }
00486 if (can_interleave) {
00487 for (CG_LOOP_BACKPATCH *bp = CG_LOOP_Backpatch_First(epilog, NULL);
00488 bp != NULL;
00489 bp = CG_LOOP_Backpatch_Next(bp)) {
00490 if (CG_LOOP_BACKPATCH_body_tn(bp) == use &&
00491 CG_LOOP_BACKPATCH_omega(bp) != 0) {
00492 can_interleave = false;
00493 break;
00494 }
00495 }
00496 }
00497 if (can_interleave)
00498 action = RECUR_INTERLEAVE;
00499 }
00500 }
00501
00502 if (allow_back_sub_variant && action == RECUR_NONE) {
00503
00504
00505 INT old_omega = OP_omega(op, reg_opnd_num);
00506 TN *body_tn = OP_opnd(op, reg_opnd_num);
00507 for (int i = 1+old_omega; i <= old_omega * 2; i++) {
00508 CG_LOOP_BACKPATCH *bp;
00509 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
00510 bp != NULL;
00511 bp = CG_LOOP_Backpatch_Next(bp)) {
00512 if (CG_LOOP_BACKPATCH_body_tn(bp) == body_tn &&
00513 CG_LOOP_BACKPATCH_omega(bp) == i) {
00514 can_back_sub_variant = false;
00515 break;
00516 }
00517 }
00518 }
00519 if (can_back_sub_variant) {
00520 action = RECUR_BACK_SUB_VARIANT;
00521 TN *body_tn = OP_opnd(op, invar_opnd_num);
00522 need_copy_variant = OP_omega(op, invar_opnd_num) != 0;
00523
00524
00525
00526
00527
00528
00529 if (!need_copy_variant) {
00530 OP *other_op;
00531 INT def_count = 0;
00532 FOR_ALL_BB_OPs(body, other_op) {
00533 if (OP_Defs_TN(other_op, body_tn)) {
00534 if (OP_cond_def(other_op) ||
00535 def_count++ > 0) {
00536 need_copy_variant = true;
00537 break;
00538 }
00539 }
00540 }
00541 }
00542 if (!need_copy_variant) {
00543 CG_LOOP_BACKPATCH *bp;
00544 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
00545 bp != NULL;
00546 bp = CG_LOOP_Backpatch_Next(bp)) {
00547 if (CG_LOOP_BACKPATCH_body_tn(bp) == body_tn) {
00548 need_copy_variant = true;
00549 break;
00550 }
00551 }
00552 }
00553 }
00554 }
00555 }
00556
00557
00558
00559
00560 if (action == RECUR_BACK_SUB_INVARIANT &&
00561 CG_LOOP_Backpatch_Find_Non_Body_TN(CG_LOOP_prolog,
00562 OP_opnd(op, reg_opnd_num), 2)) {
00563 action = RECUR_NONE;
00564 }
00565
00566
00567
00568 if (!CG_LOOP_back_substitution && action == RECUR_BACK_SUB_INVARIANT)
00569 action = RECUR_NONE;
00570 if (!CG_LOOP_back_substitution_variant && action == RECUR_BACK_SUB_VARIANT)
00571 action = RECUR_NONE;
00572 if (!CG_LOOP_interleave_reductions && action == RECUR_INTERLEAVE)
00573 action = RECUR_NONE;
00574
00575 if (action != RECUR_NONE && action != RECUR_BACK_SUB_VARIANT) {
00576 INT latency = CG_DEP_Latency(op, op, CG_DEP_REGIN, reg_opnd_num);
00577 INT illegal_omega_value = 20;
00578 for (new_omega = MAX(1, CG_LOOP_recurrence_min_omega); new_omega < illegal_omega_value; new_omega++) {
00579
00580 if (latency <= new_omega * estimate_ResMII)
00581 break;
00582 }
00583 if (trace)
00584 fprintf(TFile, "<recur> new_omega=%d, latency=%d, ResMII=%g\n",
00585 new_omega, latency, estimate_ResMII);
00586
00587 Is_True(new_omega < illegal_omega_value, ("RECUR_OP_DESC: check latency and ResMII"));
00588 if (new_omega == 1)
00589 action = RECUR_NONE;
00590 }
00591 if (action != RECUR_NONE)
00592 identity = TN_is_float( OP_result(op, res_num)) ? FZero_TN : Zero_TN;
00593 }
00594 };
00595
00596
00597
00598
00599 void Apply_Back_Sub_Invariant(OP *op, INT new_omega,
00600 BB *prolog, RECUR_OP_DESC& op_desc)
00601 {
00602 INT invar_opnd_num = op_desc.Invar_opnd_num();
00603 INT reg_opnd_num = op_desc.Reg_opnd_num();
00604 INT res_num = op_desc.Res_num();
00605 TYPE_ID mtype = op_desc.Mtype();
00606
00607 TN *invar_tn = OP_opnd(op, invar_opnd_num);
00608 Is_True(OP_omega(op, reg_opnd_num) == 1,
00609 ("Increase_Recurrence_Omega: expecting omega = 1."));
00610 Set_OP_omega(op, reg_opnd_num, new_omega);
00611 OPS prolog_ops = OPS_EMPTY;
00612
00613
00614 switch (OP_code(op)) {
00615 case TOP_fadd_s:
00616 case TOP_fadd_d:
00617 case TOP_fadd:
00618 case TOP_fsub_s:
00619 case TOP_fsub_d:
00620 case TOP_fsub:
00621 {
00622 TN *new_invar_tn = Dup_TN(OP_result(op, res_num));
00623 WN *const_tree = Make_Const(Host_To_Targ_Float(mtype, new_omega));
00624 TYPE_ID mtype = WN_rtype(const_tree);
00625 TN *const_tn = Build_TN_Of_Mtype(mtype);
00626 Exp_Load(mtype, mtype, const_tn, WN_st(const_tree), 0,
00627 &prolog_ops, V_NONE);
00628 Exp_MPY(Mtype_from_opc(OP_code(op)), new_invar_tn, invar_tn, const_tn, &prolog_ops);
00629 Set_OP_opnd(op, invar_opnd_num, new_invar_tn);
00630 break;
00631 }
00632
00633 case TOP_add:
00634 case TOP_sub:
00635 {
00636 TN *new_invar_tn = Dup_TN(OP_result(op, res_num));
00637 Exp_MPY(mtype, new_invar_tn, invar_tn, Gen_Literal_TN(new_omega, 4), &prolog_ops);
00638 Set_OP_opnd(op, invar_opnd_num, new_invar_tn);
00639 break;
00640 }
00641
00642 case TOP_adds:
00643 {
00644 INT64 imm = TN_value(invar_tn);
00645 TN *new_imm_tn = Gen_Literal_TN(imm * new_omega, 4);
00646 if (TOP_Can_Have_Immediate(imm * new_omega, OP_code(op)))
00647 Set_OP_opnd(op, invar_opnd_num, new_imm_tn);
00648 else {
00649
00650 OP_Change_Opcode(op, TOP_add);
00651 TN *new_tn = Dup_TN(OP_result(op, res_num));
00652 Set_OP_opnd(op, invar_opnd_num, new_tn);
00653 Exp_COPY(new_tn, new_imm_tn, &prolog_ops);
00654 }
00655 break;
00656 }
00657
00658 default:
00659 Is_True(FALSE, ("Interleave_Associative_Op: op not supported."));
00660 }
00661
00662
00663 TN *body_tn = OP_result(op, res_num);
00664 TN *non_body_tn = CG_LOOP_Backpatch_Find_Non_Body_TN(prolog, body_tn, 1);
00665 Is_True(non_body_tn, ("Interleave_Associative_Op: can't find non-body tn"));
00666
00667
00668 for (INT i = 2; i <= new_omega; i++) {
00669 TN *new_tn = Dup_TN(non_body_tn);
00670 if (op_desc.Is_add())
00671 Exp_SUB(mtype, new_tn, non_body_tn, invar_tn, &prolog_ops);
00672 else
00673 Exp_ADD(mtype, new_tn, non_body_tn, invar_tn, &prolog_ops);
00674 CG_LOOP_Backpatch_Add(prolog, new_tn, body_tn, i);
00675 non_body_tn = new_tn;
00676 }
00677
00678 BB_Append_Ops(prolog, &prolog_ops);
00679 }
00680
00681
00682
00683
00684
00685 bool Apply_Back_Sub_Variant(RECUR_OP_DESC& op_desc, BB *prolog, BB *body, CG_SCHED_EST *loop_se, bool trace)
00686 {
00687 OP *op = op_desc.Op();
00688 INT invar_opnd_num = op_desc.Invar_opnd_num();
00689 INT reg_opnd_num = op_desc.Reg_opnd_num();
00690 INT res_num = op_desc.Res_num();
00691 TYPE_ID mtype = op_desc.Mtype();
00692 TN *identity = op_desc.Identity();
00693 bool need_copy_variant = op_desc.Need_copy_variant();
00694
00695
00696 switch (OP_code(op)) {
00697 case TOP_fadd_s:
00698 case TOP_fadd_d:
00699 case TOP_fadd:
00700 case TOP_fsub_s:
00701 case TOP_fsub_d:
00702 case TOP_fsub:
00703 {
00704 INT old_omega = OP_omega(op, reg_opnd_num);
00705 INT new_omega = old_omega * 2;
00706
00707 CG_SCHED_EST_Add_Op_Resources(loop_se, TOP_fadd_s);
00708 if (need_copy_variant)
00709 CG_SCHED_EST_Add_Op_Resources(loop_se, TOP_mov_f);
00710
00711 INT latency = CG_DEP_Latency(op, op, CG_DEP_REGIN, reg_opnd_num);
00712 double ResMII = CG_SCHED_EST_Resources_Min_Cycles(loop_se);
00713
00714 if (trace)
00715 fprintf(TFile, "<back_sub_variant> latency=%d old_omega=%d ResMII=%g\n",
00716 latency, old_omega, ResMII);
00717
00718 if (((double)latency / old_omega) <= ResMII) {
00719 CG_SCHED_EST_Subtract_Op_Resources(loop_se, TOP_fadd_s);
00720 if (need_copy_variant)
00721 CG_SCHED_EST_Subtract_Op_Resources(loop_se, TOP_mov_f);
00722 return false;
00723 }
00724
00725 OPS body_ops = OPS_EMPTY;
00726 OPS copy_ops = OPS_EMPTY;
00727
00728 TN *invar_tn_0 = OP_opnd(op, invar_opnd_num);
00729 TN *invar_tn_1 = invar_tn_0;
00730 INT invar_tn_1_omega = 0;
00731 if (need_copy_variant) {
00732 invar_tn_1 = Dup_TN(invar_tn_1);
00733 invar_tn_1_omega = OP_omega(op, invar_opnd_num);
00734 Exp_COPY(invar_tn_1, invar_tn_0, ©_ops);
00735 }
00736
00737 TN *temp_tn = Dup_TN(OP_opnd(op, reg_opnd_num));
00738 Set_OP_opnd(op, invar_opnd_num, temp_tn);
00739 Set_OP_omega(op, invar_opnd_num, 0);
00740 Set_OP_omega(op, reg_opnd_num, new_omega);
00741
00742 Exp_OP2 (OPCODE_make_op(OPR_ADD,mtype,MTYPE_V), temp_tn, invar_tn_0, invar_tn_1, &body_ops);
00743
00744 BB_Insert_Ops_Before(body, op, &body_ops);
00745
00746 OP *add_op = OP_prev(op);
00747 Is_True(OP_code(add_op) == TOP_fadd_s || OP_code(add_op) == TOP_fadd_d ||
00748 OP_code(add_op) == TOP_fadd,
00749 ("Apply_Back_Sub_Variant: must be fadd."));
00750 CGPREP_Init_Op(add_op);
00751 CG_LOOP_Init_Op(add_op);
00752 Set_OP_omega(add_op, 2, old_omega);
00753 if (need_copy_variant)
00754 Set_OP_omega(add_op, 3, 1);
00755
00756 if (need_copy_variant) {
00757 BB_Insert_Ops_Before(body, op, ©_ops);
00758 OP *mov_op = OP_prev(op);
00759 Is_True(OP_code(mov_op) == TOP_mov_f,
00760 ("Apply_Back_Sub_Variant: must be mov_f."));
00761 CGPREP_Init_Op(mov_op);
00762 CG_LOOP_Init_Op(mov_op);
00763 Set_OP_omega(mov_op, 1, invar_tn_1_omega);
00764 }
00765
00766 TN *body_tn = OP_result(op, res_num);
00767 for (int i = 1; i <= old_omega; i++) {
00768 CG_LOOP_BACKPATCH *bp;
00769 TN *non_body_tn = NULL;
00770 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
00771 bp != NULL;
00772 bp = CG_LOOP_Backpatch_Next(bp)) {
00773 if (CG_LOOP_BACKPATCH_body_tn(bp) == body_tn &&
00774 CG_LOOP_BACKPATCH_omega(bp) == i) {
00775 non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
00776 break;
00777 }
00778 }
00779 Is_True(non_body_tn, ("can't find backpatch for TN%d[%d]\n",
00780 TN_number(body_tn), i));
00781 CG_LOOP_Backpatch_Add(prolog, non_body_tn, body_tn, i + old_omega);
00782 }
00783
00784 {
00785 for (int i = 1; i <= old_omega; i++)
00786 CG_LOOP_Backpatch_Add(prolog, identity, invar_tn_1, i);
00787 }
00788
00789 return true;
00790 }
00791
00792 default:
00793 Is_True(FALSE, ("Apply_Back_Sub_Variant: op not supported."));
00794 }
00795 return false;
00796 }
00797
00798
00799
00800
00801
00802 void Apply_Interleave(OP *op, INT new_omega,
00803 BB *prolog, BB *epilog, RECUR_OP_DESC& op_desc)
00804 {
00805 INT reg_opnd_num = op_desc.Reg_opnd_num();
00806 INT res_num = op_desc.Res_num();
00807 TYPE_ID mtype = op_desc.Mtype();
00808 TN *identity = op_desc.Identity();
00809
00810 Is_True(OP_omega(op, reg_opnd_num) == 1,
00811 ("Increase_Recurrence_Omega: expecting omega = 1."));
00812 Set_OP_omega(op, reg_opnd_num, new_omega);
00813
00814 OPS epilog_ops = OPS_EMPTY;
00815
00816
00817 TN *body_tn = OP_result(op, res_num);
00818 for (INT i = 2; i <= new_omega; i++)
00819 CG_LOOP_Backpatch_Add(prolog, identity, body_tn, i);
00820
00821
00822 TN *non_body_tn = CG_LOOP_Backpatch_Find_Non_Body_TN(epilog, body_tn, 0);
00823 if (!non_body_tn) {
00824 DevWarn("Interleave_Associative_Op: can't find non-body tn");
00825 } else {
00826 std::vector<TN*> backpatch_tns;
00827 backpatch_tns.push_back(non_body_tn);
00828 INT i;
00829 for (i = 1; i < new_omega; i++) {
00830 TN *new_tn = Dup_TN(non_body_tn);
00831 backpatch_tns.push_back(new_tn);
00832 Is_True(CG_LOOP_Backpatch_Find_Non_Body_TN(epilog, body_tn, i) == NULL,
00833 ("Apply_Interleave: epilog backpatch existed."));
00834 CG_LOOP_Backpatch_Add(epilog, new_tn, body_tn, i);
00835 }
00836 for (int size = backpatch_tns.size(); size > 1; size = (size + 1) / 2) {
00837 for (int i = 0; 2 * i < size; i++) {
00838 TN *dst = backpatch_tns[2 * i];
00839 if (2 * i + 1 < size) {
00840 TN *src = backpatch_tns[2 * i + 1];
00841 Exp_ADD(mtype, dst, dst, src, &epilog_ops);
00842 }
00843 backpatch_tns[i] = dst;
00844 }
00845 }
00846 }
00847
00848 BB_Append_Ops(epilog, &epilog_ops);
00849 }
00850
00851
00852
00853
00854
00855 class Critical_Recurrence {
00856 static const INT NEG_INF = -999;
00857 std::vector< std::vector<INT> > mindist;
00858 INT size() const {return mindist.size(); }
00859 public:
00860 INT operator()(INT i) const { return mindist[i][i] > 0; }
00861 void Print(FILE *fp) const;
00862 Critical_Recurrence(const SWP_OP_vector& v, INT start, INT stop, INT branch, INT ii);
00863 };
00864
00865
00866 void Critical_Recurrence::Print(FILE *fp) const
00867 {
00868 const int n_col = 16;
00869 fprintf(fp, "Critical_Recurrence %dx%d:\n", size(), size());
00870 fprintf(fp, " ");
00871 for (INT j = 0; j < size(); j++) {
00872 if (j != 0 && j % n_col == 0)
00873 fprintf(fp, "\n ");
00874 fprintf(fp,"%4d", j);
00875 }
00876 fprintf(fp, "\n");
00877 for (INT i = 0; i < size(); i++) {
00878 fprintf(fp, "%3d: ", i);
00879 for (INT j = 0; j < size(); j++) {
00880 if (j != 0 && j % n_col == 0)
00881 fprintf(fp, "\n ");
00882 fprintf(fp,"%4d", mindist[i][j]);
00883 }
00884 fprintf(fp, "\n");
00885 }
00886 }
00887
00888
00889 Critical_Recurrence::Critical_Recurrence(const SWP_OP_vector& v,
00890 INT start, INT stop,
00891 INT branch, INT ii)
00892 :mindist(v.size(), std::vector<INT>(v.size(), NEG_INF))
00893 {
00894 int n_ops = v.size();
00895
00896
00897
00898 INT i;
00899 for (i = 0; i < v.size(); i++) {
00900 OP *op = v[i].op;
00901 if (op) {
00902 for ( ARC_LIST *al = OP_succs(op) ; al; al = ARC_LIST_rest(al) ) {
00903 ARC *arc = ARC_LIST_first(al);
00904
00905
00906 if (ARC_kind(arc) == CG_DEP_PREBR)
00907 continue;
00908 OP *succ = ARC_succ(arc);
00909 mindist[i][SWP_index(succ)] =
00910 MAX(mindist[i][SWP_index(succ)], ARC_latency(arc) - ARC_omega(arc) * ii);
00911 Is_True(succ == v[SWP_index(succ)].op, ("MinDIST: wrong SWP_index."));
00912 }
00913 mindist[start][i] = 0;
00914 mindist[i][stop] = 0;
00915 mindist[i][branch] = MAX(mindist[i][branch], 0);
00916 }
00917 }
00918
00919
00920
00921 for (INT k = 0; k < n_ops; k++)
00922 for (INT i = 0; i < n_ops; i++)
00923 for (INT j = 0; j < n_ops; j++)
00924 mindist[i][j] = MAX(mindist[i][j], mindist[i][k] + mindist[k][j]);
00925 }
00926
00927
00928
00929
00930
00931 bool TOP_is_associative(TOP top)
00932 {
00933 switch (top) {
00934 case TOP_add:
00935 case TOP_sub:
00936 return true;
00937
00938 case TOP_fadd:
00939 case TOP_fadd_d:
00940 case TOP_fadd_s:
00941 case TOP_fsub:
00942 case TOP_fsub_d:
00943 case TOP_fsub_s:
00944 case TOP_fmpy:
00945 case TOP_fmpy_d:
00946 case TOP_fmpy_s:
00947 return (Roundoff_Level >= ROUNDOFF_ASSOC);
00948 }
00949 return false;
00950 }
00951
00952
00953
00954
00955 TOP Get_Opposite_TOP(TOP top)
00956 {
00957 switch (top) {
00958 case TOP_add:
00959 return TOP_sub;
00960
00961 case TOP_fadd:
00962 return TOP_fsub;
00963
00964 case TOP_fadd_d:
00965 return TOP_fsub_d;
00966
00967 case TOP_fadd_s:
00968 return TOP_fsub_s;
00969
00970 case TOP_sub:
00971 return TOP_add;
00972
00973 case TOP_fsub:
00974 return TOP_fadd;
00975
00976 case TOP_fsub_d:
00977 return TOP_fadd_d;
00978
00979 case TOP_fsub_s:
00980 return TOP_fadd_s;
00981 }
00982
00983 return TOP_UNDEFINED;
00984 }
00985
00986
00987
00988
00989
00990 bool OPND_can_be_reassociated(TOP top, int opnd)
00991 {
00992 switch (top) {
00993 case TOP_add:
00994 return (opnd == 1 || opnd == 2);
00995
00996 case TOP_sub:
00997 return (opnd == 1);
00998
00999 case TOP_fadd:
01000 case TOP_fadd_d:
01001 case TOP_fadd_s:
01002 case TOP_fmpy:
01003 case TOP_fmpy_d:
01004 case TOP_fmpy_s:
01005 return (opnd == 2 || opnd == 3);
01006
01007 case TOP_fsub:
01008 case TOP_fsub_d:
01009 case TOP_fsub_s:
01010 return (opnd == 2);
01011 }
01012
01013 return false;
01014 }
01015
01016
01017
01018
01019 bool OPs_can_be_reassociated(OP *op1, OP *op2)
01020 {
01021 TOP top1 = OP_code(op1);
01022 TOP top2 = OP_code(op2);
01023
01024 if (top1 == top2 ||
01025 top2 == Get_Opposite_TOP(top1)) {
01026
01027 if (OP_opnd(op1, OP_PREDICATE_OPND) ==
01028 OP_opnd(op2, OP_PREDICATE_OPND))
01029 return true;
01030 }
01031
01032 return false;
01033 }
01034
01035
01036 bool OP_is_multiplication(OP *op)
01037 {
01038 TOP top = OP_code(op);
01039 return (top == TOP_fmpy ||
01040 top == TOP_fmpy_s ||
01041 top == TOP_fmpy_d);
01042 }
01043
01044
01045 bool OP_is_addition(OP *op)
01046 {
01047 TOP top = OP_code(op);
01048 return (top == TOP_add ||
01049 top == TOP_fadd ||
01050 top == TOP_fadd_s ||
01051 top == TOP_fadd_d);
01052 }
01053
01054
01055
01056
01057 INT Other_opnd(OP *op, INT this_opnd)
01058 {
01059 switch (OP_code(op)) {
01060 case TOP_add:
01061 case TOP_sub:
01062 if (this_opnd == 2) return 1;
01063 if (this_opnd == 1) return 2;
01064 break;
01065
01066 case TOP_fadd:
01067 case TOP_fadd_d:
01068 case TOP_fadd_s:
01069 case TOP_fsub:
01070 case TOP_fsub_d:
01071 case TOP_fsub_s:
01072 case TOP_fmpy:
01073 case TOP_fmpy_d:
01074 case TOP_fmpy_s:
01075 if (this_opnd == 2) return 3;
01076 if (this_opnd == 3) return 2;
01077 break;
01078 }
01079 Is_True(FALSE, ("Other_opnd: wrong opnd num"));
01080 return 0;
01081 }
01082
01083
01084
01085
01086 void Exchange_Opnd(OP *op1, int op1_opnd_num, OP *op2, int op2_opnd_num)
01087 {
01088 int op1_omega = OP_omega(op1, op1_opnd_num);
01089 TN *op1_tn = OP_opnd(op1, op1_opnd_num);
01090
01091 int op2_omega = OP_omega(op2, op2_opnd_num);
01092 TN *op2_tn = OP_opnd(op2, op2_opnd_num);
01093
01094 Set_OP_omega(op1, op1_opnd_num, op2_omega);
01095 Set_OP_opnd(op1, op1_opnd_num, op2_tn);
01096
01097 Set_OP_omega(op2, op2_opnd_num, op1_omega);
01098 Set_OP_opnd(op2, op2_opnd_num, op1_tn);
01099 }
01100
01101
01102
01103
01104
01105 bool OPs_Are_Dependent(OP *op1, OP *op2)
01106 {
01107 for (int i = 0; i < OP_results(op1); i++) {
01108 TN *tn = OP_result(op1,i);
01109 if (TN_is_register(tn) && !TN_is_const_reg(tn)) {
01110 if (OP_Defs_TN(op2, tn) ||
01111 OP_Refs_TN(op2, tn))
01112 return true;
01113 }
01114 }
01115 for (int i = 0; i < OP_opnds(op1); i++) {
01116 TN *tn = OP_opnd(op1,i);
01117 if (TN_is_register(tn) && !TN_is_const_reg(tn)) {
01118 if (OP_Defs_TN(op2, tn))
01119 return true;
01120 }
01121 }
01122 return false;
01123 }
01124
01125
01126
01127
01128 bool OP_Can_Sink_Before(OP *sink_op, OP *point)
01129 {
01130 for (OP *op = OP_next(sink_op); op != point; op = OP_next(op)) {
01131 if (OPs_Are_Dependent(sink_op, op))
01132 return false;
01133 }
01134 return true;
01135 }
01136
01137
01138
01139
01140 bool OPND_is_not_critical(OP *succ, int opnd, Critical_Recurrence& critical_recurrence)
01141 {
01142 ARC_LIST *al;
01143 for (al = OP_preds(succ) ; al; al = ARC_LIST_rest(al) ) {
01144 ARC *arc = ARC_LIST_first(al);
01145 OP *pred = ARC_pred(arc);
01146 INT pred_idx = SWP_index(pred);
01147 if (ARC_opnd(arc) == opnd &&
01148 critical_recurrence(pred_idx))
01149 return false;
01150 }
01151 return true;
01152 }
01153
01154
01155
01156
01157 void Reassociate(BB *body,
01158 SWP_OP_vector& v,
01159 int op_idx,
01160 int opnd,
01161 Critical_Recurrence& critical_recurrence,
01162 bool trace)
01163 {
01164 OP *op = v[op_idx].op;
01165 if (OPND_can_be_reassociated(OP_code(op), opnd)) {
01166 INT use_count = 0;
01167 ARC *critical_arc = NULL;
01168 for ( ARC_LIST *al = OP_succs(op) ; al; al = ARC_LIST_rest(al) ) {
01169 ARC *arc = ARC_LIST_first(al);
01170 OP *succ = ARC_succ(arc);
01171 INT succ_idx = SWP_index(succ);
01172 if (ARC_kind(arc) == CG_DEP_REGIN) {
01173 use_count++;
01174 if (ARC_omega(arc) == 0 &&
01175 critical_recurrence(succ_idx))
01176 critical_arc = arc;
01177 }
01178 }
01179 bool succeeded = false;
01180 INT succ_idx;
01181 INT succ_opnd;
01182 if (use_count == 1 &&
01183 critical_arc) {
01184 OP *succ = ARC_succ(critical_arc);
01185 succ_idx = SWP_index(succ);
01186 if (OPs_can_be_reassociated(op, succ) &&
01187 OP_Can_Sink_Before(op, succ)) {
01188
01189
01190
01191
01192 INT critical_opnd = ARC_opnd(critical_arc);
01193 succ_opnd = Other_opnd(succ, critical_opnd);
01194
01195 if (OPND_is_not_critical(succ, succ_opnd, critical_recurrence) &&
01196 OPND_can_be_reassociated(OP_code(succ), critical_opnd)) {
01197
01198 Exchange_Opnd(op, opnd, succ, succ_opnd);
01199 if (OP_next(op) != succ)
01200 BB_Sink_Op_Before(body, op, succ);
01201
01202
01203 if (!OP_is_multiplication(op)) {
01204
01205 if (OP_is_addition(op))
01206 if (OP_is_addition(succ)) {
01207
01208 } else {
01209
01210 OP_Change_Opcode(op, Get_Opposite_TOP(OP_code(op)));
01211 OP_Change_Opcode(succ, Get_Opposite_TOP(OP_code(succ)));
01212
01213
01214 if (OPND_can_be_reassociated(OP_code(op), opnd))
01215 Exchange_Opnd(op, opnd, op, Other_opnd(op, opnd));
01216 }
01217 else
01218 if (OP_is_addition(succ)) {
01219
01220 } else {
01221
01222 OP_Change_Opcode(op, Get_Opposite_TOP(OP_code(op)));
01223 Exchange_Opnd(succ, succ_opnd, succ, Other_opnd(succ, succ_opnd));
01224 succ_opnd = Other_opnd(succ, succ_opnd);
01225 }
01226 }
01227
01228 succeeded = true;
01229 }
01230 }
01231 }
01232 if (trace)
01233 fprintf(TFile, "<reassoc> %s reassoc OP%d opnd%d\n",
01234 succeeded ? "" : "not", op_idx, opnd);
01235
01236 if (succeeded)
01237 Reassociate(body, v, succ_idx, succ_opnd, critical_recurrence, trace);
01238
01239 BB_Update_OP_Order(body);
01240 }
01241 }
01242
01243
01244
01245
01246 void Shorten_Critical_Recurrence_By_Reassociation(CG_LOOP& cl,
01247 BOOL is_doloop,
01248 BOOL trace)
01249 {
01250 CXX_MEM_POOL local_pool("local pool", FALSE);
01251
01252 BB *body = cl.Loop_header();
01253
01254
01255 OP *op;
01256 INT count = 0;
01257 FOR_ALL_BB_OPs(body, op) {
01258 Reset_OP_loh(op);
01259 if (OP_code(op) == TOP_adds &&
01260 OP_result(op, 0) == OP_opnd(op, 2))
01261 Set_OP_loh(op);
01262 if (OP_code(op) == TOP_add &&
01263 (OP_result(op, 0) == OP_opnd(op, 1) ||
01264 OP_result(op, 0) == OP_opnd(op, 2)))
01265 Set_OP_loh(op);
01266 count++;
01267 }
01268
01269 if (count + SWP_OPS_OVERHEAD > SWP_OPS_LIMIT) return;
01270
01271 SWP_OP_vector v(body, is_doloop, local_pool());
01272
01273
01274 CG_SCHED_EST *loop_se = CG_SCHED_EST_Create(body, local_pool(),
01275 SCHED_EST_FOR_UNROLL |
01276 SCHED_EST_IGNORE_LOH_OPS |
01277 SCHED_EST_IGNORE_PREFETCH);
01278
01279 int ii = CG_SCHED_EST_Resource_Cycles(loop_se);
01280
01281
01282 CYCLIC_DEP_GRAPH manager( body, local_pool());
01283
01284
01285 Critical_Recurrence critical_recurrence(v,
01286 v.start,
01287 v.stop,
01288 v.branch,
01289 ii);
01290
01291 if (trace) {
01292 fprintf(TFile, "Shorten Critical Recurrence by Reassociation:\n");
01293 CG_DEP_Trace_Graph(body);
01294 for (int i = 0; i < v.size(); i++) {
01295 if (v[i].op) {
01296 fprintf(TFile, "%3d: ", i);
01297 Print_OP_No_SrcLine(v[i].op);
01298 }
01299 }
01300 fprintf(TFile,"II is %d\n", ii);
01301 fprintf(TFile, "<ti resource count> ");
01302 TI_RES_COUNT_Print(TFile, loop_se->res_count);
01303 fprintf(TFile, "\n");
01304 critical_recurrence.Print(TFile);
01305 }
01306
01307 for (int i = 0; i < v.size(); i++) {
01308 if (v[i].op &&
01309 TOP_is_associative(OP_code(v[i].op)) &&
01310 critical_recurrence(i)) {
01311
01312 Is_True(OP_results(v[i].op) == 1,
01313 ("associative OP must have 1 result."));
01314
01315 if (trace)
01316 fprintf(TFile, "<critical recurrence> OP%d is critical.\n", i);
01317
01318 ARC_LIST *al;
01319 for (al = OP_preds(v[i].op) ; al; al = ARC_LIST_rest(al) ) {
01320 ARC *arc = ARC_LIST_first(al);
01321 OP *pred = ARC_pred(arc);
01322 INT pred_idx = SWP_index(pred);
01323 if (ARC_kind(arc) == CG_DEP_REGIN &&
01324 ARC_omega(arc) == 1 &&
01325 critical_recurrence(pred_idx))
01326 Reassociate(body, v, i, ARC_opnd(arc), critical_recurrence, trace);
01327 }
01328 }
01329 }
01330 }
01331
01332
01333
01334 void Fix_Recurrences_Before_Unrolling(CG_LOOP& cl)
01335 {
01336 if (!CG_LOOP_fix_recurrences) return;
01337
01338 bool trace = Get_Trace(TP_CGLOOP, 0x800);
01339
01340 if (CG_LOOP_reassociate)
01341 Shorten_Critical_Recurrence_By_Reassociation(cl, TRUE, trace);
01342
01343 BB *body = cl.Loop_header();
01344 if (BB_length(body) > CG_maxinss) return;
01345
01346 CXX_MEM_POOL local_pool("fix recurrence pool", FALSE);
01347 TN_SET *tn_def = TN_SET_Create_Empty(Last_TN + 1, local_pool());
01348 TN_SET *multi_def = TN_SET_Create_Empty(Last_TN + 1, local_pool());
01349 OP *op;
01350 FOR_ALL_BB_OPs(body, op) {
01351 for (INT i = 0; i < OP_results(op); i++) {
01352 TN *res = OP_result(op,i);
01353 if (TN_is_register(res) && !TN_is_const_reg(res)) {
01354 if (TN_SET_MemberP(tn_def, res)) {
01355 multi_def = TN_SET_Union1D(multi_def, res, local_pool());
01356 } else
01357 tn_def = TN_SET_Union1D(tn_def, res, local_pool());
01358 }
01359 }
01360 }
01361
01362 BB *prolog = CG_LOOP_prolog;
01363 BB *epilog = CG_LOOP_epilog;
01364 CG_LOOP_DEF loop_def(body);
01365
01366
01367
01368 CG_SCHED_EST *loop_se = CG_SCHED_EST_Create(body, local_pool(),
01369 SCHED_EST_FOR_UNROLL |
01370 SCHED_EST_IGNORE_BRANCH |
01371 SCHED_EST_IGNORE_INT_OPS |
01372 SCHED_EST_IGNORE_PREFETCH);
01373
01374 double estimate_ResMII = CG_SCHED_EST_Resources_Min_Cycles(loop_se);
01375
01376
01377
01378 if (estimate_ResMII < 0.1) {
01379 loop_se = CG_SCHED_EST_Create(body, local_pool(),
01380 SCHED_EST_FOR_UNROLL |
01381 SCHED_EST_IGNORE_BRANCH |
01382 SCHED_EST_IGNORE_PREFETCH);
01383 estimate_ResMII = CG_SCHED_EST_Resources_Min_Cycles(loop_se);
01384 }
01385
01386 std::vector<RECUR_OP_DESC> delay_processing;
01387
01388 FOR_ALL_BB_OPs(body, op) {
01389
01390 if (! OP_has_predicate(op) ||
01391 ! TN_is_true_pred(OP_opnd(op, OP_PREDICATE_OPND)))
01392 {
01393 continue;
01394 }
01395
01396 RECUR_OP_DESC op_desc(body, epilog, op, loop_def, multi_def, estimate_ResMII, trace);
01397
01398 INT new_omega = op_desc.New_omega();
01399
01400 switch (op_desc.Action()) {
01401
01402 case RECUR_BACK_SUB_INVARIANT:
01403 Apply_Back_Sub_Invariant(op, new_omega, prolog, op_desc);
01404
01405 if (trace)
01406 fprintf(TFile, "<back_sub_invar> TN%d, %s\n",
01407 TN_number(OP_result(op, op_desc.Res_num())), TOP_Name(OP_code(op)));
01408
01409 break;
01410
01411 case RECUR_INTERLEAVE:
01412
01413 Apply_Interleave(op, new_omega, prolog, epilog, op_desc);
01414
01415 if (trace)
01416 fprintf(TFile, "<interleave> TN%d, %s\n",
01417 TN_number(OP_result(op, op_desc.Res_num())), TOP_Name(OP_code(op)));
01418
01419 break;
01420
01421 case RECUR_BACK_SUB_VARIANT:
01422
01423 delay_processing.push_back(op_desc);
01424 if (trace)
01425 fprintf(TFile, "<back_sub_variant> TN%d, %s\n",
01426 TN_number(OP_result(op, op_desc.Res_num())), TOP_Name(OP_code(op)));
01427
01428 break;
01429 }
01430 }
01431
01432 bool changed = true;
01433 while (changed) {
01434 changed = false;
01435 for (int i = 0; i < delay_processing.size(); i++) {
01436 RECUR_OP_DESC& op_desc = delay_processing[i];
01437 changed |= Apply_Back_Sub_Variant(op_desc, prolog, body, loop_se, trace);
01438 }
01439 }
01440 }
01441
01442
01443
01444 void Fix_Recurrences_After_Unrolling(CG_LOOP& cl)
01445 {
01446 if (!CG_LOOP_fix_recurrences)
01447 return;
01448
01449 if (!CG_LOOP_interleave_posti)
01450 return;
01451
01452 LOOP_DESCR *loop = cl.Loop();
01453 BB *body = cl.Loop_header();
01454 BB *prolog = CG_LOOP_prolog;
01455 BB *epilog = CG_LOOP_epilog;
01456 op_vec_type op_vec;
01457
01458 if (BB_length(body) > CG_maxinss) return;
01459
01460 bool trace = Get_Trace(TP_CGLOOP, 0x800);
01461
01462
01463 OP *op;
01464 FOR_ALL_BB_OPs(body, op) {
01465 for (INT32 i = 0; i < OP_results(op); i++) {
01466 TN *tn = OP_result(op, i);
01467 op_vec.push_back(tn_def_op(tn, op));
01468 }
01469 }
01470
01471
01472 stable_sort(op_vec.begin(), op_vec.end());
01473
01474 if (trace) {
01475 for (int i = 0; i < op_vec.size(); i++) {
01476 TN *tn = op_vec[i].first;
01477 OP *op = op_vec[i].second;
01478 fprintf(TFile, "<recur>: TN%d, %s ", TN_number(tn), TOP_Name(OP_code(op)));
01479 Print_OP_No_SrcLine(op);
01480 }
01481 }
01482
01483 op_vec_type::iterator p = op_vec.begin();
01484
01485 while (p != op_vec.end()) {
01486 TN *cur_tn = (*p).first;
01487 op_vec_type::iterator q = p;
01488 while (q != op_vec.end() && cur_tn == (*q).first) q++;
01489
01490
01491
01492 Interleave_Base_Update(p, q, prolog, body, epilog, trace);
01493
01494 p = q;
01495 }
01496 }
01497
01498
01499