00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123 #define __STDC_LIMIT_MACROS
00124 #include <stdint.h>
00125 #include <math.h>
00126 #include <stdarg.h>
00127 #include <set>
00128 #include <vector>
00129 #include <list>
00130 #include <utility>
00131
00132 #include "defs.h"
00133 #include "config.h"
00134 #include "errors.h"
00135 #include "mempool.h"
00136 #include "cg_flags.h"
00137 #include "cgir.h"
00138 #include "tracing.h"
00139 #include "config_asm.h"
00140 #include "tn_map.h"
00141 #include "cio_rwtran.h"
00142 #include "cgprep.h"
00143 #include "op.h"
00144 #include "op_list.h"
00145 #include "bb.h"
00146 #include "cgtarget.h"
00147 #include "cg_swp_target.h"
00148 #include "wn.h"
00149 #include "wn_core.h"
00150 #include "wn_util.h"
00151 #include "whirl2ops.h"
00152
00153 #include "dep_graph.h"
00154 #include "cg.h"
00155 #include "register.h"
00156 #include "tn_set.h"
00157 #include "gra.h"
00158 #include "gra_live.h"
00159 #include "bb_set.h"
00160 #include "gtn_universe.h"
00161 #include "gtn_set.h"
00162 #include "pf_cg.h"
00163 #include "note.h"
00164 #include "cgexp.h"
00165 #include "cio.h"
00166 #include "cio_rwtran.h"
00167 #include "cg_cflow.h"
00168 #include "resource.h"
00169 #include "cg_db_op.h"
00170 #include "dominate.h"
00171 #include "ti_res_count.h"
00172 #include "ti_latency.h"
00173 #include "cg_loop_scc.h"
00174 #include "cg_loop_recur.h"
00175 #include "cg_loop_mii.h"
00176 #include "freq.h"
00177 #include "cg_region.h"
00178 #include "cg_sched_est.h"
00179 #include "cg_loop.h"
00180 #include "cg_swp.h"
00181 #include "cg_swp_options.h"
00182 #include "findloops.h"
00183 #include "dominate.h"
00184 #include "ebo.h"
00185 #include "hb.h"
00186 #include "gra_live.h"
00187
00188 #if defined(TARG_SL)
00189 #include "tag.h"
00190 #include "label_util.h"
00191 #include "profile_util.h"
00192 #endif
00193
00194 #if defined(TARG_IA64) || defined(TARG_SL) || defined(TARG_MIPS)
00195 #include "region.h"
00196 #include "region_bb_util.h"
00197 #endif
00198 #if defined(TARG_IA64)
00199 #include "ipfec_options.h"
00200 #include "if_conv.h"
00201 #include "region_bb_util.h"
00202
00203 #include "targ_issue_port.h"
00204 #endif
00205
00206
00207 #define FB_TOL 0.05
00208
00209 CG_LOOP *Current_CG_LOOP;
00210
00211 BB *CG_LOOP_prolog;
00212 BB *CG_LOOP_epilog;
00213 BB *CG_LOOP_prolog_start;
00214 BB *CG_LOOP_epilog_end;
00215
00216 OP_MAP _CG_LOOP_info_map;
00217
00218 BOOL CG_LOOP_unroll_fully = TRUE;
00219 #if defined(TARG_SL)
00220 BOOL CG_LOOP_unroll_remainder_fully = FALSE;
00221 #else
00222 BOOL CG_LOOP_unroll_remainder_fully = TRUE;
00223 #endif
00224 UINT32 CG_LOOP_unroll_min_trip = 5;
00225
00226 #ifdef MIPS_UNROLL
00227 BOOL CG_LOOP_unroll_analysis = TRUE;
00228 #else
00229 BOOL CG_LOOP_unroll_analysis = FALSE;
00230 #endif
00231 BOOL CG_LOOP_multi_bb_unroll_analysis = TRUE;
00232 UINT32 CG_LOOP_unroll_analysis_threshold = 10;
00233 BOOL CG_LOOP_ooo_unroll_heuristics = FALSE;
00234 BOOL CG_LOOP_ooo_unroll_heuristics_set = FALSE;
00235 UINT32 CG_LOOP_reorder_buffer_size = 16;
00236 UINT32 CG_LOOP_cache_miss_threshold = 33;
00237 #ifdef KEY
00238 INT32 CG_Enable_Loop_Opt_Limit=-1;
00239 #endif
00240
00241 BOOL CG_LOOP_unroll_multi_bb = TRUE;
00242 BOOL CG_LOOP_unroll_non_trip_countable = TRUE;
00243 BOOL CG_LOOP_unroll_multi_make_remainder_loop = TRUE;
00244 BOOL CG_LOOP_create_loop_prologs;
00245 BOOL CG_LOOP_create_loop_epilogs = TRUE;
00246 INT32 CG_LOOP_force_ifc = 1;
00247
00248 BOOL CG_LOOP_optimize_non_trip_countable = TRUE;
00249 BOOL CG_LOOP_optimize_non_innermost = TRUE;
00250 BOOL CG_LOOP_optimize_multi_targ = FALSE;
00251 BOOL CG_LOOP_optimize_lno_winddown_cache = TRUE;
00252 BOOL CG_LOOP_optimize_lno_winddown_reg = TRUE;
00253
00254
00255
00256
00257 UINT32 CG_LOOP_unroll_times_max;
00258 UINT32 CG_LOOP_unrolled_size_max;
00259
00260 static BOOL unroll_names_valid = FALSE;
00261
00262 static CG_LOOP_BACKPATCH *prolog_backpatches, *epilog_backpatches;
00263
00264
00265 #if MAX_OMEGA > 255
00266 error OP_omega/Set_OP_omega assume MAX_OMEGA <= 255
00267 #endif
00268
00269 _CG_LOOP_INFO *free_loop_info;
00270 #define next_free_loop_info(info) (*(_CG_LOOP_INFO **)(info))
00271
00272
00273 void CG_LOOP_Init(void)
00274
00275
00276
00277
00278 {
00279 free_loop_info = NULL;
00280
00281 if (CG_opt_level < 2) return;
00282 unroll_names_valid = FALSE;
00283 #ifdef TARG_SL
00284 if (CG_sl2)
00285 CG_zdl_enabled_level = 4;
00286 else
00287 CG_zdl_enabled_level = 2;
00288 #endif
00289 }
00290
00291 void CG_LOOP_Init_Op(OP *op)
00292
00293
00294
00295
00296 {
00297 _CG_LOOP_INFO *info = free_loop_info;
00298 INT sizeof_info = _CG_LOOP_info_sizeof(op);
00299 if (info && OP_opnds(op) <= OP_MAX_FIXED_OPNDS) {
00300
00301
00302
00303
00304
00305
00306
00307 free_loop_info = next_free_loop_info(info);
00308 } else {
00309 info = (_CG_LOOP_INFO *)MEM_POOL_Alloc(&MEM_phase_nz_pool, sizeof_info);
00310 }
00311 bzero(info, sizeof_info);
00312 OP_MAP_Set(_CG_LOOP_info_map, op, info);
00313 }
00314
00315
00316 void CG_LOOP_Init_OPS(OPS *ops)
00317 {
00318 for (OP *op = OPS_first(ops); op != NULL; op = OP_next(op))
00319 CG_LOOP_Init_Op(op);
00320 }
00321
00322
00323 INT Branch_Target_Operand(OP *br_op)
00324
00325
00326
00327
00328 {
00329 Is_True(br_op, ("br_op is NULL."));
00330 INT opnd = OP_find_opnd_use(br_op, OU_target);
00331 Is_True(opnd >= 0,
00332 ("couldn't find branch target for %s", TOP_Name(OP_code(br_op))));
00333 return opnd;
00334 }
00335
00336
00337 static BOOL Negate_Branch(OP *br)
00338
00339
00340
00341
00342 {
00343 TOP top = OP_code(br);
00344 TOP new_top = CGTARG_Invert(top);
00345
00346 if (new_top != TOP_UNDEFINED) {
00347 OP_Change_Opcode(br, new_top);
00348 return TRUE;
00349 }
00350
00351 if (OP_has_predicate(br)) {
00352 OP *cmp;
00353 TN *tn1, *tn2;
00354 BB *br_bb = OP_bb(br);
00355 CGTARG_Analyze_Compare(br, &tn1, &tn2, &cmp);
00356 if (cmp != NULL && OP_results(cmp) == 2) {
00357 BB *cmp_bb = OP_bb(cmp);
00358 TN *r0 = OP_result(cmp,0);
00359 TN *r1 = OP_result(cmp,1);
00360 TN *pred = OP_opnd(br,OP_PREDICATE_OPND);
00361 TN *neg_tn = r0 == pred ? r1 : r0;
00362
00363 if (neg_tn == True_TN) {
00364 DevWarn("negative cmp result is a sink");
00365 return FALSE;
00366 }
00367
00368 Set_OP_opnd(br, OP_PREDICATE_OPND, neg_tn);
00369
00370 if (br_bb != cmp_bb && !CG_localize_tns) {
00371 GRA_LIVE_Compute_Local_Info(br_bb);
00372 GRA_LIVE_Region_Start();
00373 GRA_LIVE_Region_Entry(cmp_bb);
00374 GRA_LIVE_Region_Exit(br_bb);
00375 GRA_LIVE_Region_Compute_Global_Live_Info();
00376 }
00377
00378 return TRUE;
00379 }
00380 }
00381
00382 return FALSE;
00383 }
00384
00385
00386 static void loop_info_detach(OP *op)
00387
00388
00389
00390
00391 {
00392 _CG_LOOP_INFO *info = (_CG_LOOP_INFO *)OP_MAP_Get(_CG_LOOP_info_map, op);
00393 Is_True(info, ("no loop info to delete"));
00394 next_free_loop_info(info) = free_loop_info;
00395 free_loop_info = info;
00396 }
00397
00398 static float sum_succ_probs(BB *bb)
00399
00400
00401
00402
00403
00404 {
00405 BBLIST *succs;
00406 float sum = 0.0;
00407
00408 FOR_ALL_BB_SUCCS(bb, succs) {
00409 if (CG_warn_bad_freqs && !BB_freq_fb_based(bb)
00410 && BBLIST_prob(succs) == 0.0)
00411 DevWarn("succ edge from BB:%d to BB:%d has probability 0",
00412 BB_id(bb), BB_id(BBLIST_item(succs)));
00413 sum += BBLIST_prob(succs);
00414 }
00415 return sum;
00416 }
00417
00418 static void retarget_loop_exits(LOOP_DESCR *loop, BB *from, BB *to)
00419
00420
00421
00422
00423
00424
00425
00426 {
00427 BBLIST *preds = BB_preds(from);
00428 while (preds) {
00429 BB *pred = BBLIST_item(preds);
00430 preds = BBLIST_next(preds);
00431 if (BB_SET_MemberP(LOOP_DESCR_bbset(loop), pred))
00432 if (!BB_Retarget_Branch(pred, from, to))
00433
00434 Change_Succ(pred, from, to);
00435 }
00436 }
00437
00438 static void insert_fall_thru(BB *pred, BB *new_ftbb)
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448 {
00449 BOOL freqs = FREQ_Frequencies_Computed();
00450 #ifdef TARG_IA64
00451 Remove_Explicit_Branch(pred);
00452 #endif
00453 BB *old_ftbb = BB_Fall_Thru_Successor(pred);
00454 float other_succ_probs = 0.0;
00455 OP *new_br_op = BB_branch_op(new_ftbb);
00456
00457 Is_True(old_ftbb, ("<pred> has no fall-through successor"));
00458 Is_True(new_br_op == NULL || OP_cond(new_br_op),
00459 ("<new_ftbb> BB:%d doesn't fall-through to anything",
00460 BB_id(new_ftbb)));
00461 Is_True(BB_next(new_ftbb) == NULL,
00462 ("<new_ftbb> BB:%d has a BB_next", BB_id(new_ftbb)));
00463 Is_True(BB_prev(new_ftbb) == NULL,
00464 ("<new_ftbb> BB:%d has a BB_prev", BB_id(new_ftbb)));
00465 if (freqs) {
00466 other_succ_probs = sum_succ_probs(new_ftbb);
00467 if (CG_warn_bad_freqs && 1.0 - other_succ_probs <= 0.0)
00468 DevWarn("existing succ edge probabilities from BB:%d total %s1.0",
00469 BB_id(new_ftbb), other_succ_probs > 1.0 ? ">" : "");
00470 } else if (BB_freq_fb_based(old_ftbb)) {
00471
00472 UINT32 len = BBlist_Len(BB_succs(new_ftbb)) + 1;
00473 other_succ_probs = (len - 1.0) / len;
00474 }
00475 Chain_BBs(pred, new_ftbb);
00476 Change_Succ(pred, old_ftbb, new_ftbb);
00477 Chain_BBs(new_ftbb, old_ftbb);
00478 Link_Pred_Succ_with_Prob(new_ftbb, old_ftbb, 1.0-other_succ_probs);
00479 if (freqs || BB_freq_fb_based(old_ftbb))
00480 BB_freq(old_ftbb) += BB_freq(new_ftbb) * (1.0 - other_succ_probs);
00481 }
00482
00483
00484 inline void append_to_prolog(BB *bb)
00485
00486
00487
00488
00489
00490
00491 {
00492 GRA_LIVE_Compute_Local_Info(bb);
00493 insert_fall_thru(CG_LOOP_prolog, bb);
00494 CG_LOOP_prolog = bb;
00495 }
00496
00497
00498
00499 inline void extend_prolog(void)
00500
00501
00502
00503
00504
00505
00506 {
00507 BB *new_bb = Gen_BB_Like(CG_LOOP_prolog);
00508 if (BB_freq_fb_based(CG_LOOP_prolog)) Set_BB_freq_fb_based(new_bb);
00509 append_to_prolog(new_bb);
00510 }
00511
00512
00513
00514 static void prepend_to_epilog(LOOP_DESCR *loop, BB *bb)
00515
00516
00517
00518
00519
00520
00521
00522 {
00523 BB *ftp = BB_Fall_Thru_Predecessor(CG_LOOP_epilog);
00524 GRA_LIVE_Compute_Local_Info(bb);
00525 if (ftp) {
00526 insert_fall_thru(ftp, bb);
00527 } else {
00528 Chain_BBs(BB_prev(CG_LOOP_epilog), bb);
00529 Chain_BBs(bb, CG_LOOP_epilog);
00530 }
00531 retarget_loop_exits(loop, CG_LOOP_epilog, bb);
00532 CG_LOOP_epilog = bb;
00533 }
00534
00535
00536
00537 inline void extend_epilog(LOOP_DESCR *loop)
00538
00539
00540
00541
00542
00543
00544
00545 {
00546 BB *new_bb = Gen_BB_Like(CG_LOOP_epilog);
00547 if (BB_freq_fb_based(CG_LOOP_epilog)) Set_BB_freq_fb_based(new_bb);
00548 prepend_to_epilog(loop, new_bb);
00549 }
00550
00551
00552
00553 void CG_LOOP_Trace_Prolog(void)
00554
00555
00556
00557
00558 {
00559 BB *bb;
00560 fprintf(TFile, "<prolog> loop prolog:\n");
00561 for (bb = CG_LOOP_prolog_start;
00562 bb && BB_prev(bb) != CG_LOOP_prolog;
00563 bb = BB_next(bb))
00564 Print_BB(bb);
00565 }
00566
00567
00568
00569 void CG_LOOP_Trace_Epilog(void)
00570
00571
00572
00573
00574 {
00575 BB *bb;
00576 fprintf(TFile, "<epilog> loop epilog:\n");
00577 for (bb = CG_LOOP_epilog;
00578 bb && BB_prev(bb) != CG_LOOP_epilog_end;
00579 bb = BB_next(bb))
00580 Print_BB(bb);
00581 }
00582
00583
00584 static BB_SET *
00585 mark_prolog_epilog_bbs(
00586 BB *self,
00587 BB *limit,
00588 BB_SET *visited
00589 )
00590 {
00591 BB *pred;
00592 BBLIST *lst;
00593
00594 visited = BB_SET_Union1(visited,self,&MEM_local_pool);
00595
00596 if ( self != limit ) {
00597 FOR_ALL_BB_PREDS (self, lst) {
00598 pred = BBLIST_item(lst);
00599 if ( ! BB_SET_MemberP(visited,pred) ) {
00600 visited = mark_prolog_epilog_bbs(pred,limit,
00601 visited);
00602 }
00603 }
00604 }
00605
00606 return visited;
00607 }
00608
00609
00610
00611
00612
00613
00614
00615
00616 static void
00617 remove_prolog_or_epilog_bbs(BB *head, BB *tail, BB *keep, BB_SET *inp_visited)
00618 {
00619 BB *bb;
00620 BB *bb_next;
00621 BB_SET *visited;
00622 BB *pred;
00623 BB *succ;
00624 BBLIST *lst;
00625 BBLIST *lst_next;
00626 RID *bbrid;
00627
00628 if ( inp_visited == NULL ) {
00629 MEM_POOL_Push(&MEM_local_pool);
00630 visited = BB_SET_Create_Empty(PU_BB_Count+2,&MEM_local_pool);
00631 visited = mark_prolog_epilog_bbs(tail,head,visited);
00632 } else {
00633 visited = inp_visited;
00634 }
00635
00636 for (bb = REGION_First_BB; bb; bb = bb_next) {
00637 bb_next = BB_next(bb);
00638
00639 if ( ( bbrid = BB_rid( bb ) )
00640 && ( RID_level( bbrid ) >= RL_CGSCHED ) )
00641
00642
00643
00644 continue;
00645
00646 if ( bb != keep ) {
00647 if ( BB_SET_MemberP(visited,bb) ) {
00648 for ( lst = BB_succs(bb); lst != NULL; lst = lst_next ) {
00649 lst_next = BBLIST_next(lst);
00650 if (succ = BBLIST_item(lst))
00651 Unlink_Pred_Succ(bb, succ);
00652 }
00653
00654 for ( lst = BB_preds(bb); lst != NULL; lst = lst_next ) {
00655 lst_next = BBLIST_next(lst);
00656 if (pred = BBLIST_item(lst))
00657 Unlink_Pred_Succ(pred, bb);
00658 }
00659 BB_Remove_All(bb);
00660 Remove_BB(bb);
00661 }
00662 }
00663 }
00664
00665 if ( inp_visited == NULL )
00666 MEM_POOL_Pop(&MEM_local_pool);
00667
00668 }
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681 void CG_LOOP_Remove_Prolog_OPs(BB *head)
00682 {
00683 BB *succ;
00684 BBLIST *lst;
00685 BBLIST *lst_next;
00686 float prob;
00687
00688
00689
00690
00691 lst = BB_Find_Succ(CG_LOOP_prolog, head);
00692 if (lst) {
00693 prob = BBLIST_prob(lst);
00694 } else {
00695 Is_True(FALSE, ("couldn't find BB:%d => BB:%d edge",
00696 BB_id(CG_LOOP_prolog), BB_id(head)));
00697 prob = 0;
00698 }
00699
00700 remove_prolog_or_epilog_bbs(CG_LOOP_prolog_start,
00701 CG_LOOP_prolog,
00702 CG_LOOP_prolog_start,
00703 NULL);
00704
00705 for ( lst = BB_succs(CG_LOOP_prolog_start); lst != NULL; lst = lst_next ) {
00706 lst_next = BBLIST_next(lst);
00707 if (succ = BBLIST_item(lst))
00708 Unlink_Pred_Succ(CG_LOOP_prolog_start, succ);
00709 }
00710
00711 Unlink_Pred_Succ(CG_LOOP_prolog, head);
00712 Link_Pred_Succ_with_Prob(CG_LOOP_prolog_start, head, prob);
00713 BB_next(CG_LOOP_prolog_start) = head;
00714 BB_prev(head) = CG_LOOP_prolog_start;
00715
00716 CG_LOOP_prolog = CG_LOOP_prolog_start;
00717
00718 }
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730 void CG_LOOP_Remove_Epilog_OPs(BB *tail)
00731 {
00732 BB *pred;
00733 BBLIST *lst;
00734 BBLIST *lst_next;
00735 float prob;
00736
00737 BB_Remove_All(CG_LOOP_epilog_end);
00738
00739
00740
00741
00742 lst = BB_Find_Succ(tail, CG_LOOP_epilog);
00743 if (lst) {
00744 prob = BBLIST_prob(lst);
00745 } else {
00746 Is_True(FALSE, ("couldn't find BB:%d => BB:%d edge",
00747 BB_id(tail), BB_id(CG_LOOP_epilog)));
00748 prob = 0;
00749 }
00750
00751 remove_prolog_or_epilog_bbs(CG_LOOP_epilog,
00752 CG_LOOP_epilog_end,
00753 CG_LOOP_epilog_end,
00754 NULL);
00755
00756 for ( lst = BB_preds(CG_LOOP_epilog_end); lst != NULL; lst = lst_next ) {
00757 lst_next = BBLIST_next(lst);
00758 if (pred = BBLIST_item(lst))
00759 Unlink_Pred_Succ(pred, CG_LOOP_epilog_end);
00760 }
00761
00762 Unlink_Pred_Succ(tail, CG_LOOP_epilog);
00763 Link_Pred_Succ_with_Prob(tail, CG_LOOP_epilog_end, prob);
00764 BB_next(tail) = CG_LOOP_epilog_end;
00765 BB_prev(CG_LOOP_epilog_end) = tail;
00766 CG_LOOP_epilog = CG_LOOP_epilog_end;
00767
00768 }
00769
00770
00771 void CG_LOOP_Trace_Loop(LOOP_DESCR *loop, const char *fmt, ...)
00772
00773
00774
00775
00776 {
00777 BB *bb;
00778 va_list args;
00779
00780 fputs(DBar, TFile);
00781 va_start(args, fmt);
00782 vfprintf(TFile, fmt, args);
00783 va_end(args);
00784 fputs("\n", TFile);
00785
00786 CG_LOOP_Trace_Prolog();
00787 fputs(SBar, TFile);
00788 CG_LOOP_Backpatch_Trace(CG_LOOP_prolog, NULL);
00789 fputs("Loop body:\n", TFile);
00790
00791 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb) Print_BB(bb);
00792
00793 fputs(SBar, TFile);
00794 if (CG_LOOP_epilog == NULL) {
00795 fputs("<epilog> no loop epilog\n", TFile);
00796 } else {
00797 CG_LOOP_Backpatch_Trace(CG_LOOP_epilog, NULL);
00798 fputs(SBar, TFile);
00799 CG_LOOP_Trace_Epilog();
00800 }
00801
00802 fputs(DBar, TFile);
00803 }
00804
00805
00806 static BOOL any_bbs_in(BBLIST *list, BB_SET *bbs)
00807
00808
00809
00810
00811 {
00812 BBLIST *item;
00813 FOR_ALL_BBLIST_ITEMS(list, item)
00814 if (BB_SET_MemberP(bbs, BBLIST_item(item))) return TRUE;
00815 return FALSE;
00816 }
00817
00818
00819 static BOOL all_bbs_in(BBLIST *list, BB_SET *bbs)
00820
00821
00822
00823
00824 {
00825 BBLIST *item;
00826 FOR_ALL_BBLIST_ITEMS(list, item)
00827 if (!BB_SET_MemberP(bbs, BBLIST_item(item))) return FALSE;
00828 return TRUE;
00829 }
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839 void CG_LOOP::Attach_Prolog_And_Epilog(LOOP_DESCR *loop)
00840 {
00841 BB *loop_head = LOOP_DESCR_loophead(loop);
00842 BB *loop_tail = LOOP_DESCR_Find_Unique_Tail(loop), *bb;
00843 const char *trace_pfx = Get_Trace(TP_CGLOOP, 1) ? "<cgprep> " : NULL;
00844 BBLIST *preds;
00845 BOOL freqs = FREQ_Frequencies_Computed();
00846
00847
00848
00849
00850
00851
00852
00853
00854 if (loop_tail == NULL)
00855 return;
00856
00857
00858 if (CG_LOOP_Trip_Count(loop) != NULL) {
00859 preds = BB_preds(loop_head);
00860 while (preds && BB_SET_MemberP(LOOP_DESCR_bbset(loop), BBLIST_item(preds)))
00861 preds = BBLIST_next(preds);
00862 if (preds)
00863 trip_count_bb = BBLIST_item(preds);
00864 Is_True(trip_count_bb,
00865 ("unable to identify trip count bb."));
00866 }
00867
00868
00869 prolog_backpatches = epilog_backpatches = NULL;
00870
00871 CG_LOOP_prolog = CG_LOOP_prolog_start = NULL;
00872
00873 Set_has_prolog();
00874 CG_LOOP_prolog = CG_LOOP_Gen_And_Prepend_To_Prolog(loop_head, loop);
00875 CG_LOOP_prolog_start = CG_LOOP_prolog;
00876 GRA_LIVE_Compute_Liveness_For_BB(CG_LOOP_prolog);
00877 if (trace_pfx)
00878 fprintf(TFile, "%screating loop prolog BB:%d\n", trace_pfx,
00879 BB_id(CG_LOOP_prolog));
00880
00881
00882
00883
00884
00885
00886
00887 CG_LOOP_epilog = CG_LOOP_epilog_end = NULL;
00888 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb) {
00889 BBLIST *succs;
00890 FOR_ALL_BB_SUCCS(bb, succs) {
00891 BB *succ = BBLIST_item(succs);
00892 if (!BB_SET_MemberP(LOOP_DESCR_bbset(loop), succ)) {
00893 if (CG_LOOP_epilog && succ != CG_LOOP_epilog) {
00894 CG_LOOP_epilog = NULL;
00895 if (trace_pfx)
00896 fprintf(TFile, "%scan't find or create suitable loop epilog; "
00897 "disallows some optimization\n", trace_pfx);
00898 return;
00899 }
00900 CG_LOOP_epilog = succ;
00901 }
00902 }
00903 }
00904 Set_has_epilog();
00905 if (CG_LOOP_epilog == NULL) {
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915 BB *last_bb = REGION_First_BB;
00916 while (BB_next(last_bb)) last_bb = BB_next(last_bb);
00917 CG_LOOP_epilog = Gen_And_Insert_BB_After(last_bb);
00918 if (trace_pfx)
00919 fprintf(TFile, "%screating loop epilog BB:%d\n", trace_pfx,
00920 BB_id(CG_LOOP_epilog));
00921 BB_rid(CG_LOOP_epilog) = BB_rid(loop_head);
00922 if (!CG_localize_tns) Set_BB_gra_spill(CG_LOOP_epilog);
00923 if (BB_freq_fb_based(CG_LOOP_prolog)) Set_BB_freq_fb_based(CG_LOOP_epilog);
00924 GRA_LIVE_Compute_Liveness_For_BB(CG_LOOP_epilog);
00925 } else {
00926 BB *next = CG_LOOP_epilog;
00927 BB *ftp = BB_Fall_Thru_Predecessor(next);
00928 BBKIND ftp_kind = ftp ? BB_kind(ftp) : BBKIND_UNKNOWN;
00929 LOOP_DESCR *enclosing = LOOP_DESCR_Next_Enclosing_Loop(loop);
00930 CG_LOOP_epilog = Gen_And_Insert_BB_Before(next);
00931 if (trace_pfx)
00932 fprintf(TFile, "%screating loop epilog BB:%d\n", trace_pfx,
00933 BB_id(CG_LOOP_epilog));
00934 BB_rid(CG_LOOP_epilog) = BB_rid(loop_head);
00935 if (!CG_localize_tns) Set_BB_gra_spill(CG_LOOP_epilog);
00936 if (BB_freq_fb_based(CG_LOOP_prolog)) Set_BB_freq_fb_based(CG_LOOP_epilog);
00937 if (ftp && !BB_SET_MemberP(LOOP_DESCR_bbset(loop), ftp)) {
00938
00939
00940
00941
00942
00943
00944 if (ftp_kind == BBKIND_GOTO) {
00945 BBLIST *bbl = BB_Find_Succ(ftp, next);
00946 Add_Goto(ftp, next);
00947 BBLIST_prob(bbl) = 1.0F;
00948 } else {
00949 BB *new_bb = Gen_And_Insert_BB_After(ftp);
00950 BB_rid(new_bb) = BB_rid(loop_head);
00951 if (BB_freq_fb_based(CG_LOOP_epilog)) Set_BB_freq_fb_based(new_bb);
00952 Change_Succ(ftp, next, new_bb);
00953 Add_Goto(new_bb, next);
00954 GRA_LIVE_Compute_Liveness_For_BB(new_bb);
00955 }
00956 }
00957 retarget_loop_exits(loop, next, CG_LOOP_epilog);
00958 Link_Pred_Succ_with_Prob(CG_LOOP_epilog, next, 1.0);
00959 if (freqs || BB_freq_fb_based(next))
00960 BB_freq(next) += BB_freq(CG_LOOP_epilog);
00961 GRA_LIVE_Compute_Liveness_For_BB(CG_LOOP_epilog);
00962
00963
00964
00965
00966 if (enclosing &&
00967 all_bbs_in(BB_preds(CG_LOOP_epilog), LOOP_DESCR_bbset(enclosing)))
00968 LOOP_DESCR_Add_BB(enclosing, CG_LOOP_epilog);
00969 }
00970 CG_LOOP_epilog_end = CG_LOOP_epilog;
00971
00972 if (CG_warn_bad_freqs && CG_LOOP_epilog &&
00973 (freqs || BB_freq_fb_based(CG_LOOP_prolog)) &&
00974 !FREQ_Match(BB_freq(CG_LOOP_prolog), BB_freq(CG_LOOP_epilog)))
00975 DevWarn("CG_LOOP prolog (BB:%d) and epilog (BB:%d) frequencies disagree",
00976 BB_id(CG_LOOP_prolog), BB_id(CG_LOOP_epilog));
00977 }
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988 OP *CG_LOOP_DEF::Get(TN *tn)
00989 {
00990 if (TN_is_register(tn) && !TN_is_const_reg(tn))
00991 return (OP*) TN_MAP_Get(tn_map, tn);
00992 else
00993 return NULL;
00994 }
00995
00996
00997
00998
00999
01000
01001
01002
01003 BOOL CG_LOOP_DEF::Is_invariant(TN *tn)
01004 {
01005 return Get(tn) == NULL;
01006 }
01007
01008
01009
01010
01011
01012
01013
01014
01015 CG_LOOP_DEF::CG_LOOP_DEF(BB *body)
01016 {
01017 tn_map = TN_MAP_Create();
01018 OP *op;
01019 #ifdef TARG_X8664
01020 static TN* rflags = Rflags_TN();
01021 #endif
01022
01023 FOR_ALL_BB_OPs(body, op) {
01024
01025 for (INT i = 0; i < OP_results(op); i++) {
01026 TN *res = OP_result(op,i);
01027 if (TN_is_register(res) &&
01028 !TN_is_const_reg(res) &&
01029 !Get(res))
01030 TN_MAP_Set(tn_map, res, op);
01031 #ifdef TARG_X8664
01032 if( TOP_is_change_rflags( OP_code(op) ) &&
01033 !Get( rflags ) ){
01034 TN_MAP_Set( tn_map, rflags, op );
01035 }
01036 #endif
01037 }
01038 }
01039 }
01040
01041
01042
01043
01044
01045
01046
01047
01048 CG_LOOP_DEF::~CG_LOOP_DEF()
01049 {
01050 TN_MAP_Delete(tn_map);
01051 }
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065 void CG_LOOP::Build_CG_LOOP_Info()
01066 {
01067 Is_True(Single_BB(), ("LOOP has multiple BB."));
01068
01069
01070 if (_CG_LOOP_info_map) {
01071 OP_MAP_Delete(_CG_LOOP_info_map);
01072 _CG_LOOP_info_map = NULL;
01073 }
01074
01075 Recompute_Liveness();
01076
01077 _CG_LOOP_info_map = OP_MAP_Create();
01078 op_map = _CG_LOOP_info_map;
01079
01080 BB *body = Loop_header();
01081 BB *prolog = Prolog_end();
01082 BB *epilog = Epilog_start();
01083
01084 CG_LOOP_DEF tn_def(body);
01085
01086
01087
01088
01089
01090 OP *op;
01091 FOR_ALL_BB_OPs(body, op) {
01092 CG_LOOP_Init_Op(op);
01093 for (INT i = 0; i < OP_results(op); i++) {
01094 TN *res = OP_result(op,i);
01095 if (TN_is_register(res) &&
01096 !TN_is_const_reg(res)) {
01097 if (GTN_SET_MemberP(BB_live_in(epilog), res)) {
01098 Is_True(TN_is_global_reg(res), ("TN in GTN_SET not a global_reg."));
01099
01100 if (!TN_is_dedicated(res))
01101 CG_LOOP_Backpatch_Add(epilog, res, res, 0);
01102
01103 if (GTN_SET_MemberP(BB_live_in(body), res))
01104 if (!TN_is_dedicated(res))
01105 CG_LOOP_Backpatch_Add(prolog, res, res, 1);
01106 }
01107 }
01108 }
01109 for (INT opnd = 0; opnd < OP_opnds(op); opnd++) {
01110 TN *tn = OP_opnd(op,opnd);
01111 if (TN_is_register(tn) &&
01112 !TN_is_const_reg(tn)) {
01113 OP *def_op = tn_def.Get(tn);
01114
01115
01116 if (def_op &&
01117 !OP_Precedes(def_op, op)) {
01118 if ( !CG_LOOP_Backpatch_Find_Non_Body_TN(prolog, tn, 1) )
01119 if (!TN_is_dedicated(tn))
01120 CG_LOOP_Backpatch_Add(prolog, tn, tn, 1);
01121 Set_OP_omega(op, opnd, 1);
01122 }
01123 }
01124 }
01125 }
01126
01127 #ifdef Is_True_On
01128 Verify();
01129 #endif
01130 }
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141 CG_LOOP::CG_LOOP(LOOP_DESCR *loop)
01142 {
01143 Current_CG_LOOP = this;
01144
01145 BB *body = LOOP_DESCR_loophead(loop);
01146
01147 unroll_fully = FALSE;
01148 unroll_factor = -1;
01149 flags = CG_LOOP_NONE;
01150 trip_count_bb = NULL;
01151 prolog_start = prolog_end = NULL;
01152 epilog_start = epilog_end = NULL;
01153
01154 Attach_Prolog_And_Epilog(loop);
01155
01156 prolog_start = CG_LOOP_prolog_start;
01157 prolog_end = CG_LOOP_prolog;
01158 epilog_start = CG_LOOP_epilog;
01159 epilog_end = CG_LOOP_epilog_end;
01160
01161
01162 op_map = NULL;
01163 _CG_LOOP_info_map = NULL;
01164
01165 prolog_backpatches = epilog_backpatches = NULL;
01166
01167
01168 this->loop = loop;
01169 this->unroll_remainder_bb = NULL;
01170
01171 #ifdef TARG_IA64
01172 acyclic_len = 0;
01173 acyclic_len_wo_dspec = 0;
01174 #endif
01175
01176 }
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186 CG_LOOP::~CG_LOOP()
01187 {
01188 if (_CG_LOOP_info_map) {
01189 OP_MAP_Delete(_CG_LOOP_info_map);
01190 _CG_LOOP_info_map = NULL;
01191 }
01192 prolog_backpatches = epilog_backpatches = NULL;
01193 Current_CG_LOOP = NULL;
01194 }
01195
01196
01197
01198
01199
01200
01201
01202
01203 void CG_LOOP::Verify()
01204 {
01205 BB *body = Loop_header();
01206 if (_CG_LOOP_info_map) {
01207 OP *op;
01208 FOR_ALL_BB_OPs(body, op) {
01209
01210
01211 for (INT opnd = 0; opnd < OP_opnds(op); opnd++) {
01212 TN *tn = OP_opnd(op,opnd);
01213 if (TN_is_dedicated(tn))
01214 FmtAssert(OP_omega(op, opnd) <= 1,
01215 ("CG_LOOP: Dedicated TN cannot have omega > 1."));
01216 }
01217 }
01218 }
01219 }
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230 void
01231 CG_LOOP::Recompute_Liveness()
01232 {
01233 BB *entry = Trip_count_bb() ? Trip_count_bb() : Prolog_start();
01234 BB_REGION region(Malloc_Mem_Pool);
01235 region.entries.push_back(entry);
01236 BBLIST *succs;
01237 FOR_ALL_BB_SUCCS(Epilog_end(), succs) {
01238 BB *succ = BBLIST_item(succs);
01239 region.exits.push_back(succ);
01240 }
01241
01242
01243 if (_CG_LOOP_info_map) {
01244 BB *body = Loop_header();
01245 OP *op;
01246 FOR_ALL_BB_OPs(body, op) {
01247 for (INT opnd = 0; opnd < OP_opnds(op); opnd++) {
01248 TN *tn = OP_opnd(op,opnd);
01249 if (Is_CG_LOOP_Op(op) &&
01250 OP_omega(op, opnd) != 0 &&
01251 !TN_is_dedicated(tn) &&
01252 !TN_is_global_reg(tn))
01253 GTN_UNIVERSE_Add_TN(tn);
01254 }
01255 }
01256 }
01257
01258 GRA_LIVE_Init_Loop(CG_LOOP_prolog,
01259 _CG_LOOP_info_map ? Loop_header() : NULL,
01260 CG_LOOP_epilog,
01261 prolog_backpatches,
01262 epilog_backpatches);
01263
01264 BB_REGION_Recompute_Global_Live_Info(region, TRUE );
01265
01266 GRA_LIVE_Fini_Loop();
01267 }
01268
01269
01270
01271 void
01272 CG_LOOP_Recompute_Liveness(LOOP_DESCR *loop)
01273 {
01274 Is_True(loop == Current_CG_LOOP->Loop(),
01275 ("loop != Current_CG_LOOP"));
01276 Current_CG_LOOP->Recompute_Liveness();
01277 }
01278
01279
01280 TN *CG_LOOP_Backpatch_Find_Body_TN(BB *bb, TN *tn, UINT8 *omptr)
01281
01282
01283
01284
01285 {
01286 CG_LOOP_BACKPATCH *bp = CG_LOOP_Backpatch_First(bb, NULL);
01287 while (bp && CG_LOOP_BACKPATCH_non_body_tn(bp) != tn)
01288 bp = CG_LOOP_Backpatch_Next(bp);
01289 if (bp && omptr) *omptr = CG_LOOP_BACKPATCH_omega(bp);
01290 return bp ? CG_LOOP_BACKPATCH_body_tn(bp) : NULL;
01291 }
01292
01293
01294 TN *CG_LOOP_Backpatch_Find_Non_Body_TN(BB *bb, TN *tn, UINT8 om)
01295
01296
01297
01298
01299 {
01300 CG_LOOP_BACKPATCH *bp = CG_LOOP_Backpatch_First(bb, tn);
01301 while (bp && CG_LOOP_BACKPATCH_omega(bp) != om)
01302 bp = CG_LOOP_Backpatch_Next(bp);
01303 return bp ? CG_LOOP_BACKPATCH_non_body_tn(bp) : NULL;
01304 }
01305
01306 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Add(BB *bb, TN *non_body_tn,
01307 TN *body_tn, UINT8 omega)
01308
01309
01310
01311
01312 {
01313 BOOL isprolog = (bb == CG_LOOP_prolog);
01314 CG_LOOP_BACKPATCH *bps = isprolog ? prolog_backpatches : epilog_backpatches;
01315 CG_LOOP_BACKPATCH *bp;
01316
01317 Is_True(bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01318 ("bb not CG_LOOP_prolog or CG_LOOP_epilog"));
01319
01320
01321 if (!isprolog && TN_is_zero_reg(non_body_tn)) return NULL;
01322
01323 for (bp = bps; bp; bp = CG_LOOP_Backpatch_Next(bp)) {
01324 if (isprolog && bp->body_tn == body_tn && bp->omega == omega) {
01325 FmtAssert(bp->non_body_tn == non_body_tn,
01326 ("conflicting prolog backpatch for TN%d[%d]",
01327 TN_number(body_tn), omega));
01328 return bp;
01329 } else if (!isprolog && bp->non_body_tn == non_body_tn) {
01330 FmtAssert(bp->body_tn == body_tn && bp->omega == omega,
01331 ("conflicting epilog backpatch for TN%d",
01332 TN_number(non_body_tn)));
01333 return bp;
01334 }
01335 }
01336
01337 bp = TYPE_MEM_POOL_ALLOC(CG_LOOP_BACKPATCH, &MEM_phase_nz_pool);
01338 bp->non_body_tn = non_body_tn;
01339 bp->body_tn = body_tn;
01340 bp->omega = omega;
01341 bp->next = bps;
01342 if (isprolog)
01343 prolog_backpatches = bp;
01344 else
01345 epilog_backpatches = bp;
01346
01347 return bp;
01348 }
01349
01350
01351 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_First(BB *bb, TN *body_tn)
01352
01353
01354
01355
01356 {
01357 CG_LOOP_BACKPATCH *bp;
01358 Is_True(bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01359 ("bb not CG_LOOP_prolog or CG_LOOP_epilog"));
01360 bp = (bb == CG_LOOP_prolog) ? prolog_backpatches : epilog_backpatches;
01361 while (bp && body_tn && bp->body_tn != body_tn)
01362 bp = bp->next;
01363 if (bp == NULL)
01364 return bp;
01365 return body_tn ? _CG_LOOP_BP_limit_iter(bp) : bp;
01366 }
01367
01368
01369 CG_LOOP_BACKPATCH *CG_LOOP_Backpatch_Next(CG_LOOP_BACKPATCH *bp)
01370
01371
01372
01373
01374 {
01375 if (_CG_LOOP_BP_iter_limited(bp)) {
01376 TN *body_tn;
01377 bp = _CG_LOOP_BP_actual_ptr(bp);
01378 body_tn = bp->body_tn;
01379 do {
01380 bp = bp->next;
01381 } while (bp && bp->body_tn != body_tn);
01382 if (bp == NULL)
01383 return bp;
01384 return _CG_LOOP_BP_limit_iter(bp);
01385 } else {
01386 return bp->next;
01387 }
01388 }
01389
01390
01391 void CG_LOOP_Backpatch_Delete(BB *bb, CG_LOOP_BACKPATCH *the_bp)
01392
01393
01394
01395
01396 {
01397 CG_LOOP_BACKPATCH *bp;
01398 CG_LOOP_BACKPATCH *prev_bp;
01399 CG_LOOP_BACKPATCH **bpp;
01400
01401 Is_True(bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01402 ("bb not CG_LOOP_prolog or CG_LOOP_epilog"));
01403
01404 the_bp = _CG_LOOP_BP_actual_ptr(the_bp);
01405 bpp = (bb == CG_LOOP_prolog) ? &prolog_backpatches : &epilog_backpatches;
01406
01407 prev_bp = NULL;
01408 for (bp = *bpp; bp; bp = bp->next) {
01409 if (bp == the_bp) break;
01410 prev_bp = bp;
01411 }
01412
01413 if (!bp) return;
01414
01415 if (prev_bp) {
01416 prev_bp->next = bp->next;
01417 } else {
01418 *bpp = bp->next;
01419 }
01420 }
01421
01422
01423
01424 void CG_LOOP_Backpatch_Replace_Non_Body_TN(BB *bb, TN *tn, TN *newtn)
01425
01426
01427
01428
01429 {
01430 CG_LOOP_BACKPATCH *bp;
01431
01432 Is_True(bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01433 ("bb not CG_LOOP_prolog or CG_LOOP_epilog"));
01434
01435 bp = (bb == CG_LOOP_prolog) ? prolog_backpatches : epilog_backpatches;
01436
01437 for ( ; bp ; bp = bp->next )
01438 if ( bp->non_body_tn == tn ) bp->non_body_tn = newtn;
01439
01440 return;
01441 }
01442
01443 void CG_LOOP_Backpatch_Replace_Body_TN(BB *bb, TN *tn, TN *newtn,
01444 INT16 om_adj)
01445
01446
01447
01448
01449 {
01450 CG_LOOP_BACKPATCH *bp;
01451
01452 Is_True(bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01453 ("bb not CG_LOOP_prolog or CG_LOOP_epilog"));
01454
01455 bp = (bb == CG_LOOP_prolog) ? prolog_backpatches : epilog_backpatches;
01456
01457 for ( ; bp ; bp = bp->next )
01458 if ( bp->body_tn == tn ) {
01459 bp->body_tn = newtn;
01460 bp->omega+= om_adj;
01461 }
01462
01463 return;
01464
01465 }
01466
01467
01468 void CG_LOOP_Backpatch_Trace(BB *bb, CG_LOOP_BACKPATCH *bp)
01469
01470
01471
01472
01473 {
01474 Is_True(bb == NULL || bb == CG_LOOP_prolog || bb == CG_LOOP_epilog,
01475 ("bb not NULL, CG_LOOP_prolog or CG_LOOP_epilog"));
01476
01477 bp = _CG_LOOP_BP_actual_ptr(bp);
01478 if (bp) {
01479 if (bb == CG_LOOP_epilog) {
01480 fprintf(TFile, "<bp> TN%d <- TN%d[%d]\n", TN_number(bp->non_body_tn),
01481 TN_number(bp->body_tn), bp->omega);
01482 } else {
01483 fprintf(TFile, "<bp> TN%d[%d] %s TN%d\n", TN_number(bp->body_tn),
01484 bp->omega, bb == CG_LOOP_prolog ? "<-" : "",
01485 TN_number(bp->non_body_tn));
01486 }
01487 } else if (bb) {
01488 BOOL prolog = bb == CG_LOOP_prolog;
01489 fprintf(TFile, "<bp> %slog backpatches:\n", prolog ? "pro" : "epi");
01490 for (bp = prolog ? prolog_backpatches : epilog_backpatches; bp;
01491 bp = bp->next)
01492 CG_LOOP_Backpatch_Trace(bb, bp);
01493 fprintf(TFile,"\n");
01494 } else {
01495 CG_LOOP_Backpatch_Trace(CG_LOOP_prolog, NULL);
01496 CG_LOOP_Backpatch_Trace(CG_LOOP_epilog, NULL);
01497 }
01498 }
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630 static TN_LIST *Count_Copies_Needed(BB *body, hTN_MAP tn_def_map,
01631 hTN_MAP32 tn_copies_map,
01632 MEM_POOL *pool)
01633 {
01634 TN_LIST *need_copies = NULL;
01635
01636
01637
01638
01639 for (OP *op = BB_first_op(body); op != NULL; op = OP_next(op)) {
01640
01641
01642
01643
01644 for (INT res = OP_results(op) - 1; res >= 0; --res) {
01645 TN *tn = OP_result(op, res);
01646 if (hTN_MAP_Get(tn_def_map, tn) == NULL)
01647 hTN_MAP_Set(tn_def_map, tn, (void *) op);
01648 }
01649
01650 for (INT opnd = OP_opnds(op) - 1; opnd >= 0; --opnd) {
01651 TN *tn = OP_opnd(op, opnd);
01652 if ( ! TN_is_register(tn)) continue;
01653 INT copies = OP_omega(op, opnd);
01654 OP *tn_def_op = (OP *) hTN_MAP_Get(tn_def_map, tn);
01655 if (tn_def_op == NULL || (tn_def_op == op && copies == 1))
01656 --copies;
01657 if (copies > 0) {
01658 INT max_copies = hTN_MAP32_Get(tn_copies_map, tn);
01659 if (copies > max_copies) {
01660 hTN_MAP32_Set(tn_copies_map, tn, copies);
01661 if (max_copies == 0)
01662 need_copies = TN_LIST_Push(tn, need_copies, pool);
01663 }
01664 }
01665 }
01666 }
01667
01668
01669
01670
01671 CG_LOOP_BACKPATCH *bp;
01672 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL);
01673 bp; bp = CG_LOOP_Backpatch_Next(bp)) {
01674 INT copies = CG_LOOP_BACKPATCH_omega(bp);
01675 if (copies > 0) {
01676 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01677 INT max_copies = hTN_MAP32_Get(tn_copies_map, body_tn);
01678 if (copies > max_copies) {
01679 hTN_MAP32_Set(tn_copies_map, body_tn, copies);
01680 if (max_copies == 0)
01681 need_copies = TN_LIST_Push(body_tn, need_copies, pool);
01682 }
01683 }
01684 }
01685
01686
01687
01688
01689 if (need_copies == NULL) {
01690 CG_LOOP_BACKPATCH *bp_next;
01691 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
01692 bp; bp = bp_next) {
01693 bp_next = CG_LOOP_Backpatch_Next(bp);
01694 if (CG_LOOP_BACKPATCH_omega(bp) > 1)
01695 CG_LOOP_Backpatch_Delete(CG_LOOP_prolog, bp);
01696 }
01697 } else {
01698 CG_LOOP_BACKPATCH *bp_next;
01699 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
01700 bp; bp = bp_next) {
01701 bp_next = CG_LOOP_Backpatch_Next(bp);
01702 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01703 INT omega = CG_LOOP_BACKPATCH_omega(bp);
01704 if (hTN_MAP32_Get(tn_copies_map, body_tn) < omega - 1)
01705 CG_LOOP_Backpatch_Delete(CG_LOOP_prolog, bp);
01706 }
01707 }
01708
01709 return need_copies;
01710 }
01711
01712
01713 void Generate_Copy_TNs(BB *body, hTN_MAP32 tn_copies_map,
01714 hTN_MAP new_tn_map, TN_LIST *need_copies,
01715 MEM_POOL *pool)
01716 {
01717
01718
01719 TN_LIST *p;
01720 for (p = need_copies; p != NULL; p = TN_LIST_rest(p)) {
01721 TN *tn = TN_LIST_first(p);
01722 INT copies = hTN_MAP32_Get(tn_copies_map, tn);
01723
01724
01725 Is_True(copies > 0, ("Generate_Copy_TNs: need_copies has TN%d with"
01726 " copies == 0", TN_number(tn)));
01727 Is_True(hTN_MAP_Get(new_tn_map, tn) == NULL,
01728 ("Generate_Copy_TNs: need_copies contains duplicates"));
01729
01730 TN **new_tn_array = TYPE_MEM_POOL_ALLOC_N(TN *, pool, copies);
01731 hTN_MAP_Set(new_tn_map, tn, (void *) new_tn_array);
01732 for (INT i = copies - 1; i >= 0; --i)
01733 new_tn_array[i] = NULL;
01734 }
01735
01736
01737
01738
01739
01740 CG_LOOP_BACKPATCH *bp;
01741 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
01742 bp = CG_LOOP_Backpatch_Next(bp)) {
01743 INT copy = CG_LOOP_BACKPATCH_omega(bp);
01744 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01745 if (copy > 0 && ! TN_is_global_reg(non_body_tn)) {
01746 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01747 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
01748 if (new_tn_array[copy - 1] == NULL) {
01749 GTN_UNIVERSE_Add_TN(non_body_tn);
01750 new_tn_array[copy - 1] = non_body_tn;
01751 }
01752 }
01753 }
01754
01755
01756
01757
01758
01759
01760 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL); bp;
01761 bp = CG_LOOP_Backpatch_Next(bp)) {
01762 INT copy = CG_LOOP_BACKPATCH_omega(bp) - 1;
01763 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01764 if (copy > 0 && TN_is_register(non_body_tn)
01765 && ! TN_is_dedicated(non_body_tn)
01766 && ! TN_is_global_reg(non_body_tn)) {
01767 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01768 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
01769 if (new_tn_array[copy - 1] == NULL) {
01770 GTN_UNIVERSE_Add_TN(non_body_tn);
01771 new_tn_array[copy - 1] = non_body_tn;
01772 }
01773 }
01774 }
01775
01776
01777
01778 for (p = need_copies; p != NULL; p = TN_LIST_rest(p)) {
01779 TN *tn = TN_LIST_first(p);
01780 INT copies = hTN_MAP32_Get(tn_copies_map, tn);
01781 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, tn);
01782 for (INT i = 0; i < copies; ++i)
01783 if (new_tn_array[i] == NULL)
01784 new_tn_array[i] = Build_TN_Like(tn);
01785 }
01786 }
01787
01788
01789 void Remove_Notations_Without_Copies(BB *head, BB *tail)
01790 {
01791
01792
01793
01794
01795
01796
01797 CG_LOOP_BACKPATCH *bp, *bp_next;
01798 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
01799 bp = CG_LOOP_Backpatch_Next(bp)) {
01800 Is_True(CG_LOOP_BACKPATCH_omega(bp) == 0,
01801 ("Remove_Notations_Without_Copies: omega > 0"));
01802 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01803 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01804
01805
01806 if (non_body_tn != body_tn) {
01807 CGPREP_Copy_TN_Into_BB(non_body_tn, body_tn, tail, NULL, 0, TRUE);
01808 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
01809 }
01810 }
01811
01812
01813
01814
01815 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL); bp;
01816 bp = bp_next) {
01817 bp_next = CG_LOOP_Backpatch_Next(bp);
01818 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01819 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01820
01821
01822 if (body_tn != non_body_tn) {
01823 CGPREP_Copy_TN_Into_BB(body_tn, non_body_tn,
01824 head, BB_last_op(head), 0, FALSE);
01825 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
01826 }
01827 }
01828 }
01829
01830
01831 void Remove_Notations_With_Copies(BB *body, BB *head, BB *tail,
01832 hTN_MAP tn_def_map, hTN_MAP32 tn_copies_map,
01833 TN_LIST *need_copies, hTN_MAP new_tn_map)
01834 {
01835
01836
01837
01838 CG_LOOP_BACKPATCH *bp, *bp_next;
01839 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
01840 bp = CG_LOOP_Backpatch_Next(bp)) {
01841
01842
01843 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01844 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01845 INT omega = CG_LOOP_BACKPATCH_omega(bp);
01846 if (omega > 0) {
01847 CG_LOOP_BACKPATCH_Set_omega(bp, 0);
01848 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
01849 body_tn = new_tn_array[omega - 1];
01850 CG_LOOP_BACKPATCH_Set_body_tn(bp, body_tn);
01851 }
01852
01853
01854 if (non_body_tn != body_tn) {
01855 CGPREP_Copy_TN_Into_BB(non_body_tn, body_tn, tail, NULL, 0, TRUE);
01856 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
01857 }
01858 }
01859
01860
01861
01862 for (OP *op = BB_first_op(body); op != NULL; op = OP_next(op))
01863 for (INT opnd = OP_opnds(op) - 1; opnd >= 0; --opnd) {
01864
01865
01866 INT omega = OP_omega(op, opnd);
01867 if (omega == 0) continue;
01868 TN *tn = OP_opnd(op, opnd);
01869 OP *def_op = (OP *) hTN_MAP_Get(tn_def_map, tn);
01870 Is_True(def_op, ("CG_LOOP_Remove_Notations: NULL def_op"));
01871 if (omega == 1 && op == def_op) continue;
01872
01873
01874 if (OP_Precedes(op, def_op)) {
01875 if (omega == 1) continue;
01876 --omega;
01877 Set_OP_omega(op, opnd, 1);
01878 } else
01879 Set_OP_omega(op, opnd, 0);
01880
01881
01882 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, tn);
01883 Set_OP_opnd(op, opnd, new_tn_array[omega - 1]);
01884 }
01885
01886
01887
01888 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
01889 bp; bp = bp_next) {
01890 bp_next = CG_LOOP_Backpatch_Next(bp);
01891
01892
01893 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01894 INT omega = CG_LOOP_BACKPATCH_omega(bp);
01895 if (omega > 1) {
01896
01897 CG_LOOP_BACKPATCH_Set_omega(bp, 1);
01898 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
01899 body_tn = new_tn_array[omega - 2];
01900 CG_LOOP_BACKPATCH_Set_body_tn(bp, body_tn);
01901 }
01902
01903
01904 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01905 if (body_tn != non_body_tn) {
01906 CGPREP_Copy_TN_Into_BB(body_tn, non_body_tn,
01907 head, BB_last_op(head), 0, FALSE);
01908 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
01909 }
01910 }
01911
01912
01913
01914 for (TN_LIST *p = need_copies; p != NULL; p = TN_LIST_rest(p)) {
01915 TN *tn = TN_LIST_first(p);
01916 OP *op = (OP *) hTN_MAP_Get(tn_def_map, tn);
01917 INT copies = hTN_MAP32_Get(tn_copies_map, tn);
01918 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, tn);
01919
01920
01921
01922 for (INT cp = copies - 1; cp > 0; --cp)
01923 CGPREP_Copy_TN(new_tn_array[cp], new_tn_array[cp - 1], op, 1, TRUE);
01924 CGPREP_Copy_TN(new_tn_array[0], tn, op, 1, TRUE);
01925 }
01926 }
01927
01928 #ifdef TARG_IA64
01929 void CG_LOOP_Remove_Notations(CG_LOOP& cl, BB *head, BB *tail)
01930 #else
01931 void CG_LOOP_Remove_Notations(LOOP_DESCR* loop, BB *head, BB *tail)
01932 #endif
01933 {
01934 #ifdef TARG_IA64
01935 LOOP_DESCR *loop = cl.Loop();
01936 BB *body = cl.Loop_header();
01937 #else
01938 BB *body = LOOP_DESCR_loophead(loop);
01939 #endif
01940
01941 if (Get_Trace(TP_CGLOOP, 0x4)) {
01942 #pragma mips_frequency_hint NEVER
01943 CG_LOOP_Trace_Loop(loop, "Before CG_LOOP_Remove_Notations");
01944 }
01945
01946
01947
01948
01949
01950
01951
01952
01953
01954 CXX_MEM_POOL pool("CG_LOOP_Remove_Notations", FALSE);
01955 hTN_MAP tn_def_map = hTN_MAP_Create(pool());
01956 hTN_MAP32 tn_copies_map = hTN_MAP32_Create(pool());
01957 TN_LIST *need_copies = Count_Copies_Needed(body, tn_def_map,
01958 tn_copies_map, pool());
01959
01960 if (need_copies == NULL) {
01961
01962
01963
01964 Remove_Notations_Without_Copies(head, tail);
01965
01966 } else {
01967
01968
01969
01970
01971 hTN_MAP new_tn_map = hTN_MAP_Create(pool());
01972 Generate_Copy_TNs(body, tn_copies_map, new_tn_map, need_copies, pool());
01973
01974 Remove_Notations_With_Copies(body, head, tail, tn_def_map,
01975 tn_copies_map, need_copies, new_tn_map);
01976 }
01977
01978 if (Get_Trace(TP_CGLOOP, 0x4)) {
01979 #pragma mips_frequency_hint NEVER
01980 CG_LOOP_Trace_Loop( loop, "After CG_LOOP_Remove_Notations" );
01981 }
01982 }
01983
01984
01985
01986
01987
01988
01989
01990
01991
01992
01993
01994
01995
01996
01997
01998
01999
02000
02001
02002
02003
02004
02005
02006 static BOOL
02007 Readers_Reach(
02008 OP *op,
02009 INT32 count
02010 )
02011 {
02012 INT64 offset;
02013 INT32 store_offset;
02014 TN *liv_tn, *lit_tn;
02015 ARC_LIST *al;
02016 TN *iv_tn = OP_result(op,0 );
02017
02018 if ( OP_opnd(op,0 ) == iv_tn )
02019 liv_tn = OP_opnd(op,1 );
02020 else
02021 liv_tn = OP_opnd(op,0 );
02022
02023 for ( al = OP_succs(op) ; al; al = ARC_LIST_rest(al) ) {
02024 ARC *arc = ARC_LIST_first(al);
02025 OP *reader = ARC_succ(arc);
02026
02027 if ( reader == op )
02028 continue;
02029
02030 if ( OP_br(reader) )
02031 continue;
02032
02033 if ( CGTARG_Have_Indexed_Mem_Insts()
02034 && OP_Is_Float_Mem(reader)
02035 ) {
02036
02037
02038
02039 continue;
02040 }
02041
02042
02043
02044
02045
02046
02047 store_offset = OP_store(reader) ? 1 : 0;
02048
02049 if ( OP_opnd(reader,store_offset) == iv_tn )
02050 lit_tn = OP_opnd(reader,store_offset + 1);
02051 else
02052 lit_tn = OP_opnd(reader,store_offset);
02053
02054 FmtAssert(OP_iadd(op) || OP_isub(op),("Expected iadd or isub."));
02055
02056 if ( OP_iadd(op) )
02057 offset = TN_value(lit_tn) + TN_value(liv_tn) * count;
02058 else
02059 offset = TN_value(lit_tn) - TN_value(liv_tn) * count;
02060
02061 if ( ! TOP_Can_Have_Immediate(offset,OP_code(reader)) )
02062 return FALSE;
02063 }
02064
02065 return TRUE;
02066 }
02067
02068
02069
02070 void CG_LOOP_Clear_SCCs(LOOP_DESCR *loop)
02071
02072
02073
02074
02075 {
02076 BB *bb = LOOP_DESCR_loophead(loop);
02077 OP *op;
02078 FOR_ALL_BB_OPs(bb, op) {
02079 _CG_LOOP_INFO *info = _CG_LOOP_info(op);
02080
02081
02082
02083
02084
02085
02086 CG_DEP_Delete_SCC_Arcs(info->scc_ancestors);
02087 info->scc_ancestors = NULL;
02088 info->scc_descendents = NULL;
02089 info->scc = NULL;
02090 }
02091 }
02092
02093
02094
02095
02096
02097
02098
02099
02100
02101
02102
02103
02104
02105
02106
02107
02108
02109
02110
02111
02112
02113
02114
02115
02116
02117
02118
02119
02120
02121
02122
02123 static TN_MAP unroll_names;
02124
02125
02126 static void unroll_names_finish(void)
02127 {
02128 TN_MAP_Delete(unroll_names);
02129 unroll_names_valid = FALSE;
02130 }
02131
02132 #ifdef KEY
02133
02134
02135
02136 static BOOL Has_Nonzero_Omega_in_Epilog( TN* tn )
02137 {
02138 if( !TN_is_gra_homeable(tn) )
02139 return FALSE;
02140
02141 CG_LOOP_BACKPATCH* bpatches = CG_LOOP_Backpatch_First(CG_LOOP_epilog,tn);
02142 while( bpatches != NULL ){
02143 const TN* body_tn = CG_LOOP_BACKPATCH_body_tn(bpatches);
02144 if( body_tn == tn ){
02145 if( CG_LOOP_BACKPATCH_omega(bpatches) > 0 )
02146 return TRUE;
02147 }
02148 bpatches = CG_LOOP_Backpatch_Next(bpatches);
02149 }
02150
02151 return FALSE;
02152 }
02153 #endif
02154
02155 static void unroll_names_init_tn(TN *result, UINT16 ntimes, MEM_POOL *pool)
02156 {
02157 TN **entry = TYPE_MEM_POOL_ALLOC_N(TN *, pool, ntimes);
02158 UINT16 unrolling;
02159 TN_MAP_Set(unroll_names, result, entry);
02160 #ifdef KEY
02161 const BOOL reset_gra_home =
02162 !TN_is_dedicated(result) && Has_Nonzero_Omega_in_Epilog(result);
02163 #endif // KEY
02164 for (unrolling = 0; unrolling < ntimes; ++unrolling){
02165 if (TN_is_dedicated(result)){
02166 entry[unrolling] = result;
02167 }
02168 #ifdef KEY
02169
02170
02171
02172
02173
02174
02175
02176
02177 else if( unrolling == ntimes - 1 &&
02178 TN_is_gra_homeable(result) ){
02179 entry[unrolling] = result;
02180 }
02181 #endif // KEY
02182 else{
02183 entry[unrolling] = Dup_TN(result);
02184 #ifdef KEY
02185
02186
02187
02188
02189
02190 if( reset_gra_home &&
02191 unrolling < ntimes - 1 ){
02192 Reset_TN_is_gra_homeable( entry[unrolling] );
02193 Set_TN_home( entry[unrolling], NULL );
02194 }
02195 #endif // KEY
02196 }
02197 }
02198 }
02199
02200
02201 #ifdef KEY
02202 static BOOL TN_is_cond_def_of_another_op(BB *bb, TN *tn, OP *cand_op)
02203 {
02204 OP *op;
02205 FOR_ALL_BB_OPs(bb, op) {
02206 if (cand_op == op)
02207 break;
02208 if (!OP_cond_def(op))
02209 continue;
02210 for (INT i = 0; i < OP_results(op); ++i) {
02211 TN *result_tn = OP_result(op,i);
02212 if (result_tn == tn)
02213 return TRUE;
02214 }
02215 }
02216 return FALSE;
02217 }
02218 #endif
02219
02220 static void unroll_names_init(LOOP_DESCR *loop, UINT16 ntimes, MEM_POOL *pool)
02221
02222
02223
02224
02225 {
02226 BB_SET *bbs = LOOP_DESCR_bbset(loop);
02227 Is_True(BB_SET_Size(bbs) == 1, ("unroll_names_init: only support single BB loop."));
02228
02229 BB *bb = LOOP_DESCR_loophead(loop);
02230 OP *op;
02231
02232 Is_True(!unroll_names_valid, ("unroll_names_finish not called."));
02233
02234 unroll_names = TN_MAP_Create();
02235 unroll_names_valid = TRUE;
02236
02237 FOR_ALL_BB_OPs(bb, op) {
02238 for (INT i = 0; i < OP_results(op); ++i) {
02239 TN *result_tn = OP_result(op,i);
02240
02241 if (!OP_cond_def(op) ||
02242 !TN_is_global_reg(result_tn)) {
02243
02244 if (OP_base_update_kind(op) == NO_BASE_UPDATE ||
02245 (OP_load(op) && i == 0))
02246 #ifdef KEY
02247 if (!TN_MAP_Get(unroll_names, result_tn) && !TN_is_cond_def_of_another_op(bb, result_tn, op))
02248 #else
02249 if (!TN_MAP_Get(unroll_names, result_tn))
02250 #endif
02251 unroll_names_init_tn(result_tn, ntimes, pool);
02252 }
02253 }
02254 }
02255
02256 #ifdef Is_True_On
02257 FOR_ALL_BB_OPs(bb, op) {
02258 for (INT i = 0; i < OP_opnds(op); ++i) {
02259 TN *tn = OP_opnd(op,i);
02260 if (TN_is_register(tn) &&
02261 OP_omega(op, i) >= 2)
02262 Is_True(TN_MAP_Get(unroll_names, tn),
02263 ("unroll_names_init: must rename omega TN."));
02264 }
02265 }
02266 #endif
02267 }
02268
02269
02270 inline TN *unroll_names_get(TN *tn, UINT16 unrolling)
02271 {
02272 if (TN_is_register(tn)) {
02273 TN **entry = (TN **)TN_MAP_Get(unroll_names, tn);
02274 return entry ? entry[unrolling] : tn;
02275 }
02276 return tn;
02277 }
02278
02279
02280 TN *CG_LOOP_unroll_names_get(TN *tn, UINT16 unrolling)
02281 {
02282 if ( tn == NULL ) return NULL;
02283 return unroll_names_get(tn, unrolling);
02284 }
02285
02286
02287
02288
02289
02290
02291
02292 typedef struct {
02293 mUINT16 ntimes;
02294 mBOOL const_trip;
02295 } NOTE_REMAINDER_HEAD;
02296
02297 typedef struct {
02298 char *reason;
02299 } NOTE_NOT_UNROLLED;
02300
02301
02302 #define NOTE_ntimes(n) ((n)->ntimes)
02303 #define NOTE_const_trip(n) ((n)->const_trip)
02304 #define NOTE_reason(n) ((n)->reason)
02305
02306
02307 static void remainder_head_note_handler(NOTE_ACTION action, NOTE_INFO *info,
02308 FILE *file)
02309
02310
02311
02312
02313 {
02314 NOTE_REMAINDER_HEAD *info_u = (NOTE_REMAINDER_HEAD *)info;
02315 UINT16 ntimes = NOTE_ntimes(info_u);
02316 BOOL ctrip = NOTE_const_trip(info_u);
02317
02318 switch (action) {
02319 case NOTE_PRINT_TO_FILE:
02320 if (ctrip) {
02321 fprintf(file,
02322 "%s<loop> Unrolling remainder loop (%d iteration%s)\n",
02323 ASM_CMNT_LINE, ntimes, ntimes == 1 ? "" : "s");
02324 } else {
02325 fprintf(file,
02326 "%s<loop> Unrolling remainder loop (at most %d iteration%s)\n",
02327 ASM_CMNT_LINE, ntimes-1, ntimes-1 == 1 ? "" : "s");
02328 }
02329 if (ntimes == 0)
02330 DevWarn("Found remainder head note with ntimes = 0");
02331 break;
02332 case NOTE_PRINT_TO_ANL_FILE:
02333
02334 break;
02335 case NOTE_PRINT_HANDLER_NAME_TO_FILE:
02336 fprintf(file, "remainder_head_note_handler");
02337 break;
02338 default:
02339 Is_True(FALSE, ("Didn't recognize action"));
02340 }
02341 }
02342
02343
02344
02345 static void note_remainder_head(BB *head, UINT16 ntimes, UINT16 trips_if_const)
02346
02347
02348
02349
02350
02351
02352 {
02353 NOTE_REMAINDER_HEAD *note;
02354 note = TYPE_MEM_POOL_ALLOC(NOTE_REMAINDER_HEAD, &MEM_pu_nz_pool);
02355 NOTE_const_trip(note) = trips_if_const > 0;
02356 NOTE_ntimes(note) = trips_if_const ? trips_if_const : ntimes;
02357 NOTE_Add_To_BB(head, remainder_head_note_handler, (NOTE_INFO *)note);
02358 }
02359
02360
02361
02362 static void not_unrolled_note_handler(NOTE_ACTION action, NOTE_INFO *info,
02363 FILE *file)
02364
02365
02366
02367
02368 {
02369 NOTE_NOT_UNROLLED *info_u = (NOTE_NOT_UNROLLED *)info;
02370 char *reason = NOTE_reason(info_u);
02371
02372 switch (action) {
02373 case NOTE_PRINT_TO_FILE:
02374 fprintf(file, "%s<loop> Not unrolled: %s\n", ASM_CMNT_LINE, reason);
02375 break;
02376 case NOTE_PRINT_TO_ANL_FILE:
02377
02378 break;
02379 case NOTE_PRINT_HANDLER_NAME_TO_FILE:
02380 fprintf(file, "not_unrolled_note_handler");
02381 break;
02382 default:
02383 Is_True(FALSE, ("Didn't recognize action"));
02384 }
02385 }
02386
02387
02388
02389 static void note_not_unrolled(BB *head, const char *reason, ...)
02390
02391
02392
02393
02394
02395
02396
02397 {
02398 NOTE_NOT_UNROLLED *note;
02399 va_list args;
02400 char buf[256];
02401
02402
02403
02404
02405 va_start(args, reason);
02406 vsprintf(buf, reason, args);
02407 va_end(args);
02408 Is_True(strlen(buf) < 256, ("note_not_unrolled buffer overrun"));
02409 buf[255] = (char)0;
02410
02411 note = TYPE_MEM_POOL_ALLOC(NOTE_NOT_UNROLLED, &MEM_pu_nz_pool);
02412 NOTE_reason(note) = TYPE_MEM_POOL_ALLOC_N(char, &MEM_pu_nz_pool,
02413 strlen(buf)+1);
02414 strcpy(NOTE_reason(note), buf);
02415 NOTE_Add_To_BB(head, not_unrolled_note_handler, (NOTE_INFO *)note);
02416 }
02417
02418
02419
02420
02421
02422
02423 #define is_power_of_two(i) (((i) & ((i)-1)) == 0)
02424
02425
02426
02427 inline UINT16 log2(UINT32 n)
02428
02429
02430
02431
02432
02433
02434 {
02435 UINT16 result = 0;
02436 Is_True(n > 0, ("can't take logarithm of zero"));
02437 while ((1 << result) <= n)
02438 ++result;
02439 return --result;
02440 }
02441
02442
02443
02444 static void unroll_guard_unrolled_body(LOOP_DESCR *loop,
02445 LOOPINFO *unrolled_info,
02446 TN *orig_trip_count_tn,
02447 UINT32 ntimes)
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457 {
02458 if (!TN_is_constant(orig_trip_count_tn)) {
02459 INT64 trip_est = WN_loop_trip_est(LOOPINFO_wn(unrolled_info));
02460 TN *new_trip_count_tn = Build_TN_Like(orig_trip_count_tn);
02461 INT32 trip_size = TN_size(orig_trip_count_tn);
02462 float ztrip_prob = 1.0 / MAX(trip_est, 1);
02463 float orig_post_prolog_freq = BB_freq(BB_next(CG_LOOP_prolog));
02464 OPS ops = OPS_EMPTY;
02465 BB *continuation_bb;
02466 LABEL_IDX continuation_lbl;
02467
02468 Is_True(is_power_of_two(ntimes), ("not power of two"));
02469 Is_True(!TN_is_constant(orig_trip_count_tn), ("trip count is constant"));
02470
02471 LOOPINFO_trip_count_tn(unrolled_info) = new_trip_count_tn;
02472
02473 extend_epilog(loop);
02474 continuation_bb = CG_LOOP_epilog;
02475 continuation_lbl = Gen_Label_For_BB(continuation_bb);
02476
02477
02478
02479
02480
02481
02482
02483 Exp_OP2(trip_size == 4 ? OPC_I4ASHR : OPC_I8ASHR,
02484 new_trip_count_tn,
02485 orig_trip_count_tn,
02486 Gen_Literal_TN(log2(ntimes), trip_size),
02487 &ops);
02488 #ifdef TARG_X8664 //bug 12956: x8664/exp_branch.cxx does not accept Zero_TN
02489 Exp_OP3v(OPC_FALSEBR,
02490 NULL,
02491 Gen_Label_TN(continuation_lbl,0),
02492 new_trip_count_tn,
02493 Gen_Literal_TN(0,4),
02494 trip_size == 4 ? V_BR_I4EQ : V_BR_I8EQ,
02495 &ops);
02496 #else
02497 Exp_OP3v(OPC_FALSEBR,
02498 NULL,
02499 Gen_Label_TN(continuation_lbl,0),
02500 new_trip_count_tn,
02501 Zero_TN,
02502 V_BR_I8EQ,
02503 &ops);
02504 #endif
02505 BB_Append_Ops(CG_LOOP_prolog, &ops);
02506 Link_Pred_Succ_with_Prob(CG_LOOP_prolog, continuation_bb, ztrip_prob);
02507 Change_Succ_Prob(CG_LOOP_prolog, BB_next(CG_LOOP_prolog), 1.0 - ztrip_prob);
02508 BB_freq(BB_next(CG_LOOP_prolog)) = orig_post_prolog_freq;
02509
02510
02511
02512
02513 extend_prolog();
02514 extend_epilog(loop);
02515 }
02516 }
02517
02518
02519
02520 static void unroll_xfer_annotations(BB *unrolled_bb, BB *orig_bb)
02521
02522
02523
02524
02525
02526
02527
02528
02529
02530
02531
02532
02533
02534
02535 {
02536 if (BB_has_pragma(orig_bb)) {
02537 BB_Copy_Annotations(unrolled_bb, orig_bb, ANNOT_PRAGMA);
02538 }
02539
02540 if (BB_exit(orig_bb)) {
02541 ANNOTATION *ant = ANNOT_Get(BB_annotations(orig_bb), ANNOT_EXITINFO);
02542 if (ant == NULL) {
02543 DevWarn("BB_exit(BB:%d) set but no ANNOT_EXITINFO attached",
02544 BB_id(orig_bb));
02545 } else {
02546 EXITINFO *orig_info = ANNOT_exitinfo(ant);
02547 EXITINFO *unrolled_info = TYPE_PU_ALLOC(EXITINFO);
02548 *unrolled_info = *orig_info;
02549 if (EXITINFO_sp_adj(orig_info)) {
02550 OP *sp_adj;
02551 FOR_ALL_BB_OPs_REV(unrolled_bb, sp_adj)
02552 if (OP_code(sp_adj) == TOP_spadjust) break;
02553 if (sp_adj == NULL)
02554 DevWarn("missing spadjust OP in unrolled BB:%d", BB_id(unrolled_bb));
02555 EXITINFO_sp_adj(unrolled_info) = sp_adj;
02556 }
02557 BB_Add_Annotation(unrolled_bb, ANNOT_PRAGMA, orig_info);
02558 }
02559 }
02560
02561 if (BB_call(orig_bb)) {
02562 BB_Copy_Annotations(unrolled_bb, orig_bb, ANNOT_CALLINFO);
02563 }
02564
02565 if (BB_has_note(orig_bb)) {
02566 BB_Copy_Annotations(unrolled_bb, orig_bb, ANNOT_NOTE);
02567 }
02568 }
02569
02570
02571 static BB *Unroll_Replicate_Body(LOOP_DESCR *loop, INT32 ntimes, BOOL unroll_fully)
02572
02573
02574
02575
02576
02577
02578
02579
02580
02581
02582
02583 {
02584 BB *body = LOOP_DESCR_loophead(loop);
02585 BB *unrolled_body = Gen_BB_Like(body);
02586 UINT16 unrolling;
02587 OP *op;
02588 ANNOTATION *annot = ANNOT_Get(BB_annotations(body), ANNOT_LOOPINFO);
02589 LOOPINFO *info = ANNOT_loopinfo(annot);
02590 TN *trip_count = LOOPINFO_trip_count_tn(info);
02591 INT16 new_trip_count_val;
02592 LOOPINFO *unrolled_info = TYPE_P_ALLOC(LOOPINFO);
02593 WN *wn = WN_COPY_Tree(LOOPINFO_wn(info));
02594 TYPE_ID ttype = WN_rtype(WN_loop_trip(wn));
02595 OPCODE opc_intconst = OPCODE_make_op(OPR_INTCONST, ttype, MTYPE_V);
02596 WN *ntimes_wn = WN_CreateIntconst(opc_intconst, ntimes);
02597 OPCODE opc_div = OPCODE_make_op(OPR_DIV, ttype, MTYPE_V);
02598
02599
02600 Is_True(BB_SET_Size(LOOP_DESCR_bbset(loop)) == 1,
02601 ("unroll_replicate_body passed multi-bb loop body"));
02602
02603 if (BB_freq_fb_based(body)) Set_BB_freq_fb_based(unrolled_body);
02604
02605 if (TN_is_constant(trip_count)) {
02606 new_trip_count_val = TN_value(trip_count) / ntimes;
02607 Is_True((unroll_fully && new_trip_count_val == 1) ||
02608 (!unroll_fully && new_trip_count_val > 1),
02609 ("new_trip_count_val does not make sense."));
02610 }
02611
02612
02613
02614 WN_set_loop_trip(wn, WN_CreateExp2(opc_div, WN_loop_trip(wn), ntimes_wn));
02615 WN_loop_trip_est(wn) = WN_loop_trip_est(wn) / ntimes;
02616 LOOPINFO_wn(unrolled_info) = wn;
02617 LOOPINFO_srcpos(unrolled_info) = LOOPINFO_srcpos(info);
02618 if (TN_is_constant(trip_count))
02619 LOOPINFO_trip_count_tn(unrolled_info) =
02620 Gen_Literal_TN(new_trip_count_val, TN_size(trip_count));
02621 Set_BB_unrollings(unrolled_body, ntimes);
02622 if (unroll_fully) Set_BB_unrolled_fully(unrolled_body);
02623 BB_Add_Annotation(unrolled_body, ANNOT_LOOPINFO, unrolled_info);
02624
02625 bool trace_pref = Get_Trace(TP_CGLOOP, 2);
02626
02627
02628
02629
02630 for (unrolling = 0; unrolling < ntimes; unrolling++) {
02631 FOR_ALL_BB_OPs(body, op) {
02632
02633
02634 if (OP_prefetch(op)) {
02635
02636 WN *mem_wn = Get_WN_From_Memory_OP(op);
02637 Is_True(WN_operator(mem_wn) == OPR_PREFETCH,
02638 ("wrong prefetch WHIRL node."));
02639
02640 if (trace_pref && unrolling == 0)
02641 if (mem_wn)
02642 fprintf(TFile, "<cgpref> - 1L cache stride = %d, 2L cache stride = %d,"
02643 " confidence = %d\n",
02644 WN_pf_stride_1L(mem_wn),
02645 WN_pf_stride_2L(mem_wn),
02646 WN_pf_confidence(mem_wn));
02647 else
02648 fprintf(TFile, "<cgpref> pref wn not found.\n");
02649
02650 if (Prefetch_Kind_Enabled(mem_wn)) {
02651 #ifdef TARG_IA64
02652 int stride = WN_pf_stride_2L( mem_wn ) ? WN_pf_stride_2L( mem_wn ) : WN_pf_stride_1L(mem_wn);
02653 #else
02654 int stride = WN_pf_stride_2L( wn ) ? WN_pf_stride_2L( wn ) : WN_pf_stride_1L(wn);
02655 #endif
02656 if (stride != 0 && (unrolling % stride) != 0) {
02657 if (trace_pref)
02658 fprintf(TFile, "<cgpref> pref pruned at unrolling %d.\n", unrolling);
02659 continue;
02660 }
02661 }
02662 }
02663 if (!OP_br(op) || !unroll_fully && unrolling == ntimes-1) {
02664 UINT8 opnd;
02665 UINT8 res;
02666 OP *new_op = Dup_OP(op);
02667 CGPREP_Init_Op(new_op);
02668 CG_LOOP_Init_Op(new_op);
02669 Copy_WN_For_Memory_OP(new_op, op);
02670
02671 if (OP_loh(op))
02672 Set_OP_loh(new_op);
02673
02674 Set_OP_unrolling(new_op, unrolling);
02675 Set_OP_orig_idx(new_op, OP_map_idx(op));
02676 Set_OP_unroll_bb(new_op, unrolled_body);
02677 for (res = 0; res < OP_results(op); ++res) {
02678 TN *new_res = unroll_names_get(OP_result(op,res), unrolling);
02679 Set_OP_result(new_op, res, new_res);
02680 }
02681 for (opnd = 0; opnd < OP_opnds(op); opnd++) {
02682 INT omega = OP_omega(op,opnd);
02683 INT adjust = omega + (ntimes - unrolling - 1);
02684 INT new_omega = adjust / ntimes;
02685 INT which = ntimes - 1 - (adjust - (new_omega * ntimes));
02686
02687
02688 Is_True(new_omega * ntimes + (unrolling - which) == omega,
02689 ("new_omega * ntimes + (unrolling - which) == omega."));
02690
02691 TN *new_tn = unroll_names_get(OP_opnd(op,opnd), which);
02692 Is_True(omega == 0 || new_tn != NULL,
02693 ("Replicate_Unroll_Body: TN with non-zero omega must be renamed."));
02694 Set_OP_opnd(new_op, opnd, new_tn);
02695
02696
02697 if (OP_br(op))
02698 new_omega = omega;
02699
02700 Set_OP_omega(new_op, opnd, new_omega);
02701 }
02702 BB_Append_Op(unrolled_body, new_op);
02703 }
02704 }
02705 }
02706
02707 unroll_xfer_annotations(unrolled_body, body);
02708
02709 if (!unroll_fully) {
02710
02711
02712
02713
02714 op = BB_branch_op(unrolled_body);
02715 Is_True(op, ("didn't insert loopback branch correctly"));
02716 Set_OP_opnd(op,
02717 Branch_Target_Operand(op),
02718 Gen_Label_TN(Gen_Label_For_BB(unrolled_body), 0));
02719 INT loop_count = WN_loop_trip_est(wn) ? WN_loop_trip_est(wn) : 100;
02720 Link_Pred_Succ_with_Prob(unrolled_body, unrolled_body,
02721 (loop_count - 1.0) / loop_count);
02722 }
02723
02724 float new_body_freq = BB_freq(body) / ntimes;
02725
02726
02727 Chain_BBs(BB_prev(body), unrolled_body);
02728 Chain_BBs(unrolled_body, BB_next(body));
02729 LOOP_DESCR_Retarget_Loop_Entrances(loop, unrolled_body);
02730 Unlink_Pred_Succ(body, BB_next(body));
02731 INT loop_count = WN_loop_trip_est(wn) ? WN_loop_trip_est(wn) : 100;
02732 Link_Pred_Succ_with_Prob(unrolled_body, BB_next(body),
02733 1.0 / loop_count);
02734 BB_next(body) = BB_prev(body) = NULL;
02735
02736 if (FREQ_Frequencies_Computed() || BB_freq_fb_based(body)) {
02737 BB_freq(unrolled_body) = new_body_freq;
02738 }
02739
02740
02741
02742 GRA_LIVE_Compute_Local_Info(unrolled_body);
02743
02744 return unrolled_body;
02745 }
02746
02747
02748
02749 void Unroll_Make_Remainder_Loop(CG_LOOP& cl, INT32 ntimes)
02750
02751
02752
02753
02754
02755
02756
02757
02758
02759
02760 {
02761 LOOP_DESCR *loop = cl.Loop();
02762 BB *body = LOOP_DESCR_loophead(loop);
02763 ANNOTATION *annot = ANNOT_Get(BB_annotations(body), ANNOT_LOOPINFO);
02764 LOOPINFO *info = ANNOT_loopinfo(annot);
02765 TN *trip_count = LOOPINFO_trip_count_tn(info);
02766 TN *new_trip_count, *label_tn;
02767 OP *br_op = BB_branch_op(body);
02768 BB *remainder_tail;
02769 LABEL_IDX continuation_label = (LABEL_IDX)0;
02770 CG_LOOP_BACKPATCH *bp;
02771 BOOL freqs = FREQ_Frequencies_Computed();
02772 float loop_entry_freq;
02773 if (freqs)
02774 loop_entry_freq = BB_freq(CG_LOOP_prolog);
02775
02776 Is_True(info == LOOP_DESCR_loopinfo(loop),
02777 ("LOOPINFO for head BB disagrees with that in loop descriptor"));
02778 Is_True(BB_unrollings(body) < 2,
02779 ("unrolled loophead passed to unroll_make_remainder_loop"));
02780
02781 if (Get_Trace(TP_CGLOOP, 0x4)) {
02782 #pragma mips_frequency_hint NEVER
02783 CG_LOOP_Trace_Loop(loop, "Before Unroll_Make_Remainder_Loop");
02784 }
02785
02786
02787
02788
02789
02790
02791
02792
02793
02794
02795
02796
02797
02798
02799
02800
02801
02802
02803
02804
02805
02806
02807
02808
02809
02810
02811
02812
02813
02814
02815
02816
02817
02818
02819
02820 CXX_MEM_POOL pool("Unroll_Make_Remainder_Loop", FALSE);
02821 hTN_MAP tn_def_map = hTN_MAP_Create(pool());
02822 hTN_MAP32 tn_copies_map = hTN_MAP32_Create(pool());
02823 TN_LIST *need_copies = Count_Copies_Needed(body, tn_def_map,
02824 tn_copies_map, pool());
02825 hTN_MAP new_tn_map = NULL;
02826
02827
02828
02829
02830 if (need_copies == NULL) {
02831
02832 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL); bp;
02833 bp = CG_LOOP_Backpatch_Next(bp)) {
02834
02835
02836 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
02837 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
02838 if (non_body_tn != body_tn) {
02839 CGPREP_Copy_TN_Into_BB(body_tn, non_body_tn, CG_LOOP_prolog,
02840 BB_last_op(CG_LOOP_prolog), 0, FALSE);
02841 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
02842 }
02843 }
02844
02845 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
02846 bp = CG_LOOP_Backpatch_Next(bp)) {
02847
02848
02849 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
02850 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
02851 if (non_body_tn != body_tn) {
02852 CGPREP_Copy_TN_Into_BB(non_body_tn, body_tn,
02853 CG_LOOP_epilog, NULL, 0, TRUE);
02854 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
02855 }
02856 }
02857
02858 } else {
02859
02860
02861
02862
02863 new_tn_map = hTN_MAP_Create(pool());
02864 Generate_Copy_TNs(body, tn_copies_map, new_tn_map, need_copies, pool());
02865
02866
02867
02868
02869 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL); bp;
02870 bp = CG_LOOP_Backpatch_Next(bp)) {
02871
02872
02873 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
02874 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
02875 INT omega = CG_LOOP_BACKPATCH_omega(bp) - 1;
02876 if (omega > 0) {
02877 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
02878 body_tn = new_tn_array[omega - 1];
02879 }
02880
02881
02882 if (non_body_tn != body_tn) {
02883 CGPREP_Copy_TN_Into_BB(body_tn, non_body_tn, CG_LOOP_prolog,
02884 BB_last_op(CG_LOOP_prolog), 0, FALSE);
02885 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
02886 }
02887 }
02888
02889
02890
02891
02892 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
02893 bp = CG_LOOP_Backpatch_Next(bp)) {
02894
02895
02896 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
02897 TN *non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
02898 INT omega = CG_LOOP_BACKPATCH_omega(bp);
02899 if (omega > 0) {
02900 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
02901 body_tn = new_tn_array[omega - 1];
02902 }
02903
02904
02905 if (non_body_tn != body_tn) {
02906 CGPREP_Copy_TN_Into_BB(non_body_tn, body_tn,
02907 CG_LOOP_epilog, NULL, 0, TRUE);
02908 CG_LOOP_BACKPATCH_Set_non_body_tn(bp, body_tn);
02909 }
02910 }
02911 }
02912
02913 if (Get_Trace(TP_CGLOOP, 0x4)) {
02914 #pragma mips_frequency_hint NEVER
02915 CG_LOOP_Trace_Loop(loop, "1st Check Unroll_Make_Remainder_Loop");
02916 }
02917
02918
02919
02920
02921
02922
02923
02924 CG_LOOP_BACKPATCH *remainder_prolog_backpatches = prolog_backpatches;
02925 CG_LOOP_BACKPATCH *remainder_epilog_backpatches = NULL;
02926 CG_LOOP_BACKPATCH *main_loop_prolog_backpatches = NULL;
02927 CG_LOOP_BACKPATCH *main_loop_epilog_backpatches = epilog_backpatches;
02928
02929
02930
02931 prolog_backpatches = NULL;
02932 epilog_backpatches = NULL;
02933
02934
02935
02936
02937 CG_LOOP_DEF tn_def(body);
02938
02939 OP *op;
02940 FOR_ALL_BB_OPs(body, op) {
02941
02942
02943 for (INT res = 0; res < OP_results(op); res++) {
02944 TN *tn = OP_result(op, res);
02945 if (TN_is_register(tn) && ! TN_is_dedicated(tn)
02946 && ! TN_is_const_reg(tn)
02947 && GTN_SET_MemberP(BB_live_in(CG_LOOP_epilog), tn)) {
02948
02949 #ifdef TARG_IA64
02950
02951
02952 if (! GTN_SET_MemberP(BB_live_def(body), tn)
02953 || GTN_SET_MemberP(BB_live_use(body), tn)
02954 || GTN_SET_MemberP(BB_live_out(CG_LOOP_epilog), tn)) {
02955 CG_LOOP_Backpatch_Add(CG_LOOP_epilog, tn, tn, 0);
02956 CG_LOOP_Backpatch_Add(CG_LOOP_prolog, tn, tn, 1);
02957 }else {
02958 DevWarn("Eliminate useless backpatchs for TN%d", TN_number(tn));
02959 }
02960 #else
02961 CG_LOOP_Backpatch_Add(CG_LOOP_epilog, tn, tn, 0);
02962 CG_LOOP_Backpatch_Add(CG_LOOP_prolog, tn, tn, 1);
02963 #endif
02964 }
02965 }
02966
02967
02968 for (INT opnd = 0; opnd < OP_opnds(op); opnd++) {
02969 TN *tn = OP_opnd(op, opnd);
02970 if (TN_is_register(tn) && ! TN_is_dedicated(tn) &&
02971 ! TN_is_const_reg(tn)) {
02972 OP *def_op = tn_def.Get(tn);
02973
02974 if (def_op && ! OP_Precedes(def_op, op)
02975 && ! CG_LOOP_Backpatch_Find_Non_Body_TN(CG_LOOP_prolog, tn, 1) ) {
02976 CG_LOOP_Backpatch_Add(CG_LOOP_epilog, tn, tn, 0);
02977 CG_LOOP_Backpatch_Add(CG_LOOP_prolog, tn, tn, 1);
02978 }
02979 }
02980 }
02981 }
02982
02983
02984
02985 if (need_copies) {
02986
02987 for (TN_LIST *p = need_copies; p != NULL; p = TN_LIST_rest(p)) {
02988 TN *body_tn = TN_LIST_first(p);
02989 INT copies = hTN_MAP32_Get(tn_copies_map, body_tn);
02990 TN **new_tn_array = (TN **) hTN_MAP_Get(new_tn_map, body_tn);
02991
02992 for (INT cp = copies; cp > 0; --cp) {
02993 TN *tn = new_tn_array[cp - 1];
02994 CG_LOOP_Backpatch_Add(CG_LOOP_epilog, tn, body_tn, cp);
02995 CG_LOOP_Backpatch_Add(CG_LOOP_prolog, tn, body_tn, cp + 1);
02996 }
02997 if (! CG_LOOP_Backpatch_Find_Non_Body_TN(CG_LOOP_prolog, body_tn, 1)) {
02998 CG_LOOP_Backpatch_Add(CG_LOOP_epilog, body_tn, body_tn, 0);
02999 CG_LOOP_Backpatch_Add(CG_LOOP_prolog, body_tn, body_tn, 1);
03000 }
03001 }
03002 }
03003
03004 remainder_epilog_backpatches = epilog_backpatches;
03005 main_loop_prolog_backpatches = prolog_backpatches;
03006
03007
03008 remainder_tail = Gen_BB_Like(body);
03009 if (BB_freq_fb_based(CG_LOOP_prolog))
03010 Set_BB_freq_fb_based(remainder_tail);
03011
03012 prolog_backpatches = remainder_prolog_backpatches;
03013 epilog_backpatches = remainder_epilog_backpatches;
03014
03015 if (Get_Trace(TP_CGLOOP, 0x4)) {
03016 #pragma mips_frequency_hint NEVER
03017 CG_LOOP_Trace_Loop(loop, "2nd Check Unroll_Make_Remainder_Loop");
03018 }
03019
03020
03021 if (need_copies == NULL) {
03022
03023
03024
03025 Remove_Notations_Without_Copies(CG_LOOP_prolog, remainder_tail);
03026
03027 } else {
03028
03029 Remove_Notations_With_Copies(body, CG_LOOP_prolog, remainder_tail,
03030 tn_def_map,
03031 tn_copies_map, need_copies, new_tn_map);
03032 }
03033
03034
03035 cl.Recompute_Liveness();
03036
03037 if (Get_Trace(TP_CGLOOP, 0x4)) {
03038 #pragma mips_frequency_hint NEVER
03039 CG_LOOP_Trace_Loop(loop, "3rd Check Unroll_Make_Remainder_Loop");
03040 }
03041
03042 OPS zero_trip_guard_ops = OPS_EMPTY;
03043 OPS prolog_ops = OPS_EMPTY;
03044 BOOL const_trip = TN_is_constant(trip_count);
03045
03046 if (const_trip) {
03047
03048
03049
03050
03051
03052 INT16 new_trip_count_val = TN_value(trip_count) % ntimes;
03053 Is_True(new_trip_count_val > 0,
03054 ("unroll_make_remainder_loop: trip count is negative or zero"));
03055 new_trip_count = Gen_Literal_TN(new_trip_count_val, 4);
03056
03057 } else {
03058
03059
03060
03061
03062
03063
03064
03065
03066 float ztrip_prob = 1.0 / ntimes;
03067 INT32 trip_size = TN_size(trip_count);
03068 new_trip_count = Build_TN_Like(trip_count);
03069 if (is_power_of_two(ntimes))
03070 Exp_OP2(trip_size == 4 ? OPC_U4BAND : OPC_U8BAND,
03071 new_trip_count,
03072 trip_count,
03073 Gen_Literal_TN(ntimes-1, trip_size),
03074 &prolog_ops);
03075 else
03076 Exp_OP2(trip_size == 4 ? OPC_U4MOD : OPC_U8MOD,
03077 new_trip_count,
03078 trip_count,
03079 Gen_Literal_TN(ntimes, trip_size),
03080 &prolog_ops);
03081
03082 continuation_label = Gen_Label_For_BB(remainder_tail);
03083 #ifdef TARG_X8664
03084 Exp_OP3v(OPC_FALSEBR,
03085 NULL,
03086 Gen_Label_TN(continuation_label,0),
03087 new_trip_count,
03088 Gen_Literal_TN(0,4),
03089 trip_size == 4 ? V_BR_I4EQ : V_BR_I8EQ,
03090 &zero_trip_guard_ops);
03091 #else
03092 Exp_OP3v(OPC_FALSEBR,
03093 NULL,
03094 Gen_Label_TN(continuation_label,0),
03095 new_trip_count,
03096 Zero_TN,
03097 V_BR_I8EQ,
03098 &zero_trip_guard_ops);
03099 #endif
03100
03101 Link_Pred_Succ_with_Prob(CG_LOOP_prolog, remainder_tail, ztrip_prob);
03102 if (freqs || BB_freq_fb_based(CG_LOOP_prolog))
03103 BB_freq(remainder_tail) += loop_entry_freq * ztrip_prob;
03104 if (freqs)
03105 Change_Succ_Prob(CG_LOOP_prolog, BB_next(CG_LOOP_prolog),
03106 1.0 - ztrip_prob);
03107 }
03108
03109
03110
03111
03112
03113 Is_True(br_op && OP_br(br_op), ("loop body doesn't end in branch"));
03114
03115 label_tn = OP_opnd(br_op, Branch_Target_Operand(br_op));
03116 Is_True(TN_is_label(label_tn), ("branch operand not a label"));
03117 BB_Remove_Op(body, br_op);
03118
03119 BB *first_remainder_body = body;
03120 float body_freq = 0.0;
03121
03122 if (const_trip && TN_value(new_trip_count) < 2) {
03123
03124
03125
03126
03127 BB_annotations(body) = ANNOT_Unlink(BB_annotations(body), annot);
03128 Unlink_Pred_Succ(body, body);
03129 Set_BB_loop_head_bb(body, NULL);
03130 body_freq = loop_entry_freq;
03131
03132 } else {
03133
03134 if (ntimes == 2 || CG_LOOP_unroll_remainder_fully) {
03135
03136 INT32 unroll_times = (const_trip ? (TN_value(new_trip_count) % ntimes)
03137 : (ntimes - 1));
03138 BB *unrolled_body = Gen_BB_Like(body);
03139 first_remainder_body = unrolled_body;
03140
03141 OPS body_ops = OPS_EMPTY;
03142 if (!const_trip) {
03143 BB_Append_Ops(CG_LOOP_prolog, &prolog_ops);
03144 BB_Append_Ops(CG_LOOP_prolog, &zero_trip_guard_ops);
03145
03146 INT32 trip_size = TN_size(new_trip_count);
03147 Exp_OP2(trip_size == 4 ? OPC_I4ADD : OPC_I8ADD,
03148 new_trip_count,
03149 new_trip_count,
03150 Gen_Literal_TN(-1, trip_size),
03151 &body_ops);
03152 #ifdef TARG_X8664
03153 Exp_OP3v(OPC_TRUEBR,
03154 NULL,
03155 Gen_Label_TN(continuation_label,0),
03156 new_trip_count,
03157 Gen_Literal_TN(0,4),
03158 trip_size == 4 ? V_BR_I4EQ : V_BR_I8EQ,
03159 &body_ops);
03160 #else
03161 Exp_OP3v(OPC_TRUEBR,
03162 NULL,
03163 Gen_Label_TN(continuation_label,0),
03164 new_trip_count,
03165 Zero_TN,
03166 V_BR_I8EQ,
03167 &body_ops);
03168 #endif
03169 }
03170 else {
03171 Set_BB_unrollings(unrolled_body, unroll_times);
03172 }
03173
03174 for (INT32 unrolling = 1; unrolling <= unroll_times; unrolling++) {
03175 OP *op;
03176 FOR_ALL_BB_OPs(body, op) {
03177
03178 if (OP_prefetch(op) && unrolling != unroll_times)
03179 continue;
03180 OP *new_op = Dup_OP(op);
03181 if (const_trip) {
03182 Set_OP_unrolling(new_op, unrolling-1);
03183 Set_OP_orig_idx(new_op, OP_map_idx(op));
03184 Set_OP_unroll_bb(new_op, unrolled_body);
03185 }
03186 #ifndef TARG_IA64
03187 CG_LOOP_Init_Op(new_op);
03188 #endif
03189 Copy_WN_For_Memory_OP(new_op, op);
03190 BB_Append_Op(unrolled_body, new_op);
03191 }
03192
03193 if (unrolling < unroll_times && !const_trip) {
03194 FOR_ALL_OPS_OPs(&body_ops, op) {
03195 OP *new_op = Dup_OP(op);
03196 Copy_WN_For_Memory_OP(new_op, op);
03197 BB_Append_Op(unrolled_body, new_op);
03198 }
03199 Link_Pred_Succ_with_Prob(unrolled_body,
03200 remainder_tail,
03201 1.0 / (unroll_times - unrolling + 1.0));
03202 append_to_prolog(unrolled_body);
03203 if (freqs || BB_freq_fb_based(body))
03204 BB_freq(unrolled_body)
03205 = loop_entry_freq * (unroll_times + 1.0 - unrolling)
03206 / (unroll_times + 1.0);
03207 unrolled_body = Gen_BB_Like(body);
03208 }
03209 }
03210 if (freqs || BB_freq_fb_based(body))
03211 body_freq = const_trip ?
03212 BB_freq(CG_LOOP_prolog) : loop_entry_freq / (unroll_times + 1.0);
03213 body = unrolled_body;
03214
03215 } else {
03216
03217
03218
03219 INT64 trip_est = (const_trip ? TN_value(new_trip_count)
03220 : MIN((1 + ntimes) / 2, 2));
03221 if (info) {
03222 WN *wn = LOOPINFO_wn(info);
03223 TYPE_ID ttype = WN_rtype(WN_loop_trip(wn));
03224 OPCODE opc_intconst = OPCODE_make_op(OPR_INTCONST, ttype, MTYPE_V);
03225 WN *ntimes_wn = WN_CreateIntconst(opc_intconst, ntimes);
03226 OPCODE opc_rem = OPCODE_make_op(OPR_REM, ttype, MTYPE_V);
03227 WN_set_loop_trip(wn,
03228 WN_CreateExp2(opc_rem, WN_loop_trip(wn), ntimes_wn));
03229 WN_loop_trip_est(wn) = trip_est;
03230 WN_Set_Loop_Unimportant_Misc(wn);
03231 LOOPINFO_trip_count_tn(info) = new_trip_count;
03232 }
03233
03234 OPS body_ops = OPS_EMPTY;
03235 #ifdef TARG_IA64
03236 CGTARG_Generate_Remainder_Branch(new_trip_count, label_tn,
03237 &prolog_ops, &body_ops);
03238 #else
03239 OP* op = NULL;
03240 BB* new_body = Gen_BB_Like( body );
03241 TN* var_trip_count = new_trip_count;
03242
03243 if( const_trip ){
03244 var_trip_count = Build_TN_Like( trip_count );
03245 Exp_COPY( var_trip_count, new_trip_count, &prolog_ops );
03246 }
03247
03248 FOR_ALL_BB_OPs( body, op ){
03249 OP* new_op = Dup_OP(op);
03250 CG_LOOP_Init_Op( new_op );
03251 Copy_WN_For_Memory_OP( new_op, op );
03252 OPS_Append_Op( &body_ops, new_op );
03253 }
03254
03255 const INT32 trip_size = TN_size(var_trip_count);
03256 Exp_OP2( trip_size == 4 ? OPC_I4ADD : OPC_I8ADD,
03257 var_trip_count,
03258 var_trip_count,
03259 Gen_Literal_TN(-1, trip_size),
03260 &body_ops );
03261
03262 #ifdef TARG_X8664 //bug 12956: x8664/exp_branch.cxx does not accept Zero_TN
03263 Exp_OP3v( OPC_TRUEBR,
03264 NULL,
03265 Gen_Label_TN( Gen_Label_For_BB(new_body),0 ),
03266 var_trip_count,
03267 Gen_Literal_TN(0,4),
03268 trip_size == 4 ? V_BR_I4NE : V_BR_I8NE,
03269 &body_ops );
03270 #else
03271 Exp_OP3v( OPC_TRUEBR,
03272 NULL,
03273 Gen_Label_TN( Gen_Label_For_BB(new_body),0 ),
03274 var_trip_count,
03275 Zero_TN,
03276 V_BR_I8NE,
03277 &body_ops );
03278 #endif
03279 float exit_prob = 1.0 / ntimes;
03280 Link_Pred_Succ_with_Prob( new_body, new_body, 1.0 - exit_prob );
03281
03282
03283 body = new_body;
03284 #endif
03285
03286 #if defined(TARG_SL)
03287 if(info)
03288 BB_Add_Annotation(body, ANNOT_LOOPINFO, info);
03289 #endif
03290 BB_Append_Ops(CG_LOOP_prolog, &prolog_ops);
03291 BB_Append_Ops(CG_LOOP_prolog, &zero_trip_guard_ops);
03292 BB_Append_Ops(body, &body_ops);
03293
03294 #ifdef TARG_IA64
03295 if (freqs || BB_freq_fb_based(body)) {
03296
03297 body_freq = BB_freq(CG_LOOP_prolog) * (trip_est - 1);
03298 if (freqs && trip_est > 0)
03299 BBLIST_prob(BB_succs(body)) = (trip_est - 1.0) / trip_est;
03300 }
03301 #endif
03302 if (Get_Trace(TP_CGLOOP, 0x4)) {
03303 #pragma mips_frequency_hint NEVER
03304 CG_LOOP_Trace_Loop( loop, "Remainder Loop Structure" );
03305 }
03306 }
03307 }
03308
03309
03310
03311 append_to_prolog(body);
03312 if (freqs || BB_freq_fb_based(body))
03313 BB_freq(body) = body_freq;
03314
03315
03316
03317
03318
03319
03320 append_to_prolog(remainder_tail);
03321 if (freqs || BB_freq_fb_based(body))
03322 BB_freq(remainder_tail) = loop_entry_freq;
03323
03324
03325
03326 note_remainder_head(first_remainder_body, ntimes,
03327 TN_is_constant(new_trip_count)
03328 ? TN_value(new_trip_count) : 0);
03329
03330
03331 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL); bp;
03332 bp = CG_LOOP_Backpatch_Next(bp))
03333 CG_LOOP_Backpatch_Delete(CG_LOOP_prolog, bp);
03334 for (bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL); bp;
03335 bp = CG_LOOP_Backpatch_Next(bp))
03336 CG_LOOP_Backpatch_Delete(CG_LOOP_epilog, bp);
03337
03338
03339 prolog_backpatches = main_loop_prolog_backpatches;
03340 epilog_backpatches = main_loop_epilog_backpatches;
03341
03342 if (Get_Trace(TP_CGLOOP, 0x4)) {
03343 #pragma mips_frequency_hint NEVER
03344 CG_LOOP_Trace_Loop( loop, "After Unroll_Make_Remainder_Loop" );
03345 }
03346 }
03347
03348
03349
03350 void unroll_rename_backpatches(CG_LOOP_BACKPATCH *bpatches, UINT16 n,
03351 UINT16 ntimes)
03352
03353
03354
03355
03356
03357 {
03358 while (bpatches) {
03359 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bpatches);
03360 UINT16 adjust = CG_LOOP_BACKPATCH_omega(bpatches) + (ntimes - n - 1);
03361 UINT8 new_omega = adjust / ntimes;
03362 UINT8 which = ntimes - 1 - (adjust - (new_omega * ntimes));
03363 TN *new_body_tn = unroll_names_get(body_tn, which);
03364 CG_LOOP_BACKPATCH_Set_body_tn(bpatches, new_body_tn);
03365 CG_LOOP_BACKPATCH_Set_omega(bpatches, new_omega);
03366 bpatches = CG_LOOP_Backpatch_Next(bpatches);
03367 }
03368 }
03369
03370
03371 void unroll_remove_notations(BB *fully_unrolled_body)
03372
03373
03374
03375
03376
03377
03378
03379 {
03380 CG_LOOP_BACKPATCH *bp;
03381 OP *op;
03382
03383
03384
03385
03386
03387 FOR_ALL_BB_OPs(fully_unrolled_body, op) {
03388 UINT8 i;
03389 for (i = 0; i < OP_opnds(op); i++) {
03390 UINT8 omega = OP_omega(op,i);
03391 if (omega) {
03392 TN *old_tn = OP_opnd(op,i);
03393 TN *new_tn;
03394 #ifdef KEY
03395
03396 if (!TN_is_register(old_tn) || TN_is_dedicated(old_tn))
03397 new_tn = old_tn;
03398 else
03399 #endif
03400 new_tn = CG_LOOP_Backpatch_Find_Non_Body_TN(CG_LOOP_prolog,
03401 old_tn, omega);
03402 #ifdef TARG_X8664
03403 if( new_tn == NULL &&
03404 old_tn == X87_cw_TN() ){
03405
03406 new_tn = old_tn;
03407 }
03408 #endif
03409 Is_True(new_tn, ("missing prolog backpatch for TN%d[%d]",
03410 TN_number(old_tn), omega));
03411 Set_OP_opnd(op, i, new_tn);
03412 Set_OP_omega(op, i, 0);
03413 }
03414 }
03415 }
03416
03417
03418
03419
03420
03421
03422
03423
03424 bp = CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL);
03425 while (bp) {
03426 CG_LOOP_BACKPATCH *next = CG_LOOP_Backpatch_Next(bp);
03427 TN *src_tn = CG_LOOP_BACKPATCH_body_tn(bp);
03428 UINT8 omega = CG_LOOP_BACKPATCH_omega(bp);
03429 TN *dest_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
03430 if (omega) {
03431 TN *new_src_tn = CG_LOOP_Backpatch_Find_Non_Body_TN(CG_LOOP_prolog,
03432 src_tn, omega);
03433 Is_True(new_src_tn, ("missing prolog backpatch for TN%d[%d]",
03434 TN_number(src_tn), omega));
03435 src_tn = new_src_tn;
03436 }
03437 if (src_tn != dest_tn) {
03438 CGPREP_Copy_TN(dest_tn, src_tn, BB_last_op(fully_unrolled_body), 0,
03439 FALSE);
03440 }
03441 CG_LOOP_Backpatch_Delete(CG_LOOP_epilog, bp);
03442 bp = next;
03443 }
03444
03445
03446
03447
03448 bp = CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL);
03449 while (bp) {
03450 CG_LOOP_BACKPATCH *next = CG_LOOP_Backpatch_Next(bp);
03451 CG_LOOP_Backpatch_Delete(CG_LOOP_prolog, bp);
03452 bp = next;
03453 }
03454 }
03455
03456
03457 static void trace_loop_cflow(LOOP_DESCR *loop, const char *prefix)
03458 {
03459 BB *bb;
03460
03461 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb) {
03462 BBLIST *bbl;
03463 fprintf(TFile, "%sBB:%d preds = { ", prefix, BB_id(bb));
03464 FOR_ALL_BB_PREDS(bb, bbl)
03465 fprintf(TFile, "BB:%d ", BB_id(BBLIST_item(bbl)));
03466 fprintf(TFile, "} succs = { ");
03467 FOR_ALL_BB_SUCCS(bb, bbl)
03468 fprintf(TFile, "BB:%d ", BB_id(BBLIST_item(bbl)));
03469 fprintf(TFile, "}\n");
03470 }
03471 }
03472
03473
03474 static BOOL sort_topologically(LOOP_DESCR *loop, BB **result)
03475
03476
03477
03478
03479
03480
03481 {
03482 BB_SET *bbs = LOOP_DESCR_bbset(loop);
03483 UINT32 num_bbs = BB_SET_Size(bbs);
03484 BB_MAP topo_map = BB_Topological_Map(bbs, LOOP_DESCR_loophead(loop));
03485 BB *bb;
03486
03487 FOR_ALL_BB_SET_members(bbs, bb) {
03488 INT32 i = BB_MAP32_Get(topo_map, bb);
03489 Is_True(i >= 0 && i <= num_bbs, ("bad <topo_map> value"));
03490 if (i == 0) {
03491 BB_MAP_Delete(topo_map);
03492 return FALSE;
03493 }
03494 result[i-1] = bb;
03495 }
03496
03497 if (Get_Trace(TP_CGLOOP, 2)) {
03498 UINT32 bbi;
03499 fprintf(TFile, "<unroll> topological sort of loop BBs:");
03500 for (bbi = 0; bbi < num_bbs; bbi++)
03501 fprintf(TFile, " %d", BB_id(result[bbi]));
03502 fprintf(TFile, "\n");
03503 }
03504
03505 BB_MAP_Delete(topo_map);
03506
03507 return TRUE;
03508 }
03509
03510 static BOOL unroll_multi_make_remainder_loop(LOOP_DESCR *loop, UINT8 ntimes,
03511 BB **topo_vec, UINT32 num_bbs)
03512
03513
03514
03515
03516
03517
03518
03519
03520
03521
03522
03523
03524
03525
03526
03527
03528
03529
03530
03531
03532
03533 {
03534 BB *head = LOOP_DESCR_loophead(loop), *tail = topo_vec[num_bbs-1];
03535 ANNOTATION *annot = ANNOT_Get(BB_annotations(head), ANNOT_LOOPINFO);
03536 LOOPINFO *info = ANNOT_loopinfo(annot);
03537 TN *trip_count = LOOPINFO_trip_count_tn(info), *trip_counter;
03538 TN *new_trip_count = TN_is_constant(trip_count) ?
03539 Gen_Literal_TN(TN_value(trip_count) % ntimes, 4) : Build_TN_Like(trip_count);
03540 OP *op, *backedge_br_op = BB_branch_op(tail);
03541 BOOL freqs = FREQ_Frequencies_Computed();
03542 float ztrip_prob = !freqs || TN_is_constant(trip_count) ? 0.0 : 1.0 / ntimes;
03543 INT64 orig_trip_est = WN_loop_trip_est(LOOPINFO_wn(info));
03544 INT64 trip_est = TN_is_constant(new_trip_count) ?
03545 TN_value(new_trip_count) : MIN((1 + ntimes) / 2, 2);
03546 float orig_prolog_freq = BB_freq(CG_LOOP_prolog);
03547 float orig_post_prolog_freq = BB_freq(BB_next(CG_LOOP_prolog));
03548 float freq_factor = (1.0 - ztrip_prob) * trip_est / MAX(orig_trip_est, 1);
03549 float fts_freq, head_freq;
03550 BB *remainder_epilog = NULL, *fts, *orig_prolog = CG_LOOP_prolog;
03551 INT16 dbnum = 0;
03552 UINT32 bbi;
03553
03554 if (TN_is_constant(new_trip_count) && TN_value(new_trip_count) % ntimes == 0)
03555 return FALSE;
03556
03557 Is_True(CG_LOOP_epilog != NULL,
03558 ("remainder loop generation requires non-NULL CG_LOOP_epilog"));
03559 Is_True(BB_unrollings(head) < 2,
03560 ("unrolled loophead passed to unroll_multi_make_remainder_loop"));
03561 Is_True(topo_vec[0] == head, ("<head> not first in <topo_vec>"));
03562 Is_True(LOOP_DESCR_Find_Unique_Tail(loop),
03563 ("<loop> has no unique tail"));
03564 Is_True(tail == LOOP_DESCR_Find_Unique_Tail(loop),
03565 ("<loop> tail not last in <topo_vec>"));
03566
03567
03568
03569
03570
03571
03572 if (TN_is_constant(trip_count)) {
03573 INT16 new_trip_count_val = TN_value(trip_count) % ntimes;
03574 if (new_trip_count_val < 0)
03575 DevWarn("unroll_multi_make_remainder_loop: trip count is negative");
03576 new_trip_count = Gen_Literal_TN(new_trip_count_val, 4);
03577 } else {
03578
03579
03580
03581
03582
03583
03584
03585 LABEL_IDX continuation_label;
03586 INT32 trip_size = TN_size(trip_count);
03587 OPS ops = OPS_EMPTY;
03588 remainder_epilog = Gen_BB_Like(CG_LOOP_prolog);
03589 if (BB_freq_fb_based(CG_LOOP_prolog))
03590 Set_BB_freq_fb_based(remainder_epilog);
03591 continuation_label = Gen_Label_For_BB(remainder_epilog);
03592 Is_True(is_power_of_two(ntimes), ("unroll amount not power of two"));
03593 new_trip_count = Build_TN_Like(trip_count);
03594 Exp_OP2(trip_size == 4 ? OPC_U4BAND : OPC_U8BAND,
03595 new_trip_count,
03596 trip_count,
03597 Gen_Literal_TN(ntimes-1, trip_size),
03598 &ops);
03599 #ifdef TARG_X8664 //bug 12956: x8664/exp_branch.cxx does not accept Zero_TN
03600 Exp_OP3v(OPC_FALSEBR,
03601 NULL,
03602 Gen_Label_TN(continuation_label,0),
03603 new_trip_count,
03604 Gen_Literal_TN(0,4),
03605 trip_size == 4 ? V_BR_I4EQ : V_BR_I8EQ,
03606 &ops);
03607 #else
03608 Exp_OP3v(OPC_FALSEBR,
03609 NULL,
03610 Gen_Label_TN(continuation_label,0),
03611 new_trip_count,
03612 Zero_TN,
03613 V_BR_I8EQ,
03614 &ops);
03615 #endif
03616 BB_Append_Ops(CG_LOOP_prolog, &ops);
03617 Link_Pred_Succ_with_Prob(CG_LOOP_prolog, remainder_epilog, ztrip_prob);
03618 if (freqs)
03619 Change_Succ_Prob(CG_LOOP_prolog, BB_next(CG_LOOP_prolog),
03620 1.0 - ztrip_prob);
03621 }
03622
03623
03624
03625
03626
03627
03628 #ifdef TARG_IA64
03629 Remove_Explicit_Branch(CG_LOOP_prolog);
03630 #endif
03631 fts = BB_Fall_Thru_Successor(CG_LOOP_prolog);
03632 Is_True(fts, ("CG_LOOP_prolog has no fall-thru successor"));
03633 fts_freq = BB_freq(fts);
03634 head_freq = BB_freq(head);
03635 Change_Succ(CG_LOOP_prolog, fts, head);
03636 BB_freq(fts) = fts_freq;
03637 BB_freq(head) = head_freq;
03638 #ifdef TARG_IA64
03639 Remove_Explicit_Branch(tail);
03640 #endif
03641 fts = BB_Fall_Thru_Successor(tail);
03642 FmtAssert(fts,
03643
03644 ("trip-countable loop (line %d) tail has no fall-thru successor",
03645 BB_Loop_Lineno(head)));
03646 Unlink_Pred_Succ(tail, fts);
03647 for (bbi = 0; bbi < num_bbs; bbi++) {
03648 BB *loop_bb = topo_vec[bbi];
03649 BB *post_prolog = BB_next(CG_LOOP_prolog);
03650 BB *prev = BB_prev(loop_bb);
03651 BB *next = BB_next(loop_bb);
03652 RID *rid = BB_rid(loop_bb);
03653 #ifdef TARG_IA64
03654 Remove_Explicit_Branch(loop_bb);
03655 #endif
03656 BB *fall_thru = BB_Fall_Thru_Successor(loop_bb);
03657 Is_True(BB_SET_MemberP(LOOP_DESCR_bbset(loop), loop_bb),
03658 ("topo_vec[%d] = BB:%d not in LOOP_DESCR_bbset",
03659 bbi, BB_id(loop_bb)));
03660 if (prev && BB_next(prev) == loop_bb) BB_next(prev) = next;
03661 if (next && BB_prev(next) == loop_bb) BB_prev(next) = prev;
03662 BB_prev(loop_bb) = BB_next(loop_bb) = NULL;
03663 if (REGION_First_BB == loop_bb) REGION_First_BB = next;
03664 if (rid && RID_cginfo(rid)) {
03665
03666 CGRIN *cgrin = RID_cginfo(rid);
03667 if (CGRIN_first_bb(cgrin) == loop_bb) CGRIN_first_bb(cgrin) = next;
03668 if (CGRIN_last_bb(cgrin) == loop_bb) CGRIN_last_bb(cgrin) = prev;
03669 }
03670 Chain_BBs(CG_LOOP_prolog, loop_bb);
03671 Chain_BBs(loop_bb, post_prolog);
03672 BB_freq(loop_bb) *= freq_factor;
03673 CG_LOOP_prolog = loop_bb;
03674 if (bbi != num_bbs -1 && fall_thru && fall_thru != topo_vec[bbi+1]) {
03675
03676 BB *new_bb = Gen_And_Insert_BB_After(loop_bb);
03677 BB_rid(new_bb) = BB_rid(loop_bb);
03678 if (BB_freq_fb_based(loop_bb)) Set_BB_freq_fb_based(new_bb);
03679 Add_Goto(new_bb, fall_thru);
03680 Change_Succ(loop_bb, fall_thru, new_bb);
03681 if (freqs || BB_freq_fb_based(fall_thru))
03682 BB_freq(fall_thru) += BB_freq(loop_bb);
03683 if (BB_SET_MemberP(LOOP_DESCR_bbset(loop), fall_thru))
03684 Set_BB_loop_head_bb(new_bb, head);
03685 CG_LOOP_prolog = new_bb;
03686 }
03687 }
03688
03689
03690
03691
03692
03693 if (backedge_br_op) {
03694 if (Is_DB_OP_Init(backedge_br_op))
03695 dbnum = OP_dbnum(backedge_br_op);
03696 BB_Remove_Op(tail, backedge_br_op);
03697 Unlink_Pred_Succ(tail, head);
03698 }
03699
03700 if (ntimes == 2 ||
03701 TN_is_constant(new_trip_count) && TN_value(new_trip_count) < 2) {
03702
03703
03704
03705 BB_annotations(head) = ANNOT_Unlink(BB_annotations(head), annot);
03706 for (bbi = 0; bbi < num_bbs; bbi++)
03707 Set_BB_loop_head_bb(topo_vec[bbi], NULL);
03708 Link_Pred_Succ_with_Prob(tail, BB_next(tail), 1.0);
03709
03710 } else {
03711
03712
03713
03714 OPS ops = OPS_EMPTY;
03715 INT32 trip_size = TN_size(new_trip_count);
03716 WN *wn = LOOPINFO_wn(info);
03717 TYPE_ID ttype = WN_rtype(WN_loop_trip(wn));
03718 OPCODE opc_intconst = OPCODE_make_op(OPR_INTCONST, ttype, MTYPE_V);
03719 WN *ntimes_wn = WN_CreateIntconst(opc_intconst, ntimes);
03720 OPCODE opc_rem = OPCODE_make_op(OPR_REM, ttype, MTYPE_V);
03721 float backedge_prob = (trip_est - 1.0) / trip_est;
03722 WN_set_loop_trip(wn,
03723 WN_CreateExp2(opc_rem, WN_loop_trip(wn), ntimes_wn));
03724 WN_loop_trip_est(wn) = trip_est;
03725 WN_Set_Loop_Unimportant_Misc(wn);
03726 LOOPINFO_trip_count_tn(info) = new_trip_count;
03727
03728
03729
03730
03731
03732
03733
03734
03735
03736
03737 if (TN_is_constant(new_trip_count)) {
03738 OPS copy_ops = OPS_EMPTY;
03739 trip_counter = Gen_Register_TN (ISA_REGISTER_CLASS_integer, trip_size);
03740 Exp_COPY(trip_counter, new_trip_count, ©_ops);
03741 BB_Append_Ops(orig_prolog, ©_ops);
03742 } else {
03743 trip_counter = new_trip_count;
03744 }
03745 Exp_OP2(trip_size == 4 ? OPC_I4ADD : OPC_I8ADD,
03746 trip_counter,
03747 trip_counter,
03748 Gen_Literal_TN(-1, trip_size),
03749 &ops);
03750 #ifdef TARG_X8664 //bug 12956: x8664/exp_branch.cxx does not accept Zero_TN
03751 Exp_OP3v(OPC_TRUEBR,
03752 NULL,
03753 Gen_Label_TN(Gen_Label_For_BB(head), 0),
03754 trip_counter,
03755 Gen_Literal_TN(0,4),
03756 trip_size == 4 ? V_BR_I4NE : V_BR_I8NE,
03757 &ops);
03758 #else
03759 Exp_OP3v(OPC_TRUEBR,
03760 NULL,
03761 Gen_Label_TN(Gen_Label_For_BB(head), 0),
03762 trip_counter,
03763 Zero_TN,
03764 V_BR_I8NE,
03765 &ops);
03766 #endif
03767 BB_Append_Ops(tail, &ops);
03768 if (dbnum > 0) {
03769 FOR_ALL_OPS_OPs(&ops, op) DB_Initialize_OP(op, dbnum);
03770 }
03771 Link_Pred_Succ_with_Prob(tail, head, backedge_prob);
03772 Link_Pred_Succ_with_Prob(tail, BB_next(tail), 1.0 - backedge_prob);
03773 }
03774
03775
03776
03777
03778
03779 if (remainder_epilog) {
03780 append_to_prolog(remainder_epilog);
03781 BB_freq(remainder_epilog) = orig_prolog_freq;
03782 }
03783 BB_freq(BB_next(CG_LOOP_prolog)) = orig_post_prolog_freq;
03784
03785
03786
03787 note_remainder_head(head, ntimes, TN_is_constant(new_trip_count) ?
03788 TN_value(new_trip_count) : 0);
03789
03790 return TRUE;
03791 }
03792
03793
03794 static BOOL unroll_multi_bb(LOOP_DESCR *loop, UINT8 ntimes)
03795
03796
03797
03798
03799
03800
03801 {
03802 BB *head = LOOP_DESCR_loophead(loop);
03803 UINT32 num_bbs = BB_SET_Size(LOOP_DESCR_bbset(loop));
03804 BB *replicas;
03805 BB_SET *new_bbs;
03806 BB **orig_bbs, **orig_br_targ_bbs, **orig_fall_thru_bbs, *bb, *replica;
03807 float *orig_br_probs;
03808 float orig_head_freq = BB_freq(head);
03809 BB_MAP orig_bb_index_map;
03810 UINT32 bbi, i, unrolling;
03811 BOOL unrolling_fully = FALSE;
03812 ANNOTATION *annot = ANNOT_Get(BB_annotations(head), ANNOT_LOOPINFO);
03813 LOOPINFO *info = annot ? ANNOT_loopinfo(annot) : NULL;
03814 TN *trip_count_tn = info ? LOOPINFO_trip_count_tn(info) : NULL;
03815 LOOPINFO *unrolled_info = NULL;
03816 BOOL freqs = FREQ_Frequencies_Computed();
03817 BOOL gen_remainder_loop = trip_count_tn && is_power_of_two(ntimes) &&
03818 CG_LOOP_unroll_multi_make_remainder_loop;
03819 UINT32 removed_exits = 0;
03820
03821 MEM_POOL_Push(&MEM_local_nz_pool);
03822 orig_bbs = TYPE_MEM_POOL_ALLOC_N(BB *, &MEM_local_nz_pool, num_bbs);
03823 if (!sort_topologically(loop, orig_bbs)) {
03824 const char *reason = "loop has irreducible flow graph";
03825 MEM_POOL_Pop(&MEM_local_nz_pool);
03826 if (Get_Trace(TP_CGLOOP, 2))
03827 fprintf(TFile, "<unroll> aborting; %s\n", reason);
03828 note_not_unrolled(head, reason);
03829 return FALSE;
03830 }
03831
03832 replicas = Gen_BB_N(num_bbs * ntimes);
03833 if (info) {
03834 WN *wn = WN_COPY_Tree(LOOPINFO_wn(info));
03835 unrolled_info = TYPE_P_ALLOC(LOOPINFO);
03836 LOOPINFO_wn(unrolled_info) = wn;
03837 WN_loop_trip_est(wn) /= ntimes;
03838 LOOPINFO_srcpos(unrolled_info) = LOOPINFO_srcpos(info);
03839 if (trip_count_tn) {
03840 INT16 new_trip_count_val;
03841 TYPE_ID ttype = WN_rtype(WN_loop_trip(wn));
03842 OPCODE opc_intconst = OPCODE_make_op(OPR_INTCONST, ttype, MTYPE_V);
03843 WN *ntimes_wn = WN_CreateIntconst(opc_intconst, ntimes);
03844 OPCODE opc_div = OPCODE_make_op(OPR_DIV, ttype, MTYPE_V);
03845 if (TN_is_constant(trip_count_tn))
03846 new_trip_count_val = TN_value(trip_count_tn) / ntimes;
03847 unrolling_fully = CG_LOOP_unroll_fully &&
03848 TN_is_constant(trip_count_tn) && TN_value(trip_count_tn) == ntimes;
03849 WN_set_loop_trip(wn, WN_CreateExp2(opc_div, WN_loop_trip(wn),
03850 ntimes_wn));
03851 if (TN_is_constant(trip_count_tn))
03852 LOOPINFO_trip_count_tn(unrolled_info) =
03853 Gen_Literal_TN(new_trip_count_val, TN_size(trip_count_tn));
03854 }
03855 }
03856
03857
03858 if (unrolled_info)
03859 BB_Add_Annotation(&replicas[0], ANNOT_LOOPINFO, unrolled_info);
03860 Set_BB_unrollings(&replicas[0], ntimes);
03861
03862
03863
03864
03865
03866
03867
03868
03869
03870
03871 orig_bb_index_map = BB_MAP32_Create();
03872 orig_br_targ_bbs = TYPE_MEM_POOL_ALLOC_N(BB *, &MEM_local_nz_pool, num_bbs);
03873 orig_br_probs = TYPE_MEM_POOL_ALLOC_N(float, &MEM_local_nz_pool, num_bbs);
03874 orig_fall_thru_bbs = TYPE_MEM_POOL_ALLOC_N(BB *, &MEM_local_nz_pool,num_bbs);
03875 for (bbi = 0; bbi < num_bbs; bbi++) {
03876 BB *orig_bb = orig_bbs[bbi];
03877 BBLIST *fts_list = BBlist_Fall_Thru_Succ(orig_bb);
03878 BB *fall_thru = fts_list ? BBLIST_item(fts_list) : NULL;
03879 float fall_thru_prob = fts_list ? BBLIST_prob(fts_list) : 0.0;
03880 OP *br_op = BB_branch_op(orig_bb);
03881 BB *br_targ = NULL;
03882 float br_prob = 0.0;
03883 if (br_op) {
03884 INT br_targ_opnd = Branch_Target_Operand(br_op);
03885 if (TN_is_label(OP_opnd(br_op, br_targ_opnd))) {
03886 BBLIST *tlist = (fts_list == BB_succs(orig_bb)) ?
03887 BBLIST_next(BB_succs(orig_bb)) : BB_succs(orig_bb);
03888 LABEL_IDX label = TN_label(OP_opnd(br_op, br_targ_opnd));
03889 Is_True(tlist, ("BB_succs(BB:%d) missing succ labelled %s",
03890 BB_id(orig_bb), LABEL_name(label)));
03891 br_targ = BBLIST_item(tlist);
03892 Is_True(Is_Label_For_BB(label, br_targ),
03893 ("BB_succs(BB:%d) has succ BB:%d, expected one labelled %s",
03894 BB_id(orig_bb), BB_id(br_targ), ST_name(label)));
03895 br_prob = freqs ? BBLIST_prob(tlist) : (fall_thru ? 0.5 : 1.0);
03896 }
03897 }
03898 if (CG_warn_bad_freqs && freqs && br_targ &&
03899 !FREQ_Match(1.0, br_prob + fall_thru_prob))
03900 DevWarn("for BB:%d, fall_thru_prob + br_prob != 1.0", BB_id(orig_bb));
03901 BB_MAP32_Set(orig_bb_index_map, orig_bb, bbi+1);
03902 orig_br_targ_bbs[bbi] = br_targ;
03903 orig_br_probs[bbi] = br_prob;
03904 orig_fall_thru_bbs[bbi] = fall_thru;
03905 }
03906
03907
03908
03909
03910
03911 new_bbs = BB_SET_Create_Empty(PU_BB_Count, &MEM_local_nz_pool);
03912 Chain_BBs(BB_prev(head), &replicas[0]);
03913 new_bbs = BB_SET_Union1D(new_bbs, &replicas[0], &MEM_local_nz_pool);
03914 for (i = 1; i < num_bbs * ntimes; i++) {
03915 new_bbs = BB_SET_Union1D(new_bbs, &replicas[i], &MEM_local_nz_pool);
03916 Chain_BBs(&replicas[i-1], &replicas[i]);
03917 }
03918 Chain_BBs(&replicas[i-1], head);
03919
03920
03921 LOOP_DESCR_Retarget_Loop_Entrances(loop, &replicas[0]);
03922
03923 if (freqs || BB_freq_fb_based(head)) {
03924
03925
03926
03927
03928
03929
03930
03931
03932
03933
03934 if (info) {
03935 INT64 orig_trip_est = WN_loop_trip_est(LOOPINFO_wn(info));
03936 if (CG_warn_bad_freqs &&
03937 !FREQ_Match(orig_head_freq,
03938 orig_trip_est * BB_freq(CG_LOOP_prolog)))
03939 DevWarn("BB_freq(orig head BB:%d) != BB_freq(prolog BB:%d) * trip_est",
03940 BB_id(head), BB_id(CG_LOOP_prolog));
03941 }
03942 BB_freq(&replicas[0]) = gen_remainder_loop ?
03943 BB_freq(CG_LOOP_prolog) * WN_loop_trip_est(LOOPINFO_wn(unrolled_info)) :
03944 orig_head_freq / ntimes;
03945 }
03946
03947
03948
03949 replica = &replicas[0];
03950 for (unrolling = 0; unrolling < ntimes; unrolling++) {
03951 for (bbi = 0; bbi < num_bbs; bbi++, replica++) {
03952 BB *orig_bb = orig_bbs[bbi];
03953 BB *br_targ = orig_br_targ_bbs[bbi];
03954 float br_prob = orig_br_probs[bbi];
03955 BB *fall_thru_dest = orig_fall_thru_bbs[bbi];
03956 OP *op, *replica_br_op = NULL;
03957 BOOL fall_thru_dest_in_loop;
03958
03959
03960 Set_BB_loop_head_bb(replica, &replicas[0]);
03961 Set_BB_unrollings(replica, ntimes);
03962 if (unrolling_fully) Set_BB_unrolled_fully(replica);
03963 BB_rid(replica) = BB_rid(head);
03964 if (BB_freq_fb_based(orig_bb)) Set_BB_freq_fb_based(replica);
03965
03966
03967
03968 FOR_ALL_BB_OPs(orig_bb, op) {
03969 OP *rop;
03970 UINT8 opi;
03971 UINT8 resi;
03972 rop = Dup_OP(op);
03973 Set_OP_unrolling(rop, unrolling);
03974 Set_OP_orig_idx(rop, OP_map_idx(op));
03975 Set_OP_unroll_bb(rop, replica);
03976 Copy_WN_For_Memory_OP(rop, op);
03977 for (resi = 0; resi < OP_results(rop); resi++) {
03978 TN *res = OP_result(rop,resi);
03979 if (!TN_is_global_reg(res)) {
03980 TN *new_res = unroll_names_get(res, unrolling);
03981 Set_OP_result(rop, resi, new_res);
03982 }
03983 }
03984 for (opi = 0; opi < OP_opnds(rop); opi++) {
03985 TN *opnd = OP_opnd(rop, opi);
03986 if (TN_is_register(opnd)) {
03987 if (!TN_is_global_reg(opnd)) {
03988 Set_OP_opnd(rop, opi, unroll_names_get(opnd, unrolling));
03989 }
03990 }
03991 }
03992 BB_Append_Op(replica, rop);
03993 if (OP_br(rop)) replica_br_op = rop;
03994 }
03995 Is_True(br_targ == NULL || replica_br_op, ("no replica branch op"));
03996 unroll_xfer_annotations(replica, orig_bb);
03997
03998
03999 if (fall_thru_dest) {
04000 INT32 ftd_bbi = BB_MAP32_Get(orig_bb_index_map, fall_thru_dest) - 1;
04001 fall_thru_dest_in_loop = ftd_bbi >= 0;
04002 if (fall_thru_dest_in_loop) {
04003
04004 UINT32 iter = ftd_bbi > 0 ? unrolling : (unrolling + 1) % ntimes;
04005 fall_thru_dest = &replicas[iter * num_bbs + ftd_bbi];
04006 }
04007 }
04008
04009
04010 if (replica_br_op) {
04011 INT replica_targ_opnd = Branch_Target_Operand(replica_br_op);
04012 if (br_targ == NULL) {
04013 BBLIST *succs;
04014 Is_True(!TN_is_label(OP_opnd(replica_br_op, replica_targ_opnd)),
04015 ("expected indirect branch"));
04016 FOR_ALL_BB_SUCCS(orig_bb, succs) {
04017 BB *succ = BBLIST_item(succs);
04018 Is_True(!BB_SET_MemberP(LOOP_DESCR_bbset(loop), succ),
04019
04020 ("not yet retargeting intra-loop indirect branches"));
04021 Link_Pred_Succ_with_Prob(replica, succ, BBLIST_prob(succs));
04022 }
04023 } else {
04024 INT32 bt_bbi = BB_MAP32_Get(orig_bb_index_map, br_targ) - 1;
04025 BOOL br_targ_in_loop = bt_bbi >= 0;
04026 if (br_targ_in_loop) {
04027
04028 UINT32 iter = bt_bbi > 0 ? unrolling : (unrolling + 1) % ntimes;
04029 br_targ = &replicas[iter * num_bbs + bt_bbi];
04030 }
04031 if (unrolling_fully && br_targ == &replicas[0]) {
04032
04033 BB_Remove_Op(replica, replica_br_op);
04034 br_prob = 0.0;
04035 } else if (br_targ == BB_next(replica)) {
04036
04037
04038
04039
04040
04041
04042 if (gen_remainder_loop && bt_bbi == 0) {
04043
04044 BB_Remove_Op(replica, replica_br_op);
04045 br_prob = 0.0;
04046 } else if (fall_thru_dest && fall_thru_dest != br_targ) {
04047 if (Negate_Branch(replica_br_op)) {
04048 Set_OP_opnd(replica_br_op, replica_targ_opnd,
04049 Gen_Label_TN(Gen_Label_For_BB(fall_thru_dest), 0));
04050 br_prob = 1.0 - br_prob;
04051 Link_Pred_Succ_with_Prob(replica, fall_thru_dest, br_prob);
04052 if (fall_thru_dest_in_loop && fall_thru_dest != &replicas[0] &&
04053 (freqs || BB_freq_fb_based(fall_thru_dest)))
04054 BB_freq(fall_thru_dest) += br_prob * BB_freq(replica);
04055 } else {
04056 #pragma mips_frequency_hint NEVER
04057 DevWarn("unable to negate branch %s", TOP_Name(OP_code(replica_br_op)));
04058 }
04059 } else {
04060 BB_Remove_Op(replica, replica_br_op);
04061 br_prob = 0.0;
04062 }
04063 fall_thru_dest = br_targ;
04064 fall_thru_dest_in_loop = br_targ_in_loop;
04065 } else if (br_targ_in_loop) {
04066
04067
04068
04069 Set_OP_opnd(replica_br_op, replica_targ_opnd,
04070 Gen_Label_TN(Gen_Label_For_BB(br_targ), 0));
04071 Link_Pred_Succ_with_Prob(replica, br_targ, br_prob);
04072 if (br_targ != &replicas[0] &&
04073 (freqs || BB_freq_fb_based(br_targ)))
04074 BB_freq(br_targ) += br_prob * BB_freq(replica);
04075 } else if (br_targ) {
04076
04077
04078
04079 Link_Pred_Succ_with_Prob(replica, br_targ, br_prob);
04080 }
04081 }
04082 }
04083
04084
04085
04086
04087 if (fall_thru_dest) {
04088 float ft_prob = 1.0 - br_prob;
04089 if (BB_next(replica) != fall_thru_dest) {
04090 BB *new_bb = Gen_And_Insert_BB_After(replica);
04091 BB_rid(new_bb) = BB_rid(head);
04092 if (BB_freq_fb_based(replica)) Set_BB_freq_fb_based(new_bb);
04093 if (freqs || BB_freq_fb_based(new_bb))
04094 BB_freq(new_bb) = ft_prob * BB_freq(replica);
04095 Add_Goto(new_bb, fall_thru_dest);
04096 if (fall_thru_dest_in_loop) {
04097 new_bbs = BB_SET_Union1D(new_bbs, new_bb, &MEM_local_nz_pool);
04098 Set_BB_loop_head_bb(new_bb, &replicas[0]);
04099 Set_BB_unrollings(new_bb, ntimes);
04100 }
04101 }
04102 Link_Pred_Succ_with_Prob(replica, BB_next(replica), ft_prob);
04103 if (fall_thru_dest_in_loop && fall_thru_dest != &replicas[0] &&
04104 (freqs || BB_freq_fb_based(fall_thru_dest)))
04105 BB_freq(fall_thru_dest) += ft_prob * BB_freq(replica);
04106 }
04107 }
04108 }
04109
04110
04111
04112
04113 BB_freq(head) = orig_head_freq;
04114
04115
04116
04117
04118
04119
04120
04121 if (!gen_remainder_loop ||
04122 !unroll_multi_make_remainder_loop(loop, ntimes, orig_bbs, num_bbs)) {
04123
04124
04125
04126
04127 BB *post_prolog = BB_next(CG_LOOP_prolog);
04128 gen_remainder_loop = FALSE;
04129 for (bbi = 0; bbi < num_bbs; bbi++) {
04130 BB *loop_bb = orig_bbs[bbi];
04131 BB *prev = BB_prev(loop_bb);
04132 BB *next = BB_next(loop_bb);
04133 RID *rid = BB_rid(loop_bb);
04134 while (BB_succs(loop_bb))
04135 Unlink_Pred_Succ(loop_bb, BBLIST_item(BB_succs(loop_bb)));
04136 while (BB_preds(loop_bb))
04137 Unlink_Pred_Succ(BBLIST_item(BB_preds(loop_bb)), loop_bb);
04138 if (prev && BB_next(prev) == loop_bb) BB_next(prev) = next;
04139 if (next && BB_prev(next) == loop_bb) BB_prev(next) = prev;
04140 BB_prev(loop_bb) = BB_next(loop_bb) = NULL;
04141 if (REGION_First_BB == loop_bb) REGION_First_BB = next;
04142 if (rid && RID_cginfo(rid)) {
04143
04144 CGRIN *cgrin = RID_cginfo(rid);
04145 if (CGRIN_first_bb(cgrin) == loop_bb) CGRIN_first_bb(cgrin) = next;
04146 if (CGRIN_last_bb(cgrin) == loop_bb) CGRIN_last_bb(cgrin) = prev;
04147 }
04148 }
04149 BB_next(CG_LOOP_prolog) = post_prolog;
04150 }
04151
04152
04153 LOOP_DESCR_loophead(loop) = &replicas[0];
04154 BB_SET* del = BS_Difference (LOOP_DESCR_bbset(loop), new_bbs,
04155 &MEM_local_nz_pool);
04156 FOR_ALL_BB_SET_members(new_bbs, bb)
04157 LOOP_DESCR_Add_BB(loop, bb);
04158 FOR_ALL_BB_SET_members(del, bb)
04159 LOOP_DESCR_Delete_BB(loop, bb);
04160
04161 LOOP_DESCR_loopinfo(loop) = unrolled_info;
04162 LOOP_DESCR_num_exits(loop) =
04163 LOOP_DESCR_num_exits(loop) * ntimes - removed_exits;
04164
04165 if (gen_remainder_loop && !TN_is_constant(trip_count_tn)) {
04166
04167
04168
04169
04170
04171 unroll_guard_unrolled_body(loop, unrolled_info, trip_count_tn, ntimes);
04172 }
04173
04174 BB_MAP_Delete(orig_bb_index_map);
04175 MEM_POOL_Pop(&MEM_local_nz_pool);
04176 return TRUE;
04177 }
04178
04179
04180 void CG_LOOP_Finish(void)
04181
04182
04183
04184
04185 {
04186
04187
04188
04189
04190
04191
04192
04193
04194 }
04195
04196 BB*
04197 CG_LOOP_Gen_And_Prepend_To_Prolog(BB *loop_head, LOOP_DESCR* loop)
04198
04199
04200
04201
04202 {
04203 BB *ftp = BB_Fall_Thru_Predecessor(loop_head);
04204 LOOP_DESCR *enclosing = LOOP_DESCR_Next_Enclosing_Loop(loop);
04205 BB *new_prolog = Gen_BB_Like(loop_head);
04206 BOOL freqs = FREQ_Frequencies_Computed();
04207
04208 if (!CG_localize_tns) Set_BB_gra_spill(new_prolog);
04209 if (BB_freq_fb_based(loop_head)) Set_BB_freq_fb_based(new_prolog);
04210 LOOP_DESCR_Retarget_Loop_Entrances(loop, new_prolog);
04211
04212 if (ftp && BB_SET_MemberP(LOOP_DESCR_bbset(loop), ftp)) {
04213
04214
04215
04216
04217
04218
04219
04220
04221
04222 BB *last_bb = REGION_First_BB;
04223 while (BB_next(last_bb)) last_bb = BB_next(last_bb);
04224 Insert_BB(new_prolog, last_bb);
04225 Add_Goto(new_prolog, loop_head);
04226 } else {
04227 Insert_BB(new_prolog, BB_prev(loop_head));
04228 Link_Pred_Succ_with_Prob(new_prolog, loop_head, 1.0);
04229 }
04230
04231 if (freqs || BB_freq_fb_based(new_prolog))
04232 BB_freq(loop_head) += BB_freq(new_prolog);
04233
04234
04235
04236
04237
04238
04239 if (enclosing &&
04240 all_bbs_in(BB_preds(new_prolog), LOOP_DESCR_bbset(enclosing))) {
04241 LOOP_DESCR_Add_BB(enclosing, new_prolog);
04242 BB_nest_level(new_prolog) = LOOP_DESCR_nestlevel(enclosing);
04243 }
04244
04245 return new_prolog;
04246 }
04247
04248 BB*
04249 CG_LOOP_Append_BB_To_Prolog(BB *loop_prolog, BB *loop_head)
04250
04251
04252
04253
04254 {
04255 OP *br_op = BB_branch_op(loop_prolog);
04256 BB *new_bb = Gen_And_Insert_BB_After(loop_prolog);
04257 BOOL freqs = FREQ_Frequencies_Computed();
04258
04259 LOOP_DESCR *prolog_loop = LOOP_DESCR_Find_Loop(loop_prolog);
04260 if (BB_freq_fb_based(loop_prolog)) Set_BB_freq_fb_based(new_bb);
04261 BB_rid(new_bb) = BB_rid(loop_prolog);
04262 Is_True(!TN_is_label(OP_opnd(br_op, Branch_Target_Operand(br_op))) ||
04263 Is_Label_For_BB(TN_label(OP_opnd(br_op, Branch_Target_Operand(br_op))), loop_head),
04264 ("explicit branch target should be <loop_head>"));
04265
04266 BB_Remove_Op(loop_prolog, br_op);
04267 Change_Succ(loop_prolog, loop_head, new_bb);
04268 Add_Goto(new_bb, loop_head);
04269
04270 if (freqs || BB_freq_fb_based(loop_prolog))
04271 BB_freq(loop_head) += BB_freq(new_bb);
04272
04273 if (prolog_loop) LOOP_DESCR_Add_BB(prolog_loop, new_bb);
04274
04275 return new_bb;
04276 }
04277
04278
04279
04280
04281
04282
04283
04284
04285
04286
04287 void
04288 CG_LOOP_Coalesce_Backedges(LOOP_DESCR* loop)
04289 {
04290 BB* bb_back;
04291 BB* head = LOOP_DESCR_loophead(loop);
04292
04293
04294
04295
04296
04297
04298 bb_back = Gen_And_Insert_BB_Before(head);
04299 BB_freq(bb_back) = 0.0;
04300 LOOP_DESCR_Add_BB(loop, bb_back);
04301
04302
04303
04304
04305 BBLIST* preds = BB_preds(head);
04306 while (preds) {
04307 BB* pred = BBLIST_item(preds);
04308 preds = BBLIST_next(preds);
04309 if (BB_SET_MemberP(LOOP_DESCR_bbset(loop), pred)) {
04310 if (!BB_Retarget_Branch(pred, head, bb_back)) {
04311
04312 Change_Succ(pred, head, bb_back);
04313 }
04314 }
04315 }
04316
04317
04318
04319
04320
04321 Add_Goto(bb_back, head);
04322 GRA_LIVE_Compute_Liveness_For_BB(bb_back);
04323 }
04324
04325
04326 void trace_loop(LOOP_DESCR *loop)
04327 {
04328 CG_LOOP_Trace_Loop(loop, "");
04329 }
04330
04331
04332 #ifndef KEY
04333 static
04334 #endif
04335 void Unroll_Do_Loop_guard(LOOP_DESCR *loop,
04336 LOOPINFO *unrolled_info,
04337 TN *unrolled_trip_count)
04338 {
04339 INT64 trip_est = WN_loop_trip_est(LOOPINFO_wn(unrolled_info));
04340 float ztrip_prob = 1.0 / MAX(trip_est, 1);
04341 float orig_post_prolog_freq = BB_freq(BB_next(CG_LOOP_prolog));
04342 OPS ops = OPS_EMPTY;
04343 BB *continuation_bb;
04344 LABEL_IDX continuation_lbl;
04345
04346 LOOPINFO_trip_count_tn(unrolled_info) = unrolled_trip_count;
04347
04348 extend_epilog(loop);
04349 continuation_bb = CG_LOOP_epilog;
04350 continuation_lbl = Gen_Label_For_BB(continuation_bb);
04351
04352 #ifdef TARG_X8664
04353 Exp_OP3v(OPC_FALSEBR,
04354 NULL,
04355 Gen_Label_TN(continuation_lbl,0),
04356 unrolled_trip_count,
04357 Gen_Literal_TN(0,4),
04358 TN_size(unrolled_trip_count) == 4 ? V_BR_I4EQ : V_BR_I8EQ,
04359 &ops);
04360 #else
04361 Exp_OP3v(OPC_FALSEBR,
04362 NULL,
04363 Gen_Label_TN(continuation_lbl,0),
04364 unrolled_trip_count,
04365 Zero_TN,
04366 V_BR_I8EQ,
04367 &ops);
04368 #endif
04369 BB_Append_Ops(CG_LOOP_prolog, &ops);
04370 Link_Pred_Succ_with_Prob(CG_LOOP_prolog, continuation_bb, ztrip_prob);
04371 Change_Succ_Prob(CG_LOOP_prolog, BB_next(CG_LOOP_prolog), 1.0 - ztrip_prob);
04372 BB_freq(BB_next(CG_LOOP_prolog)) = orig_post_prolog_freq;
04373
04374
04375
04376
04377 extend_prolog();
04378 extend_epilog(loop);
04379 }
04380
04381
04382
04383
04384 void Unroll_Do_Loop(CG_LOOP& cl, UINT32 ntimes)
04385 {
04386 LOOP_DESCR *loop = cl.Loop();
04387 if (Get_Trace(TP_CGLOOP, 2))
04388 CG_LOOP_Trace_Loop(loop,
04389 "Unroll_Do_Loop: Before unrolling BB:%d %d times:",
04390 BB_id(LOOP_DESCR_loophead(loop)), ntimes);
04391
04392 BB *head = LOOP_DESCR_loophead(loop);
04393 TN *trip_count_tn = CG_LOOP_Trip_Count(loop);
04394 BB *unrolled_body;
04395 ANNOTATION *annot;
04396 LOOPINFO *unrolled_info;
04397 TN *unrolled_trip_count = NULL;
04398
04399 OPS ops = OPS_EMPTY;
04400 BOOL gen_remainder_loop = TRUE;
04401 BOOL gen_unrolled_loop_guard = TRUE;
04402 if (TN_is_constant(trip_count_tn)) {
04403
04404
04405
04406
04407 if (TN_value(trip_count_tn) < 2 * ntimes) {
04408 if (Get_Trace(TP_CGLOOP, 2))
04409 CG_LOOP_Trace_Loop(loop, "disable unrolling because"
04410 " trip_count(%d) < 2 * ntimes(%d).",
04411 TN_value(trip_count_tn), ntimes);
04412 note_not_unrolled(head, "trip_count(%d) < 2 * unroll_factor(%d)",
04413 TN_value(trip_count_tn), ntimes);
04414 return;
04415 }
04416 gen_unrolled_loop_guard = FALSE;
04417 if (TN_value(trip_count_tn) % ntimes == 0)
04418 gen_remainder_loop = FALSE;
04419 } else {
04420 INT32 trip_size = TN_size(trip_count_tn);
04421 unrolled_trip_count = Build_TN_Like(trip_count_tn);
04422 if (is_power_of_two(ntimes))
04423 Exp_OP2(trip_size == 4 ? OPC_I4ASHR : OPC_I8ASHR,
04424 unrolled_trip_count,
04425 trip_count_tn,
04426 Gen_Literal_TN(log2(ntimes), trip_size),
04427 &ops);
04428 else
04429 Exp_OP2(trip_size == 4 ? OPC_U4DIV : OPC_U8DIV,
04430 unrolled_trip_count,
04431 trip_count_tn,
04432 Gen_Literal_TN(ntimes, trip_size),
04433 &ops);
04434 }
04435
04436 #ifdef TARG_IA64 // only IA64 has a counted loop (cloop) instruction
04437
04438
04439 {
04440 OPS body_ops = OPS_EMPTY;
04441 OP *br_op = BB_branch_op(head);
04442 TN *label_tn = OP_opnd(br_op, Branch_Target_Operand(br_op));
04443
04444 CGTARG_Generate_Branch_Cloop(br_op, unrolled_trip_count, trip_count_tn,
04445 ntimes, label_tn, &ops, &body_ops);
04446 if (OPS_length(&body_ops) > 0) {
04447 BB_Remove_Op(head, br_op);
04448 BB_Append_Ops(head, &body_ops);
04449 CGPREP_Init_Op(BB_branch_op(head));
04450 CG_LOOP_Init_Op(BB_branch_op(head));
04451 }
04452 }
04453 #endif
04454
04455
04456 unroll_names_init(loop, ntimes, &MEM_phase_nz_pool);
04457
04458
04459
04460 unrolled_body = Unroll_Replicate_Body(loop, ntimes, FALSE);
04461 annot = ANNOT_Get(BB_annotations(unrolled_body), ANNOT_LOOPINFO);
04462 FmtAssert(annot, ("unrolled body has no LOOPINFO annotation"));
04463 unrolled_info = ANNOT_loopinfo(annot);
04464
04465
04466
04467
04468
04469
04470 if (gen_remainder_loop) {
04471
04472 Unroll_Make_Remainder_Loop(cl, ntimes);
04473 }
04474
04475
04476 LOOP_DESCR_loophead(loop) = unrolled_body;
04477 #ifdef TARG_IA64
04478 LOOP_DESCR_Add_BB(loop, unrolled_body);
04479 LOOP_DESCR_Delete_BB(loop, head);
04480 #else
04481 LOOP_DESCR_Delete_BB(loop, head);
04482 LOOP_DESCR_Add_BB(loop, unrolled_body);
04483 #endif
04484 LOOP_DESCR_loopinfo(loop) = unrolled_info;
04485
04486
04487
04488
04489 BB_Append_Ops(CG_LOOP_prolog, &ops);
04490
04491
04492
04493
04494
04495 if (gen_unrolled_loop_guard)
04496 Unroll_Do_Loop_guard(loop, unrolled_info, unrolled_trip_count);
04497
04498
04499
04500 unroll_rename_backpatches(CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL),
04501 0, ntimes);
04502
04503
04504
04505
04506 unroll_rename_backpatches(CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL),
04507 ntimes-1, ntimes);
04508
04509 unroll_names_finish();
04510
04511 if (Get_Trace(TP_CGLOOP, 2))
04512 CG_LOOP_Trace_Loop(loop, "Unroll_Do_Loop: After unrolling BB:%d %d times:",
04513 BB_id(head), ntimes);
04514 }
04515
04516
04517
04518 void Unroll_Do_Loop_Fully(LOOP_DESCR *loop, UINT32 ntimes)
04519 {
04520 if (Get_Trace(TP_CGLOOP, 2))
04521 CG_LOOP_Trace_Loop(loop, "Unroll_Do_Loop_Fully:"
04522 " Before unrolling BB:%d %d times:",
04523 BB_id(LOOP_DESCR_loophead(loop)), ntimes);
04524
04525 BB *head = LOOP_DESCR_loophead(loop);
04526 TN *trip_count_tn = CG_LOOP_Trip_Count(loop);
04527 BB *unrolled_body;
04528 ANNOTATION *annot;
04529 LOOPINFO *unrolled_info;
04530
04531 Is_True(TN_is_constant(trip_count_tn) &&
04532 TN_value(trip_count_tn) == ntimes,
04533 ("Unroll_Do_Loop_Fully: unable to fully unroll do_loop."));
04534
04535
04536 unroll_names_init(loop, ntimes, &MEM_phase_nz_pool);
04537
04538
04539
04540 unrolled_body = Unroll_Replicate_Body(loop, ntimes, TRUE);
04541 annot = ANNOT_Get(BB_annotations(unrolled_body), ANNOT_LOOPINFO);
04542 FmtAssert(annot, ("unrolled body has no LOOPINFO annotation"));
04543 unrolled_info = ANNOT_loopinfo(annot);
04544
04545
04546 LOOP_DESCR_loophead(loop) = unrolled_body;
04547 #ifdef TARG_IA64
04548 LOOP_DESCR_Delete_BB(loop, head);
04549 LOOP_DESCR_Add_BB(loop, unrolled_body);
04550 #else
04551 LOOP_DESCR_Add_BB(loop, unrolled_body);
04552 LOOP_DESCR_Delete_BB(loop, head);
04553 #endif
04554 LOOP_DESCR_loopinfo(loop) = unrolled_info;
04555
04556
04557
04558 unroll_rename_backpatches(CG_LOOP_Backpatch_First(CG_LOOP_prolog, NULL),
04559 0, ntimes);
04560
04561
04562
04563
04564 unroll_rename_backpatches(CG_LOOP_Backpatch_First(CG_LOOP_epilog, NULL),
04565 ntimes-1, ntimes);
04566
04567
04568 unroll_remove_notations(unrolled_body);
04569
04570 unroll_names_finish();
04571
04572 if (Get_Trace(TP_CGLOOP, 2))
04573 CG_LOOP_Trace_Loop(loop, "Unroll_Do_Loop_Fully:"
04574 " After unrolling BB:%d %d times:",
04575 BB_id(head), ntimes);
04576 }
04577
04578
04579
04580
04581
04582
04583
04584 void Unroll_Dowhile_Loop(LOOP_DESCR *loop, UINT32 ntimes)
04585 {
04586 if (ntimes <= 1)
04587 return;
04588
04589 #ifdef TARG_IA64
04590
04591
04592
04593
04594
04595 if (CG_Enable_Ipfec_Phases)
04596 {
04597 BB *bb;
04598 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb)
04599 {
04600 OP *br = BB_branch_op(bb);
04601 if ( BB_succs_len(bb)>=2 && br ) {
04602 TN *cond_tn1 = OP_opnd(br,OP_PREDICATE_OPND);
04603 if ( cond_tn1 && cond_tn1 != True_TN)
04604 {
04605 DEF_KIND kind;
04606 OP *def_op = TN_Reaching_Value_At_Op(cond_tn1, br, &kind, TRUE);
04607 if (!def_op) return;
04608 }
04609 }
04610 }
04611 }
04612 #endif
04613
04614 MEM_POOL_Push(&MEM_local_nz_pool);
04615
04616 if (Get_Trace(TP_CGLOOP, 2))
04617 CG_LOOP_Trace_Loop(loop, "Unroll_Dowhile_Loop:"
04618 " Before unrolling BB:%d %d times:",
04619 BB_id(LOOP_DESCR_loophead(loop)), ntimes);
04620
04621 BB *head = LOOP_DESCR_loophead(loop);
04622 BB_SET *new_bbs;
04623 ANNOTATION *annot = ANNOT_Get(BB_annotations(head), ANNOT_LOOPINFO);
04624 LOOPINFO *info = annot ? ANNOT_loopinfo(annot) : NULL;
04625 LOOPINFO *unrolled_info = NULL;
04626 BOOL freqs = FREQ_Frequencies_Computed() || BB_freq_fb_based(head) ;
04627
04628 BB *replicas = Gen_BB_N(ntimes-1);
04629 Set_BB_unrollings(&replicas[0], ntimes);
04630
04631 if (info) {
04632 WN *wn = WN_COPY_Tree(LOOPINFO_wn(info));
04633 unrolled_info = TYPE_P_ALLOC(LOOPINFO);
04634 LOOPINFO_wn(unrolled_info) = wn;
04635 WN_loop_trip_est(wn) /= ntimes;
04636 WN_loop_trip_est(wn) += 1;
04637 LOOPINFO_srcpos(unrolled_info) = LOOPINFO_srcpos(info);
04638 BB_Add_Annotation(&replicas[0], ANNOT_LOOPINFO, unrolled_info);
04639 }
04640
04641
04642 unroll_names_init(loop, ntimes, &MEM_phase_nz_pool);
04643
04644 if (freqs || BB_freq_fb_based(head)) {
04645
04646
04647
04648
04649
04650
04651
04652
04653
04654
04655 if (info) {
04656 INT64 orig_trip_est = WN_loop_trip_est(LOOPINFO_wn(info));
04657 float orig_head_freq = BB_freq(head);
04658 if (CG_warn_bad_freqs &&
04659 !FREQ_Match(orig_head_freq,
04660 orig_trip_est * BB_freq(CG_LOOP_prolog)))
04661 DevWarn("BB_freq(orig head BB:%d) != BB_freq(prolog BB:%d) * trip_est",
04662 BB_id(head), BB_id(CG_LOOP_prolog));
04663 }
04664 }
04665
04666 OP *br_op = BB_branch_op(head);
04667 INT br_targ_opnd = Branch_Target_Operand(br_op);
04668 BBLIST *fts_list = BBlist_Fall_Thru_Succ(head);
04669 BB *loop_merge_bb = BBLIST_item(fts_list);
04670 TN *loop_merge_label = Gen_Label_TN(Gen_Label_For_BB(loop_merge_bb), 0);
04671 float br_prob = freqs ? (1.0 - BBLIST_prob(fts_list)) : 0.5;
04672 float replica_prob = (freqs || BB_freq_fb_based(head)) ?
04673 (BB_freq(head) + ntimes - 1)/ ntimes : 1.0;
04674
04675
04676
04677
04678
04679 new_bbs = BB_SET_Create_Empty(PU_BB_Count, &MEM_local_nz_pool);
04680 Chain_BBs(BB_prev(head), &replicas[0]);
04681 for (INT i = 0; i < ntimes - 1; i++) {
04682 new_bbs = BB_SET_Union1D(new_bbs, &replicas[i], &MEM_local_nz_pool);
04683 Chain_BBs(&replicas[i], (i == ntimes - 2) ? head : &replicas[i+1]);
04684 }
04685
04686 while (BB_preds(head)) {
04687 BB *pred = BBLIST_item(BB_preds(head));
04688 Change_Succ(pred, head, &replicas[0]);
04689 }
04690
04691
04692
04693 for (INT unrolling = 0; unrolling < ntimes - 1; unrolling++) {
04694
04695 BB *replica = &replicas[unrolling];
04696 Set_BB_loop_head_bb(replica, &replicas[0]);
04697 Set_BB_unrollings(replica, ntimes);
04698 BB_rid(replica) = BB_rid(head);
04699 if (BB_freq_fb_based(head)) Set_BB_freq_fb_based(replica);
04700
04701
04702
04703 OP *op;
04704 FOR_ALL_BB_OPs(head, op) {
04705 OP *rop = Dup_OP(op);
04706 Set_OP_unrolling(rop, unrolling);
04707 Set_OP_orig_idx(rop, OP_map_idx(op));
04708 Set_OP_unroll_bb(rop, replica);
04709 Copy_WN_For_Memory_OP(rop, op);
04710 for (INT resi = 0; resi < OP_results(rop); resi++) {
04711 TN *res = OP_result(rop,resi);
04712 if (!TN_is_global_reg(res)) {
04713 TN *new_res = unroll_names_get(res, unrolling);
04714 Set_OP_result(rop, resi, new_res);
04715 }
04716 }
04717 for (INT opi = 0; opi < OP_opnds(rop); opi++) {
04718 TN *opnd = OP_opnd(rop, opi);
04719 if (TN_is_register(opnd)) {
04720 if (!TN_is_global_reg(opnd)) {
04721 Set_OP_opnd(rop, opi, unroll_names_get(opnd, unrolling));
04722 }
04723 }
04724 }
04725 BB_Append_Op(replica, rop);
04726 }
04727
04728 OP *replica_br_op = BB_branch_op(replica);
04729 BOOL ok = Negate_Branch(replica_br_op);
04730 Is_True(ok, ("unable to negate branch %s", TOP_Name(OP_code(replica_br_op))));
04731 Set_OP_opnd(replica_br_op, Branch_Target_Operand(replica_br_op),
04732 loop_merge_label);
04733 Link_Pred_Succ_with_Prob(replica, loop_merge_bb, 1.0 - br_prob);
04734 Link_Pred_Succ_with_Prob(replica, BB_next(replica), br_prob);
04735 if (freqs)
04736 BB_freq(replica) = replica_prob;
04737 replica_prob *= br_prob;
04738 }
04739 if (freqs)
04740 BB_freq(head) = replica_prob;
04741
04742 {
04743
04744 unroll_xfer_annotations(&replicas[0], head);
04745 OP *br = BB_branch_op(head);
04746 Set_OP_opnd(br,
04747 Branch_Target_Operand(br),
04748 Gen_Label_TN(Gen_Label_For_BB(&replicas[0]),0));
04749 }
04750
04751 {
04752
04753 LOOP_DESCR_loophead(loop) = &replicas[0];
04754 BB *bb;
04755 FOR_ALL_BB_SET_members(new_bbs, bb)
04756 LOOP_DESCR_Add_BB(loop, bb);
04757 LOOP_DESCR_loopinfo(loop) = unrolled_info;
04758 }
04759
04760 unroll_names_finish();
04761
04762 if (Get_Trace(TP_CGLOOP, 2))
04763 CG_LOOP_Trace_Loop(loop, "Unroll_Dowhile_Loop:"
04764 " After unrolling BB:%d %d times:",
04765 BB_id(head), ntimes);
04766
04767 MEM_POOL_Pop(&MEM_local_nz_pool);
04768 }
04769
04770
04771
04772
04773
04774
04775
04776
04777 bool CG_LOOP::Determine_Unroll_Fully()
04778 {
04779 if (!CG_LOOP_unroll_fully)
04780 return false;
04781
04782 LOOPINFO *info = LOOP_DESCR_loopinfo(Loop());
04783 TN *trip_count_tn = info ? LOOPINFO_trip_count_tn(info) : NULL;
04784 BB *head = LOOP_DESCR_loophead(loop);
04785
04786 if (BB_Has_Exc_Label(head))
04787 return false;
04788
04789 if (CG_LOOP_unroll_times_max < 2)
04790 return false;
04791
04792 if (trip_count_tn == NULL)
04793 return false;
04794
04795 if (!TN_is_constant(trip_count_tn))
04796 return false;
04797
04798 INT32 const_trip_count = TN_value(trip_count_tn);
04799 INT32 body_len = BB_length(head);
04800
04801 if (body_len * const_trip_count <= CG_LOOP_unrolled_size_max ||
04802 CG_LOOP_unrolled_size_max == 0 &&
04803 CG_LOOP_unroll_times_max >= const_trip_count) {
04804
04805 if (Get_Trace(TP_CGLOOP, 2))
04806 fprintf(TFile, "<unroll> unrolling fully (%d times)\n", const_trip_count);
04807
04808 Set_unroll_fully();
04809 Set_unroll_factor(const_trip_count);
04810 return true;
04811 }
04812
04813 return false;
04814 }
04815
04816
04817 void CG_LOOP::Determine_Unroll_Factor()
04818 {
04819 LOOPINFO *info = LOOP_DESCR_loopinfo(Loop());
04820 TN *trip_count_tn = info ? LOOPINFO_trip_count_tn(info) : NULL;
04821 BB *head = LOOP_DESCR_loophead(loop);
04822 BOOL trace = Get_Trace(TP_CGLOOP, 2);
04823
04824 Set_unroll_factor(1);
04825
04826 if (BB_Has_Exc_Label(head)) {
04827 const char *reason = "in exception region or handler";
04828 note_not_unrolled(head, reason);
04829 if (trace) fprintf(TFile, "<unroll> not unrolling; %s\n", reason);
04830 return;
04831 }
04832
04833 #ifdef TARG_X8664
04834 if( Is_Target_32bit() ){
04835 for( OP* op = BB_first_op(head); op != NULL; op = OP_next(op) ){
04836 if( TOP_is_change_x87_cw(OP_code(op)) ){
04837 const char* reason = "fldcw/fnstcw slows down the performance";
04838 note_not_unrolled( head, reason );
04839 if( trace )
04840 fprintf(TFile, "<unroll> not unrolling; %s\n", reason);
04841
04842 return;
04843 }
04844 }
04845 }
04846 #endif
04847
04848 if (CG_LOOP_unroll_times_max < 2) {
04849 const char * const reason = "OPT:unroll_times_max=%d";
04850 note_not_unrolled(head, reason, CG_LOOP_unroll_times_max);
04851 if (trace) {
04852 fprintf(TFile, "<unroll> not unrolling; ");
04853 fprintf(TFile, reason, CG_LOOP_unroll_times_max);
04854 fprintf(TFile, "\n");
04855 }
04856 return;
04857 }
04858
04859 if (trip_count_tn == NULL) {
04860
04861 UINT32 ntimes = MAX(1, OPT_unroll_times-1);
04862 INT32 body_len = BB_length(head);
04863 while (ntimes > 1 && ntimes * body_len > CG_LOOP_unrolled_size_max)
04864 ntimes--;
04865 Set_unroll_factor(ntimes);
04866
04867 } else {
04868
04869 BOOL const_trip = TN_is_constant(trip_count_tn);
04870 INT32 const_trip_count = const_trip ? TN_value(trip_count_tn) : 0;
04871 INT32 body_len = BB_length(head);
04872
04873 CG_LOOP_unroll_min_trip = MAX(CG_LOOP_unroll_min_trip, 1);
04874 if (const_trip && CG_LOOP_unroll_fully &&
04875
04876
04877
04878
04879
04880
04881 (
04882 #ifdef KEY
04883
04884 body_len * (UINT64)const_trip_count <= CG_LOOP_unrolled_size_max ||
04885 #else
04886 body_len * const_trip_count <= CG_LOOP_unrolled_size_max ||
04887 #endif
04888 CG_LOOP_unrolled_size_max == 0 &&
04889 CG_LOOP_unroll_times_max >= const_trip_count)) {
04890
04891 if (trace)
04892 fprintf(TFile, "<unroll> unrolling fully (%d times)\n", const_trip_count);
04893
04894 Set_unroll_fully();
04895 Set_unroll_factor(const_trip_count);
04896 } else {
04897 #if defined(TARG_SL)
04898 if(FREQ_Frequencies_Computed() && BB_freq_fb_based(head)) {
04899 float loop_entry_freq = BB_freq(head);
04900 if (CG_LOOP_unroll_min_trip >= loop_entry_freq) {
04901 return;
04902 }
04903 }
04904 #endif
04905 UINT32 ntimes = OPT_unroll_times;
04906 ntimes = MIN(ntimes, CG_LOOP_unroll_times_max);
04907 if (!is_power_of_two(ntimes)) {
04908 ntimes = 1 << log2(ntimes);
04909 if (trace)
04910 fprintf(TFile, "<unroll> rounding down to power of two = %d times\n", ntimes);
04911 }
04912 while (ntimes > 1 && ntimes * body_len > CG_LOOP_unrolled_size_max)
04913 ntimes /= 2;
04914 if (const_trip) {
04915 while (ntimes > 1 && const_trip_count < 2 * ntimes)
04916 ntimes /= 2;
04917 }
04918 Set_unroll_factor(ntimes);
04919 }
04920 }
04921
04922 #ifdef KEY
04923 if( Unroll_factor() > 1 ){
04924 for( OP* op = BB_first_op(head); op != NULL; op = OP_next(op) ){
04925 if( !OP_store(op) )
04926 continue;
04927
04928 TN* tn = OP_opnd( op, OP_find_opnd_use(op,OU_storeval) );
04929 if( TN_is_dedicated(tn) ||
04930 !TN_is_global_reg(tn) ||
04931 !TN_is_gra_homeable(tn) )
04932 continue;
04933
04934 WN* wn = Get_WN_From_Memory_OP(op);
04935 if( wn == NULL )
04936 continue;
04937
04938
04939
04940
04941 if( Aliased( Alias_Manager, TN_home(tn), wn ) == SAME_LOCATION ){
04942 Reset_TN_is_gra_homeable( tn );
04943 Set_TN_home( tn, NULL );
04944 }
04945 }
04946 }
04947 #endif
04948
04949
04950 #ifdef KEY
04951 ANNOTATION *info_ant = ANNOT_Get(BB_annotations(head), ANNOT_LOOPINFO);
04952 info = info_ant ? ANNOT_loopinfo(info_ant) : NULL;
04953 BOOL unroll_pragma = FALSE;
04954 ANNOTATION *unroll_ant = ANNOT_Get(BB_annotations(head), ANNOT_PRAGMA);
04955 while (unroll_ant && WN_pragma(ANNOT_pragma(unroll_ant)) != WN_PRAGMA_UNROLL)
04956 unroll_ant = ANNOT_Get(ANNOT_next(unroll_ant), ANNOT_PRAGMA);
04957 if (unroll_ant) {
04958 WN *wn = ANNOT_pragma(unroll_ant);
04959 if (WN_pragma_arg1(wn) > 1)
04960 Set_unroll_factor(WN_pragma_arg1(wn));
04961 }
04962 #endif
04963 }
04964
04965
04966
04967
04968 inline bool CG_LOOP_OP_is_live(OP *op, TN_SET *live_set, bool keep_prefetch)
04969 {
04970 #ifdef KEY
04971 if( OP_br(op) ){
04972 return true;
04973 }
04974 #endif
04975 if (OP_store(op))
04976 return true;
04977 if (OP_has_implicit_interactions(op))
04978 return true;
04979 if (keep_prefetch && OP_prefetch(op))
04980 return true;
04981 for (INT i = 0; i < OP_results(op); i++) {
04982 TN *res = OP_result(op, i);
04983 if (TN_is_register(res) && !TN_is_const_reg(res))
04984 if (TN_SET_MemberP(live_set, res) || TN_is_dedicated(res))
04985 return true;
04986 }
04987 return false;
04988 }
04989
04990
04991
04992 void Induction_Variables_Removal(CG_LOOP& cl,
04993 bool remove,
04994 bool keep_prefetch,
04995 bool trace)
04996 {
04997 BB *body = cl.Loop_header();
04998 BB *epilog = CG_LOOP_epilog;
04999 OP *op;
05000
05001 if (!remove) {
05002 FOR_ALL_BB_OPs(body, op) {
05003 Reset_OP_loh(op);
05004 }
05005 }
05006
05007 CXX_MEM_POOL pool("Temp TN_SET", FALSE);
05008 TN_SET *tnset = TN_SET_Create_Empty(Last_TN + 1, pool());
05009
05010
05011 TN *tn;
05012 FOR_ALL_GTN_SET_members(BB_live_in(epilog),tn) {
05013 tnset = TN_SET_Union1D(tnset, tn, pool());
05014 }
05015
05016
05017
05018 bool changed = true;
05019 while (changed) {
05020 changed = false;
05021 FOR_ALL_BB_OPs_REV(body, op) {
05022 if (CG_LOOP_OP_is_live(op, tnset, keep_prefetch)) {
05023
05024 for (INT i = 0; i < OP_opnds(op); i++) {
05025 TN *opnd = OP_opnd(op, i);
05026 if (TN_is_register(opnd) &&
05027 !TN_is_const_reg(opnd) &&
05028 !TN_SET_MemberP(tnset, opnd)) {
05029 changed = true;
05030 tnset = TN_SET_Union1D(tnset, opnd, pool());
05031 }
05032 }
05033 }
05034 }
05035 }
05036
05037
05038
05039 if (!remove) {
05040 FOR_ALL_BB_OPs(body, op) {
05041 if (!CG_LOOP_OP_is_live(op, tnset, keep_prefetch)) {
05042 Set_OP_loh(op);
05043 if (trace) {
05044 fprintf(TFile, "<remove ind var> mark loh OP: ");
05045 Print_OP_No_SrcLine(op);
05046 }
05047 }
05048 }
05049 } else {
05050 OP *next_op;
05051 for (op = BB_first_op(body); op != NULL; op = next_op) {
05052 next_op = OP_next(op);
05053 if (!CG_LOOP_OP_is_live(op, tnset, keep_prefetch)) {
05054 if (trace) {
05055 fprintf(TFile, "<remove ind var> delete OP: ");
05056 Print_OP_No_SrcLine(op);
05057 }
05058 for (INT i = 0; i < OP_results(op); i++) {
05059 TN *tn = OP_result(op, i);
05060 if (TN_is_register(tn) && !TN_is_const_reg(tn)) {
05061 GTN_SET_Difference1D(BB_defreach_out(body), tn);
05062 GTN_SET_Difference1D(BB_live_in(body), tn);
05063 }
05064 }
05065 BB_Remove_Op(body, op);
05066 } else {
05067
05068 for (INT i = 0; i < OP_results(op); i++) {
05069 TN *tn = OP_result(op, i);
05070 if (TN_is_register(tn) &&
05071 !TN_is_const_reg(tn) &&
05072 !TN_SET_MemberP(tnset, tn)) {
05073 if (TN_register_class(tn) == TN_register_class(True_TN)) {
05074 Set_OP_result(op, i, True_TN);
05075 if (trace) {
05076 fprintf(TFile, "<remove ind var> modify result TN: ");
05077 Print_OP_No_SrcLine(op);
05078 }
05079 }
05080 }
05081 }
05082 }
05083 }
05084 }
05085 }
05086
05087 #ifdef TARG_IA64
05088
05089
05090
05091
05092
05093
05094 static INT32
05095 Estimate_Acyclic_Critical_Path_Len (BB* blk, BOOL allow_data_spec,
05096 INT32& len, INT32& len_wo_dspec, MEM_POOL* mp) {
05097
05098 len = len_wo_dspec = 0;
05099
05100 INT32 op_num = BB_length(blk);
05101 BB_OP_MAP dep_height_map = BB_OP_MAP32_Create (blk, mp);
05102
05103 OP* op;
05104 FOR_ALL_BB_OPs (blk, op) { BB_OP_MAP32_Set (dep_height_map, op, 0); }
05105
05106
05107 FOR_ALL_BB_OPs_REV(blk, op) {
05108
05109 INT32 dep_height = BB_OP_MAP32_Get (dep_height_map, op);
05110 BOOL is_leaf = TRUE;
05111 for (ARC_LIST* arcs = OP_succs(op); arcs != NULL; arcs = ARC_LIST_rest(arcs)) {
05112
05113 ARC* arc = ARC_LIST_first(arcs);
05114
05115 if (ARC_omega (arc) != 0) continue;
05116 if (ARC_kind(arc) == CG_DEP_MEMIN && allow_data_spec && ARC_is_dotted(arc)) {
05117
05118 continue;
05119 }
05120
05121 OP* succ = ARC_succ (arc);
05122 INT32 t = BB_OP_MAP32_Get (dep_height_map, succ) + ARC_latency(arc);
05123 dep_height = MAX(dep_height, t);
05124 is_leaf = FALSE;
05125 }
05126
05127
05128 if (is_leaf) {
05129 for (INT i = 0; i < OP_results(op); i++) {
05130 INT32 t = TI_LATENCY_Result_Available_Cycle(OP_code(op), i);
05131 dep_height = MAX(dep_height, t);
05132 }
05133 }
05134
05135 BB_OP_MAP32_Set (dep_height_map, op, dep_height);
05136 len = MAX(len, dep_height);
05137 }
05138
05139
05140
05141 FOR_ALL_BB_OPs (blk, op) { BB_OP_MAP32_Set (dep_height_map, op, 0); }
05142 FOR_ALL_BB_OPs_REV(blk, op) {
05143
05144 INT32 dep_height = BB_OP_MAP32_Get (dep_height_map, op);
05145 BOOL is_leaf = TRUE;
05146 for (ARC_LIST* arcs = OP_succs(op); arcs != NULL; arcs = ARC_LIST_rest(arcs)) {
05147
05148 ARC* arc = ARC_LIST_first(arcs);
05149
05150 if (ARC_omega (arc) != 0) continue;
05151
05152 OP* succ = ARC_succ (arc);
05153 INT32 t = BB_OP_MAP32_Get (dep_height_map, succ) + ARC_latency(arc);
05154 dep_height = MAX(dep_height, t);
05155 is_leaf = FALSE;
05156 }
05157
05158
05159 if (is_leaf) {
05160 for (INT i = 0; i < OP_results(op); i++) {
05161 INT32 t = TI_LATENCY_Result_Available_Cycle(OP_code(op), i);
05162 dep_height = MAX(dep_height, t);
05163 }
05164 }
05165
05166 BB_OP_MAP32_Set (dep_height_map, op, dep_height);
05167 len_wo_dspec = MAX(len, dep_height);
05168 }
05169 }
05170
05171
05172 static void
05173 Prune_Violable_Mem_Deps (BB* body) {
05174
05175 std::vector<ARC*> arcs_to_delete;
05176 OP *op;
05177 FOR_ALL_BB_OPs(body, op) {
05178 if (_CG_DEP_op_info(op)) {
05179 for (ARC_LIST *arcs = OP_succs(op); arcs; arcs = ARC_LIST_rest(arcs)) {
05180 ARC *arc = ARC_LIST_first(arcs);
05181 if (ARC_kind(arc) == CG_DEP_MEMIN && ARC_is_dotted(arc)) {
05182 arcs_to_delete.push_back(arc);
05183 }
05184 }
05185 }
05186 }
05187 for (size_t i = 0; i < arcs_to_delete.size(); i++) {
05188 CG_DEP_Detach_Arc(arcs_to_delete[i]);
05189 }
05190 }
05191
05192
05193
05194 static void
05195 Compute_Rec_Res_Min_II(CG_LOOP &cl, BOOL take_into_account_dspec=FALSE)
05196 {
05197 BB *body = cl.Loop_header();
05198 LOOP_DESCR *loop = cl.Loop();
05199 OP *op;
05200
05201 CXX_MEM_POOL limit_local_pool("kick pool", FALSE);
05202
05203 TN_SET *tn_Invariants = TN_SET_Create_Empty(Last_TN + 1, limit_local_pool());
05204 TN_SET *tn_non_rotating = TN_SET_Create_Empty(Last_TN + 1, limit_local_pool());
05205 TN_SET *tn_Defs = TN_SET_Create_Empty(Last_TN + 1, limit_local_pool());
05206 TN_SET *tn_Uses = TN_SET_Create_Empty(Last_TN + 1, limit_local_pool());
05207 FOR_ALL_BB_OPs(body, op) {
05208 for (INT i = 0; i < OP_results(op); i++) {
05209 TN *tn = OP_result(op, i);
05210 if (TN_is_register(tn) && !TN_is_dedicated(tn))
05211 tn_Defs = TN_SET_Union1D(tn_Defs, tn, limit_local_pool());
05212 if (TN_is_dedicated(tn))
05213 tn_non_rotating = TN_SET_Union1D(tn_non_rotating, tn, limit_local_pool());
05214 }
05215 for (INT j = 0; j < OP_opnds(op); j++) {
05216 TN *tn = OP_opnd(op, j);
05217 if (TN_is_register(tn) && !TN_is_dedicated(tn))
05218 tn_Uses = TN_SET_Union1D(tn_Uses, tn, limit_local_pool());
05219 if (TN_is_dedicated(tn))
05220 tn_non_rotating = TN_SET_Union1D(tn_non_rotating, tn, limit_local_pool());
05221 }
05222 }
05223 tn_Invariants = TN_SET_Difference(tn_Uses, tn_Defs, limit_local_pool());
05224 tn_non_rotating = TN_SET_UnionD(tn_non_rotating, tn_Invariants, limit_local_pool());
05225
05226 CYCLIC_DEP_GRAPH cyclic_graph(body, limit_local_pool());
05227 {
05228 std::vector<ARC*> arcs_to_delete;
05229 FOR_ALL_BB_OPs(body, op) {
05230 if (_CG_DEP_op_info(op)) {
05231 for (ARC_LIST *arcs = OP_succs(op); arcs; arcs = ARC_LIST_rest(arcs)) {
05232 ARC *arc = ARC_LIST_first(arcs);
05233
05234 if (ARC_kind(arc) == CG_DEP_REGOUT && ARC_omega(arc) > 0) {
05235
05236 bool redundant = true;
05237 for (INT i = 0; i < OP_results(op); i++) {
05238 TN *tn = OP_result(op,i);
05239 if (!TN_is_register(tn) ||
05240 TN_is_dedicated(tn) ||
05241 TN_SET_MemberP(tn_non_rotating, tn)) {
05242 redundant = false;
05243 break;
05244 }
05245 }
05246 if (redundant) {
05247 arcs_to_delete.push_back(arc);
05248 }
05249 }
05250 }
05251 }
05252 }
05253 for (size_t i = 0; i < arcs_to_delete.size(); i++) {
05254 CG_DEP_Detach_Arc(arcs_to_delete[i]);
05255 }
05256 }
05257
05258 {
05259 CG_LOOP_rec_min_ii = CG_LOOP_res_min_ii = CG_LOOP_min_ii = 0;
05260 CG_LOOP_rec_min_ii_with_dspec = -1;
05261
05262
05263 MEM_POOL_Push(&MEM_local_pool);
05264 BOOL ignore_non_def_mem_deps = FALSE;
05265 CG_LOOP_Make_Strongly_Connected_Components(body, &MEM_local_pool, ignore_non_def_mem_deps);
05266 CG_LOOP_Calculate_Min_Resource_II(body, NULL, TRUE , TRUE );
05267 CG_LOOP_Calculate_Min_Recurrence_II(body, ignore_non_def_mem_deps);
05268 if (take_into_account_dspec) {
05269 INT32 save_rec_mii = CG_LOOP_rec_min_ii;
05270 CG_LOOP_rec_min_ii = 0;
05271
05272 Prune_Violable_Mem_Deps (body);
05273 CG_LOOP_Calculate_Min_Recurrence_II(body, ignore_non_def_mem_deps);
05274 CG_LOOP_rec_min_ii_with_dspec = CG_LOOP_rec_min_ii;
05275 CG_LOOP_rec_min_ii = save_rec_mii ;
05276 } else {
05277 CG_LOOP_rec_min_ii_with_dspec = -1;
05278 }
05279
05280 CG_LOOP_Clear_SCCs(loop);
05281 if (take_into_account_dspec) {
05282 INT32 len, len_wo_dspec;
05283 Estimate_Acyclic_Critical_Path_Len
05284 (body, IPFEC_Enable_Data_Speculation, len, len_wo_dspec, &MEM_local_pool);
05285 len_wo_dspec = MAX(len_wo_dspec, CG_LOOP_res_min_ii);
05286 len_wo_dspec = MAX(len_wo_dspec, 1);
05287 len_wo_dspec = MAX(len_wo_dspec, CG_LOOP_min_ii);
05288 len = MAX(len, CG_LOOP_res_min_ii);
05289 len = MAX(len, 1);
05290 if (len + 2 > len_wo_dspec) {
05291
05292
05293 len = len_wo_dspec;
05294 }
05295 cl.Set_acyclic_len (len);
05296 cl.Set_acyclic_len_wo_dspec (len_wo_dspec);
05297 } else {
05298 cl.Set_acyclic_len (0);
05299 cl.Set_acyclic_len_wo_dspec (0);
05300 }
05301 MEM_POOL_Pop(&MEM_local_pool);
05302 }
05303 }
05304 #endif
05305
05306 void CG_LOOP::Determine_SWP_Unroll_Factor()
05307 {
05308 MEM_POOL_Push(&MEM_local_nz_pool);
05309
05310 Is_True(SWP_Options.Min_Unroll_Times <= SWP_Options.Max_Unroll_Times,
05311 ("CG_LOOP: -SWP:min_unroll_times > -SWP:max_unroll_times"));
05312
05313 Is_True(!Unroll_fully(),
05314 ("CG_LOOP: loop will be fully unrolled."));
05315
05316 BB *head = Loop_header();
05317
05318 #ifdef TARG_IA64
05319
05320 ANNOTATION *unroll_ant = ANNOT_Get(BB_annotations(head), ANNOT_PRAGMA);
05321 while (unroll_ant && WN_pragma(ANNOT_pragma(unroll_ant)) != WN_PRAGMA_UNROLL)
05322 unroll_ant = ANNOT_Get(ANNOT_next(unroll_ant), ANNOT_PRAGMA);
05323
05324 if (unroll_ant) {
05325 WN *wn = ANNOT_pragma(unroll_ant);
05326 UINT32 utimes = WN_pragma_arg1(wn);
05327 Is_True(utimes>0 && utimes<1024, ("Pragma 'UNROLL' has bad value"));
05328 Set_unroll_factor(utimes);
05329 return;
05330 }
05331 #endif
05332
05333 INT loop_size = 0;
05334 OP *op;
05335 INT num_prefetches = 0;
05336 FOR_ALL_BB_OPs(head, op) {
05337 loop_size++;
05338 if (OP_prefetch(op))
05339 num_prefetches++;
05340 }
05341
05342
05343 Induction_Variables_Removal(*this,
05344 false,
05345 false,
05346 Get_Trace(TP_CGLOOP, 2) );
05347
05348 CG_SCHED_EST *loop_se = CG_SCHED_EST_Create(head, &MEM_local_nz_pool,
05349 SCHED_EST_FOR_UNROLL |
05350 SCHED_EST_IGNORE_LOH_OPS |
05351 SCHED_EST_IGNORE_PREFETCH);
05352
05353 #ifdef TARG_IA64
05354
05355 for (INT i = 0; i < num_prefetches; i++)
05356 CG_SCHED_EST_Add_Op_Resources(loop_se,
05357 SWP_Options.Implicit_Prefetch ?
05358 TOP_adds : TOP_lfetch);
05359 #endif
05360
05361 #ifdef TARG_IA64
05362 CG_LOOP_res_min_ii = CG_SCHED_EST_Resource_Cycles(loop_se);
05363 #endif
05364 CG_SCHED_EST *additional_se = CG_SCHED_EST_Create(head, &MEM_local_nz_pool,
05365 SCHED_EST_FOR_UNROLL |
05366 SCHED_EST_IGNORE_LOH_OPS |
05367 SCHED_EST_IGNORE_BRANCH |
05368 SCHED_EST_IGNORE_PREFETCH);
05369
05370 INT min_unr = SWP_Options.Min_Unroll_Times;
05371 INT max_unr = SWP_Options.Max_Unroll_Times;
05372 INT min_recurrence = num_prefetches > 0 && SWP_Options.Implicit_Prefetch ?
05373 4 : 0;
05374
05375 const bool swp_trace = Get_Trace(TP_SWPIPE, 2);
05376 std::vector<double> swp_cycles(SWP_Options.Max_Unroll_Times+1, 0.0);
05377 INT i;
05378 for (i = min_unr; i <= max_unr; i++) {
05379 swp_cycles[i] = CG_SCHED_EST_Resource_Cycles(loop_se) * (1.0 / i);
05380 if (swp_trace) {
05381 fprintf(TFile, "<ti resource count> %d: ", i);
05382 TI_RES_COUNT_Print(TFile, loop_se->res_count);
05383 fprintf(TFile, "\n");
05384 }
05385 CG_SCHED_EST_Append_Scheds(loop_se, additional_se);
05386 }
05387
05388 if (swp_trace)
05389 for (i = min_unr; i <= max_unr; i++) {
05390 fprintf(TFile, "<swp unroll resource> swp_cycles[%d] = %g\n", i, swp_cycles[i]);
05391 }
05392
05393
05394 if (min_recurrence != 0)
05395 for (i = min_unr; i <= max_unr; i++) {
05396 if (swp_cycles[i] < (double) min_recurrence / i)
05397 swp_cycles[i] = (double) min_recurrence / i;
05398 }
05399
05400 if (swp_trace)
05401 for (i = min_unr; i <= max_unr; i++) {
05402 fprintf(TFile, "<swp unroll resource + recur> swp_cycles[%d] = %g\n", i, swp_cycles[i]);
05403 }
05404
05405 INT unroll_times = SWP_Options.Min_Unroll_Times;
05406 INT loop_size_limit = MIN(SWP_Options.OPS_Limit, CG_LOOP_unrolled_size_max);
05407 for (i = min_unr; i <= max_unr; i++) {
05408 if (i * loop_size < loop_size_limit) {
05409 if (swp_cycles[i] < (swp_cycles[unroll_times] * (1.0 - (i - unroll_times) * 0.01)))
05410 unroll_times = i;
05411 }
05412 }
05413
05414 #if defined(TARG_IA64)
05415 INT old_computed;
05416
05417 {
05418 CG_SCHED_EST *loop_se2 = CG_SCHED_EST_Create(head, &MEM_local_nz_pool,
05419 SCHED_EST_FOR_UNROLL |
05420 SCHED_EST_IGNORE_LOH_OPS |
05421 SCHED_EST_IGNORE_PREFETCH);
05422
05423 for (INT i = 0; i < num_prefetches; i++)
05424 CG_SCHED_EST_Add_Op_Resources(loop_se2, TOP_lfetch);
05425
05426 for (INT i = 0; i < num_prefetches; i++)
05427 CG_SCHED_EST_Subtract_Op_Resources(loop_se2, TOP_adds);
05428
05429 std::vector<double> swp_cycles(SWP_Options.Max_Unroll_Times+1, 0.0);
05430 INT i;
05431 for (i = min_unr; i <= max_unr; i++) {
05432 swp_cycles[i] = CG_SCHED_EST_Resource_Cycles(loop_se2) * (1.0 / i);
05433 CG_SCHED_EST_Append_Scheds(loop_se2, additional_se);
05434 }
05435
05436 INT unroll_times2 = SWP_Options.Min_Unroll_Times;
05437 for (i = min_unr; i <= max_unr; i++) {
05438 if (i * loop_size < loop_size_limit) {
05439 if (swp_cycles[i] < (swp_cycles[unroll_times2] * (1.0 - (i - unroll_times2) * 0.01)))
05440 unroll_times2 = i;
05441 }
05442 }
05443
05444 old_computed = unroll_times;
05445 if (CG_LOOP_res_min_ii >= 3 )
05446 unroll_times = MIN(unroll_times, unroll_times2);
05447 }
05448
05449
05450 TN *trip_count_tn = CG_LOOP_Trip_Count(loop);
05451 if (TN_is_constant(trip_count_tn)) {
05452 if (swp_trace)
05453 fprintf(TFile, "trip_count is constant, so need not change unroll_times to 2^^n\n");
05454 }
05455 else{
05456 int computed;
05457 computed = unroll_times;
05458 unroll_times=1;
05459 while (unroll_times < computed || unroll_times < min_unr){
05460 unroll_times *= 2;
05461 }
05462 if ((unroll_times-computed > computed-unroll_times/2) || unroll_times>max_unr ){
05463 unroll_times /= 2;
05464 }
05465 if (unroll_times < min_unr){
05466 unroll_times = min_unr;
05467 }
05468 if (swp_trace ){
05469 if (computed != unroll_times) {
05470 fprintf(TFile, "<swp unroll factor> : old heuristic computed(%d) => changeto(%d)\n", computed, unroll_times);
05471 }
05472 }
05473 }
05474
05475 INT old_unroll_times = unroll_times;
05476 if (swp_trace ) {
05477 fprintf(TFile, "<swp unroll factor> : RecMII(%d) ResMII(%d)\n", CG_LOOP_rec_min_ii, CG_LOOP_res_min_ii);
05478 }
05479
05480 if (CG_LOOP_rec_min_ii >= CG_LOOP_res_min_ii && CG_LOOP_rec_min_ii >= 3) {
05481 unroll_times = 1;
05482 }
05483
05484 {
05485
05486 INT computed = old_unroll_times;
05487
05488 if (CG_LOOP_res_min_ii >= 15)
05489 computed = old_unroll_times / 4;
05490 else if (CG_LOOP_res_min_ii >= 10)
05491 computed = old_unroll_times / 2;
05492
05493 if (CG_LOOP_res_min_ii >= 10)
05494 unroll_times = MIN( unroll_times, MAX(computed, 1));
05495 }
05496
05497 LOOP_DESCR *loop = Loop();
05498 BB *body = LOOP_DESCR_loophead(loop);
05499 BOOL contain_b = FALSE;
05500
05501 OP *last_op = BB_last_op(body);
05502 TN *ref_tn = True_TN;
05503
05504 FOR_ALL_BB_OPs(body, op) {
05505 TN *pred_tn = OP_opnd(op, OP_PREDICATE_OPND);
05506 if ( pred_tn != ref_tn) {
05507 if (op == last_op)
05508 break;
05509 contain_b = TRUE;
05510 }
05511 }
05512
05513 if (contain_b && CG_LOOP_rec_min_ii >= CG_LOOP_res_min_ii &&
05514 2 * loop_size < loop_size_limit && CG_LOOP_res_min_ii < 20)
05515 unroll_times = MIN(old_unroll_times, MAX(unroll_times, 2));
05516 #endif
05517
05518 if (swp_trace)
05519 fprintf(TFile, "<swp unroll factor> swp_cycles[%d] = %g\n", unroll_times, swp_cycles[unroll_times]);
05520
05521 #ifdef TARG_IA64
05522
05523 if(PROCESSOR_Version == 2 && swp_cycles[unroll_times]*unroll_times < 1.2)
05524 unroll_times *= 2;
05525 #endif
05526 Set_unroll_factor(unroll_times);
05527
05528 MEM_POOL_Pop(&MEM_local_nz_pool);
05529 }
05530
05531
05532 void CG_LOOP::EBO_Before_Unrolling()
05533 {
05534 #ifdef KEY
05535 if( !Enable_CG_Peephole )
05536 return;
05537 #endif
05538
05539 MEM_POOL_Push(&MEM_local_pool);
05540
05541 {
05542 BB_REGION bb_region(&MEM_local_pool);
05543 bb_region.entries.push_back(Loop_header());
05544 bb_region.exits.push_back(Epilog_start());
05545 bb_region.Set_has_omega();
05546
05547
05548 bb_region.Verify();
05549
05550
05551 EBO_before_unrolling(&bb_region);
05552 }
05553
05554 MEM_POOL_Pop(&MEM_local_pool);
05555 }
05556
05557 void CG_LOOP::EBO_After_Unrolling()
05558 {
05559 #ifdef KEY
05560 if( !Enable_CG_Peephole )
05561 return;
05562 #endif
05563
05564 MEM_POOL_Push(&MEM_local_pool);
05565 {
05566 BB_REGION bb_region(&MEM_local_pool);
05567 bb_region.entries.push_back(Prolog_start());
05568 BBLIST *succs;
05569 FOR_ALL_BB_SUCCS(Epilog_end(), succs) {
05570 BB *succ = BBLIST_item(succs);
05571 bb_region.exits.push_back(succ);
05572 }
05573
05574
05575 EBO_after_unrolling(&bb_region);
05576 }
05577 MEM_POOL_Pop(&MEM_local_pool);
05578 }
05579
05580
05581 void CG_LOOP::Print(FILE *fp)
05582 {
05583 for (BB *bb = epilog_start; ; bb = BB_next(bb)) {
05584 Print_BB_No_Srclines(bb);
05585 if (bb == epilog_end)
05586 break;
05587 }
05588 }
05589
05590
05591 static BOOL Skip_Loop_For_Reason(LOOP_DESCR *loop)
05592 {
05593 const char *reason = NULL;
05594 BB *bb;
05595 BOOL trace_general = Get_Trace(TP_CGLOOP, 1);
05596
05597 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb)
05598 {
05599 if (BB_compile(bb) == FALSE)
05600 {
05601 reason = "contains BB from already-scheduled region";
05602 break;
05603 }
05604 }
05605
05606 if (!reason)
05607 {
05608 BB *head = LOOP_DESCR_loophead(loop);
05609 TN *trip_count = CG_LOOP_Trip_Count(loop);
05610 LOOPINFO *info = LOOP_DESCR_loopinfo(loop);
05611
05612 if (info && WN_Loop_Unimportant_Misc(LOOPINFO_wn(info)))
05613 {
05614 reason = "marked UNIMPORTANT_MISC";
05615 }
05616 else if (info &&
05617 !CG_LOOP_optimize_lno_winddown_cache &&
05618 WN_Loop_Winddown_Cache(LOOPINFO_wn(info)))
05619 {
05620 reason = "marked WINDDOWN_CACHE; see -CG:opt_lno_winddown_cache";
05621 }
05622 else if (info &&
05623 !CG_LOOP_optimize_lno_winddown_reg &&
05624 WN_Loop_Winddown_Reg(LOOPINFO_wn(info)))
05625 {
05626 reason = "marked WINDDOWN_REG; see -CG:opt_lno_winddown_reg";
05627 }
05628 else if (!CG_LOOP_optimize_non_trip_countable &&
05629 trip_count == NULL)
05630 {
05631 reason = "not trip-countable; see -CG:opt_non_trip_countable";
05632 }
05633 else if (!CG_LOOP_optimize_non_innermost &&
05634 !BB_innermost(head))
05635 {
05636 reason = "not innermost loop; see -CG:opt_non_innermost";
05637 }
05638 #ifdef MIPS_UNROLL
05639 else if (!CG_LOOP_Attach_Prolog_And_Epilog(loop))
05640 {
05641 reason = "loop has multiple back edges";
05642 }
05643 #endif
05644 else if (!BB_innermost(head))
05645 {
05646 if (trace_general) {
05647 #pragma mips_frequency_hint NEVER
05648 fprintf(TFile, "<cgprep> no further optimizations done for non-innermost loops\n");
05649 }
05650 return TRUE;
05651 }
05652 #ifdef MIPS_UNROLL
05653 else if (!CG_LOOP_epilog && !CG_LOOP_optimize_multi_targ)
05654 {
05655 reason = "loop exits to multiple targets; see -CG:opt_multi_targ";
05656 }
05657 #endif
05658 else if ( BBlist_Fall_Thru_Succ(head) == NULL)
05659 {
05660 reason = "loop never exits";
05661 }
05662 #ifdef TARG_X8664
05663 else if( BB_freq_fb_based(head) && BB_freq(head) < 0.01 ){
05664 reason = "loop is barely executed";
05665 }
05666 #endif
05667 }
05668
05669 if (reason)
05670 {
05671 if (trace_general) {
05672 #pragma mips_frequency_hint NEVER
05673 fprintf(TFile, "<cgprep> rejecting: %s\n", reason);
05674 }
05675 return TRUE;
05676 }
05677 return FALSE;
05678 }
05679
05680
05681
05682
05683
05684
05685
05686
05687 static BOOL Loop_Amenable_For_SWP(LOOP_DESCR *loop, BOOL trace)
05688 {
05689 BB *bb;
05690 UINT32 bb_ctnt = 0;
05691 UINT32 insts_ctnt = 0;
05692
05693 #ifdef KEY
05694 if( !Enable_SWP ){
05695 return FALSE;
05696 }
05697 #endif
05698
05699 FOR_ALL_BB_SET_members(LOOP_DESCR_bbset(loop), bb) {
05700 bb_ctnt++;
05701 insts_ctnt += BB_length(bb);
05702 if (bb_ctnt > CG_maxblocks ||
05703 insts_ctnt > CG_maxinss) {
05704 if (trace)
05705 fprintf(TFile,"<loop> swp disabled: Loop body size too big");
05706 return FALSE;
05707 }
05708 }
05709
05710
05711
05712
05713
05714
05715
05716
05717
05718 double cycles_before_if_cvt = LOOP_DESCR_Estimate_Cycles (loop);
05719 INT32 old_num_loop_bbs = BB_SET_Size(LOOP_DESCR_bbset(loop));
05720
05721
05722
05723
05724 MEM_POOL_Push(&MEM_local_nz_pool);
05725
05726 BB *head = LOOP_DESCR_loophead(loop);
05727 BB_SET *bbs = LOOP_DESCR_bbset(loop);
05728 CG_SCHED_EST *loop_se = CG_SCHED_EST_Create(head, &MEM_local_nz_pool,
05729 SCHED_EST_FOR_UNROLL);
05730 FOR_ALL_BB_SET_members(bbs, bb) {
05731 if (bb != head) {
05732 CG_SCHED_EST *se = CG_SCHED_EST_Create(bb, &MEM_local_nz_pool,
05733 SCHED_EST_FOR_UNROLL);
05734 CG_SCHED_EST_Append_Scheds(loop_se, se);
05735 }
05736 }
05737
05738 double estimate_swp_cycles = CG_SCHED_EST_Resource_Cycles(loop_se);
05739
05740 MEM_POOL_Pop(&MEM_local_nz_pool);
05741
05742 if (estimate_swp_cycles <= cycles_before_if_cvt) {
05743 if (trace) {
05744 #pragma mips_frequency_hint NEVER
05745 fprintf(TFile,
05746 "<loop> swp enabled: estimate_swp_cycles=%g <= cycles_before_if_cvt=%g",
05747 estimate_swp_cycles, cycles_before_if_cvt);
05748 }
05749 return TRUE;
05750 } else {
05751 if (trace) {
05752 #pragma mips_frequency_hint NEVER
05753 fprintf(TFile,
05754 "<loop> swp disabled: estimate_swp_cycles=%g > cycles_before_if_cvt=%g",
05755 estimate_swp_cycles, cycles_before_if_cvt);
05756 }
05757 return FALSE;
05758 }
05759 }
05760
05761
05762
05763
05764
05765 void Gen_Counted_Loop_Branch(CG_LOOP& cl)
05766 {
05767 TN *trip_count_tn = cl.Trip_count_tn();
05768 BB *prolog = cl.Prolog_end();
05769 BB *head = cl.Loop_header();
05770 BB *tail = BB_Other_Predecessor(head, prolog);
05771
05772 OPS ops = OPS_EMPTY;
05773 OPS body_ops = OPS_EMPTY;
05774 OP *br_op = BB_branch_op(tail);
05775
05776
05777 if (CGTARG_OP_is_counted_loop(br_op)) return;
05778
05779 #ifdef TARG_IA64 // only IA64 has a counted loop (cloop) instruction
05780 TN *label_tn = OP_opnd(br_op, Branch_Target_Operand(br_op));
05781 CGTARG_Generate_Branch_Cloop(br_op, trip_count_tn, trip_count_tn, 1,
05782 label_tn, &ops, &body_ops);
05783 if (OPS_length(&body_ops) > 0) {
05784 BB_Remove_Op(tail, br_op);
05785 BB_Append_Ops(tail, &body_ops);
05786
05787 BB_Append_Ops(prolog, &ops);
05788 }
05789 #endif
05790 #ifdef TARG_X8664
05791 LOOP_DESCR *loop = cl.Loop();
05792 CGTARG_Generate_Countdown_Loop(trip_count_tn, tail,
05793 &ops, &body_ops, head == tail, loop);
05794 if (OPS_length(&body_ops) > 0) {
05795 BB_Append_Ops(tail, &body_ops);
05796
05797 BB_Append_Ops(prolog, &ops);
05798 }
05799 #endif
05800 }
05801
05802
05803
05804
05805
05806 static inline pair<BB*, CG_LOOP_BACKPATCH *>
05807 make_pair(BB* a, CG_LOOP_BACKPATCH* b)
05808 {
05809 return pair<BB*, CG_LOOP_BACKPATCH *>(a, b);
05810 }
05811
05812 void Fix_Backpatches(CG_LOOP& cl, bool trace)
05813 {
05814 vector<pair<BB*, CG_LOOP_BACKPATCH *> > dead_bp;
05815 std::set<TN*> epilog_tns;
05816 BB *body = cl.Loop_header();
05817 BB *prolog = CG_LOOP_prolog;
05818 BB *epilog = CG_LOOP_epilog;
05819 CG_LOOP_BACKPATCH *bp;
05820 for (bp = CG_LOOP_Backpatch_First(epilog, NULL); bp; bp = CG_LOOP_Backpatch_Next(bp)) {
05821 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
05822 if (!GTN_SET_MemberP(BB_defreach_out(body), body_tn))
05823 dead_bp.push_back(make_pair(epilog,bp));
05824 else
05825 epilog_tns.insert(body_tn);
05826 }
05827 for (bp = CG_LOOP_Backpatch_First(prolog, NULL); bp; bp = CG_LOOP_Backpatch_Next(bp)) {
05828 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
05829 if (!GTN_SET_MemberP(BB_live_in(body), body_tn) &&
05830 !(GTN_SET_MemberP(BB_defreach_out(body), body_tn) &&
05831 epilog_tns.find(body_tn) != epilog_tns.end()))
05832 dead_bp.push_back(make_pair(prolog,bp));
05833 }
05834 while (!dead_bp.empty()) {
05835 BB *bb = (*(dead_bp.end()-1)).first;
05836 bp = (*(dead_bp.end()-1)).second;
05837 if (trace) {
05838 TN *body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
05839 fprintf(TFile, "Fix_Backpatches: delete backpatches with body TN%d\n",
05840 TN_number(body_tn));
05841 }
05842 CG_LOOP_Backpatch_Delete(bb, bp);
05843 dead_bp.pop_back();
05844 }
05845 }
05846
05847 #ifdef TARG_IA64
05848
05849
05850
05851
05852
05853
05854 static BOOL
05855 Do_Loop_Determine_SWP_Or_Unroll (CG_LOOP& cg_loop, BOOL trace) {
05856
05857 if (!CG_tune_do_loop) {
05858
05859 return TRUE;
05860 }
05861
05862
05863
05864 if (PU_src_lang(Get_Current_PU()) != PU_C_LANG &&
05865 PU_src_lang(Get_Current_PU()) != PU_CXX_LANG) {
05866
05867
05868
05869 return TRUE;
05870 }
05871
05872 LOOP_DESCR* loop = cg_loop.Loop ();
05873
05874 TN* trip_cnt = CG_LOOP_Trip_Count(loop);
05875 Is_True (trip_cnt, ("<loop> should be a DO-loop"));
05876
05877
05878 if (!Enable_SWP) return FALSE;
05879
05880
05881 if (TN_is_constant(trip_cnt) && TN_value(trip_cnt) < 40) {
05882 if (trace)
05883 fprintf (TFile, "small trip count, SWP is not profitable");
05884 return FALSE;
05885 }
05886
05887 BB* body = LOOP_DESCR_loophead (loop);
05888 if (BB_length(body) == 0) {
05889 if (trace)
05890 fprintf (TFile, "small trip count, SWP is not profitable");
05891 return TRUE;
05892 }
05893
05894
05895 if (Detect_SWP_Constraints(cg_loop, trace) != SWP_OK) {
05896 return FALSE;
05897 }
05898
05899
05900
05901 if (BB_length(body) >= 130) {
05902 if (trace) {
05903 fprintf (TFile,
05904 "Loop body has %d instructions, too big for SWP", BB_length(body));
05905 }
05906 return FALSE;
05907 }
05908
05909
05910
05911
05912
05913
05914
05915
05916
05917
05918
05919
05920
05921
05922
05923
05924
05925
05926 if (!Current_Dep_Graph || CG_DEP_Ignore_LNO || CG_DEP_Ignore_WOPT) {
05927 return TRUE;
05928 }
05929
05930
05931 Compute_Rec_Res_Min_II (cg_loop, TRUE);
05932 if (CG_LOOP_min_ii <= 5) {
05933
05934 return TRUE;
05935 }
05936
05937 INT32 alen_wo_dspec = cg_loop.Acyclic_len_wo_dspec ();
05938 INT32 alen = cg_loop.Acyclic_len ();
05939
05940
05941 if (alen * 2 < alen_wo_dspec) {
05942
05943
05944
05945 alen = (INT32)(alen * 1.5f);
05946 }
05947 if (alen < (INT32)(1.2f * CG_LOOP_min_ii)) {
05948 alen = (INT32)(alen * 1.2f);
05949 }
05950 alen = MIN(alen,alen_wo_dspec);
05951
05952 if (alen > 0 && alen < CG_LOOP_min_ii) {
05953
05954
05955 if (trace) {
05956 fprintf (TFile,
05957 "Acyclic schedule with data speculation (%d cycle) is shorter than MII(%d)",
05958 alen, CG_LOOP_min_ii);
05959 }
05960 return FALSE;
05961 }
05962
05963
05964 Is_True (CG_LOOP_rec_min_ii_with_dspec > 0,
05965 ("CG_LOOP_res_min_ii_with_dspec is not calculated properly"));
05966
05967 INT32 mii_with_dspec = MAX(CG_LOOP_rec_min_ii_with_dspec, CG_LOOP_res_min_ii);
05968 mii_with_dspec = MAX(mii_with_dspec, 1);
05969
05970 mii_with_dspec = MAX(mii_with_dspec, CG_LOOP_min_ii/2);
05971
05972 INT32 unroll_factor = CG_LOOP_unrolled_size_max/BB_length(body);
05973 unroll_factor = MAX(unroll_factor, 1);
05974 unroll_factor = MIN(CG_LOOP_unroll_times_max, unroll_factor);
05975 INT32 unroll_ii = alen + (unroll_factor - 1) * mii_with_dspec;
05976 unroll_ii /= unroll_factor;
05977
05978 if (CG_LOOP_min_ii > (INT32)(1.2f * unroll_ii)) {
05979 if (trace) {
05980 fprintf (TFile, "The estimated average II (%d) due to unrolling and"
05981 " data speculation is shorter than MII(%d)",
05982 unroll_ii, CG_LOOP_min_ii);
05983 }
05984 return FALSE;
05985 }
05986
05987 return TRUE;
05988 }
05989
05990
05991
05992
05993
05994 static BOOL
05995 Do_Loop_Perform_SWP_or_Unroll (BOOL perform_swp, CG_LOOP& cg_loop,
05996 vector<SWP_FIXUP>& fixup, BOOL trace_loop_opt) {
05997
05998 LOOP_DESCR* loop = cg_loop.Loop ();
05999
06000 if (perform_swp) {
06001 if (!cg_loop.Has_prolog_epilog())
06002 return FALSE;
06003
06004 if (trace_loop_opt)
06005 CG_LOOP_Trace_Loop(loop, "*** Before loop canonicalization ***");
06006
06007 if (!Prepare_Loop_For_SWP_1(cg_loop, trace_loop_opt))
06008 return FALSE;
06009
06010
06011
06012 Gen_Counted_Loop_Branch(cg_loop);
06013 Gen_SWP_Branch(cg_loop, true );
06014
06015 cg_loop.Recompute_Liveness();
06016 Rename_TNs_For_BB(cg_loop.Loop_header(), NULL);
06017 cg_loop.Recompute_Liveness();
06018
06019 cg_loop.Build_CG_LOOP_Info();
06020 cg_loop.Recompute_Liveness();
06021
06022 if (cg_loop.Determine_Unroll_Fully()) {
06023 Unroll_Do_Loop_Fully(loop, cg_loop.Unroll_factor());
06024 cg_loop.Recompute_Liveness();
06025 return TRUE;
06026 }
06027
06028 Perform_Read_Write_Removal(loop);
06029 cg_loop.Recompute_Liveness();
06030
06031 if (trace_loop_opt)
06032 CG_LOOP_Trace_Loop(loop, "*** Before Postincr generation / After RW removal ***");
06033
06034
06035 if (!Prepare_Loop_For_SWP_2(cg_loop, trace_loop_opt))
06036 return FALSE;
06037
06038 if (trace_loop_opt)
06039 CG_LOOP_Trace_Loop(loop, "*** Before Fix_Recurrences / After Postincr ***");
06040
06041
06042 Induction_Variables_Removal(cg_loop,
06043 true,
06044 true,
06045 trace_loop_opt);
06046 Fix_Recurrences_Before_Unrolling(cg_loop);
06047
06048 if (trace_loop_opt)
06049 CG_LOOP_Trace_Loop(loop, "*** before ebo 1 and unrolling / after fix recurrences ***");
06050
06051 cg_loop.Recompute_Liveness();
06052 cg_loop.EBO_Before_Unrolling();
06053
06054 if (SWP_Options.Predicate_Promotion) {
06055 std::list<BB*> bbl;
06056 bbl.push_front(cg_loop.Loop_header());
06057 CG_DEP_Prune_Dependence_Arcs(bbl, TRUE, trace_loop_opt);
06058 if (trace_loop_opt)
06059 CG_LOOP_Trace_Loop(loop, "*** after ebo 1 and prune predicate / before unrolling ***");
06060 }
06061
06062 Compute_Rec_Res_Min_II(cg_loop);
06063 cg_loop.Determine_SWP_Unroll_Factor();
06064
06065 if (cg_loop.Unroll_factor() > 1) {
06066 Unroll_Do_Loop(cg_loop, cg_loop.Unroll_factor());
06067 cg_loop.Recompute_Liveness();
06068 }
06069
06070 if (trace_loop_opt)
06071 CG_LOOP_Trace_Loop(loop, "*** Before ebo 2/ after unrolling ***");
06072
06073 cg_loop.EBO_Before_Unrolling();
06074
06075 BB_Verify_OP_Order(cg_loop.Loop_header());
06076
06077
06078 Fix_Recurrences_After_Unrolling(cg_loop);
06079
06080 if (trace_loop_opt)
06081 CG_LOOP_Trace_Loop(loop, "*** Before implicit prefetch / after fix recurrences 2 ***");
06082
06083 Gen_Implicit_Prefetches(cg_loop, trace_loop_opt);
06084
06085 if (trace_loop_opt)
06086 CG_LOOP_Trace_Loop(loop, "*** Before Ind. Var. Removal / after implicit prefetch ***");
06087
06088
06089 cg_loop.Recompute_Liveness();
06090 Induction_Variables_Removal(cg_loop,
06091 true,
06092 true,
06093 trace_loop_opt);
06094 cg_loop.Recompute_Liveness();
06095 Fix_Backpatches(cg_loop, trace_loop_opt);
06096
06097 if (trace_loop_opt)
06098 CG_LOOP_Trace_Loop(loop, "*** Before swp / after Ind. Var. Removal ***");
06099
06100 if (!Perform_SWP(cg_loop, fixup, true )) {
06101 Undo_SWP_Branch(cg_loop, true );
06102 CG_LOOP_Remove_Notations(cg_loop, CG_LOOP_prolog, CG_LOOP_epilog);
06103 cg_loop.Recompute_Liveness();
06104 }
06105 } else {
06106 if (!cg_loop.Has_prolog_epilog())
06107 return FALSE;
06108
06109 if (trace_loop_opt)
06110 CG_LOOP_Trace_Loop(loop, "*** Before LOOP CANONICALIZATION ***");
06111
06112
06113
06114 if (!Remove_Non_Definite_Dependence(cg_loop, false, trace_loop_opt))
06115 return FALSE;
06116
06117 cg_loop.Build_CG_LOOP_Info();
06118
06119 if (trace_loop_opt)
06120 CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_DOLOOP_UNROLL ***");
06121
06122 cg_loop.Recompute_Liveness();
06123 cg_loop.Determine_Unroll_Factor();
06124
06125 if (cg_loop.Unroll_fully()) {
06126
06127
06128 Unroll_Do_Loop_Fully(loop, cg_loop.Unroll_factor());
06129
06130 } else {
06131 Perform_Read_Write_Removal(loop);
06132
06133
06134 Fix_Recurrences_Before_Unrolling(cg_loop);
06135
06136 if (cg_loop.Unroll_factor() > 1) {
06137 Unroll_Do_Loop(cg_loop, cg_loop.Unroll_factor());
06138 }
06139 cg_loop.Recompute_Liveness();
06140 CG_LOOP_Remove_Notations(cg_loop, CG_LOOP_prolog, CG_LOOP_epilog);
06141 }
06142
06143 cg_loop.Recompute_Liveness();
06144 cg_loop.EBO_After_Unrolling();
06145 }
06146
06147 return TRUE;
06148 }
06149 #endif
06150
06151
06152
06153 #if defined(TARG_IA64) || defined(TARG_SL) || defined(TARG_MIPS)
06154 BOOL CG_LOOP_Optimize(LOOP_DESCR *loop, vector<SWP_FIXUP>& fixup,
06155 void **par_rgn=NULL,
06156 void *rgn_loop_update=NULL)
06157 #else
06158 BOOL CG_LOOP_Optimize(LOOP_DESCR *loop, vector<SWP_FIXUP>& fixup)
06159 #endif
06160 {
06161 enum LOOP_OPT_ACTION {
06162 NO_LOOP_OPT,
06163 #ifdef TARG_IA64
06164 SINGLE_BB_DOLOOP_SWP_OR_UNROLL,
06165 #else
06166 SINGLE_BB_DOLOOP_SWP,
06167 SINGLE_BB_DOLOOP_UNROLL,
06168 #endif
06169 SINGLE_BB_WHILELOOP_SWP,
06170 SINGLE_BB_WHILELOOP_UNROLL,
06171 MULTI_BB_DOLOOP
06172 };
06173
06174 #ifdef TARG_IA64
06175 if(IPFEC_Enable_Region_Formation){
06176 if(Home_Region(LOOP_DESCR_loophead(loop))->Is_No_Further_Opt())
06177 return FALSE;
06178 }
06179 #endif
06180
06181
06182 if (!BB_innermost(LOOP_DESCR_loophead(loop)))
06183 return FALSE;
06184
06185 if (Skip_Loop_For_Reason(loop))
06186 return FALSE;
06187
06188 BOOL trace_loop_opt = Get_Trace(TP_CGLOOP, 0x4);
06189
06190
06191
06192 LOOP_OPT_ACTION action = NO_LOOP_OPT;
06193 BOOL has_trip_count = CG_LOOP_Trip_Count(loop) != NULL;
06194 BOOL single_bb = (BB_SET_Size(LOOP_DESCR_bbset(loop)) == 1);
06195
06196 if (trace_loop_opt) {
06197 if (!single_bb) {
06198 fprintf(TFile, "loop is not a single BB loop.\n");
06199 BB_SET_Print(LOOP_DESCR_bbset(loop), TFile);
06200 fprintf(TFile, "\n");
06201 }
06202 }
06203
06204 if (!single_bb &&
06205 CG_LOOP_force_ifc > 0 &&
06206 Loop_Amenable_For_SWP (loop, trace_loop_opt)) {
06207
06208 BB *new_single_bb;
06209 #ifdef TARG_IA64
06210 if (IPFEC_Enable_If_Conversion) {
06211 IF_CONVERTOR convertor;
06212 new_single_bb = convertor.Force_If_Convert(loop, CG_LOOP_force_ifc >= 2);
06213 } else {
06214 new_single_bb = Force_If_Convert(loop, CG_LOOP_force_ifc >=2);
06215 }
06216 #else
06217 new_single_bb = Force_If_Convert(loop, CG_LOOP_force_ifc >=2);
06218 #endif
06219 if (new_single_bb) {
06220 single_bb = TRUE;
06221 }
06222 }
06223
06224 if (single_bb) {
06225 if (has_trip_count) {
06226 #ifdef TARG_IA64
06227 action = SINGLE_BB_DOLOOP_SWP_OR_UNROLL;
06228 #else
06229 if (Enable_SWP)
06230 action = SINGLE_BB_DOLOOP_SWP;
06231 else
06232 action = SINGLE_BB_DOLOOP_UNROLL;
06233 #endif
06234 } else {
06235 if (Enable_SWP && SWP_Options.Enable_While_Loop)
06236 action = SINGLE_BB_WHILELOOP_SWP;
06237 else if (CG_LOOP_unroll_non_trip_countable)
06238 action = SINGLE_BB_WHILELOOP_UNROLL;
06239 }
06240 } else if (has_trip_count) {
06241 action = MULTI_BB_DOLOOP;
06242 } else {
06243 action = NO_LOOP_OPT;
06244 }
06245
06246 #ifdef TARG_IA64
06247 if(IPFEC_Enable_Region_Formation && action!=NO_LOOP_OPT){
06248 extern void *Record_And_Del_Loop_Region(LOOP_DESCR *loop, void *tmp);
06249 (*par_rgn) = Record_And_Del_Loop_Region(loop, rgn_loop_update);
06250 if((*par_rgn) == NULL)
06251 return FALSE;
06252 }
06253 #endif
06254 switch (action) {
06255 #ifdef TARG_IA64
06256 case SINGLE_BB_DOLOOP_SWP_OR_UNROLL:
06257 {
06258 CG_LOOP cg_loop(loop);
06259 cg_loop.Build_CG_LOOP_Info ();
06260
06261 BOOL perform_swp = Do_Loop_Determine_SWP_Or_Unroll
06262 (cg_loop, trace_loop_opt);
06263 return Do_Loop_Perform_SWP_or_Unroll
06264 (perform_swp, cg_loop, fixup, trace_loop_opt);
06265 }
06266 #else
06267 case SINGLE_BB_DOLOOP_SWP:
06268 {
06269 CG_LOOP cg_loop(loop);
06270 if (!cg_loop.Has_prolog_epilog())
06271 return FALSE;
06272
06273 if (trace_loop_opt)
06274 CG_LOOP_Trace_Loop(loop, "*** Before loop canonicalization ***");
06275
06276 if (!Prepare_Loop_For_SWP_1(cg_loop, trace_loop_opt))
06277 return FALSE;
06278
06279
06280
06281 Gen_Counted_Loop_Branch(cg_loop);
06282 Gen_SWP_Branch(cg_loop, true );
06283
06284 cg_loop.Recompute_Liveness();
06285 Rename_TNs_For_BB(cg_loop.Loop_header(), NULL);
06286 cg_loop.Recompute_Liveness();
06287
06288 cg_loop.Build_CG_LOOP_Info();
06289 cg_loop.Recompute_Liveness();
06290
06291 if (cg_loop.Determine_Unroll_Fully()) {
06292 Unroll_Do_Loop_Fully(loop, cg_loop.Unroll_factor());
06293 cg_loop.Recompute_Liveness();
06294 return TRUE;
06295 }
06296
06297 Perform_Read_Write_Removal(loop);
06298 cg_loop.Recompute_Liveness();
06299
06300 if (trace_loop_opt)
06301 CG_LOOP_Trace_Loop(loop, "*** Before Postincr generation / After RW removal ***");
06302
06303
06304 if (!Prepare_Loop_For_SWP_2(cg_loop, trace_loop_opt))
06305 return FALSE;
06306
06307 if (trace_loop_opt)
06308 CG_LOOP_Trace_Loop(loop, "*** Before Fix_Recurrences / After Postincr ***");
06309
06310
06311 Induction_Variables_Removal(cg_loop,
06312 true,
06313 true,
06314 trace_loop_opt);
06315 Fix_Recurrences_Before_Unrolling(cg_loop);
06316
06317 if (trace_loop_opt)
06318 CG_LOOP_Trace_Loop(loop, "*** before ebo 1 and unrolling / after fix recurrences ***");
06319
06320 cg_loop.Recompute_Liveness();
06321 cg_loop.EBO_Before_Unrolling();
06322
06323 if (SWP_Options.Predicate_Promotion) {
06324 std::list<BB*> bbl;
06325 bbl.push_front(cg_loop.Loop_header());
06326 CG_DEP_Prune_Dependence_Arcs(bbl, TRUE, trace_loop_opt);
06327 if (trace_loop_opt)
06328 CG_LOOP_Trace_Loop(loop, "*** after ebo 1 and prune predicate / before unrolling ***");
06329 }
06330
06331 cg_loop.Determine_SWP_Unroll_Factor();
06332
06333 if (cg_loop.Unroll_factor() > 1) {
06334 Unroll_Do_Loop(cg_loop, cg_loop.Unroll_factor());
06335 cg_loop.Recompute_Liveness();
06336 }
06337
06338 if (trace_loop_opt)
06339 CG_LOOP_Trace_Loop(loop, "*** Before ebo 2/ after unrolling ***");
06340
06341 cg_loop.EBO_Before_Unrolling();
06342
06343 BB_Verify_OP_Order(cg_loop.Loop_header());
06344
06345
06346 Fix_Recurrences_After_Unrolling(cg_loop);
06347
06348 if (trace_loop_opt)
06349 CG_LOOP_Trace_Loop(loop, "*** Before implicit prefetch / after fix recurrences 2 ***");
06350
06351 Gen_Implicit_Prefetches(cg_loop, trace_loop_opt);
06352
06353 if (trace_loop_opt)
06354 CG_LOOP_Trace_Loop(loop, "*** Before Ind. Var. Removal / after implicit prefetch ***");
06355
06356
06357 cg_loop.Recompute_Liveness();
06358 Induction_Variables_Removal(cg_loop,
06359 true,
06360 true,
06361 trace_loop_opt);
06362 cg_loop.Recompute_Liveness();
06363 Fix_Backpatches(cg_loop, trace_loop_opt);
06364
06365 if (trace_loop_opt)
06366 CG_LOOP_Trace_Loop(loop, "*** Before swp / after Ind. Var. Removal ***");
06367
06368 if (!Perform_SWP(cg_loop, fixup, true )) {
06369 Undo_SWP_Branch(cg_loop, true );
06370 CG_LOOP_Remove_Notations(loop, CG_LOOP_prolog, CG_LOOP_epilog);
06371 cg_loop.Recompute_Liveness();
06372 }
06373 break;
06374 }
06375
06376 case SINGLE_BB_DOLOOP_UNROLL:
06377 {
06378 CG_LOOP cg_loop(loop);
06379 if (!cg_loop.Has_prolog_epilog())
06380 return FALSE;
06381
06382 if (trace_loop_opt)
06383 CG_LOOP_Trace_Loop(loop, "*** Before LOOP CANONICALIZATION ***");
06384
06385
06386
06387 if (!Remove_Non_Definite_Dependence(cg_loop, false, trace_loop_opt))
06388 return FALSE;
06389
06390 cg_loop.Build_CG_LOOP_Info();
06391
06392 if (trace_loop_opt)
06393 CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_DOLOOP_UNROLL ***");
06394
06395 cg_loop.Recompute_Liveness();
06396 cg_loop.Determine_Unroll_Factor();
06397
06398 if (cg_loop.Unroll_fully()) {
06399
06400
06401 Unroll_Do_Loop_Fully(loop, cg_loop.Unroll_factor());
06402
06403 } else {
06404 Perform_Read_Write_Removal(loop);
06405
06406
06407 Fix_Recurrences_Before_Unrolling(cg_loop);
06408
06409 if (cg_loop.Unroll_factor() > 1) {
06410 Unroll_Do_Loop(cg_loop, cg_loop.Unroll_factor());
06411 }
06412 cg_loop.Recompute_Liveness();
06413 CG_LOOP_Remove_Notations(loop, CG_LOOP_prolog, CG_LOOP_epilog);
06414 }
06415
06416 #ifdef TARG_X8664
06417
06418 CGTARG_LOOP_Optimize( loop );
06419 #endif
06420 cg_loop.Recompute_Liveness();
06421 cg_loop.EBO_After_Unrolling();
06422 break;
06423 }
06424
06425 #endif
06426 case SINGLE_BB_WHILELOOP_SWP:
06427 {
06428 CG_LOOP cg_loop(loop);
06429
06430
06431 if (!cg_loop.Has_prolog_epilog()) return FALSE;
06432
06433 if (trace_loop_opt)
06434 CG_LOOP_Trace_Loop(loop, "*** Before LOOP CANONICALIZATION ***");
06435
06436 if (Prepare_Loop_For_SWP_1(cg_loop, trace_loop_opt)) {
06437
06438 Gen_SWP_Branch(cg_loop, false);
06439
06440 cg_loop.Recompute_Liveness();
06441 cg_loop.Build_CG_LOOP_Info();
06442
06443
06444 cg_loop.Recompute_Liveness();
06445 Induction_Variables_Removal(cg_loop,
06446 true,
06447 true,
06448 trace_loop_opt);
06449 cg_loop.Recompute_Liveness();
06450
06451 Prepare_Loop_For_SWP_2(cg_loop, trace_loop_opt);
06452
06453 Convert_While_Loop_to_Fully_Predicated_Form(cg_loop);
06454
06455 #if 0
06456 if (SWP_Options.Predicate_Promotion) {
06457 list<BB*> bbl;
06458 bbl.push_front(cg_loop.Loop_header());
06459 CG_DEP_Prune_Dependence_Arcs(bbl, TRUE, trace_loop_opt);
06460 }
06461 #endif
06462
06463 if (trace_loop_opt)
06464 CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_SWP ***");
06465
06466 cg_loop.Recompute_Liveness();
06467 Fix_Backpatches(cg_loop, trace_loop_opt);
06468
06469 if (!Perform_SWP(cg_loop, fixup, false )) {
06470
06471
06472 Undo_SWP_Branch(cg_loop, false);
06473 #ifdef TARG_IA64
06474 CG_LOOP_Remove_Notations(cg_loop, CG_LOOP_prolog, CG_LOOP_epilog);
06475 #else
06476 CG_LOOP_Remove_Notations(loop, CG_LOOP_prolog, CG_LOOP_epilog);
06477 #endif
06478 cg_loop.Recompute_Liveness();
06479 }
06480 }
06481 }
06482 break;
06483
06484 case SINGLE_BB_WHILELOOP_UNROLL:
06485 {
06486 CG_LOOP cg_loop(loop);
06487
06488 if (trace_loop_opt)
06489 CG_LOOP_Trace_Loop(loop, "*** Before SINGLE_BB_WHILELOOP_UNROLL ***");
06490
06491 cg_loop.Build_CG_LOOP_Info();
06492 cg_loop.Determine_Unroll_Factor();
06493 Unroll_Dowhile_Loop(loop, cg_loop.Unroll_factor());
06494 cg_loop.Recompute_Liveness();
06495 cg_loop.EBO_After_Unrolling();
06496 }
06497 break;
06498
06499 case MULTI_BB_DOLOOP:
06500 {
06501 CG_LOOP cg_loop(loop);
06502
06503
06504 if (!cg_loop.Has_prolog_epilog()) return FALSE;
06505
06506 if (trace_loop_opt) {
06507 CG_LOOP_Trace_Loop(loop, "*** Before MULTI_BB_DOLOOP ***");
06508 }
06509
06510 Gen_Counted_Loop_Branch(cg_loop);
06511
06512 if (trace_loop_opt) {
06513 CG_LOOP_Trace_Loop(loop, "*** After MULTI_BB_DOLOOP ***");
06514 }
06515 }
06516 break;
06517
06518 case NO_LOOP_OPT:
06519 return FALSE;
06520
06521 default:
06522 Is_True(FALSE, ("unknown loop opt action."));
06523 }
06524
06525 return TRUE;
06526 }
06527
06528
06529
06530
06531
06532 BOOL CG_LOOP_Skip(BB *bb)
06533 {
06534 BOOL skip = FALSE;
06535 if (CG_skip_local_loop) {
06536 skip = (BB_id(bb) < CG_local_skip_before ||
06537 BB_id(bb) > CG_local_skip_after ||
06538 BB_id(bb) == CG_local_skip_equal);
06539 if (skip) DevWarn("skip CG loop at BB %d", BB_id(bb));
06540 else DevWarn("process CG loop at BB %d", BB_id(bb));
06541 }
06542 return skip;
06543 }
06544
06545
06546 void CG_LOOP_Statistics(LOOP_DESCR *loop)
06547 {
06548 BB *head = LOOP_DESCR_loophead(loop);
06549 BB_SET *bbs = LOOP_DESCR_bbset(loop);
06550 TN *trip_count = CG_LOOP_Trip_Count(loop);
06551 bool inner = BB_innermost(head);
06552 bool has_call = false;
06553 bool early_exit = false;
06554 bool multi_exit = false;
06555 bool has_return = false;
06556 INT32 nbb= BB_SET_Size(bbs);
06557 INT32 nops = 0;
06558 BB *bb;
06559 FOR_ALL_BB_SET_members(bbs, bb) {
06560 if (BB_call(bb))
06561 has_call = true;
06562 nops += BB_length(bb);
06563 }
06564
06565 BBLIST *succs;
06566 BB *loop_merge = NULL;
06567 FOR_ALL_BB_SUCCS(head, succs) {
06568 BB *succ = BBLIST_item(succs);
06569 if (!BB_SET_MemberP(bbs, succ)) {
06570 loop_merge = succ;
06571 }
06572 }
06573 FOR_ALL_BB_SET_members(bbs,bb) {
06574 if (bb != head) {
06575 FOR_ALL_BB_SUCCS(bb, succs) {
06576 BB *succ = BBLIST_item(succs);
06577 if (!BB_SET_MemberP(bbs, succ)) {
06578 early_exit = true;
06579 if (succ != loop_merge)
06580 if (BB_exit(succ))
06581 has_return = true;
06582 else
06583 multi_exit = true;
06584 }
06585 }
06586 }
06587 }
06588
06589 fprintf(TFile, "<loopstat> %s %s\n",
06590 inner ? "inner" : "no_inner",
06591 trip_count ? "has_loop_count" : "no_loop_count");
06592 if (inner) {
06593 fprintf(TFile, "<loopstat_inner> nbb=%d nops=%d %s %s %s %s\n",
06594 nbb, nops,
06595 has_call ? "has_call" : "no_call",
06596 has_return ? "has_return" : "no_return",
06597 early_exit ? "has_early_exit" : "no_early_exit",
06598 multi_exit ? "has_multi_exit" : "no_multi_exit");
06599 }
06600 }
06601
06602
06603 #if defined(TARG_SL)
06604 static BOOL enable_zdl_with_branch = FALSE;
06605 static int zdl_seq_no = 0;
06606
06607
06608
06609
06610 static OP*
06611 CG_LOOP_Zdl_Internal_Gen_Code( CG_LOOP& cl)
06612 {
06613 TN* trip_count_tn = cl.Trip_count_tn();
06614 BB* prolog = cl.Prolog_end();
06615 BB* epilog = cl.Epilog_start();
06616 BB* head = cl.Loop_header();
06617 BB* tail = LOOP_DESCR_Find_Unique_Tail(cl.Loop());
06618 BOOL using_mvtci = FALSE;
06619 Is_True( prolog, ("No prolog for the loop"));
06620 Is_True( epilog, ("No epilog for the loop"));
06621
06622
06623
06624
06625
06626 Is_True(BB_length(tail)>0, ("empty tail bb"));
06627 OP* br_op = BB_branch_op( tail );
06628 Is_True(OP_code(br_op) != TOP_loop, ("branch op is top_loop"));
06629 Is_True(br_op!=NULL || BB_has_tag(tail), ("tail bb has neither br nor tag "));
06630
06631
06632
06633
06634 TN* gpr_tn;
06635 OP* set_gpr_op = NULL;
06636 OP* set_gpr_op_1 = NULL;
06637 gpr_tn = Gen_Register_TN( ISA_REGISTER_CLASS_integer, 4 );
06638
06639 if (TN_is_constant(trip_count_tn) && TN_has_value(trip_count_tn)){
06640
06641
06642
06643 INT32 val = TN_value(trip_count_tn);
06644
06645 if (ISA_LC_Value_In_Class(val, LC_uimm10)) {
06646 set_gpr_op = NULL;
06647 using_mvtci = TRUE;
06648 } else if (ISA_LC_Value_In_Class (val, LC_simm16)){
06649 set_gpr_op = Mk_OP(TOP_addiu, gpr_tn, Zero_TN, trip_count_tn);
06650 } else if (ISA_LC_Value_In_Class (val, LC_uimm16)){
06651 set_gpr_op = Mk_OP(TOP_ori, gpr_tn, Zero_TN, trip_count_tn);
06652 } else if(val >= 32768 && val <= INT32_MAX ) {
06653
06654 if((val & 0xffff) == 0) {
06655 set_gpr_op = Mk_OP(TOP_lui, gpr_tn, Gen_Literal_TN((val >> 16) & 0xffff, 4));
06656 } else {
06657 set_gpr_op = Mk_OP(TOP_lui, gpr_tn, Gen_Literal_TN((val >> 16) & 0xffff,4));
06658 set_gpr_op_1 = Mk_OP(TOP_ori, gpr_tn, gpr_tn,
06659 Gen_Literal_TN(val & 0xffff, 4));
06660 }
06661 }
06662 } else {
06663 gpr_tn = trip_count_tn;
06664 set_gpr_op = NULL;
06665 set_gpr_op_1 = NULL;
06666 }
06667
06668
06669 TN* lc_tn;
06670 INT lc_idx = LOOP_DESCR_lcidx(cl.Loop());
06671 switch(lc_idx) {
06672 case 1: lc_tn = LC0_TN; break;
06673 case 2: lc_tn = LC1_TN; break;
06674 case 3: lc_tn = LC2_TN; break;
06675 case 4: lc_tn = LC3_TN; break;
06676 default:
06677 Is_True(0, ("invalide loop-counter-index"));
06678 }
06679 OP* mvtc_op;
06680 if (using_mvtci) {
06681 mvtc_op = Mk_OP(TOP_mvtc_i, lc_tn, trip_count_tn);
06682 } else {
06683 mvtc_op = Mk_OP(TOP_mvtc, lc_tn, gpr_tn);
06684 }
06685
06686 LABEL_IDX loop_targ_label = br_op == NULL? Get_BB_Tag( tail ) : Gen_Tag();
06687 TN* label_tn = Gen_Label_TN( loop_targ_label, 0 );
06688 TN* lc_index_tn = Gen_Literal_TN( lc_idx-1, 4 );
06689 OP* loop_op = Mk_OP( TOP_loop, lc_index_tn, label_tn, lc_tn);
06690 Set_BB_Tag(tail, loop_targ_label);
06691 Set_BB_has_tag(tail);
06692
06693 if(set_gpr_op) {
06694 BB_Append_Op( prolog, set_gpr_op );
06695 }
06696 if(set_gpr_op_1) {
06697 BB_Append_Op( prolog, set_gpr_op_1 );
06698 }
06699 BB_Append_Op( prolog, mvtc_op );
06700 BB_Append_Op( prolog, loop_op );
06701
06702
06703 if (br_op)
06704 BB_Remove_Op( tail, br_op );
06705
06706
06707
06708
06709
06710
06711
06712
06713
06714
06715 Set_BB_zdl_prolog( prolog );
06716 Set_BB_zdl_body( tail );
06717
06718 return br_op;
06719 }
06720
06721
06722
06723
06724 static BOOL TN_Used_In_Following_OPs( TN *tn, OP *cur_op )
06725 {
06726 BOOL used = FALSE;
06727 OP* op = cur_op;
06728 while( op=OP_next(op) ) {
06729 if( OP_Refs_TN( op, tn ) )
06730 return TRUE;
06731 }
06732 return FALSE;
06733 }
06734 #endif
06735
06736
06737 #if defined(TARG_SL)
06738 static INT Use_OPs_Num_Of_TN(TN* tn, BB* bb)
06739 {
06740 OP* op;
06741 INT num = 0;
06742 FOR_ALL_BB_OPs_FWD(bb,op) {
06743 if( OP_Refs_TN(op, tn) )
06744 num++;
06745 }
06746 return num;
06747 }
06748
06749
06750
06751
06752 static void
06753 CG_LOOP_ZDL_Remove_Idx_GTN( LOOP_DESCR * loop, CG_LOOP &cl, OP* br_op )
06754 {
06755
06756 if( !br_op )
06757 return;
06758
06759 BB_SET* bbset = loop->bbset;
06760 BB *bb;
06761 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_GEN);
06762 BB* prolog = cl.Prolog_end();
06763 BB* epilog = cl.Epilog_start();
06764 BB* head = cl.Loop_header();
06765 BB* tail = BB_Other_Predecessor(head, prolog);
06766 int bb_num = 0;
06767
06768 Is_True( epilog && prolog, ("epilog or prolog not exist") );
06769
06770 if( trace ) {
06771 fprintf( TFile, "===== Dead Code Elimination after EBO \n");
06772 fprintf( TFile, "loop contains BBs: " );
06773 }
06774
06775 for (bb = BB_SET_Choose(bbset); bb != BB_SET_CHOOSE_FAILURE;
06776 bb = BB_SET_Choose_Next(bbset, bb) ) {
06777
06778 OP *br = BB_branch_op(bb);
06779 if(br){
06780 INT targ_opnd = Branch_Target_Operand(br_op);
06781 if( TN_is_label(OP_opnd(br_op, targ_opnd)) ){
06782 LABEL_IDX label = TN_label(OP_opnd(br_op, targ_opnd));
06783 BB *succ = Get_Label_BB(label);
06784 Is_True( BB_SET_MemberP(bbset, succ) , ("zdl loop body bb contains branch, targeting outside of loop") );
06785 }
06786 }
06787 if( trace ) {
06788 fprintf( TFile, "%i ", BB_id(bb) );
06789 }
06790 bb_num++;
06791 }
06792
06793 if( trace ) {
06794 fprintf( TFile, "\n prolog:%i, epilog:%i, head:%i, tail:%i\n", BB_id(prolog), BB_id(epilog), BB_id(head), BB_id(tail) );
06795 }
06796
06797 extern void Print_OP_No_SrcLine(const OP* op);
06798 if(trace)
06799 fprintf( TFile, "Remove OPs:\n" );
06800
06801
06802
06803
06804
06805
06806
06807 TN* ind_var_gtn = NULL;
06808 for( int i=0; i < OP_opnds(br_op); i++ ){
06809 TN* tn = OP_opnd(br_op, i);
06810 if( TN_is_global_reg(tn) && !TN_is_constant(tn) ) {
06811 ind_var_gtn = tn;
06812 break;
06813 }
06814 }
06815
06816
06817
06818
06819 if( !ind_var_gtn )
06820 return;
06821
06822
06823
06824
06825 if( GTN_SET_MemberP(BB_live_in(epilog), ind_var_gtn) )
06826 return;
06827
06828
06829
06830
06831 bb = tail;
06832 int live_use_bbs = 0;
06833 BB* live_use_bb = NULL;
06834 for(; bb && BB_SET_MemberP(bbset, bb); bb = BB_prev(bb) ) {
06835 if( GTN_SET_MemberP( BB_live_use(bb), ind_var_gtn ) ) {
06836 live_use_bb = bb;
06837 live_use_bbs++;
06838 }
06839 }
06840
06841 if( live_use_bbs > 1 ){
06842 return;
06843 }
06844
06845
06846
06847
06848
06849 if( live_use_bbs == 0 ){
06850 DevWarn( ("induction var GTN not used in any BB") );
06851 return;
06852 }
06853
06854 Is_True( live_use_bbs == 1 && ind_var_gtn, ("induction var GTN not used in any BB") );
06855
06856
06857
06858
06859
06860
06861
06862
06863
06864
06865
06866
06867
06868
06869 std::vector<OP *> def_ops;
06870 std::vector<OP *> use_ops;
06871 OP *op;
06872 FOR_ALL_BB_OPs_FWD( live_use_bb, op ){
06873 if( OP_Defs_TN(op, ind_var_gtn) )
06874 def_ops.push_back(op);
06875 if( OP_Refs_TN(op, ind_var_gtn) )
06876 use_ops.push_back(op);
06877 }
06878
06879
06880
06881
06882
06883
06884
06885
06886 BOOL cannot_dce = FALSE;
06887 for( std::vector<OP*>::iterator it1 = use_ops.begin();
06888 it1 != use_ops.end();
06889 ++it1 ) {
06890 OP * op = *it1;
06891 for( int i=0; i < OP_results( op ); i++ ){
06892 TN* tn = OP_result(op, i);
06893 INT usenum = 0;
06894 for( std::vector<OP*>::iterator it2 = def_ops.begin();
06895 it2 != def_ops.end();
06896 ++it2 ) {
06897 if( OP_Refs_TN( *it2, tn ) )
06898 usenum++;
06899 }
06900
06901 if( usenum != Use_OPs_Num_Of_TN(tn, live_use_bb) ) {
06902 cannot_dce = TRUE;
06903 break;
06904 }
06905 }
06906 if( cannot_dce )
06907 break;
06908 }
06909
06910 if( cannot_dce ) {
06911 return;
06912 }
06913
06914
06915 for( std::vector<OP*>::iterator it1 = use_ops.begin();
06916 it1 != use_ops.end();
06917 ++it1 ){
06918 if( !OP_has_tag(*it1) ){
06919 BB_Remove_Op( live_use_bb, *it1 );
06920 if( trace ) {
06921 fprintf( TFile, "in BB:%i , delete:", BB_id(live_use_bb) );
06922 Print_OP_No_SrcLine( *it1 );
06923 }
06924 }
06925 }
06926 for( std::vector<OP*>::iterator it1 = def_ops.begin();
06927 it1 != def_ops.end();
06928 ++it1 ) {
06929 if( (*it1)->bb == live_use_bb ) {
06930 if( !OP_has_tag(*it1) ){
06931 BB_Remove_Op( live_use_bb, *it1 );
06932 if( trace ){
06933 fprintf( TFile, "in BB:%i , delete:", BB_id(live_use_bb) );
06934 Print_OP_No_SrcLine( *it1 );
06935 }
06936 }
06937 }
06938 }
06939
06940 return;
06941 }
06942 #endif
06943
06944 #ifdef TARG_SL
06945
06946
06947
06948 static void CG_LOOP_ZDL_Remove_LTN( LOOP_DESCR * loop, CG_LOOP &cl, OP* br_op )
06949 {
06950
06951 if( !br_op )
06952 return;
06953
06954 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_GEN);
06955 BB_SET* bbset = loop->bbset;
06956 BB *bb;
06957 OP *op;
06958 for( bb = BB_SET_Choose(bbset); bb != BB_SET_CHOOSE_FAILURE;
06959 bb = BB_SET_Choose_Next(bbset, bb) ) {
06960 BOOL changed = TRUE;
06961 while( changed ){
06962 changed = FALSE;
06963 FOR_ALL_BB_OPs_REV( bb, op ){
06964 if( OP_has_result(op)
06965 && !OP_has_implicit_interactions(op)
06966 && !OP_store(op)
06967 && !CGTARG_Is_OP_Intrinsic(op) ){
06968 BOOL op_used = FALSE;
06969 for( int i = 0; i < OP_results(op) ; i++ ){
06970 TN * tn = OP_result(op, i);
06971 if( TN_Used_In_Following_OPs(tn, op)
06972 || TN_is_dedicated(tn)
06973 || TN_is_global_reg(tn) ){
06974 op_used = TRUE;
06975 break;
06976 }
06977 }
06978
06979 if( !op_used ){
06980 if( trace ){
06981 fprintf( TFile, "Remove_LTN remove OP:" );
06982 Print_OP_No_SrcLine( op );
06983 fprintf( TFile, "in BB:%i\n", BB_id(bb) );
06984 if( BB_length(bb)==1 && OP_has_tag(op) )
06985 fprintf(TFile, "it should NOT be removed!\n" );
06986 }
06987 BB_Remove_Op( bb, op );
06988 if( !( BB_length(bb)==1 && OP_has_tag(op) ) )
06989 changed = TRUE;
06990 }
06991 }
06992 }
06993 }
06994 }
06995 }
06996
06997
06998
06999
07000
07001
07002
07003
07004 void Check_Branch( LOOP_DESCR* loop, BOOL& br_out, BOOL& br_in )
07005 {
07006 BB *loop_head = LOOP_DESCR_loophead(loop);
07007 BB* loop_bb = NULL;
07008 FOR_ALL_BB_SET_members( LOOP_DESCR_bbset(loop) , loop_bb) {
07009 if (BB_zdl_body(loop_bb)) {
07010 continue;
07011 }
07012
07013 if( BB_call(loop_bb) || BB_asm(loop_bb) ){
07014 br_out = TRUE;
07015 return;
07016 }
07017 OP *br_op = BB_branch_op(loop_bb);
07018 if( br_op && ( OP_fork(br_op) || OP_ijump(br_op) ) ){
07019 br_out = TRUE;
07020 return;
07021 }
07022
07023
07024 BB *succ = NULL;
07025 if( br_op ){
07026 INT targ_opnd = Branch_Target_Operand(br_op);
07027 if( TN_is_label(OP_opnd(br_op, targ_opnd)) ){
07028 LABEL_IDX label = TN_label(OP_opnd(br_op, targ_opnd));
07029 succ = Get_Label_BB(label);
07030 if( !BB_SET_MemberP( LOOP_DESCR_bbset(loop), succ ) ){
07031 br_out = TRUE;
07032 return;
07033 } else if (succ != loop_head) {
07034 br_in = TRUE;
07035 }
07036 }
07037 }
07038 }
07039
07040 }
07041
07042 void ZDL_Loop_Num_Incr()
07043 {
07044 zdl_seq_no++;
07045 }
07046
07047 BOOL ZDL_Skip_Loop ()
07048 {
07049 if( zdl_seq_no == CG_zdl_skip_e || zdl_seq_no < CG_zdl_skip_b ||
07050 zdl_seq_no > CG_zdl_skip_a )
07051 return TRUE;
07052 else
07053 return FALSE;
07054 }
07055
07056
07057
07058
07059
07060
07061
07062
07063
07064
07065 static void
07066 CG_LOOP_Zdl_Ident_Rec( LOOP_DESCR* loop )
07067 {
07068 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_GEN);
07069 BOOL trace_seq_no = Get_Trace(TP_CGLOOP, TRACE_ZDL_SEQ_NO);
07070 Is_True( loop, ("loop is NULL") );
07071
07072 int curr_rnl = 1;
07073
07074
07075
07076
07077
07078 for (INT i = 0; i < VECTOR_count( loop->children ); i++) {
07079 LOOP_DESCR * child_loop = (LOOP_DESCR*) VECTOR_element( loop->children, i );
07080 Is_True( child_loop, ("child_loop is NULL") );
07081 CG_LOOP_Zdl_Ident_Rec( child_loop );
07082
07083 if (Can_Zero_Delay(child_loop) && curr_rnl > 0) {
07084 curr_rnl = curr_rnl > LOOP_DESCR_lcidx(child_loop) + 1 ?
07085 curr_rnl : LOOP_DESCR_lcidx(child_loop) + 1;
07086 } else
07087 curr_rnl = 0;
07088 }
07089
07090 if( trace ) {
07091 fprintf( TFile, "%s", DBar );
07092 fprintf( TFile, " Begin ZDL for one loop \n");
07093 }
07094
07095
07096
07097
07098 if( curr_rnl <= 0 ) {
07099 if( trace ){
07100 LOOP_DESCR_Dump_Loop_Brief( loop );
07101 fprintf( TFile, " --- can NOT be zdl\n");
07102 fprintf( TFile, " --- because one child failed\n");
07103 }
07104 return;
07105 }
07106
07107
07108
07109
07110
07111
07112
07113
07114 TN * trip_count_tn = CG_LOOP_Trip_Count( loop );
07115 if(!trip_count_tn) {
07116 if( trace ){
07117 LOOP_DESCR_Dump_Loop_Brief( loop );
07118 fprintf( TFile, " --- can NOT be zdl,\n");
07119 fprintf( TFile, " --- because no trip count.\n");
07120 }
07121 return;
07122 }
07123
07124
07125
07126
07127 if( curr_rnl > CG_zdl_enabled_level ) {
07128 if( trace ){
07129 LOOP_DESCR_Dump_Loop_Brief( loop );
07130 fprintf( TFile, " --- can NOT be zdl,\n");
07131 fprintf( TFile, " --- because rnl=%i\n", curr_rnl );
07132 fprintf( TFile, " --- zdl_enabled_level=%i\n", CG_zdl_enabled_level );
07133 }
07134 return;
07135 }
07136
07137
07138
07139 BOOL single_bb = (BB_SET_Size(LOOP_DESCR_bbset(loop)) == 1);
07140 if (!single_bb) {
07141 if (trace) {
07142 fprintf(TFile, " --- can NOT be zdl,\n");
07143 fprintf(TFile, " --- because loop body is not single");
07144 BB_SET_Print(LOOP_DESCR_bbset(loop), TFile);
07145 fprintf(TFile, "\n");
07146 }
07147 return;
07148 }
07149
07150 BOOL has_outside_br = FALSE;
07151 BOOL has_inside_br = FALSE;
07152 Check_Branch(loop, has_outside_br, has_inside_br);
07153
07154
07155
07156 if( has_outside_br) {
07157 if( trace ){
07158 LOOP_DESCR_Dump_Loop_Brief( loop );
07159 fprintf( TFile, " --- can NOT be zdl,\n");
07160 fprintf( TFile, " --- because branch to outside.\n");
07161 }
07162 return;
07163 }
07164
07165 if (!enable_zdl_with_branch && has_inside_br) {
07166 if( trace ){
07167 LOOP_DESCR_Dump_Loop_Brief( loop );
07168 fprintf( TFile, " --- can NOT be zdl\n");
07169 fprintf( TFile, " --- because having branch inside\n");
07170 }
07171 return;
07172 }
07173
07174 if( trace ){
07175 LOOP_DESCR_Dump_Loop_Brief( loop );
07176 fprintf( TFile, " --- can be zdl, rnl=%i\n", curr_rnl );
07177 }
07178
07179 if( trace_seq_no ){
07180 fprintf( TFile, " the sequential number of this loop : %i\n", zdl_seq_no );
07181 ST* cur_pu = Get_Current_PU_ST();
07182 char* pu_name = ST_name( cur_pu );
07183 fprintf( TFile, " in the PU : %s\n", pu_name );
07184 }
07185
07186 ZDL_Loop_Num_Incr();
07187
07188 if (ZDL_Skip_Loop()) {
07189 if( trace_seq_no ) {
07190 fprintf( TFile, " it's skipped by the zdl_skip_xxx option\n" );
07191 }
07192 return;
07193 }
07194
07195 Set_Can_Zero_Delay( loop );
07196 LOOP_DESCR_lcidx(loop) = curr_rnl;
07197 BB* tail = LOOP_DESCR_Find_Unique_Tail(loop);
07198 FmtAssert(BB_length(tail)>0, ("empty tail bb of loop"));
07199 Set_BB_zdl_body(tail);
07200 return;
07201 }
07202
07203 static void
07204 CG_LOOP_Adjust_Zdl_Body( CG_LOOP& cg_loop )
07205 {
07206 BB *tail=LOOP_DESCR_Find_Unique_Tail(cg_loop.Loop());
07207 if(BB_length(tail) != 0)
07208 return;
07209
07210 BB *new_tail=tail;
07211 while( new_tail && BB_length(new_tail)==0 ) {
07212 DevWarn( ("One bb in the loop is empty, dangerous") );
07213 new_tail = BB_prev(new_tail);
07214 }
07215 FmtAssert(new_tail && new_tail != tail &&
07216 BB_SET_MemberP( LOOP_DESCR_bbset(cg_loop.Loop()), new_tail ),
07217 ("CG_LOOP_Adjust_Zdl_Body::wrong new tail found"));
07218
07219 if(BB_zdl_body(new_tail)) {
07220 FmtAssert(BB_has_tag(new_tail), ("CG_LOOP_Adjust_Zdl_Body::zdl body has no tag"));
07221 LABEL_IDX tag = Get_BB_Tag(new_tail);
07222 TN* tag_tn = Gen_Label_TN( tag, 0 );
07223
07224 BB *prolog = cg_loop.Prolog_end();
07225 OP *loop_op = BB_last_op(prolog);
07226 FmtAssert(OP_code(loop_op) == TOP_loop, ("CG_LOOP_Adjust_Zdl_Body::cannot find loop op"));
07227 Set_OP_opnd( loop_op, 1, tag_tn );
07228
07229 } else {
07230 LABEL_IDX tag = Get_BB_Tag(tail);
07231 Set_BB_Tag(new_tail, tag);
07232 Set_BB_has_tag(new_tail);
07233 Set_BB_zdl_body(new_tail);
07234 }
07235
07236 Reset_BB_has_tag(tail);
07237 Reset_BB_zdl_body(tail);
07238
07239 return;
07240 }
07241
07242 static void
07243 CG_LOOP_Zdl_Gen_Rec( LOOP_DESCR* loop )
07244 {
07245 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_GEN);
07246 BOOL trace_seq_no = Get_Trace(TP_CGLOOP, TRACE_ZDL_SEQ_NO);
07247
07248 Is_True( loop, ("loop is NULL") );
07249
07250
07251
07252
07253
07254 for( INT i = 0; i < VECTOR_count( loop->children ); i++ ){
07255 LOOP_DESCR * child_loop = (LOOP_DESCR*) VECTOR_element( loop->children, i );
07256 Is_True( child_loop, ("child_loop is NULL") );
07257 CG_LOOP_Zdl_Gen_Rec( child_loop );
07258 }
07259
07260 if( trace ) {
07261 fprintf( TFile, "%s", DBar );
07262 fprintf( TFile, " Begin ZDL for one loop \n");
07263 }
07264
07265
07266
07267
07268
07269 if(!Can_Zero_Delay( loop ))
07270 return;
07271
07272 CG_LOOP cg_loop( loop );
07273 OP* br_op = CG_LOOP_Zdl_Internal_Gen_Code( cg_loop );
07274
07275
07276
07277
07278 cg_loop.Recompute_Liveness();
07279
07280
07281
07282
07283
07284
07285
07286
07287 INT32 oldvalue = EBO_Opt_Level;
07288 EBO_Opt_Level = 2;
07289 EBO_Process_Region(NULL);
07290 EBO_Opt_Level = oldvalue;
07291
07292 CG_LOOP_ZDL_Remove_Idx_GTN( loop, cg_loop, br_op );
07293
07294
07295 cg_loop.Recompute_Liveness();
07296
07297
07298
07299
07300 CG_LOOP_ZDL_Remove_LTN( loop, cg_loop, br_op );
07301
07302
07303
07304
07305 oldvalue = EBO_Opt_Level;
07306 EBO_Opt_Level = 2;
07307 EBO_Process_Region(NULL);
07308 EBO_Opt_Level = oldvalue;
07309
07310
07311 if(BB_length(LOOP_DESCR_Find_Unique_Tail(loop))==0) {
07312 CG_LOOP_Adjust_Zdl_Body(cg_loop);
07313 }
07314
07315 return;
07316 }
07317
07318
07319
07320 void CG_LOOP_Zdl_Ident()
07321 {
07322 int i;
07323 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_IR);
07324 if( trace ) {
07325 fprintf( TFile, "%s\n", DBar );
07326 fprintf( TFile, " Before zero delay loop identification\n" );
07327 fprintf( TFile, "%s\n", DBar );
07328 CG_Dump_Cur_Region();
07329 }
07330
07331 for( i = 0; i < VECTOR_count(loop_tree_roots); i++ ){
07332 CG_LOOP_Zdl_Ident_Rec( (LOOP_DESCR*)VECTOR_element(loop_tree_roots,i) );
07333 }
07334
07335 if( trace ) {
07336 fprintf( TFile, "%s\n", DBar );
07337 fprintf( TFile, " After zero delay loop identification\n" );
07338 fprintf( TFile, "%s\n", DBar );
07339 CG_Dump_Cur_Region();
07340 }
07341 }
07342
07343 void CG_LOOP_Zdl_Gen()
07344 {
07345 int i;
07346 BOOL trace = Get_Trace(TP_CGLOOP, TRACE_ZDL_IR);
07347 if( trace ) {
07348 fprintf( TFile, "%s\n", DBar );
07349 fprintf( TFile, " Before zero delay loop generation\n" );
07350 fprintf( TFile, "%s\n", DBar );
07351 CG_Dump_Cur_Region();
07352 }
07353
07354 for( i = 0; i < VECTOR_count(loop_tree_roots); i++ ){
07355 CG_LOOP_Zdl_Gen_Rec( (LOOP_DESCR*)VECTOR_element(loop_tree_roots,i) );
07356 }
07357
07358 if( trace ) {
07359 fprintf( TFile, "%s\n", DBar );
07360 fprintf( TFile, " After zero delay loop generation\n" );
07361 fprintf( TFile, "%s\n", DBar );
07362 CG_Dump_Cur_Region();
07363 }
07364 }
07365 #endif
07366
07367
07368
07369
07370
07371 #if defined(TARG_IA64) || defined(TARG_SL) || defined(TARG_MIPS)
07372 void Perform_Loop_Optimizations(void *rgn_loop_update)
07373 #else
07374 void Perform_Loop_Optimizations()
07375 #endif
07376 {
07377 MEM_POOL loop_descr_pool;
07378 MEM_POOL_Initialize(&loop_descr_pool, "loop_descriptors", TRUE);
07379 MEM_POOL_Push (&loop_descr_pool);
07380 BOOL trace_general = Get_Trace(TP_CGLOOP, 1);
07381
07382 SWP_Options.PU_Configure();
07383
07384 Calculate_Dominators();
07385
07386 SWP_FIXUP_VECTOR fixup;
07387 #ifdef KEY
07388 #ifdef Is_True_On
07389 INT32 cur_loop_idx = 0;
07390 #endif
07391 #endif
07392
07393 for (LOOP_DESCR *loop = LOOP_DESCR_Detect_Loops(&loop_descr_pool);
07394 loop;
07395 loop = LOOP_DESCR_next(loop)) {
07396 BB *head = LOOP_DESCR_loophead(loop);
07397
07398 if (CG_LOOP_Skip(head)) {
07399 DevWarn("CG_LOOP skip BB%d.", BB_id(head));
07400 continue;
07401 }
07402
07403 if (trace_general)
07404 {
07405 #pragma mips_frequency_hint NEVER
07406 fprintf(TFile, "%s<cgprep> %sloop line %d, BBs ",
07407 DBar, BB_innermost(head) ? "innermost " : "",
07408 BB_Loop_Lineno(head));
07409 BB_SET_Print(LOOP_DESCR_bbset(loop), TFile);
07410 fprintf(TFile, ", head BB:%d, nest level %d\n",
07411 BB_id(head), LOOP_DESCR_nestlevel(loop));
07412 }
07413
07414 if (trace_general)
07415 CG_LOOP_Statistics(loop);
07416
07417 #ifdef TARG_IA64
07418 if(IPFEC_Enable_Region_Formation){
07419 extern void Rebuild_Loop_Region(void *, void *, BOOL);
07420 void *par_rgn=NULL;
07421 int succ = CG_LOOP_Optimize(loop, fixup, &par_rgn,rgn_loop_update);
07422 if(par_rgn != NULL)
07423 Rebuild_Loop_Region(rgn_loop_update, par_rgn, succ);
07424 }else
07425 CG_LOOP_Optimize(loop, fixup);
07426 #else
07427
07428 #ifdef KEY
07429 #ifdef Is_True_On
07430 cur_loop_idx ++;
07431 if (CG_Enable_Loop_Opt_Limit != -1 &&
07432 cur_loop_idx > CG_Enable_Loop_Opt_Limit)
07433 break;
07434 #endif
07435 #endif
07436 CG_LOOP_Optimize(loop, fixup);
07437 #endif // TARG_IA64
07438 }
07439
07440
07441 for (INT i = 0; i < fixup.size(); i++)
07442 SWP_Fixup(fixup[i]);
07443
07444 #ifdef HAS_ROTATING_REGISTERS
07445 if (Enable_SWP) {
07446
07447 REGISTER_Request_Stacked_Rotating_Register();
07448 }
07449 #endif
07450
07451 #if defined(TARG_SL)
07452 if (CG_opt_level > 1 && CG_enable_zero_delay_loop) {
07453 Free_Dominators_Memory ();
07454 Calculate_Dominators();
07455 LOOP_DESCR_Detect_Loops(&loop_descr_pool);
07456 LOOP_DESCR_Create_Loop_Tree(&loop_descr_pool);
07457 if(trace_general){
07458 LOOP_DESCR_Dump_Loop_Tree();
07459 }
07460
07461
07462 CG_LOOP_Zdl_Ident();
07463
07464 CG_LOOP_Zdl_Gen();
07465 }
07466
07467 #endif
07468
07469 MEM_POOL_Pop (&loop_descr_pool);
07470 MEM_POOL_Delete(&loop_descr_pool);
07471
07472 Free_Dominators_Memory ();
07473 }
07474
07475
07476