00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "config.h"
00022 #include "system.h"
00023 #include "coretypes.h"
00024 #include "tm.h"
00025 #include "rtl.h"
00026 #include "hard-reg-set.h"
00027 #include "obstack.h"
00028 #include "basic-block.h"
00029 #include "cfgloop.h"
00030 #include "cfglayout.h"
00031 #include "params.h"
00032 #include "output.h"
00033 #include "expr.h"
00034 #include "hashtab.h"
00035 #include "recog.h"
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 struct iv_to_split
00075 {
00076 rtx insn;
00077 rtx base_var;
00078
00079 rtx step;
00080 unsigned n_loc;
00081 unsigned loc[3];
00082
00083
00084
00085 };
00086
00087
00088
00089 struct var_to_expand
00090 {
00091 rtx insn;
00092 rtx reg;
00093 VEC(rtx,heap) *var_expansions;
00094 enum rtx_code op;
00095
00096 int expansion_count;
00097 int reuse_expansion;
00098
00099
00100
00101 };
00102
00103
00104
00105
00106 struct opt_info
00107 {
00108 htab_t insns_to_split;
00109 htab_t insns_with_var_to_expand;
00110
00111 unsigned first_new_block;
00112
00113 basic_block loop_exit;
00114 basic_block loop_preheader;
00115 };
00116
00117 static void decide_unrolling_and_peeling (struct loops *, int);
00118 static void peel_loops_completely (struct loops *, int);
00119 static void decide_peel_simple (struct loop *, int);
00120 static void decide_peel_once_rolling (struct loop *, int);
00121 static void decide_peel_completely (struct loop *, int);
00122 static void decide_unroll_stupid (struct loop *, int);
00123 static void decide_unroll_constant_iterations (struct loop *, int);
00124 static void decide_unroll_runtime_iterations (struct loop *, int);
00125 static void peel_loop_simple (struct loops *, struct loop *);
00126 static void peel_loop_completely (struct loops *, struct loop *);
00127 static void unroll_loop_stupid (struct loops *, struct loop *);
00128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
00129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
00130 static struct opt_info *analyze_insns_in_loop (struct loop *);
00131 static void opt_info_start_duplication (struct opt_info *);
00132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
00133 static void free_opt_info (struct opt_info *);
00134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
00135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
00136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
00137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
00138 static int insert_var_expansion_initialization (void **, void *);
00139 static int combine_var_copies_in_loop_exit (void **, void *);
00140 static int release_var_copies (void **, void *);
00141 static rtx get_expansion (struct var_to_expand *);
00142
00143
00144 void
00145 unroll_and_peel_loops (struct loops *loops, int flags)
00146 {
00147 struct loop *loop, *next;
00148 bool check;
00149
00150
00151
00152 peel_loops_completely (loops, flags);
00153
00154
00155 decide_unrolling_and_peeling (loops, flags);
00156
00157 loop = loops->tree_root;
00158 while (loop->inner)
00159 loop = loop->inner;
00160
00161
00162 while (loop != loops->tree_root)
00163 {
00164 if (loop->next)
00165 {
00166 next = loop->next;
00167 while (next->inner)
00168 next = next->inner;
00169 }
00170 else
00171 next = loop->outer;
00172
00173 check = true;
00174
00175 switch (loop->lpt_decision.decision)
00176 {
00177 case LPT_PEEL_COMPLETELY:
00178
00179 gcc_unreachable ();
00180 case LPT_PEEL_SIMPLE:
00181 peel_loop_simple (loops, loop);
00182 break;
00183 case LPT_UNROLL_CONSTANT:
00184 unroll_loop_constant_iterations (loops, loop);
00185 break;
00186 case LPT_UNROLL_RUNTIME:
00187 unroll_loop_runtime_iterations (loops, loop);
00188 break;
00189 case LPT_UNROLL_STUPID:
00190 unroll_loop_stupid (loops, loop);
00191 break;
00192 case LPT_NONE:
00193 check = false;
00194 break;
00195 default:
00196 gcc_unreachable ();
00197 }
00198 if (check)
00199 {
00200 #ifdef ENABLE_CHECKING
00201 verify_dominators (CDI_DOMINATORS);
00202 verify_loop_structure (loops);
00203 #endif
00204 }
00205 loop = next;
00206 }
00207
00208 iv_analysis_done ();
00209 }
00210
00211
00212
00213 static bool
00214 loop_exit_at_end_p (struct loop *loop)
00215 {
00216 struct niter_desc *desc = get_simple_loop_desc (loop);
00217 rtx insn;
00218
00219 if (desc->in_edge->dest != loop->latch)
00220 return false;
00221
00222
00223 FOR_BB_INSNS (loop->latch, insn)
00224 {
00225 if (INSN_P (insn))
00226 return false;
00227 }
00228
00229 return true;
00230 }
00231
00232
00233 static void
00234 peel_loops_completely (struct loops *loops, int flags)
00235 {
00236 struct loop *loop;
00237 unsigned i;
00238
00239
00240 for (i = loops->num - 1; i > 0; i--)
00241 {
00242 loop = loops->parray[i];
00243 if (!loop)
00244 continue;
00245
00246 loop->lpt_decision.decision = LPT_NONE;
00247
00248 if (dump_file)
00249 fprintf (dump_file,
00250 "\n;; *** Considering loop %d for complete peeling ***\n",
00251 loop->num);
00252
00253 loop->ninsns = num_loop_insns (loop);
00254
00255 decide_peel_once_rolling (loop, flags);
00256 if (loop->lpt_decision.decision == LPT_NONE)
00257 decide_peel_completely (loop, flags);
00258
00259 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
00260 {
00261 peel_loop_completely (loops, loop);
00262 #ifdef ENABLE_CHECKING
00263 verify_dominators (CDI_DOMINATORS);
00264 verify_loop_structure (loops);
00265 #endif
00266 }
00267 }
00268 }
00269
00270
00271 static void
00272 decide_unrolling_and_peeling (struct loops *loops, int flags)
00273 {
00274 struct loop *loop = loops->tree_root, *next;
00275
00276 while (loop->inner)
00277 loop = loop->inner;
00278
00279
00280 while (loop != loops->tree_root)
00281 {
00282 if (loop->next)
00283 {
00284 next = loop->next;
00285 while (next->inner)
00286 next = next->inner;
00287 }
00288 else
00289 next = loop->outer;
00290
00291 loop->lpt_decision.decision = LPT_NONE;
00292
00293 if (dump_file)
00294 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
00295
00296
00297 if (!maybe_hot_bb_p (loop->header))
00298 {
00299 if (dump_file)
00300 fprintf (dump_file, ";; Not considering loop, cold area\n");
00301 loop = next;
00302 continue;
00303 }
00304
00305
00306 if (!can_duplicate_loop_p (loop))
00307 {
00308 if (dump_file)
00309 fprintf (dump_file,
00310 ";; Not considering loop, cannot duplicate\n");
00311 loop = next;
00312 continue;
00313 }
00314
00315
00316 if (loop->inner)
00317 {
00318 if (dump_file)
00319 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
00320 loop = next;
00321 continue;
00322 }
00323
00324 loop->ninsns = num_loop_insns (loop);
00325 loop->av_ninsns = average_num_loop_insns (loop);
00326
00327
00328
00329
00330 decide_unroll_constant_iterations (loop, flags);
00331 if (loop->lpt_decision.decision == LPT_NONE)
00332 decide_unroll_runtime_iterations (loop, flags);
00333 if (loop->lpt_decision.decision == LPT_NONE)
00334 decide_unroll_stupid (loop, flags);
00335 if (loop->lpt_decision.decision == LPT_NONE)
00336 decide_peel_simple (loop, flags);
00337
00338 loop = next;
00339 }
00340 }
00341
00342
00343
00344 static void
00345 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
00346 {
00347 struct niter_desc *desc;
00348
00349 if (dump_file)
00350 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
00351
00352
00353 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
00354 {
00355 if (dump_file)
00356 fprintf (dump_file, ";; Not considering loop, is too big\n");
00357 return;
00358 }
00359
00360
00361 desc = get_simple_loop_desc (loop);
00362
00363
00364 if (!desc->simple_p
00365 || desc->assumptions
00366 || desc->infinite
00367 || !desc->const_iter
00368 || desc->niter != 0)
00369 {
00370 if (dump_file)
00371 fprintf (dump_file,
00372 ";; Unable to prove that the loop rolls exactly once\n");
00373 return;
00374 }
00375
00376
00377 if (dump_file)
00378 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
00379 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
00380 }
00381
00382
00383 static void
00384 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
00385 {
00386 unsigned npeel;
00387 struct niter_desc *desc;
00388
00389 if (dump_file)
00390 fprintf (dump_file, "\n;; Considering peeling completely\n");
00391
00392
00393 if (loop->inner)
00394 {
00395 if (dump_file)
00396 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
00397 return;
00398 }
00399
00400
00401 if (!maybe_hot_bb_p (loop->header))
00402 {
00403 if (dump_file)
00404 fprintf (dump_file, ";; Not considering loop, cold area\n");
00405 return;
00406 }
00407
00408
00409 if (!can_duplicate_loop_p (loop))
00410 {
00411 if (dump_file)
00412 fprintf (dump_file,
00413 ";; Not considering loop, cannot duplicate\n");
00414 return;
00415 }
00416
00417
00418 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
00419 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
00420 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
00421
00422
00423 if (!npeel)
00424 {
00425 if (dump_file)
00426 fprintf (dump_file, ";; Not considering loop, is too big\n");
00427 return;
00428 }
00429
00430
00431 desc = get_simple_loop_desc (loop);
00432
00433
00434 if (!desc->simple_p
00435 || desc->assumptions
00436 || !desc->const_iter
00437 || desc->infinite)
00438 {
00439 if (dump_file)
00440 fprintf (dump_file,
00441 ";; Unable to prove that the loop iterates constant times\n");
00442 return;
00443 }
00444
00445 if (desc->niter > npeel - 1)
00446 {
00447 if (dump_file)
00448 {
00449 fprintf (dump_file,
00450 ";; Not peeling loop completely, rolls too much (");
00451 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
00452 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
00453 }
00454 return;
00455 }
00456
00457
00458 if (dump_file)
00459 fprintf (dump_file, ";; Decided to peel loop completely\n");
00460 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
00461 }
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477 static void
00478 peel_loop_completely (struct loops *loops, struct loop *loop)
00479 {
00480 sbitmap wont_exit;
00481 unsigned HOST_WIDE_INT npeel;
00482 unsigned n_remove_edges, i;
00483 edge *remove_edges, ein;
00484 struct niter_desc *desc = get_simple_loop_desc (loop);
00485 struct opt_info *opt_info = NULL;
00486
00487 npeel = desc->niter;
00488
00489 if (npeel)
00490 {
00491 bool ok;
00492
00493 wont_exit = sbitmap_alloc (npeel + 1);
00494 sbitmap_ones (wont_exit);
00495 RESET_BIT (wont_exit, 0);
00496 if (desc->noloop_assumptions)
00497 RESET_BIT (wont_exit, 1);
00498
00499 remove_edges = XCNEWVEC (edge, npeel);
00500 n_remove_edges = 0;
00501
00502 if (flag_split_ivs_in_unroller)
00503 opt_info = analyze_insns_in_loop (loop);
00504
00505 opt_info_start_duplication (opt_info);
00506 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00507 loops, npeel,
00508 wont_exit, desc->out_edge,
00509 remove_edges, &n_remove_edges,
00510 DLTHE_FLAG_UPDATE_FREQ
00511 | DLTHE_FLAG_COMPLETTE_PEEL
00512 | (opt_info
00513 ? DLTHE_RECORD_COPY_NUMBER : 0));
00514 gcc_assert (ok);
00515
00516 free (wont_exit);
00517
00518 if (opt_info)
00519 {
00520 apply_opt_in_copies (opt_info, npeel, false, true);
00521 free_opt_info (opt_info);
00522 }
00523
00524
00525 for (i = 0; i < n_remove_edges; i++)
00526 remove_path (loops, remove_edges[i]);
00527 free (remove_edges);
00528 }
00529
00530 ein = desc->in_edge;
00531 free_simple_loop_desc (loop);
00532
00533
00534
00535 remove_path (loops, ein);
00536
00537 if (dump_file)
00538 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
00539 }
00540
00541
00542
00543
00544 static void
00545 decide_unroll_constant_iterations (struct loop *loop, int flags)
00546 {
00547 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
00548 struct niter_desc *desc;
00549
00550 if (!(flags & UAP_UNROLL))
00551 {
00552
00553 return;
00554 }
00555
00556 if (dump_file)
00557 fprintf (dump_file,
00558 "\n;; Considering unrolling loop with constant "
00559 "number of iterations\n");
00560
00561
00562
00563 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
00564 nunroll_by_av
00565 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
00566 if (nunroll > nunroll_by_av)
00567 nunroll = nunroll_by_av;
00568 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
00569 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
00570
00571
00572 if (nunroll <= 1)
00573 {
00574 if (dump_file)
00575 fprintf (dump_file, ";; Not considering loop, is too big\n");
00576 return;
00577 }
00578
00579
00580 desc = get_simple_loop_desc (loop);
00581
00582
00583 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
00584 {
00585 if (dump_file)
00586 fprintf (dump_file,
00587 ";; Unable to prove that the loop iterates constant times\n");
00588 return;
00589 }
00590
00591
00592 if (desc->niter < 2 * nunroll)
00593 {
00594 if (dump_file)
00595 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
00596 return;
00597 }
00598
00599
00600
00601
00602
00603 best_copies = 2 * nunroll + 10;
00604
00605 i = 2 * nunroll + 2;
00606 if (i - 1 >= desc->niter)
00607 i = desc->niter - 2;
00608
00609 for (; i >= nunroll - 1; i--)
00610 {
00611 unsigned exit_mod = desc->niter % (i + 1);
00612
00613 if (!loop_exit_at_end_p (loop))
00614 n_copies = exit_mod + i + 1;
00615 else if (exit_mod != (unsigned) i
00616 || desc->noloop_assumptions != NULL_RTX)
00617 n_copies = exit_mod + i + 2;
00618 else
00619 n_copies = i + 1;
00620
00621 if (n_copies < best_copies)
00622 {
00623 best_copies = n_copies;
00624 best_unroll = i;
00625 }
00626 }
00627
00628 if (dump_file)
00629 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
00630 best_unroll + 1, best_copies, nunroll);
00631
00632 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
00633 loop->lpt_decision.times = best_unroll;
00634
00635 if (dump_file)
00636 fprintf (dump_file,
00637 ";; Decided to unroll the constant times rolling loop, %d times.\n",
00638 loop->lpt_decision.times);
00639 }
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660 static void
00661 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
00662 {
00663 unsigned HOST_WIDE_INT niter;
00664 unsigned exit_mod;
00665 sbitmap wont_exit;
00666 unsigned n_remove_edges, i;
00667 edge *remove_edges;
00668 unsigned max_unroll = loop->lpt_decision.times;
00669 struct niter_desc *desc = get_simple_loop_desc (loop);
00670 bool exit_at_end = loop_exit_at_end_p (loop);
00671 struct opt_info *opt_info = NULL;
00672 bool ok;
00673
00674 niter = desc->niter;
00675
00676
00677 gcc_assert (niter > max_unroll + 1);
00678
00679 exit_mod = niter % (max_unroll + 1);
00680
00681 wont_exit = sbitmap_alloc (max_unroll + 1);
00682 sbitmap_ones (wont_exit);
00683
00684 remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
00685 n_remove_edges = 0;
00686 if (flag_split_ivs_in_unroller
00687 || flag_variable_expansion_in_unroller)
00688 opt_info = analyze_insns_in_loop (loop);
00689
00690 if (!exit_at_end)
00691 {
00692
00693
00694
00695
00696 if (dump_file)
00697 fprintf (dump_file, ";; Condition on beginning of loop.\n");
00698
00699
00700 RESET_BIT (wont_exit, 0);
00701 if (desc->noloop_assumptions)
00702 RESET_BIT (wont_exit, 1);
00703
00704 if (exit_mod)
00705 {
00706 opt_info_start_duplication (opt_info);
00707 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00708 loops, exit_mod,
00709 wont_exit, desc->out_edge,
00710 remove_edges, &n_remove_edges,
00711 DLTHE_FLAG_UPDATE_FREQ
00712 | (opt_info && exit_mod > 1
00713 ? DLTHE_RECORD_COPY_NUMBER
00714 : 0));
00715 gcc_assert (ok);
00716
00717 if (opt_info && exit_mod > 1)
00718 apply_opt_in_copies (opt_info, exit_mod, false, false);
00719
00720 desc->noloop_assumptions = NULL_RTX;
00721 desc->niter -= exit_mod;
00722 desc->niter_max -= exit_mod;
00723 }
00724
00725 SET_BIT (wont_exit, 1);
00726 }
00727 else
00728 {
00729
00730
00731
00732 if (dump_file)
00733 fprintf (dump_file, ";; Condition on end of loop.\n");
00734
00735
00736
00737
00738 if (exit_mod != max_unroll
00739 || desc->noloop_assumptions)
00740 {
00741 RESET_BIT (wont_exit, 0);
00742 if (desc->noloop_assumptions)
00743 RESET_BIT (wont_exit, 1);
00744
00745 opt_info_start_duplication (opt_info);
00746 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00747 loops, exit_mod + 1,
00748 wont_exit, desc->out_edge,
00749 remove_edges, &n_remove_edges,
00750 DLTHE_FLAG_UPDATE_FREQ
00751 | (opt_info && exit_mod > 0
00752 ? DLTHE_RECORD_COPY_NUMBER
00753 : 0));
00754 gcc_assert (ok);
00755
00756 if (opt_info && exit_mod > 0)
00757 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
00758
00759 desc->niter -= exit_mod + 1;
00760 desc->niter_max -= exit_mod + 1;
00761 desc->noloop_assumptions = NULL_RTX;
00762
00763 SET_BIT (wont_exit, 0);
00764 SET_BIT (wont_exit, 1);
00765 }
00766
00767 RESET_BIT (wont_exit, max_unroll);
00768 }
00769
00770
00771
00772 opt_info_start_duplication (opt_info);
00773 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
00774 loops, max_unroll,
00775 wont_exit, desc->out_edge,
00776 remove_edges, &n_remove_edges,
00777 DLTHE_FLAG_UPDATE_FREQ
00778 | (opt_info
00779 ? DLTHE_RECORD_COPY_NUMBER
00780 : 0));
00781 gcc_assert (ok);
00782
00783 if (opt_info)
00784 {
00785 apply_opt_in_copies (opt_info, max_unroll, true, true);
00786 free_opt_info (opt_info);
00787 }
00788
00789 free (wont_exit);
00790
00791 if (exit_at_end)
00792 {
00793 basic_block exit_block = get_bb_copy (desc->in_edge->src);
00794
00795
00796 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
00797 {
00798 desc->out_edge = EDGE_SUCC (exit_block, 0);
00799 desc->in_edge = EDGE_SUCC (exit_block, 1);
00800 }
00801 else
00802 {
00803 desc->out_edge = EDGE_SUCC (exit_block, 1);
00804 desc->in_edge = EDGE_SUCC (exit_block, 0);
00805 }
00806 }
00807
00808 desc->niter /= max_unroll + 1;
00809 desc->niter_max /= max_unroll + 1;
00810 desc->niter_expr = GEN_INT (desc->niter);
00811
00812
00813 for (i = 0; i < n_remove_edges; i++)
00814 remove_path (loops, remove_edges[i]);
00815 free (remove_edges);
00816
00817 if (dump_file)
00818 fprintf (dump_file,
00819 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
00820 max_unroll, num_loop_insns (loop));
00821 }
00822
00823
00824
00825 static void
00826 decide_unroll_runtime_iterations (struct loop *loop, int flags)
00827 {
00828 unsigned nunroll, nunroll_by_av, i;
00829 struct niter_desc *desc;
00830
00831 if (!(flags & UAP_UNROLL))
00832 {
00833
00834 return;
00835 }
00836
00837 if (dump_file)
00838 fprintf (dump_file,
00839 "\n;; Considering unrolling loop with runtime "
00840 "computable number of iterations\n");
00841
00842
00843
00844 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
00845 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
00846 if (nunroll > nunroll_by_av)
00847 nunroll = nunroll_by_av;
00848 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
00849 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
00850
00851
00852 if (nunroll <= 1)
00853 {
00854 if (dump_file)
00855 fprintf (dump_file, ";; Not considering loop, is too big\n");
00856 return;
00857 }
00858
00859
00860 desc = get_simple_loop_desc (loop);
00861
00862
00863 if (!desc->simple_p || desc->assumptions)
00864 {
00865 if (dump_file)
00866 fprintf (dump_file,
00867 ";; Unable to prove that the number of iterations "
00868 "can be counted in runtime\n");
00869 return;
00870 }
00871
00872 if (desc->const_iter)
00873 {
00874 if (dump_file)
00875 fprintf (dump_file, ";; Loop iterates constant times\n");
00876 return;
00877 }
00878
00879
00880 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
00881 {
00882 if (dump_file)
00883 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
00884 return;
00885 }
00886
00887
00888
00889 for (i = 1; 2 * i <= nunroll; i *= 2)
00890 continue;
00891
00892 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
00893 loop->lpt_decision.times = i - 1;
00894
00895 if (dump_file)
00896 fprintf (dump_file,
00897 ";; Decided to unroll the runtime computable "
00898 "times rolling loop, %d times.\n",
00899 loop->lpt_decision.times);
00900 }
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933 static void
00934 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
00935 {
00936 rtx old_niter, niter, init_code, branch_code, tmp;
00937 unsigned i, j, p;
00938 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
00939 unsigned n_dom_bbs;
00940 sbitmap wont_exit;
00941 int may_exit_copy;
00942 unsigned n_peel, n_remove_edges;
00943 edge *remove_edges, e;
00944 bool extra_zero_check, last_may_exit;
00945 unsigned max_unroll = loop->lpt_decision.times;
00946 struct niter_desc *desc = get_simple_loop_desc (loop);
00947 bool exit_at_end = loop_exit_at_end_p (loop);
00948 struct opt_info *opt_info = NULL;
00949 bool ok;
00950
00951 if (flag_split_ivs_in_unroller
00952 || flag_variable_expansion_in_unroller)
00953 opt_info = analyze_insns_in_loop (loop);
00954
00955
00956 dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
00957 n_dom_bbs = 0;
00958
00959 body = get_loop_body (loop);
00960 for (i = 0; i < loop->num_nodes; i++)
00961 {
00962 unsigned nldom;
00963 basic_block *ldom;
00964
00965 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
00966 for (j = 0; j < nldom; j++)
00967 if (!flow_bb_inside_loop_p (loop, ldom[j]))
00968 dom_bbs[n_dom_bbs++] = ldom[j];
00969
00970 free (ldom);
00971 }
00972 free (body);
00973
00974 if (!exit_at_end)
00975 {
00976
00977
00978 may_exit_copy = 0;
00979 n_peel = max_unroll - 1;
00980 extra_zero_check = true;
00981 last_may_exit = false;
00982 }
00983 else
00984 {
00985
00986
00987 may_exit_copy = max_unroll;
00988 n_peel = max_unroll;
00989 extra_zero_check = false;
00990 last_may_exit = true;
00991 }
00992
00993
00994 start_sequence ();
00995 old_niter = niter = gen_reg_rtx (desc->mode);
00996 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
00997 if (tmp != niter)
00998 emit_move_insn (niter, tmp);
00999
01000
01001
01002
01003 niter = expand_simple_binop (desc->mode, AND,
01004 niter,
01005 GEN_INT (max_unroll),
01006 NULL_RTX, 0, OPTAB_LIB_WIDEN);
01007
01008 init_code = get_insns ();
01009 end_sequence ();
01010
01011
01012 loop_split_edge_with (loop_preheader_edge (loop), init_code);
01013
01014 remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
01015 n_remove_edges = 0;
01016
01017 wont_exit = sbitmap_alloc (max_unroll + 2);
01018
01019
01020
01021
01022
01023 sbitmap_zero (wont_exit);
01024 if (extra_zero_check
01025 && !desc->noloop_assumptions)
01026 SET_BIT (wont_exit, 1);
01027 ezc_swtch = loop_preheader_edge (loop)->src;
01028 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01029 loops, 1,
01030 wont_exit, desc->out_edge,
01031 remove_edges, &n_remove_edges,
01032 DLTHE_FLAG_UPDATE_FREQ);
01033 gcc_assert (ok);
01034
01035
01036 swtch = loop_split_edge_with (loop_preheader_edge (loop),
01037 NULL_RTX);
01038
01039 for (i = 0; i < n_peel; i++)
01040 {
01041
01042 sbitmap_zero (wont_exit);
01043 if (i != n_peel - 1 || !last_may_exit)
01044 SET_BIT (wont_exit, 1);
01045 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01046 loops, 1,
01047 wont_exit, desc->out_edge,
01048 remove_edges, &n_remove_edges,
01049 DLTHE_FLAG_UPDATE_FREQ);
01050 gcc_assert (ok);
01051
01052
01053 j = n_peel - i - (extra_zero_check ? 0 : 1);
01054 p = REG_BR_PROB_BASE / (i + 2);
01055
01056 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01057 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
01058 block_label (preheader), p,
01059 NULL_RTX);
01060
01061 swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
01062 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
01063 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
01064 e = make_edge (swtch, preheader,
01065 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
01066 e->probability = p;
01067 }
01068
01069 if (extra_zero_check)
01070 {
01071
01072 p = REG_BR_PROB_BASE / (max_unroll + 1);
01073 swtch = ezc_swtch;
01074 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01075 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
01076 block_label (preheader), p,
01077 NULL_RTX);
01078
01079 swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
01080 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
01081 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
01082 e = make_edge (swtch, preheader,
01083 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
01084 e->probability = p;
01085 }
01086
01087
01088 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
01089
01090
01091
01092 sbitmap_ones (wont_exit);
01093 RESET_BIT (wont_exit, may_exit_copy);
01094 opt_info_start_duplication (opt_info);
01095
01096 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
01097 loops, max_unroll,
01098 wont_exit, desc->out_edge,
01099 remove_edges, &n_remove_edges,
01100 DLTHE_FLAG_UPDATE_FREQ
01101 | (opt_info
01102 ? DLTHE_RECORD_COPY_NUMBER
01103 : 0));
01104 gcc_assert (ok);
01105
01106 if (opt_info)
01107 {
01108 apply_opt_in_copies (opt_info, max_unroll, true, true);
01109 free_opt_info (opt_info);
01110 }
01111
01112 free (wont_exit);
01113
01114 if (exit_at_end)
01115 {
01116 basic_block exit_block = get_bb_copy (desc->in_edge->src);
01117
01118
01119
01120 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
01121 {
01122 desc->out_edge = EDGE_SUCC (exit_block, 0);
01123 desc->in_edge = EDGE_SUCC (exit_block, 1);
01124 }
01125 else
01126 {
01127 desc->out_edge = EDGE_SUCC (exit_block, 1);
01128 desc->in_edge = EDGE_SUCC (exit_block, 0);
01129 }
01130 }
01131
01132
01133 for (i = 0; i < n_remove_edges; i++)
01134 remove_path (loops, remove_edges[i]);
01135 free (remove_edges);
01136
01137
01138
01139
01140
01141 gcc_assert (!desc->const_iter);
01142 desc->niter_expr =
01143 simplify_gen_binary (UDIV, desc->mode, old_niter,
01144 GEN_INT (max_unroll + 1));
01145 desc->niter_max /= max_unroll + 1;
01146 if (exit_at_end)
01147 {
01148 desc->niter_expr =
01149 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
01150 desc->noloop_assumptions = NULL_RTX;
01151 desc->niter_max--;
01152 }
01153
01154 if (dump_file)
01155 fprintf (dump_file,
01156 ";; Unrolled loop %d times, counting # of iterations "
01157 "in runtime, %i insns\n",
01158 max_unroll, num_loop_insns (loop));
01159
01160 if (dom_bbs)
01161 free (dom_bbs);
01162 }
01163
01164
01165 static void
01166 decide_peel_simple (struct loop *loop, int flags)
01167 {
01168 unsigned npeel;
01169 struct niter_desc *desc;
01170
01171 if (!(flags & UAP_PEEL))
01172 {
01173
01174 return;
01175 }
01176
01177 if (dump_file)
01178 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
01179
01180
01181 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
01182 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
01183 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
01184
01185
01186 if (!npeel)
01187 {
01188 if (dump_file)
01189 fprintf (dump_file, ";; Not considering loop, is too big\n");
01190 return;
01191 }
01192
01193
01194 desc = get_simple_loop_desc (loop);
01195
01196
01197 if (desc->simple_p && !desc->assumptions && desc->const_iter)
01198 {
01199 if (dump_file)
01200 fprintf (dump_file, ";; Loop iterates constant times\n");
01201 return;
01202 }
01203
01204
01205
01206 if (num_loop_branches (loop) > 1)
01207 {
01208 if (dump_file)
01209 fprintf (dump_file, ";; Not peeling, contains branches\n");
01210 return;
01211 }
01212
01213 if (loop->header->count)
01214 {
01215 unsigned niter = expected_loop_iterations (loop);
01216 if (niter + 1 > npeel)
01217 {
01218 if (dump_file)
01219 {
01220 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
01221 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
01222 (HOST_WIDEST_INT) (niter + 1));
01223 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
01224 npeel);
01225 }
01226 return;
01227 }
01228 npeel = niter + 1;
01229 }
01230 else
01231 {
01232
01233
01234 if (dump_file)
01235 fprintf (dump_file,
01236 ";; Not peeling loop, no evidence it will be profitable\n");
01237 return;
01238 }
01239
01240
01241 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
01242 loop->lpt_decision.times = npeel;
01243
01244 if (dump_file)
01245 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
01246 loop->lpt_decision.times);
01247 }
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263 static void
01264 peel_loop_simple (struct loops *loops, struct loop *loop)
01265 {
01266 sbitmap wont_exit;
01267 unsigned npeel = loop->lpt_decision.times;
01268 struct niter_desc *desc = get_simple_loop_desc (loop);
01269 struct opt_info *opt_info = NULL;
01270 bool ok;
01271
01272 if (flag_split_ivs_in_unroller && npeel > 1)
01273 opt_info = analyze_insns_in_loop (loop);
01274
01275 wont_exit = sbitmap_alloc (npeel + 1);
01276 sbitmap_zero (wont_exit);
01277
01278 opt_info_start_duplication (opt_info);
01279
01280 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01281 loops, npeel, wont_exit,
01282 NULL, NULL,
01283 NULL, DLTHE_FLAG_UPDATE_FREQ
01284 | (opt_info
01285 ? DLTHE_RECORD_COPY_NUMBER
01286 : 0));
01287 gcc_assert (ok);
01288
01289 free (wont_exit);
01290
01291 if (opt_info)
01292 {
01293 apply_opt_in_copies (opt_info, npeel, false, false);
01294 free_opt_info (opt_info);
01295 }
01296
01297 if (desc->simple_p)
01298 {
01299 if (desc->const_iter)
01300 {
01301 desc->niter -= npeel;
01302 desc->niter_expr = GEN_INT (desc->niter);
01303 desc->noloop_assumptions = NULL_RTX;
01304 }
01305 else
01306 {
01307
01308
01309
01310
01311 free_simple_loop_desc (loop);
01312 }
01313 }
01314 if (dump_file)
01315 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
01316 }
01317
01318
01319 static void
01320 decide_unroll_stupid (struct loop *loop, int flags)
01321 {
01322 unsigned nunroll, nunroll_by_av, i;
01323 struct niter_desc *desc;
01324
01325 if (!(flags & UAP_UNROLL_ALL))
01326 {
01327
01328 return;
01329 }
01330
01331 if (dump_file)
01332 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
01333
01334
01335
01336 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
01337 nunroll_by_av
01338 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
01339 if (nunroll > nunroll_by_av)
01340 nunroll = nunroll_by_av;
01341 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
01342 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
01343
01344
01345 if (nunroll <= 1)
01346 {
01347 if (dump_file)
01348 fprintf (dump_file, ";; Not considering loop, is too big\n");
01349 return;
01350 }
01351
01352
01353 desc = get_simple_loop_desc (loop);
01354
01355
01356 if (desc->simple_p && !desc->assumptions)
01357 {
01358 if (dump_file)
01359 fprintf (dump_file, ";; The loop is simple\n");
01360 return;
01361 }
01362
01363
01364
01365 if (num_loop_branches (loop) > 1)
01366 {
01367 if (dump_file)
01368 fprintf (dump_file, ";; Not unrolling, contains branches\n");
01369 return;
01370 }
01371
01372
01373 if (loop->header->count
01374 && expected_loop_iterations (loop) < 2 * nunroll)
01375 {
01376 if (dump_file)
01377 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
01378 return;
01379 }
01380
01381
01382
01383
01384 for (i = 1; 2 * i <= nunroll; i *= 2)
01385 continue;
01386
01387 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
01388 loop->lpt_decision.times = i - 1;
01389
01390 if (dump_file)
01391 fprintf (dump_file,
01392 ";; Decided to unroll the loop stupidly, %d times.\n",
01393 loop->lpt_decision.times);
01394 }
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413 static void
01414 unroll_loop_stupid (struct loops *loops, struct loop *loop)
01415 {
01416 sbitmap wont_exit;
01417 unsigned nunroll = loop->lpt_decision.times;
01418 struct niter_desc *desc = get_simple_loop_desc (loop);
01419 struct opt_info *opt_info = NULL;
01420 bool ok;
01421
01422 if (flag_split_ivs_in_unroller
01423 || flag_variable_expansion_in_unroller)
01424 opt_info = analyze_insns_in_loop (loop);
01425
01426
01427 wont_exit = sbitmap_alloc (nunroll + 1);
01428 sbitmap_zero (wont_exit);
01429 opt_info_start_duplication (opt_info);
01430
01431 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
01432 loops, nunroll, wont_exit,
01433 NULL, NULL, NULL,
01434 DLTHE_FLAG_UPDATE_FREQ
01435 | (opt_info
01436 ? DLTHE_RECORD_COPY_NUMBER
01437 : 0));
01438 gcc_assert (ok);
01439
01440 if (opt_info)
01441 {
01442 apply_opt_in_copies (opt_info, nunroll, true, true);
01443 free_opt_info (opt_info);
01444 }
01445
01446 free (wont_exit);
01447
01448 if (desc->simple_p)
01449 {
01450
01451
01452
01453
01454
01455
01456 desc->simple_p = false;
01457 }
01458
01459 if (dump_file)
01460 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
01461 nunroll, num_loop_insns (loop));
01462 }
01463
01464
01465
01466 static hashval_t
01467 si_info_hash (const void *ivts)
01468 {
01469 return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
01470 }
01471
01472
01473
01474 static int
01475 si_info_eq (const void *ivts1, const void *ivts2)
01476 {
01477 const struct iv_to_split *i1 = ivts1;
01478 const struct iv_to_split *i2 = ivts2;
01479
01480 return i1->insn == i2->insn;
01481 }
01482
01483
01484
01485 static hashval_t
01486 ve_info_hash (const void *ves)
01487 {
01488 return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
01489 }
01490
01491
01492
01493
01494 static int
01495 ve_info_eq (const void *ivts1, const void *ivts2)
01496 {
01497 const struct var_to_expand *i1 = ivts1;
01498 const struct var_to_expand *i2 = ivts2;
01499
01500 return i1->insn == i2->insn;
01501 }
01502
01503
01504
01505 bool
01506 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
01507 {
01508 basic_block *body, bb;
01509 unsigned i;
01510 int count_ref = 0;
01511 rtx insn;
01512
01513 body = get_loop_body (loop);
01514 for (i = 0; i < loop->num_nodes; i++)
01515 {
01516 bb = body[i];
01517
01518 FOR_BB_INSNS (bb, insn)
01519 {
01520 if (rtx_referenced_p (reg, insn))
01521 count_ref++;
01522 }
01523 }
01524 return (count_ref == 1);
01525 }
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550 static struct var_to_expand *
01551 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
01552 {
01553 rtx set, dest, src, op1;
01554 struct var_to_expand *ves;
01555 enum machine_mode mode1, mode2;
01556
01557 set = single_set (insn);
01558 if (!set)
01559 return NULL;
01560
01561 dest = SET_DEST (set);
01562 src = SET_SRC (set);
01563
01564 if (GET_CODE (src) != PLUS
01565 && GET_CODE (src) != MINUS
01566 && GET_CODE (src) != MULT)
01567 return NULL;
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578 if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
01579 return NULL;
01580
01581 if (!XEXP (src, 0))
01582 return NULL;
01583
01584 op1 = XEXP (src, 0);
01585
01586 if (!REG_P (dest)
01587 && !(GET_CODE (dest) == SUBREG
01588 && REG_P (SUBREG_REG (dest))))
01589 return NULL;
01590
01591 if (!rtx_equal_p (dest, op1))
01592 return NULL;
01593
01594 if (!referenced_in_one_insn_in_loop_p (loop, dest))
01595 return NULL;
01596
01597 if (rtx_referenced_p (dest, XEXP (src, 1)))
01598 return NULL;
01599
01600 mode1 = GET_MODE (dest);
01601 mode2 = GET_MODE (XEXP (src, 1));
01602 if ((FLOAT_MODE_P (mode1)
01603 || FLOAT_MODE_P (mode2))
01604 && !flag_unsafe_math_optimizations)
01605 return NULL;
01606
01607
01608 ves = XNEW (struct var_to_expand);
01609 ves->insn = insn;
01610 ves->var_expansions = VEC_alloc (rtx, heap, 1);
01611 ves->reg = copy_rtx (dest);
01612 ves->op = GET_CODE (src);
01613 ves->expansion_count = 0;
01614 ves->reuse_expansion = 0;
01615 return ves;
01616 }
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635
01636
01637
01638
01639
01640
01641
01642
01643 static struct iv_to_split *
01644 analyze_iv_to_split_insn (rtx insn)
01645 {
01646 rtx set, dest;
01647 struct rtx_iv iv;
01648 struct iv_to_split *ivts;
01649 bool ok;
01650
01651
01652
01653 set = single_set (insn);
01654 if (!set)
01655 return NULL;
01656
01657 dest = SET_DEST (set);
01658 if (!REG_P (dest))
01659 return NULL;
01660
01661 if (!biv_p (insn, dest))
01662 return NULL;
01663
01664 ok = iv_analyze_result (insn, dest, &iv);
01665
01666
01667
01668
01669
01670
01671
01672
01673
01674 if (! ok)
01675 return NULL;
01676
01677 if (iv.step == const0_rtx
01678 || iv.mode != iv.extend_mode)
01679 return NULL;
01680
01681
01682 ivts = XNEW (struct iv_to_split);
01683 ivts->insn = insn;
01684 ivts->base_var = NULL_RTX;
01685 ivts->step = iv.step;
01686 ivts->n_loc = 1;
01687 ivts->loc[0] = 1;
01688
01689 return ivts;
01690 }
01691
01692
01693
01694
01695
01696
01697 static struct opt_info *
01698 analyze_insns_in_loop (struct loop *loop)
01699 {
01700 basic_block *body, bb;
01701 unsigned i, num_edges = 0;
01702 struct opt_info *opt_info = XCNEW (struct opt_info);
01703 rtx insn;
01704 struct iv_to_split *ivts = NULL;
01705 struct var_to_expand *ves = NULL;
01706 PTR *slot1;
01707 PTR *slot2;
01708 edge *edges = get_loop_exit_edges (loop, &num_edges);
01709 bool can_apply = false;
01710
01711 iv_analysis_loop_init (loop);
01712
01713 body = get_loop_body (loop);
01714
01715 if (flag_split_ivs_in_unroller)
01716 opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
01717 si_info_hash, si_info_eq, free);
01718
01719
01720 if (!loop_preheader_edge (loop)->src)
01721 {
01722 loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01723 opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01724 }
01725 else
01726 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
01727
01728 if (num_edges == 1
01729 && !(edges[0]->flags & EDGE_COMPLEX))
01730 {
01731 opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
01732 can_apply = true;
01733 }
01734
01735 if (flag_variable_expansion_in_unroller
01736 && can_apply)
01737 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
01738 ve_info_hash, ve_info_eq, free);
01739
01740 for (i = 0; i < loop->num_nodes; i++)
01741 {
01742 bb = body[i];
01743 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
01744 continue;
01745
01746 FOR_BB_INSNS (bb, insn)
01747 {
01748 if (!INSN_P (insn))
01749 continue;
01750
01751 if (opt_info->insns_to_split)
01752 ivts = analyze_iv_to_split_insn (insn);
01753
01754 if (ivts)
01755 {
01756 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
01757 *slot1 = ivts;
01758 continue;
01759 }
01760
01761 if (opt_info->insns_with_var_to_expand)
01762 ves = analyze_insn_to_expand_var (loop, insn);
01763
01764 if (ves)
01765 {
01766 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
01767 *slot2 = ves;
01768 }
01769 }
01770 }
01771
01772 free (edges);
01773 free (body);
01774 return opt_info;
01775 }
01776
01777
01778
01779
01780 static void
01781 opt_info_start_duplication (struct opt_info *opt_info)
01782 {
01783 if (opt_info)
01784 opt_info->first_new_block = last_basic_block;
01785 }
01786
01787
01788
01789
01790
01791
01792 static unsigned
01793 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
01794 {
01795 if (unrolling)
01796 {
01797
01798
01799 return n_copy;
01800 }
01801 else
01802 {
01803
01804
01805 if (n_copy)
01806 return n_copy - 1;
01807 else
01808 return n_copies;
01809 }
01810 }
01811
01812
01813
01814
01815 static rtx *
01816 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
01817 {
01818 unsigned i;
01819 rtx *ret = &expr;
01820
01821 for (i = 0; i < ivts->n_loc; i++)
01822 ret = &XEXP (*ret, ivts->loc[i]);
01823
01824 return ret;
01825 }
01826
01827
01828
01829
01830 static int
01831 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
01832 {
01833 struct iv_to_split *ivts = *slot;
01834 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
01835
01836 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
01837
01838 return 1;
01839 }
01840
01841
01842
01843
01844 static void
01845 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
01846 {
01847 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
01848 rtx seq;
01849
01850 start_sequence ();
01851 expr = force_operand (expr, ivts->base_var);
01852 if (expr != ivts->base_var)
01853 emit_move_insn (ivts->base_var, expr);
01854 seq = get_insns ();
01855 end_sequence ();
01856
01857 emit_insn_before (seq, insn);
01858 }
01859
01860
01861
01862
01863 static void
01864 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
01865 {
01866 rtx expr, *loc, seq, incr, var;
01867 enum machine_mode mode = GET_MODE (ivts->base_var);
01868 rtx src, dest, set;
01869
01870
01871 if (!delta)
01872 expr = ivts->base_var;
01873 else
01874 {
01875 incr = simplify_gen_binary (MULT, mode,
01876 ivts->step, gen_int_mode (delta, mode));
01877 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
01878 ivts->base_var, incr);
01879 }
01880
01881
01882 loc = get_ivts_expr (single_set (insn), ivts);
01883
01884
01885 if (validate_change (insn, loc, expr, 0))
01886 return;
01887
01888
01889 start_sequence ();
01890 var = gen_reg_rtx (mode);
01891 expr = force_operand (expr, var);
01892 if (expr != var)
01893 emit_move_insn (var, expr);
01894 seq = get_insns ();
01895 end_sequence ();
01896 emit_insn_before (seq, insn);
01897
01898 if (validate_change (insn, loc, var, 0))
01899 return;
01900
01901
01902
01903 set = single_set (insn);
01904 gcc_assert (set);
01905
01906 start_sequence ();
01907 *loc = var;
01908 src = copy_rtx (SET_SRC (set));
01909 dest = copy_rtx (SET_DEST (set));
01910 src = force_operand (src, dest);
01911 if (src != dest)
01912 emit_move_insn (dest, src);
01913 seq = get_insns ();
01914 end_sequence ();
01915
01916 emit_insn_before (seq, insn);
01917 delete_insn (insn);
01918 }
01919
01920
01921
01922
01923 static rtx
01924 get_expansion (struct var_to_expand *ve)
01925 {
01926 rtx reg;
01927
01928 if (ve->reuse_expansion == 0)
01929 reg = ve->reg;
01930 else
01931 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
01932
01933 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
01934 ve->reuse_expansion = 0;
01935 else
01936 ve->reuse_expansion++;
01937
01938 return reg;
01939 }
01940
01941
01942
01943
01944
01945 static void
01946 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
01947 {
01948 rtx new_reg, set;
01949 bool really_new_expansion = false;
01950
01951 set = single_set (insn);
01952 gcc_assert (set);
01953
01954
01955
01956 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
01957 {
01958 really_new_expansion = true;
01959 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
01960 }
01961 else
01962 new_reg = get_expansion (ve);
01963
01964 validate_change (insn, &SET_DEST (set), new_reg, 1);
01965 validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
01966
01967 if (apply_change_group ())
01968 if (really_new_expansion)
01969 {
01970 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
01971 ve->expansion_count++;
01972 }
01973 }
01974
01975
01976
01977
01978
01979
01980 static int
01981 insert_var_expansion_initialization (void **slot, void *place_p)
01982 {
01983 struct var_to_expand *ve = *slot;
01984 basic_block place = (basic_block)place_p;
01985 rtx seq, var, zero_init, insn;
01986 unsigned i;
01987
01988 if (VEC_length (rtx, ve->var_expansions) == 0)
01989 return 1;
01990
01991 start_sequence ();
01992 if (ve->op == PLUS || ve->op == MINUS)
01993 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
01994 {
01995 zero_init = CONST0_RTX (GET_MODE (var));
01996 emit_move_insn (var, zero_init);
01997 }
01998 else if (ve->op == MULT)
01999 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
02000 {
02001 zero_init = CONST1_RTX (GET_MODE (var));
02002 emit_move_insn (var, zero_init);
02003 }
02004
02005 seq = get_insns ();
02006 end_sequence ();
02007
02008 insn = BB_HEAD (place);
02009 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
02010 insn = NEXT_INSN (insn);
02011
02012 emit_insn_after (seq, insn);
02013
02014 return 1;
02015 }
02016
02017
02018
02019
02020
02021
02022 static int
02023 combine_var_copies_in_loop_exit (void **slot, void *place_p)
02024 {
02025 struct var_to_expand *ve = *slot;
02026 basic_block place = (basic_block)place_p;
02027 rtx sum = ve->reg;
02028 rtx expr, seq, var, insn;
02029 unsigned i;
02030
02031 if (VEC_length (rtx, ve->var_expansions) == 0)
02032 return 1;
02033
02034 start_sequence ();
02035 if (ve->op == PLUS || ve->op == MINUS)
02036 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
02037 {
02038 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
02039 var, sum);
02040 }
02041 else if (ve->op == MULT)
02042 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
02043 {
02044 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
02045 var, sum);
02046 }
02047
02048 expr = force_operand (sum, ve->reg);
02049 if (expr != ve->reg)
02050 emit_move_insn (ve->reg, expr);
02051 seq = get_insns ();
02052 end_sequence ();
02053
02054 insn = BB_HEAD (place);
02055 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
02056 insn = NEXT_INSN (insn);
02057
02058 emit_insn_after (seq, insn);
02059
02060
02061 return 1;
02062 }
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073 static void
02074 apply_opt_in_copies (struct opt_info *opt_info,
02075 unsigned n_copies, bool unrolling,
02076 bool rewrite_original_loop)
02077 {
02078 unsigned i, delta;
02079 basic_block bb, orig_bb;
02080 rtx insn, orig_insn, next;
02081 struct iv_to_split ivts_templ, *ivts;
02082 struct var_to_expand ve_templ, *ves;
02083
02084
02085
02086 gcc_assert (!unrolling || rewrite_original_loop);
02087
02088
02089 if (opt_info->insns_to_split)
02090 htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
02091
02092 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
02093 {
02094 bb = BASIC_BLOCK (i);
02095 orig_bb = get_bb_original (bb);
02096
02097
02098
02099 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
02100 unrolling);
02101 bb->aux = 0;
02102 orig_insn = BB_HEAD (orig_bb);
02103 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
02104 {
02105 next = NEXT_INSN (insn);
02106 if (!INSN_P (insn))
02107 continue;
02108
02109 while (!INSN_P (orig_insn))
02110 orig_insn = NEXT_INSN (orig_insn);
02111
02112 ivts_templ.insn = orig_insn;
02113 ve_templ.insn = orig_insn;
02114
02115
02116 if (opt_info->insns_to_split)
02117 {
02118 ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
02119
02120 if (ivts)
02121 {
02122 gcc_assert (GET_CODE (PATTERN (insn))
02123 == GET_CODE (PATTERN (orig_insn)));
02124
02125 if (!delta)
02126 insert_base_initialization (ivts, insn);
02127 split_iv (ivts, insn, delta);
02128 }
02129 }
02130
02131 if (unrolling && opt_info->insns_with_var_to_expand)
02132 {
02133 ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
02134 if (ves)
02135 {
02136 gcc_assert (GET_CODE (PATTERN (insn))
02137 == GET_CODE (PATTERN (orig_insn)));
02138 expand_var_during_unrolling (ves, insn);
02139 }
02140 }
02141 orig_insn = NEXT_INSN (orig_insn);
02142 }
02143 }
02144
02145 if (!rewrite_original_loop)
02146 return;
02147
02148
02149
02150 if (opt_info->insns_with_var_to_expand)
02151 {
02152 htab_traverse (opt_info->insns_with_var_to_expand,
02153 insert_var_expansion_initialization,
02154 opt_info->loop_preheader);
02155 htab_traverse (opt_info->insns_with_var_to_expand,
02156 combine_var_copies_in_loop_exit,
02157 opt_info->loop_exit);
02158 }
02159
02160
02161
02162
02163 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
02164 {
02165 bb = BASIC_BLOCK (i);
02166 orig_bb = get_bb_original (bb);
02167 if (get_bb_copy (orig_bb) != bb)
02168 continue;
02169
02170 delta = determine_split_iv_delta (0, n_copies, unrolling);
02171 for (orig_insn = BB_HEAD (orig_bb);
02172 orig_insn != NEXT_INSN (BB_END (bb));
02173 orig_insn = next)
02174 {
02175 next = NEXT_INSN (orig_insn);
02176
02177 if (!INSN_P (orig_insn))
02178 continue;
02179
02180 ivts_templ.insn = orig_insn;
02181 if (opt_info->insns_to_split)
02182 {
02183 ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
02184 if (ivts)
02185 {
02186 if (!delta)
02187 insert_base_initialization (ivts, orig_insn);
02188 split_iv (ivts, orig_insn, delta);
02189 continue;
02190 }
02191 }
02192
02193 }
02194 }
02195 }
02196
02197
02198
02199
02200 static int
02201 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
02202 {
02203 struct var_to_expand *ve = *slot;
02204
02205 VEC_free (rtx, heap, ve->var_expansions);
02206
02207
02208 return 1;
02209 }
02210
02211
02212
02213 static void
02214 free_opt_info (struct opt_info *opt_info)
02215 {
02216 if (opt_info->insns_to_split)
02217 htab_delete (opt_info->insns_to_split);
02218 if (opt_info->insns_with_var_to_expand)
02219 {
02220 htab_traverse (opt_info->insns_with_var_to_expand,
02221 release_var_copies, NULL);
02222 htab_delete (opt_info->insns_with_var_to_expand);
02223 }
02224 free (opt_info);
02225 }