00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "config.h"
00022 #include "system.h"
00023 #include "coretypes.h"
00024 #include "tm.h"
00025 #include "rtl.h"
00026 #include "hard-reg-set.h"
00027 #include "obstack.h"
00028 #include "basic-block.h"
00029 #include "cfgloop.h"
00030 #include "cfglayout.h"
00031 #include "params.h"
00032 #include "output.h"
00033 #include "expr.h"
00034 #include "hashtab.h"
00035 #include "recog.h"
00036 #include "varray.h"
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 struct iv_to_split
00076 {
00077 rtx insn;
00078 rtx base_var;
00079
00080 rtx step;
00081 unsigned n_loc;
00082 unsigned loc[3];
00083
00084
00085
00086 };
00087
00088
00089
00090 struct var_to_expand
00091 {
00092 rtx insn;
00093 rtx reg;
00094 varray_type var_expansions;
00095 enum rtx_code op;
00096
00097 int expansion_count;
00098 int reuse_expansion;
00099
00100
00101
00102 };
00103
00104
00105
00106
00107 struct opt_info
00108 {
00109 htab_t insns_to_split;
00110 htab_t insns_with_var_to_expand;
00111
00112 unsigned first_new_block;
00113
00114 basic_block loop_exit;
00115 basic_block loop_preheader;
00116 };
00117
00118 static void decide_unrolling_and_peeling (struct loops *, int);
00119 static void peel_loops_completely (struct loops *, int);
00120 static void decide_peel_simple (struct loop *, int);
00121 static void decide_peel_once_rolling (struct loop *, int);
00122 static void decide_peel_completely (struct loop *, int);
00123 static void decide_unroll_stupid (struct loop *, int);
00124 static void decide_unroll_constant_iterations (struct loop *, int);
00125 static void decide_unroll_runtime_iterations (struct loop *, int);
00126 static void peel_loop_simple (struct loops *, struct loop *);
00127 static void peel_loop_completely (struct loops *, struct loop *);
00128 static void unroll_loop_stupid (struct loops *, struct loop *);
00129 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
00130 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
00131 static struct opt_info *analyze_insns_in_loop (struct loop *);
00132 static void opt_info_start_duplication (struct opt_info *);
00133 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
00134 static void free_opt_info (struct opt_info *);
00135 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
00136 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
00137 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
00138 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
00139 static int insert_var_expansion_initialization (void **, void *);
00140 static int combine_var_copies_in_loop_exit (void **, void *);
00141 static int release_var_copies (void **, void *);
00142 static rtx get_expansion (struct var_to_expand *);
00143
00144
00145 void
00146 unroll_and_peel_loops (struct loops *loops, int flags)
00147 {
00148 struct loop *loop, *next;
00149 bool check;
00150
00151
00152
00153 peel_loops_completely (loops, flags);
00154
00155
00156 decide_unrolling_and_peeling (loops, flags);
00157
00158 loop = loops->tree_root;
00159 while (loop->inner)
00160 loop = loop->inner;
00161
00162
00163 while (loop != loops->tree_root)
00164 {
00165 if (loop->next)
00166 {
00167 next = loop->next;
00168 while (next->inner)
00169 next = next->inner;
00170 }
00171 else
00172 next = loop->outer;
00173
00174 check = true;
00175
00176 switch (loop->lpt_decision.decision)
00177 {
00178 case LPT_PEEL_COMPLETELY:
00179
00180 gcc_unreachable ();
00181 case LPT_PEEL_SIMPLE:
00182 peel_loop_simple (loops, loop);
00183 break;
00184 case LPT_UNROLL_CONSTANT:
00185 unroll_loop_constant_iterations (loops, loop);
00186 break;
00187 case LPT_UNROLL_RUNTIME:
00188 unroll_loop_runtime_iterations (loops, loop);
00189 break;
00190 case LPT_UNROLL_STUPID:
00191 unroll_loop_stupid (loops, loop);
00192 break;
00193 case LPT_NONE:
00194 check = false;
00195 break;
00196 default:
00197 gcc_unreachable ();
00198 }
00199 if (check)
00200 {
00201 #ifdef ENABLE_CHECKING
00202 verify_dominators (CDI_DOMINATORS);
00203 verify_loop_structure (loops);
00204 #endif
00205 }
00206 loop = next;
00207 }
00208
00209 iv_analysis_done ();
00210 }
00211
00212
00213
00214 static bool
00215 loop_exit_at_end_p (struct loop *loop)
00216 {
00217 struct niter_desc *desc = get_simple_loop_desc (loop);
00218 rtx insn;
00219
00220 if (desc->in_edge->dest != loop->latch)
00221 return false;
00222
00223
00224 FOR_BB_INSNS (loop->latch, insn)
00225 {
00226 if (INSN_P (insn))
00227 return false;
00228 }
00229
00230 return true;
00231 }
00232
00233
00234 static void
00235 peel_loops_completely (struct loops *loops, int flags)
00236 {
00237 struct loop *loop, *next;
00238
00239 loop = loops->tree_root;
00240 while (loop->inner)
00241 loop = loop->inner;
00242
00243 while (loop != loops->tree_root)
00244 {
00245 if (loop->next)
00246 {
00247 next = loop->next;
00248 while (next->inner)
00249 next = next->inner;
00250 }
00251 else
00252 next = loop->outer;
00253
00254 loop->lpt_decision.decision = LPT_NONE;
00255
00256 if (dump_file)
00257 fprintf (dump_file,
00258 "\n;; *** Considering loop %d for complete peeling ***\n",
00259 loop->num);
00260
00261 loop->ninsns = num_loop_insns (loop);
00262
00263 decide_peel_once_rolling (loop, flags);
00264 if (loop->lpt_decision.decision == LPT_NONE)
00265 decide_peel_completely (loop, flags);
00266
00267 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
00268 {
00269 peel_loop_completely (loops, loop);
00270 #ifdef ENABLE_CHECKING
00271 verify_dominators (CDI_DOMINATORS);
00272 verify_loop_structure (loops);
00273 #endif
00274 }
00275 loop = next;
00276 }
00277 }
00278
00279
00280 static void
00281 decide_unrolling_and_peeling (struct loops *loops, int flags)
00282 {
00283 struct loop *loop = loops->tree_root, *next;
00284
00285 while (loop->inner)
00286 loop = loop->inner;
00287
00288
00289 while (loop != loops->tree_root)
00290 {
00291 if (loop->next)
00292 {
00293 next = loop->next;
00294 while (next->inner)
00295 next = next->inner;
00296 }
00297 else
00298 next = loop->outer;
00299
00300 loop->lpt_decision.decision = LPT_NONE;
00301
00302 if (dump_file)
00303 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
00304
00305
00306 if (!maybe_hot_bb_p (loop->header))
00307 {
00308 if (dump_file)
00309 fprintf (dump_file, ";; Not considering loop, cold area\n");
00310 loop = next;
00311 continue;
00312 }
00313
00314
00315 if (!can_duplicate_loop_p (loop))
00316 {
00317 if (dump_file)
00318 fprintf (dump_file,
00319 ";; Not considering loop, cannot duplicate\n");
00320 loop = next;
00321 continue;
00322 }
00323
00324
00325 if (loop->inner)
00326 {
00327 if (dump_file)
00328 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
00329 loop = next;
00330 continue;
00331 }
00332
00333 loop->ninsns = num_loop_insns (loop);
00334 loop->av_ninsns = average_num_loop_insns (loop);
00335
00336
00337
00338
00339 decide_unroll_constant_iterations (loop, flags);
00340 if (loop->lpt_decision.decision == LPT_NONE)
00341 decide_unroll_runtime_iterations (loop, flags);
00342 if (loop->lpt_decision.decision == LPT_NONE)
00343 decide_unroll_stupid (loop, flags);
00344 if (loop->lpt_decision.decision == LPT_NONE)
00345 decide_peel_simple (loop, flags);
00346
00347 loop = next;
00348 }
00349 }
00350
00351
00352
00353 static void
00354 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
00355 {
00356 struct niter_desc *desc;
00357
00358 if (dump_file)
00359 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
00360
00361
00362 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
00363 {
00364 if (dump_file)
00365 fprintf (dump_file, ";; Not considering loop, is too big\n");
00366 return;
00367 }
00368
00369
00370 desc = get_simple_loop_desc (loop);
00371
00372
00373 if (!desc->simple_p
00374 || desc->assumptions
00375 || desc->infinite
00376 || !desc->const_iter
00377 || desc->niter != 0)
00378 {
00379 if (dump_file)
00380 fprintf (dump_file,
00381 ";; Unable to prove that the loop rolls exactly once\n");
00382 return;
00383 }
00384
00385
00386 if (dump_file)
00387 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
00388 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
00389 }
00390
00391
00392 static void
00393 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
00394 {
00395 unsigned npeel;
00396 struct niter_desc *desc;
00397
00398 if (dump_file)
00399 fprintf (dump_file, "\n;; Considering peeling completely\n");
00400
00401
00402 if (loop->inner)
00403 {
00404 if (dump_file)
00405 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
00406 return;
00407 }
00408
00409
00410 if (!maybe_hot_bb_p (loop->header))
00411 {
00412 if (dump_file)
00413 fprintf (dump_file, ";; Not considering loop, cold area\n");
00414 return;
00415 }
00416
00417
00418 if (!can_duplicate_loop_p (loop))
00419 {
00420 if (dump_file)
00421 fprintf (dump_file,
00422 ";; Not considering loop, cannot duplicate\n");
00423 return;
00424 }
00425
00426
00427 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
00428 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
00429 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
00430
00431
00432 if (!npeel)
00433 {
00434 if (dump_file)
00435 fprintf (dump_file, ";; Not considering loop, is too big\n");
00436 return;
00437 }
00438
00439
00440 desc = get_simple_loop_desc (loop);
00441
00442
00443 if (!desc->simple_p
00444 || desc->assumptions
00445 || !desc->const_iter
00446 || desc->infinite)
00447 {
00448 if (dump_file)
00449 fprintf (dump_file,
00450 ";; Unable to prove that the loop iterates constant times\n");
00451 return;
00452 }
00453
00454 if (desc->niter > npeel - 1)
00455 {
00456 if (dump_file)
00457 {
00458 fprintf (dump_file,
00459 ";; Not peeling loop completely, rolls too much (");
00460 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
00461 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
00462 }
00463 return;
00464 }
00465
00466
00467 if (dump_file)
00468 fprintf (dump_file, ";; Decided to peel loop completely\n");
00469 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
00470 }
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486 static void
00487 peel_loop_completely (struct loops *loops, struct loop *loop)
00488 {
00489 sbitmap wont_exit;
00490 unsigned HOST_WIDE_INT npeel;
00491 unsigned n_remove_edges, i;
00492 edge *remove_edges, ein;
00493 struct niter_desc *desc = get_simple_loop_desc (loop);
00494 struct opt_info *opt_info = NULL;
00495
00496 npeel = desc->niter;
00497
00498 if (npeel)
00499 {
00500 wont_exit = sbitmap_alloc (npeel + 1);
00501 sbitmap_ones (wont_exit);
00502 RESET_BIT (wont_exit, 0);
00503 if (desc->noloop_assumptions)
00504 RESET_BIT (wont_exit, 1);
00505
00506 remove_edges = xcalloc (npeel, sizeof (edge));
00507 n_remove_edges = 0;
00508
00509 if (flag_split_ivs_in_unroller)
00510 opt_info = analyze_insns_in_loop (loop);
00511
00512 opt_info_start_duplication (opt_info);
00513 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00514 loops, npeel,
00515 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
00516 DLTHE_FLAG_UPDATE_FREQ))
00517 abort ();
00518
00519 free (wont_exit);
00520
00521 if (opt_info)
00522 {
00523 apply_opt_in_copies (opt_info, npeel, false, true);
00524 free_opt_info (opt_info);
00525 }
00526
00527
00528 for (i = 0; i < n_remove_edges; i++)
00529 remove_path (loops, remove_edges[i]);
00530 free (remove_edges);
00531 }
00532
00533 ein = desc->in_edge;
00534 free_simple_loop_desc (loop);
00535
00536
00537
00538 remove_path (loops, ein);
00539
00540 if (dump_file)
00541 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
00542 }
00543
00544
00545
00546
00547 static void
00548 decide_unroll_constant_iterations (struct loop *loop, int flags)
00549 {
00550 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
00551 struct niter_desc *desc;
00552
00553 if (!(flags & UAP_UNROLL))
00554 {
00555
00556 return;
00557 }
00558
00559 if (dump_file)
00560 fprintf (dump_file,
00561 "\n;; Considering unrolling loop with constant "
00562 "number of iterations\n");
00563
00564
00565
00566 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
00567 nunroll_by_av
00568 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
00569 if (nunroll > nunroll_by_av)
00570 nunroll = nunroll_by_av;
00571 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
00572 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
00573
00574
00575 if (nunroll <= 1)
00576 {
00577 if (dump_file)
00578 fprintf (dump_file, ";; Not considering loop, is too big\n");
00579 return;
00580 }
00581
00582
00583 desc = get_simple_loop_desc (loop);
00584
00585
00586 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
00587 {
00588 if (dump_file)
00589 fprintf (dump_file,
00590 ";; Unable to prove that the loop iterates constant times\n");
00591 return;
00592 }
00593
00594
00595 if (desc->niter < 2 * nunroll)
00596 {
00597 if (dump_file)
00598 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
00599 return;
00600 }
00601
00602
00603
00604
00605
00606 best_copies = 2 * nunroll + 10;
00607
00608 i = 2 * nunroll + 2;
00609 if (i - 1 >= desc->niter)
00610 i = desc->niter - 2;
00611
00612 for (; i >= nunroll - 1; i--)
00613 {
00614 unsigned exit_mod = desc->niter % (i + 1);
00615
00616 if (!loop_exit_at_end_p (loop))
00617 n_copies = exit_mod + i + 1;
00618 else if (exit_mod != (unsigned) i
00619 || desc->noloop_assumptions != NULL_RTX)
00620 n_copies = exit_mod + i + 2;
00621 else
00622 n_copies = i + 1;
00623
00624 if (n_copies < best_copies)
00625 {
00626 best_copies = n_copies;
00627 best_unroll = i;
00628 }
00629 }
00630
00631 if (dump_file)
00632 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
00633 best_unroll + 1, best_copies, nunroll);
00634
00635 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
00636 loop->lpt_decision.times = best_unroll;
00637
00638 if (dump_file)
00639 fprintf (dump_file,
00640 ";; Decided to unroll the constant times rolling loop, %d times.\n",
00641 loop->lpt_decision.times);
00642 }
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663 static void
00664 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
00665 {
00666 unsigned HOST_WIDE_INT niter;
00667 unsigned exit_mod;
00668 sbitmap wont_exit;
00669 unsigned n_remove_edges, i;
00670 edge *remove_edges;
00671 unsigned max_unroll = loop->lpt_decision.times;
00672 struct niter_desc *desc = get_simple_loop_desc (loop);
00673 bool exit_at_end = loop_exit_at_end_p (loop);
00674 struct opt_info *opt_info = NULL;
00675
00676 niter = desc->niter;
00677
00678
00679 gcc_assert (niter > max_unroll + 1);
00680
00681 exit_mod = niter % (max_unroll + 1);
00682
00683 wont_exit = sbitmap_alloc (max_unroll + 1);
00684 sbitmap_ones (wont_exit);
00685
00686 remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
00687 n_remove_edges = 0;
00688 if (flag_split_ivs_in_unroller
00689 || flag_variable_expansion_in_unroller)
00690 opt_info = analyze_insns_in_loop (loop);
00691
00692 if (!exit_at_end)
00693 {
00694
00695
00696
00697
00698 if (dump_file)
00699 fprintf (dump_file, ";; Condition on beginning of loop.\n");
00700
00701
00702 RESET_BIT (wont_exit, 0);
00703 if (desc->noloop_assumptions)
00704 RESET_BIT (wont_exit, 1);
00705
00706 if (exit_mod)
00707 {
00708 opt_info_start_duplication (opt_info);
00709 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00710 loops, exit_mod,
00711 wont_exit, desc->out_edge,
00712 remove_edges, &n_remove_edges,
00713 DLTHE_FLAG_UPDATE_FREQ))
00714 abort ();
00715
00716 if (opt_info && exit_mod > 1)
00717 apply_opt_in_copies (opt_info, exit_mod, false, false);
00718
00719 desc->noloop_assumptions = NULL_RTX;
00720 desc->niter -= exit_mod;
00721 desc->niter_max -= exit_mod;
00722 }
00723
00724 SET_BIT (wont_exit, 1);
00725 }
00726 else
00727 {
00728
00729
00730
00731 if (dump_file)
00732 fprintf (dump_file, ";; Condition on end of loop.\n");
00733
00734
00735
00736
00737 if (exit_mod != max_unroll
00738 || desc->noloop_assumptions)
00739 {
00740 RESET_BIT (wont_exit, 0);
00741 if (desc->noloop_assumptions)
00742 RESET_BIT (wont_exit, 1);
00743
00744 opt_info_start_duplication (opt_info);
00745 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
00746 loops, exit_mod + 1,
00747 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
00748 DLTHE_FLAG_UPDATE_FREQ))
00749 abort ();
00750
00751 if (opt_info && exit_mod > 0)
00752 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
00753
00754 desc->niter -= exit_mod + 1;
00755 desc->niter_max -= exit_mod + 1;
00756 desc->noloop_assumptions = NULL_RTX;
00757
00758 SET_BIT (wont_exit, 0);
00759 SET_BIT (wont_exit, 1);
00760 }
00761
00762 RESET_BIT (wont_exit, max_unroll);
00763 }
00764
00765
00766
00767 opt_info_start_duplication (opt_info);
00768 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
00769 loops, max_unroll,
00770 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
00771 DLTHE_FLAG_UPDATE_FREQ))
00772 abort ();
00773
00774 if (opt_info)
00775 {
00776 apply_opt_in_copies (opt_info, max_unroll, true, true);
00777 free_opt_info (opt_info);
00778 }
00779
00780 free (wont_exit);
00781
00782 if (exit_at_end)
00783 {
00784 basic_block exit_block = desc->in_edge->src->rbi->copy;
00785
00786
00787 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
00788 {
00789 desc->out_edge = EDGE_SUCC (exit_block, 0);
00790 desc->in_edge = EDGE_SUCC (exit_block, 1);
00791 }
00792 else
00793 {
00794 desc->out_edge = EDGE_SUCC (exit_block, 1);
00795 desc->in_edge = EDGE_SUCC (exit_block, 0);
00796 }
00797 }
00798
00799 desc->niter /= max_unroll + 1;
00800 desc->niter_max /= max_unroll + 1;
00801 desc->niter_expr = GEN_INT (desc->niter);
00802
00803
00804 for (i = 0; i < n_remove_edges; i++)
00805 remove_path (loops, remove_edges[i]);
00806 free (remove_edges);
00807
00808 if (dump_file)
00809 fprintf (dump_file,
00810 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
00811 max_unroll, num_loop_insns (loop));
00812 }
00813
00814
00815
00816 static void
00817 decide_unroll_runtime_iterations (struct loop *loop, int flags)
00818 {
00819 unsigned nunroll, nunroll_by_av, i;
00820 struct niter_desc *desc;
00821
00822 if (!(flags & UAP_UNROLL))
00823 {
00824
00825 return;
00826 }
00827
00828 if (dump_file)
00829 fprintf (dump_file,
00830 "\n;; Considering unrolling loop with runtime "
00831 "computable number of iterations\n");
00832
00833
00834
00835 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
00836 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
00837 if (nunroll > nunroll_by_av)
00838 nunroll = nunroll_by_av;
00839 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
00840 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
00841
00842
00843 if (nunroll <= 1)
00844 {
00845 if (dump_file)
00846 fprintf (dump_file, ";; Not considering loop, is too big\n");
00847 return;
00848 }
00849
00850
00851 desc = get_simple_loop_desc (loop);
00852
00853
00854 if (!desc->simple_p || desc->assumptions)
00855 {
00856 if (dump_file)
00857 fprintf (dump_file,
00858 ";; Unable to prove that the number of iterations "
00859 "can be counted in runtime\n");
00860 return;
00861 }
00862
00863 if (desc->const_iter)
00864 {
00865 if (dump_file)
00866 fprintf (dump_file, ";; Loop iterates constant times\n");
00867 return;
00868 }
00869
00870
00871 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
00872 {
00873 if (dump_file)
00874 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
00875 return;
00876 }
00877
00878
00879
00880 for (i = 1; 2 * i <= nunroll; i *= 2)
00881 continue;
00882
00883 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
00884 loop->lpt_decision.times = i - 1;
00885
00886 if (dump_file)
00887 fprintf (dump_file,
00888 ";; Decided to unroll the runtime computable "
00889 "times rolling loop, %d times.\n",
00890 loop->lpt_decision.times);
00891 }
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924 static void
00925 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
00926 {
00927 rtx old_niter, niter, init_code, branch_code, tmp;
00928 unsigned i, j, p;
00929 basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
00930 unsigned n_dom_bbs;
00931 sbitmap wont_exit;
00932 int may_exit_copy;
00933 unsigned n_peel, n_remove_edges;
00934 edge *remove_edges, e;
00935 bool extra_zero_check, last_may_exit;
00936 unsigned max_unroll = loop->lpt_decision.times;
00937 struct niter_desc *desc = get_simple_loop_desc (loop);
00938 bool exit_at_end = loop_exit_at_end_p (loop);
00939 struct opt_info *opt_info = NULL;
00940
00941 if (flag_split_ivs_in_unroller
00942 || flag_variable_expansion_in_unroller)
00943 opt_info = analyze_insns_in_loop (loop);
00944
00945
00946 dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
00947 n_dom_bbs = 0;
00948
00949 body = get_loop_body (loop);
00950 for (i = 0; i < loop->num_nodes; i++)
00951 {
00952 unsigned nldom;
00953 basic_block *ldom;
00954
00955 nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
00956 for (j = 0; j < nldom; j++)
00957 if (!flow_bb_inside_loop_p (loop, ldom[j]))
00958 dom_bbs[n_dom_bbs++] = ldom[j];
00959
00960 free (ldom);
00961 }
00962 free (body);
00963
00964 if (!exit_at_end)
00965 {
00966
00967
00968 may_exit_copy = 0;
00969 n_peel = max_unroll - 1;
00970 extra_zero_check = true;
00971 last_may_exit = false;
00972 }
00973 else
00974 {
00975
00976
00977 may_exit_copy = max_unroll;
00978 n_peel = max_unroll;
00979 extra_zero_check = false;
00980 last_may_exit = true;
00981 }
00982
00983
00984 start_sequence ();
00985 old_niter = niter = gen_reg_rtx (desc->mode);
00986 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
00987 if (tmp != niter)
00988 emit_move_insn (niter, tmp);
00989
00990
00991
00992
00993 niter = expand_simple_binop (desc->mode, AND,
00994 niter,
00995 GEN_INT (max_unroll),
00996 NULL_RTX, 0, OPTAB_LIB_WIDEN);
00997
00998 init_code = get_insns ();
00999 end_sequence ();
01000
01001
01002 loop_split_edge_with (loop_preheader_edge (loop), init_code);
01003
01004 remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
01005 n_remove_edges = 0;
01006
01007 wont_exit = sbitmap_alloc (max_unroll + 2);
01008
01009
01010
01011
01012
01013 sbitmap_zero (wont_exit);
01014 if (extra_zero_check
01015 && !desc->noloop_assumptions)
01016 SET_BIT (wont_exit, 1);
01017 ezc_swtch = loop_preheader_edge (loop)->src;
01018 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01019 loops, 1,
01020 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
01021 DLTHE_FLAG_UPDATE_FREQ))
01022 abort ();
01023
01024
01025 swtch = loop_split_edge_with (loop_preheader_edge (loop),
01026 NULL_RTX);
01027
01028 for (i = 0; i < n_peel; i++)
01029 {
01030
01031 sbitmap_zero (wont_exit);
01032 if (i != n_peel - 1 || !last_may_exit)
01033 SET_BIT (wont_exit, 1);
01034 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01035 loops, 1,
01036 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
01037 DLTHE_FLAG_UPDATE_FREQ))
01038 abort ();
01039
01040
01041 j = n_peel - i - (extra_zero_check ? 0 : 1);
01042 p = REG_BR_PROB_BASE / (i + 2);
01043
01044 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01045 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
01046 block_label (preheader), p, NULL_RTX);
01047
01048 swtch = loop_split_edge_with (EDGE_PRED (swtch, 0), branch_code);
01049 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
01050 EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
01051 e = make_edge (swtch, preheader,
01052 EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
01053 e->probability = p;
01054 }
01055
01056 if (extra_zero_check)
01057 {
01058
01059 p = REG_BR_PROB_BASE / (max_unroll + 1);
01060 swtch = ezc_swtch;
01061 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01062 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
01063 block_label (preheader), p, NULL_RTX);
01064
01065 swtch = loop_split_edge_with (EDGE_SUCC (swtch, 0), branch_code);
01066 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
01067 EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
01068 e = make_edge (swtch, preheader,
01069 EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
01070 e->probability = p;
01071 }
01072
01073
01074 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
01075
01076
01077
01078 sbitmap_ones (wont_exit);
01079 RESET_BIT (wont_exit, may_exit_copy);
01080 opt_info_start_duplication (opt_info);
01081
01082 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
01083 loops, max_unroll,
01084 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
01085 DLTHE_FLAG_UPDATE_FREQ))
01086 abort ();
01087
01088 if (opt_info)
01089 {
01090 apply_opt_in_copies (opt_info, max_unroll, true, true);
01091 free_opt_info (opt_info);
01092 }
01093
01094 free (wont_exit);
01095
01096 if (exit_at_end)
01097 {
01098 basic_block exit_block = desc->in_edge->src->rbi->copy;
01099
01100
01101 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
01102 {
01103 desc->out_edge = EDGE_SUCC (exit_block, 0);
01104 desc->in_edge = EDGE_SUCC (exit_block, 1);
01105 }
01106 else
01107 {
01108 desc->out_edge = EDGE_SUCC (exit_block, 1);
01109 desc->in_edge = EDGE_SUCC (exit_block, 0);
01110 }
01111 }
01112
01113
01114 for (i = 0; i < n_remove_edges; i++)
01115 remove_path (loops, remove_edges[i]);
01116 free (remove_edges);
01117
01118
01119
01120
01121
01122 gcc_assert (!desc->const_iter);
01123 desc->niter_expr =
01124 simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
01125 desc->niter_max /= max_unroll + 1;
01126 if (exit_at_end)
01127 {
01128 desc->niter_expr =
01129 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
01130 desc->noloop_assumptions = NULL_RTX;
01131 desc->niter_max--;
01132 }
01133
01134 if (dump_file)
01135 fprintf (dump_file,
01136 ";; Unrolled loop %d times, counting # of iterations "
01137 "in runtime, %i insns\n",
01138 max_unroll, num_loop_insns (loop));
01139 }
01140
01141
01142 static void
01143 decide_peel_simple (struct loop *loop, int flags)
01144 {
01145 unsigned npeel;
01146 struct niter_desc *desc;
01147
01148 if (!(flags & UAP_PEEL))
01149 {
01150
01151 return;
01152 }
01153
01154 if (dump_file)
01155 fprintf (dump_file, "\n;; Considering simply peeling loop\n");
01156
01157
01158 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
01159 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
01160 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
01161
01162
01163 if (!npeel)
01164 {
01165 if (dump_file)
01166 fprintf (dump_file, ";; Not considering loop, is too big\n");
01167 return;
01168 }
01169
01170
01171 desc = get_simple_loop_desc (loop);
01172
01173
01174 if (desc->simple_p && !desc->assumptions && desc->const_iter)
01175 {
01176 if (dump_file)
01177 fprintf (dump_file, ";; Loop iterates constant times\n");
01178 return;
01179 }
01180
01181
01182
01183 if (num_loop_branches (loop) > 1)
01184 {
01185 if (dump_file)
01186 fprintf (dump_file, ";; Not peeling, contains branches\n");
01187 return;
01188 }
01189
01190 if (loop->header->count)
01191 {
01192 unsigned niter = expected_loop_iterations (loop);
01193 if (niter + 1 > npeel)
01194 {
01195 if (dump_file)
01196 {
01197 fprintf (dump_file, ";; Not peeling loop, rolls too much (");
01198 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
01199 (HOST_WIDEST_INT) (niter + 1));
01200 fprintf (dump_file, " iterations > %d [maximum peelings])\n",
01201 npeel);
01202 }
01203 return;
01204 }
01205 npeel = niter + 1;
01206 }
01207 else
01208 {
01209
01210
01211 if (dump_file)
01212 fprintf (dump_file,
01213 ";; Not peeling loop, no evidence it will be profitable\n");
01214 return;
01215 }
01216
01217
01218 loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
01219 loop->lpt_decision.times = npeel;
01220
01221 if (dump_file)
01222 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
01223 loop->lpt_decision.times);
01224 }
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240 static void
01241 peel_loop_simple (struct loops *loops, struct loop *loop)
01242 {
01243 sbitmap wont_exit;
01244 unsigned npeel = loop->lpt_decision.times;
01245 struct niter_desc *desc = get_simple_loop_desc (loop);
01246 struct opt_info *opt_info = NULL;
01247
01248 if (flag_split_ivs_in_unroller && npeel > 1)
01249 opt_info = analyze_insns_in_loop (loop);
01250
01251 wont_exit = sbitmap_alloc (npeel + 1);
01252 sbitmap_zero (wont_exit);
01253
01254 opt_info_start_duplication (opt_info);
01255
01256 if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
01257 loops, npeel, wont_exit, NULL, NULL, NULL,
01258 DLTHE_FLAG_UPDATE_FREQ))
01259 abort ();
01260
01261 free (wont_exit);
01262
01263 if (opt_info)
01264 {
01265 apply_opt_in_copies (opt_info, npeel, false, false);
01266 free_opt_info (opt_info);
01267 }
01268
01269 if (desc->simple_p)
01270 {
01271 if (desc->const_iter)
01272 {
01273 desc->niter -= npeel;
01274 desc->niter_expr = GEN_INT (desc->niter);
01275 desc->noloop_assumptions = NULL_RTX;
01276 }
01277 else
01278 {
01279
01280
01281
01282
01283 free_simple_loop_desc (loop);
01284 }
01285 }
01286 if (dump_file)
01287 fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
01288 }
01289
01290
01291 static void
01292 decide_unroll_stupid (struct loop *loop, int flags)
01293 {
01294 unsigned nunroll, nunroll_by_av, i;
01295 struct niter_desc *desc;
01296
01297 if (!(flags & UAP_UNROLL_ALL))
01298 {
01299
01300 return;
01301 }
01302
01303 if (dump_file)
01304 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
01305
01306
01307
01308 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
01309 nunroll_by_av
01310 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
01311 if (nunroll > nunroll_by_av)
01312 nunroll = nunroll_by_av;
01313 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
01314 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
01315
01316
01317 if (nunroll <= 1)
01318 {
01319 if (dump_file)
01320 fprintf (dump_file, ";; Not considering loop, is too big\n");
01321 return;
01322 }
01323
01324
01325 desc = get_simple_loop_desc (loop);
01326
01327
01328 if (desc->simple_p && !desc->assumptions)
01329 {
01330 if (dump_file)
01331 fprintf (dump_file, ";; The loop is simple\n");
01332 return;
01333 }
01334
01335
01336
01337 if (num_loop_branches (loop) > 1)
01338 {
01339 if (dump_file)
01340 fprintf (dump_file, ";; Not unrolling, contains branches\n");
01341 return;
01342 }
01343
01344
01345 if (loop->header->count
01346 && expected_loop_iterations (loop) < 2 * nunroll)
01347 {
01348 if (dump_file)
01349 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
01350 return;
01351 }
01352
01353
01354
01355
01356 for (i = 1; 2 * i <= nunroll; i *= 2)
01357 continue;
01358
01359 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
01360 loop->lpt_decision.times = i - 1;
01361
01362 if (dump_file)
01363 fprintf (dump_file,
01364 ";; Decided to unroll the loop stupidly, %d times.\n",
01365 loop->lpt_decision.times);
01366 }
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385 static void
01386 unroll_loop_stupid (struct loops *loops, struct loop *loop)
01387 {
01388 sbitmap wont_exit;
01389 unsigned nunroll = loop->lpt_decision.times;
01390 struct niter_desc *desc = get_simple_loop_desc (loop);
01391 struct opt_info *opt_info = NULL;
01392
01393 if (flag_split_ivs_in_unroller
01394 || flag_variable_expansion_in_unroller)
01395 opt_info = analyze_insns_in_loop (loop);
01396
01397
01398 wont_exit = sbitmap_alloc (nunroll + 1);
01399 sbitmap_zero (wont_exit);
01400 opt_info_start_duplication (opt_info);
01401
01402 if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
01403 loops, nunroll, wont_exit, NULL, NULL, NULL,
01404 DLTHE_FLAG_UPDATE_FREQ))
01405 abort ();
01406
01407 if (opt_info)
01408 {
01409 apply_opt_in_copies (opt_info, nunroll, true, true);
01410 free_opt_info (opt_info);
01411 }
01412
01413 free (wont_exit);
01414
01415 if (desc->simple_p)
01416 {
01417
01418
01419
01420
01421
01422
01423 desc->simple_p = false;
01424 }
01425
01426 if (dump_file)
01427 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
01428 nunroll, num_loop_insns (loop));
01429 }
01430
01431
01432
01433 static hashval_t
01434 si_info_hash (const void *ivts)
01435 {
01436 return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
01437 }
01438
01439
01440
01441 static int
01442 si_info_eq (const void *ivts1, const void *ivts2)
01443 {
01444 const struct iv_to_split *i1 = ivts1;
01445 const struct iv_to_split *i2 = ivts2;
01446
01447 return i1->insn == i2->insn;
01448 }
01449
01450
01451
01452 static hashval_t
01453 ve_info_hash (const void *ves)
01454 {
01455 return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
01456 }
01457
01458
01459
01460
01461 static int
01462 ve_info_eq (const void *ivts1, const void *ivts2)
01463 {
01464 const struct var_to_expand *i1 = ivts1;
01465 const struct var_to_expand *i2 = ivts2;
01466
01467 return i1->insn == i2->insn;
01468 }
01469
01470
01471
01472 bool
01473 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
01474 {
01475 basic_block *body, bb;
01476 unsigned i;
01477 int count_ref = 0;
01478 rtx insn;
01479
01480 body = get_loop_body (loop);
01481 for (i = 0; i < loop->num_nodes; i++)
01482 {
01483 bb = body[i];
01484
01485 FOR_BB_INSNS (bb, insn)
01486 {
01487 if (rtx_referenced_p (reg, insn))
01488 count_ref++;
01489 }
01490 }
01491 return (count_ref == 1);
01492 }
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517 static struct var_to_expand *
01518 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
01519 {
01520 rtx set, dest, src, op1;
01521 struct var_to_expand *ves;
01522 enum machine_mode mode1, mode2;
01523
01524 set = single_set (insn);
01525 if (!set)
01526 return NULL;
01527
01528 dest = SET_DEST (set);
01529 src = SET_SRC (set);
01530
01531 if (GET_CODE (src) != PLUS
01532 && GET_CODE (src) != MINUS
01533 && GET_CODE (src) != MULT)
01534 return NULL;
01535
01536 if (!XEXP (src, 0))
01537 return NULL;
01538
01539 op1 = XEXP (src, 0);
01540
01541 if (!REG_P (dest)
01542 && !(GET_CODE (dest) == SUBREG
01543 && REG_P (SUBREG_REG (dest))))
01544 return NULL;
01545
01546 if (!rtx_equal_p (dest, op1))
01547 return NULL;
01548
01549 if (!referenced_in_one_insn_in_loop_p (loop, dest))
01550 return NULL;
01551
01552 if (rtx_referenced_p (dest, XEXP (src, 1)))
01553 return NULL;
01554
01555 mode1 = GET_MODE (dest);
01556 mode2 = GET_MODE (XEXP (src, 1));
01557 if ((FLOAT_MODE_P (mode1)
01558 || FLOAT_MODE_P (mode2))
01559 && !flag_unsafe_math_optimizations)
01560 return NULL;
01561
01562
01563 ves = xmalloc (sizeof (struct var_to_expand));
01564 ves->insn = insn;
01565 VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
01566 ves->reg = copy_rtx (dest);
01567 ves->op = GET_CODE (src);
01568 ves->expansion_count = 0;
01569 ves->reuse_expansion = 0;
01570 return ves;
01571 }
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598 static struct iv_to_split *
01599 analyze_iv_to_split_insn (rtx insn)
01600 {
01601 rtx set, dest;
01602 struct rtx_iv iv;
01603 struct iv_to_split *ivts;
01604
01605
01606
01607 set = single_set (insn);
01608 if (!set)
01609 return NULL;
01610
01611 dest = SET_DEST (set);
01612 if (!REG_P (dest))
01613 return NULL;
01614
01615 if (!biv_p (insn, dest))
01616 return NULL;
01617
01618 if (!iv_analyze (insn, dest, &iv))
01619 abort ();
01620
01621 if (iv.step == const0_rtx
01622 || iv.mode != iv.extend_mode)
01623 return NULL;
01624
01625
01626 ivts = xmalloc (sizeof (struct iv_to_split));
01627 ivts->insn = insn;
01628 ivts->base_var = NULL_RTX;
01629 ivts->step = iv.step;
01630 ivts->n_loc = 1;
01631 ivts->loc[0] = 1;
01632
01633 return ivts;
01634 }
01635
01636
01637
01638
01639
01640
01641 static struct opt_info *
01642 analyze_insns_in_loop (struct loop *loop)
01643 {
01644 basic_block *body, bb;
01645 unsigned i, n_edges = 0;
01646 struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
01647 rtx insn;
01648 struct iv_to_split *ivts = NULL;
01649 struct var_to_expand *ves = NULL;
01650 PTR *slot1;
01651 PTR *slot2;
01652 edge *edges = get_loop_exit_edges (loop, &n_edges);
01653 basic_block preheader;
01654 bool can_apply = false;
01655
01656 iv_analysis_loop_init (loop);
01657
01658 body = get_loop_body (loop);
01659
01660 if (flag_split_ivs_in_unroller)
01661 opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
01662 si_info_hash, si_info_eq, free);
01663
01664
01665 if (!loop_preheader_edge (loop)->src)
01666 {
01667 preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01668 opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
01669 }
01670 else
01671 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
01672
01673 if (n_edges == 1
01674 && !(edges[0]->flags & EDGE_COMPLEX)
01675 && (edges[0]->flags & EDGE_LOOP_EXIT))
01676 {
01677 opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
01678 can_apply = true;
01679 }
01680
01681 if (flag_variable_expansion_in_unroller
01682 && can_apply)
01683 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
01684 ve_info_hash, ve_info_eq, free);
01685
01686 for (i = 0; i < loop->num_nodes; i++)
01687 {
01688 bb = body[i];
01689 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
01690 continue;
01691
01692 FOR_BB_INSNS (bb, insn)
01693 {
01694 if (!INSN_P (insn))
01695 continue;
01696
01697 if (opt_info->insns_to_split)
01698 ivts = analyze_iv_to_split_insn (insn);
01699
01700 if (ivts)
01701 {
01702 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
01703 *slot1 = ivts;
01704 continue;
01705 }
01706
01707 if (opt_info->insns_with_var_to_expand)
01708 ves = analyze_insn_to_expand_var (loop, insn);
01709
01710 if (ves)
01711 {
01712 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
01713 *slot2 = ves;
01714 }
01715 }
01716 }
01717
01718 free (edges);
01719 free (body);
01720 return opt_info;
01721 }
01722
01723
01724
01725
01726 static void
01727 opt_info_start_duplication (struct opt_info *opt_info)
01728 {
01729 if (opt_info)
01730 opt_info->first_new_block = last_basic_block;
01731 }
01732
01733
01734
01735
01736
01737
01738 static unsigned
01739 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
01740 {
01741 if (unrolling)
01742 {
01743
01744
01745 return n_copy;
01746 }
01747 else
01748 {
01749
01750
01751 if (n_copy)
01752 return n_copy - 1;
01753 else
01754 return n_copies;
01755 }
01756 }
01757
01758
01759
01760
01761 static rtx *
01762 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
01763 {
01764 unsigned i;
01765 rtx *ret = &expr;
01766
01767 for (i = 0; i < ivts->n_loc; i++)
01768 ret = &XEXP (*ret, ivts->loc[i]);
01769
01770 return ret;
01771 }
01772
01773
01774
01775
01776 static int
01777 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
01778 {
01779 struct iv_to_split *ivts = *slot;
01780 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
01781
01782 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
01783
01784 return 1;
01785 }
01786
01787
01788
01789
01790 static void
01791 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
01792 {
01793 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
01794 rtx seq;
01795
01796 start_sequence ();
01797 expr = force_operand (expr, ivts->base_var);
01798 if (expr != ivts->base_var)
01799 emit_move_insn (ivts->base_var, expr);
01800 seq = get_insns ();
01801 end_sequence ();
01802
01803 emit_insn_before (seq, insn);
01804 }
01805
01806
01807
01808
01809 static void
01810 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
01811 {
01812 rtx expr, *loc, seq, incr, var;
01813 enum machine_mode mode = GET_MODE (ivts->base_var);
01814 rtx src, dest, set;
01815
01816
01817 if (!delta)
01818 expr = ivts->base_var;
01819 else
01820 {
01821 incr = simplify_gen_binary (MULT, mode,
01822 ivts->step, gen_int_mode (delta, mode));
01823 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
01824 ivts->base_var, incr);
01825 }
01826
01827
01828 loc = get_ivts_expr (single_set (insn), ivts);
01829
01830
01831 if (validate_change (insn, loc, expr, 0))
01832 return;
01833
01834
01835 start_sequence ();
01836 var = gen_reg_rtx (mode);
01837 expr = force_operand (expr, var);
01838 if (expr != var)
01839 emit_move_insn (var, expr);
01840 seq = get_insns ();
01841 end_sequence ();
01842 emit_insn_before (seq, insn);
01843
01844 if (validate_change (insn, loc, var, 0))
01845 return;
01846
01847
01848
01849 set = single_set (insn);
01850 gcc_assert (set);
01851
01852 start_sequence ();
01853 *loc = var;
01854 src = copy_rtx (SET_SRC (set));
01855 dest = copy_rtx (SET_DEST (set));
01856 src = force_operand (src, dest);
01857 if (src != dest)
01858 emit_move_insn (dest, src);
01859 seq = get_insns ();
01860 end_sequence ();
01861
01862 emit_insn_before (seq, insn);
01863 delete_insn (insn);
01864 }
01865
01866
01867
01868
01869 static rtx
01870 get_expansion (struct var_to_expand *ve)
01871 {
01872 rtx reg;
01873
01874 if (ve->reuse_expansion == 0)
01875 reg = ve->reg;
01876 else
01877 reg = VARRAY_RTX (ve->var_expansions, ve->reuse_expansion - 1);
01878
01879 if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
01880 ve->reuse_expansion = 0;
01881 else
01882 ve->reuse_expansion++;
01883
01884 return reg;
01885 }
01886
01887
01888
01889
01890
01891 static void
01892 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
01893 {
01894 rtx new_reg, set;
01895 bool really_new_expansion = false;
01896
01897 set = single_set (insn);
01898 if (!set)
01899 abort ();
01900
01901
01902
01903 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
01904 {
01905 really_new_expansion = true;
01906 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
01907 }
01908 else
01909 new_reg = get_expansion (ve);
01910
01911 validate_change (insn, &SET_DEST (set), new_reg, 1);
01912 validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
01913
01914 if (apply_change_group ())
01915 if (really_new_expansion)
01916 {
01917 VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
01918 ve->expansion_count++;
01919 }
01920 }
01921
01922
01923
01924
01925
01926
01927 static int
01928 insert_var_expansion_initialization (void **slot, void *place_p)
01929 {
01930 struct var_to_expand *ve = *slot;
01931 basic_block place = (basic_block)place_p;
01932 rtx seq, var, zero_init, insn;
01933 unsigned i;
01934
01935 if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
01936 return 1;
01937
01938 start_sequence ();
01939 if (ve->op == PLUS || ve->op == MINUS)
01940 for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
01941 {
01942 var = VARRAY_RTX (ve->var_expansions, i);
01943 zero_init = CONST0_RTX (GET_MODE (var));
01944 emit_move_insn (var, zero_init);
01945 }
01946 else if (ve->op == MULT)
01947 for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
01948 {
01949 var = VARRAY_RTX (ve->var_expansions, i);
01950 zero_init = CONST1_RTX (GET_MODE (var));
01951 emit_move_insn (var, zero_init);
01952 }
01953
01954 seq = get_insns ();
01955 end_sequence ();
01956
01957 insn = BB_HEAD (place);
01958 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
01959 insn = NEXT_INSN (insn);
01960
01961 emit_insn_after (seq, insn);
01962
01963 return 1;
01964 }
01965
01966
01967
01968
01969
01970
01971 static int
01972 combine_var_copies_in_loop_exit (void **slot, void *place_p)
01973 {
01974 struct var_to_expand *ve = *slot;
01975 basic_block place = (basic_block)place_p;
01976 rtx sum = ve->reg;
01977 rtx expr, seq, var, insn;
01978 unsigned i;
01979
01980 if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
01981 return 1;
01982
01983 start_sequence ();
01984 if (ve->op == PLUS || ve->op == MINUS)
01985 for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
01986 {
01987 var = VARRAY_RTX (ve->var_expansions, i);
01988 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
01989 var, sum);
01990 }
01991 else if (ve->op == MULT)
01992 for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
01993 {
01994 var = VARRAY_RTX (ve->var_expansions, i);
01995 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
01996 var, sum);
01997 }
01998
01999 expr = force_operand (sum, ve->reg);
02000 if (expr != ve->reg)
02001 emit_move_insn (ve->reg, expr);
02002 seq = get_insns ();
02003 end_sequence ();
02004
02005 insn = BB_HEAD (place);
02006 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
02007 insn = NEXT_INSN (insn);
02008
02009 emit_insn_after (seq, insn);
02010
02011
02012 return 1;
02013 }
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024 static void
02025 apply_opt_in_copies (struct opt_info *opt_info,
02026 unsigned n_copies, bool unrolling,
02027 bool rewrite_original_loop)
02028 {
02029 unsigned i, delta;
02030 basic_block bb, orig_bb;
02031 rtx insn, orig_insn, next;
02032 struct iv_to_split ivts_templ, *ivts;
02033 struct var_to_expand ve_templ, *ves;
02034
02035
02036
02037 gcc_assert (!unrolling || rewrite_original_loop);
02038
02039
02040 if (opt_info->insns_to_split)
02041 htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
02042
02043 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
02044 {
02045 bb = BASIC_BLOCK (i);
02046 orig_bb = bb->rbi->original;
02047
02048 delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
02049 unrolling);
02050 orig_insn = BB_HEAD (orig_bb);
02051 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
02052 {
02053 next = NEXT_INSN (insn);
02054 if (!INSN_P (insn))
02055 continue;
02056
02057 while (!INSN_P (orig_insn))
02058 orig_insn = NEXT_INSN (orig_insn);
02059
02060 ivts_templ.insn = orig_insn;
02061 ve_templ.insn = orig_insn;
02062
02063
02064 if (opt_info->insns_to_split)
02065 {
02066 ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
02067
02068 if (ivts)
02069 {
02070 #ifdef ENABLE_CHECKING
02071 gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
02072 #endif
02073
02074 if (!delta)
02075 insert_base_initialization (ivts, insn);
02076 split_iv (ivts, insn, delta);
02077 }
02078 }
02079
02080 if (unrolling && opt_info->insns_with_var_to_expand)
02081 {
02082 ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
02083 if (ves)
02084 {
02085 #ifdef ENABLE_CHECKING
02086 gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
02087 #endif
02088 expand_var_during_unrolling (ves, insn);
02089 }
02090 }
02091 orig_insn = NEXT_INSN (orig_insn);
02092 }
02093 }
02094
02095 if (!rewrite_original_loop)
02096 return;
02097
02098
02099
02100 if (opt_info->insns_with_var_to_expand)
02101 {
02102 htab_traverse (opt_info->insns_with_var_to_expand,
02103 insert_var_expansion_initialization,
02104 opt_info->loop_preheader);
02105 htab_traverse (opt_info->insns_with_var_to_expand,
02106 combine_var_copies_in_loop_exit,
02107 opt_info->loop_exit);
02108 }
02109
02110
02111
02112
02113 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
02114 {
02115 bb = BASIC_BLOCK (i);
02116 orig_bb = bb->rbi->original;
02117 if (orig_bb->rbi->copy != bb)
02118 continue;
02119
02120 delta = determine_split_iv_delta (0, n_copies, unrolling);
02121 for (orig_insn = BB_HEAD (orig_bb);
02122 orig_insn != NEXT_INSN (BB_END (bb));
02123 orig_insn = next)
02124 {
02125 next = NEXT_INSN (orig_insn);
02126
02127 if (!INSN_P (orig_insn))
02128 continue;
02129
02130 ivts_templ.insn = orig_insn;
02131 if (opt_info->insns_to_split)
02132 {
02133 ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
02134 if (ivts)
02135 {
02136 if (!delta)
02137 insert_base_initialization (ivts, orig_insn);
02138 split_iv (ivts, orig_insn, delta);
02139 continue;
02140 }
02141 }
02142
02143 }
02144 }
02145 }
02146
02147
02148
02149
02150 static int
02151 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
02152 {
02153 struct var_to_expand *ve = *slot;
02154
02155 VARRAY_CLEAR (ve->var_expansions);
02156
02157
02158 return 1;
02159 }
02160
02161
02162
02163 static void
02164 free_opt_info (struct opt_info *opt_info)
02165 {
02166 if (opt_info->insns_to_split)
02167 htab_delete (opt_info->insns_to_split);
02168 if (opt_info->insns_with_var_to_expand)
02169 {
02170 htab_traverse (opt_info->insns_with_var_to_expand,
02171 release_var_copies, NULL);
02172 htab_delete (opt_info->insns_with_var_to_expand);
02173 }
02174 free (opt_info);
02175 }