00001 /* Thread edges through blocks and update the control flow and SSA graphs. 00002 Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. 00003 00004 This file is part of GCC. 00005 00006 GCC is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2, or (at your option) 00009 any later version. 00010 00011 GCC is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with GCC; see the file COPYING. If not, write to 00018 the Free Software Foundation, 51 Franklin Street, Fifth Floor, 00019 Boston, MA 02110-1301, USA. */ 00020 00021 #include "config.h" 00022 #include "system.h" 00023 #include "coretypes.h" 00024 #include "tm.h" 00025 #include "tree.h" 00026 #include "flags.h" 00027 #include "rtl.h" 00028 #include "tm_p.h" 00029 #include "ggc.h" 00030 #include "basic-block.h" 00031 #include "output.h" 00032 #include "expr.h" 00033 #include "function.h" 00034 #include "diagnostic.h" 00035 #include "tree-flow.h" 00036 #include "tree-dump.h" 00037 #include "tree-pass.h" 00038 #include "cfgloop.h" 00039 00040 /* Given a block B, update the CFG and SSA graph to reflect redirecting 00041 one or more in-edges to B to instead reach the destination of an 00042 out-edge from B while preserving any side effects in B. 00043 00044 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the 00045 side effects of executing B. 00046 00047 1. Make a copy of B (including its outgoing edges and statements). Call 00048 the copy B'. Note B' has no incoming edges or PHIs at this time. 00049 00050 2. Remove the control statement at the end of B' and all outgoing edges 00051 except B'->C. 00052 00053 3. Add a new argument to each PHI in C with the same value as the existing 00054 argument associated with edge B->C. Associate the new PHI arguments 00055 with the edge B'->C. 00056 00057 4. For each PHI in B, find or create a PHI in B' with an identical 00058 PHI_RESULT. Add an argument to the PHI in B' which has the same 00059 value as the PHI in B associated with the edge A->B. Associate 00060 the new argument in the PHI in B' with the edge A->B. 00061 00062 5. Change the edge A->B to A->B'. 00063 00064 5a. This automatically deletes any PHI arguments associated with the 00065 edge A->B in B. 00066 00067 5b. This automatically associates each new argument added in step 4 00068 with the edge A->B'. 00069 00070 6. Repeat for other incoming edges into B. 00071 00072 7. Put the duplicated resources in B and all the B' blocks into SSA form. 00073 00074 Note that block duplication can be minimized by first collecting the 00075 the set of unique destination blocks that the incoming edges should 00076 be threaded to. Block duplication can be further minimized by using 00077 B instead of creating B' for one destination if all edges into B are 00078 going to be threaded to a successor of B. 00079 00080 We further reduce the number of edges and statements we create by 00081 not copying all the outgoing edges and the control statement in 00082 step #1. We instead create a template block without the outgoing 00083 edges and duplicate the template. */ 00084 00085 00086 /* Steps #5 and #6 of the above algorithm are best implemented by walking 00087 all the incoming edges which thread to the same destination edge at 00088 the same time. That avoids lots of table lookups to get information 00089 for the destination edge. 00090 00091 To realize that implementation we create a list of incoming edges 00092 which thread to the same outgoing edge. Thus to implement steps 00093 #5 and #6 we traverse our hash table of outgoing edge information. 00094 For each entry we walk the list of incoming edges which thread to 00095 the current outgoing edge. */ 00096 00097 struct el 00098 { 00099 edge e; 00100 struct el *next; 00101 }; 00102 00103 /* Main data structure recording information regarding B's duplicate 00104 blocks. */ 00105 00106 /* We need to efficiently record the unique thread destinations of this 00107 block and specific information associated with those destinations. We 00108 may have many incoming edges threaded to the same outgoing edge. This 00109 can be naturally implemented with a hash table. */ 00110 00111 struct redirection_data 00112 { 00113 /* A duplicate of B with the trailing control statement removed and which 00114 targets a single successor of B. */ 00115 basic_block dup_block; 00116 00117 /* An outgoing edge from B. DUP_BLOCK will have OUTGOING_EDGE->dest as 00118 its single successor. */ 00119 edge outgoing_edge; 00120 00121 /* A list of incoming edges which we want to thread to 00122 OUTGOING_EDGE->dest. */ 00123 struct el *incoming_edges; 00124 00125 /* Flag indicating whether or not we should create a duplicate block 00126 for this thread destination. This is only true if we are threading 00127 all incoming edges and thus are using BB itself as a duplicate block. */ 00128 bool do_not_duplicate; 00129 }; 00130 00131 /* Main data structure to hold information for duplicates of BB. */ 00132 static htab_t redirection_data; 00133 00134 /* Data structure of information to pass to hash table traversal routines. */ 00135 struct local_info 00136 { 00137 /* The current block we are working on. */ 00138 basic_block bb; 00139 00140 /* A template copy of BB with no outgoing edges or control statement that 00141 we use for creating copies. */ 00142 basic_block template_block; 00143 00144 /* TRUE if we thread one or more jumps, FALSE otherwise. */ 00145 bool jumps_threaded; 00146 }; 00147 00148 /* Passes which use the jump threading code register jump threading 00149 opportunities as they are discovered. We keep the registered 00150 jump threading opportunities in this vector as edge pairs 00151 (original_edge, target_edge). */ 00152 DEF_VEC_ALLOC_P(edge,heap); 00153 static VEC(edge,heap) *threaded_edges; 00154 00155 00156 /* Jump threading statistics. */ 00157 00158 struct thread_stats_d 00159 { 00160 unsigned long num_threaded_edges; 00161 }; 00162 00163 struct thread_stats_d thread_stats; 00164 00165 00166 /* Remove the last statement in block BB if it is a control statement 00167 Also remove all outgoing edges except the edge which reaches DEST_BB. 00168 If DEST_BB is NULL, then remove all outgoing edges. */ 00169 00170 static void 00171 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) 00172 { 00173 block_stmt_iterator bsi; 00174 edge e; 00175 edge_iterator ei; 00176 00177 bsi = bsi_last (bb); 00178 00179 /* If the duplicate ends with a control statement, then remove it. 00180 00181 Note that if we are duplicating the template block rather than the 00182 original basic block, then the duplicate might not have any real 00183 statements in it. */ 00184 if (!bsi_end_p (bsi) 00185 && bsi_stmt (bsi) 00186 && (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR 00187 || TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR 00188 || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR)) 00189 bsi_remove (&bsi, true); 00190 00191 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) 00192 { 00193 if (e->dest != dest_bb) 00194 remove_edge (e); 00195 else 00196 ei_next (&ei); 00197 } 00198 } 00199 00200 /* Create a duplicate of BB which only reaches the destination of the edge 00201 stored in RD. Record the duplicate block in RD. */ 00202 00203 static void 00204 create_block_for_threading (basic_block bb, struct redirection_data *rd) 00205 { 00206 /* We can use the generic block duplication code and simply remove 00207 the stuff we do not need. */ 00208 rd->dup_block = duplicate_block (bb, NULL, NULL); 00209 00210 /* Zero out the profile, since the block is unreachable for now. */ 00211 rd->dup_block->frequency = 0; 00212 rd->dup_block->count = 0; 00213 00214 /* The call to duplicate_block will copy everything, including the 00215 useless COND_EXPR or SWITCH_EXPR at the end of BB. We just remove 00216 the useless COND_EXPR or SWITCH_EXPR here rather than having a 00217 specialized block copier. We also remove all outgoing edges 00218 from the duplicate block. The appropriate edge will be created 00219 later. */ 00220 remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL); 00221 } 00222 00223 /* Hashing and equality routines for our hash table. */ 00224 static hashval_t 00225 redirection_data_hash (const void *p) 00226 { 00227 edge e = ((struct redirection_data *)p)->outgoing_edge; 00228 return e->dest->index; 00229 } 00230 00231 static int 00232 redirection_data_eq (const void *p1, const void *p2) 00233 { 00234 edge e1 = ((struct redirection_data *)p1)->outgoing_edge; 00235 edge e2 = ((struct redirection_data *)p2)->outgoing_edge; 00236 00237 return e1 == e2; 00238 } 00239 00240 /* Given an outgoing edge E lookup and return its entry in our hash table. 00241 00242 If INSERT is true, then we insert the entry into the hash table if 00243 it is not already present. INCOMING_EDGE is added to the list of incoming 00244 edges associated with E in the hash table. */ 00245 00246 static struct redirection_data * 00247 lookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert) 00248 { 00249 void **slot; 00250 struct redirection_data *elt; 00251 00252 /* Build a hash table element so we can see if E is already 00253 in the table. */ 00254 elt = XNEW (struct redirection_data); 00255 elt->outgoing_edge = e; 00256 elt->dup_block = NULL; 00257 elt->do_not_duplicate = false; 00258 elt->incoming_edges = NULL; 00259 00260 slot = htab_find_slot (redirection_data, elt, insert); 00261 00262 /* This will only happen if INSERT is false and the entry is not 00263 in the hash table. */ 00264 if (slot == NULL) 00265 { 00266 free (elt); 00267 return NULL; 00268 } 00269 00270 /* This will only happen if E was not in the hash table and 00271 INSERT is true. */ 00272 if (*slot == NULL) 00273 { 00274 *slot = (void *)elt; 00275 elt->incoming_edges = XNEW (struct el); 00276 elt->incoming_edges->e = incoming_edge; 00277 elt->incoming_edges->next = NULL; 00278 return elt; 00279 } 00280 /* E was in the hash table. */ 00281 else 00282 { 00283 /* Free ELT as we do not need it anymore, we will extract the 00284 relevant entry from the hash table itself. */ 00285 free (elt); 00286 00287 /* Get the entry stored in the hash table. */ 00288 elt = (struct redirection_data *) *slot; 00289 00290 /* If insertion was requested, then we need to add INCOMING_EDGE 00291 to the list of incoming edges associated with E. */ 00292 if (insert) 00293 { 00294 struct el *el = XNEW (struct el); 00295 el->next = elt->incoming_edges; 00296 el->e = incoming_edge; 00297 elt->incoming_edges = el; 00298 } 00299 00300 return elt; 00301 } 00302 } 00303 00304 /* Given a duplicate block and its single destination (both stored 00305 in RD). Create an edge between the duplicate and its single 00306 destination. 00307 00308 Add an additional argument to any PHI nodes at the single 00309 destination. */ 00310 00311 static void 00312 create_edge_and_update_destination_phis (struct redirection_data *rd) 00313 { 00314 edge e = make_edge (rd->dup_block, rd->outgoing_edge->dest, EDGE_FALLTHRU); 00315 tree phi; 00316 00317 e->probability = REG_BR_PROB_BASE; 00318 e->count = rd->dup_block->count; 00319 00320 /* If there are any PHI nodes at the destination of the outgoing edge 00321 from the duplicate block, then we will need to add a new argument 00322 to them. The argument should have the same value as the argument 00323 associated with the outgoing edge stored in RD. */ 00324 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) 00325 { 00326 int indx = rd->outgoing_edge->dest_idx; 00327 add_phi_arg (phi, PHI_ARG_DEF (phi, indx), e); 00328 } 00329 } 00330 00331 /* Hash table traversal callback routine to create duplicate blocks. */ 00332 00333 static int 00334 create_duplicates (void **slot, void *data) 00335 { 00336 struct redirection_data *rd = (struct redirection_data *) *slot; 00337 struct local_info *local_info = (struct local_info *)data; 00338 00339 /* If this entry should not have a duplicate created, then there's 00340 nothing to do. */ 00341 if (rd->do_not_duplicate) 00342 return 1; 00343 00344 /* Create a template block if we have not done so already. Otherwise 00345 use the template to create a new block. */ 00346 if (local_info->template_block == NULL) 00347 { 00348 create_block_for_threading (local_info->bb, rd); 00349 local_info->template_block = rd->dup_block; 00350 00351 /* We do not create any outgoing edges for the template. We will 00352 take care of that in a later traversal. That way we do not 00353 create edges that are going to just be deleted. */ 00354 } 00355 else 00356 { 00357 create_block_for_threading (local_info->template_block, rd); 00358 00359 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate 00360 block. */ 00361 create_edge_and_update_destination_phis (rd); 00362 } 00363 00364 /* Keep walking the hash table. */ 00365 return 1; 00366 } 00367 00368 /* We did not create any outgoing edges for the template block during 00369 block creation. This hash table traversal callback creates the 00370 outgoing edge for the template block. */ 00371 00372 static int 00373 fixup_template_block (void **slot, void *data) 00374 { 00375 struct redirection_data *rd = (struct redirection_data *) *slot; 00376 struct local_info *local_info = (struct local_info *)data; 00377 00378 /* If this is the template block, then create its outgoing edges 00379 and halt the hash table traversal. */ 00380 if (rd->dup_block && rd->dup_block == local_info->template_block) 00381 { 00382 create_edge_and_update_destination_phis (rd); 00383 return 0; 00384 } 00385 00386 return 1; 00387 } 00388 00389 /* Not all jump threading requests are useful. In particular some 00390 jump threading requests can create irreducible regions which are 00391 undesirable. 00392 00393 This routine will examine the BB's incoming edges for jump threading 00394 requests which, if acted upon, would create irreducible regions. Any 00395 such jump threading requests found will be pruned away. */ 00396 00397 static void 00398 prune_undesirable_thread_requests (basic_block bb) 00399 { 00400 edge e; 00401 edge_iterator ei; 00402 bool may_create_irreducible_region = false; 00403 unsigned int num_outgoing_edges_into_loop = 0; 00404 00405 /* For the heuristics below, we need to know if BB has more than 00406 one outgoing edge into a loop. */ 00407 FOR_EACH_EDGE (e, ei, bb->succs) 00408 num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0); 00409 00410 if (num_outgoing_edges_into_loop > 1) 00411 { 00412 edge backedge = NULL; 00413 00414 /* Consider the effect of threading the edge (0, 1) to 2 on the left 00415 CFG to produce the right CFG: 00416 00417 00418 0 0 00419 | | 00420 1<--+ 2<--------+ 00421 / \ | | | 00422 2 3 | 4<----+ | 00423 \ / | / \ | | 00424 4---+ E 1-- | --+ 00425 | | | 00426 E 3---+ 00427 00428 00429 Threading the (0, 1) edge to 2 effectively creates two loops 00430 (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested. 00431 This is not good. 00432 00433 However, we do need to be able to thread (0, 1) to 2 or 3 00434 in the left CFG below (which creates the middle and right 00435 CFGs with nested loops). 00436 00437 0 0 0 00438 | | | 00439 1<--+ 2<----+ 3<-+<-+ 00440 /| | | | | | | 00441 2 | | 3<-+ | 1--+ | 00442 \| | | | | | | 00443 3---+ 1--+--+ 2-----+ 00444 00445 00446 A safe heuristic appears to be to only allow threading if BB 00447 has a single incoming backedge from one of its direct successors. */ 00448 00449 FOR_EACH_EDGE (e, ei, bb->preds) 00450 { 00451 if (e->flags & EDGE_DFS_BACK) 00452 { 00453 if (backedge) 00454 { 00455 backedge = NULL; 00456 break; 00457 } 00458 else 00459 { 00460 backedge = e; 00461 } 00462 } 00463 } 00464 00465 if (backedge && find_edge (bb, backedge->src)) 00466 ; 00467 else 00468 may_create_irreducible_region = true; 00469 } 00470 else 00471 { 00472 edge dest = NULL; 00473 00474 /* If we thread across the loop entry block (BB) into the 00475 loop and BB is still reached from outside the loop, then 00476 we would create an irreducible CFG. Consider the effect 00477 of threading the edge (1, 4) to 5 on the left CFG to produce 00478 the right CFG 00479 00480 0 0 00481 / \ / \ 00482 1 2 1 2 00483 \ / | | 00484 4<----+ 5<->4 00485 / \ | | 00486 E 5---+ E 00487 00488 00489 Threading the (1, 4) edge to 5 creates two entry points 00490 into the loop (4, 5) (one from block 1, the other from 00491 block 2). A classic irreducible region. 00492 00493 So look at all of BB's incoming edges which are not 00494 backedges and which are not threaded to the loop exit. 00495 If that subset of incoming edges do not all thread 00496 to the same block, then threading any of them will create 00497 an irreducible region. */ 00498 00499 FOR_EACH_EDGE (e, ei, bb->preds) 00500 { 00501 edge e2; 00502 00503 /* We ignore back edges for now. This may need refinement 00504 as threading a backedge creates an inner loop which 00505 we would need to verify has a single entry point. 00506 00507 If all backedges thread to new locations, then this 00508 block will no longer have incoming backedges and we 00509 need not worry about creating irreducible regions 00510 by threading through BB. I don't think this happens 00511 enough in practice to worry about it. */ 00512 if (e->flags & EDGE_DFS_BACK) 00513 continue; 00514 00515 /* If the incoming edge threads to the loop exit, then it 00516 is clearly safe. */ 00517 e2 = e->aux; 00518 if (e2 && (e2->flags & EDGE_LOOP_EXIT)) 00519 continue; 00520 00521 /* E enters the loop header and is not threaded. We can 00522 not allow any other incoming edges to thread into 00523 the loop as that would create an irreducible region. */ 00524 if (!e2) 00525 { 00526 may_create_irreducible_region = true; 00527 break; 00528 } 00529 00530 /* We know that this incoming edge threads to a block inside 00531 the loop. This edge must thread to the same target in 00532 the loop as any previously seen threaded edges. Otherwise 00533 we will create an irreducible region. */ 00534 if (!dest) 00535 dest = e2; 00536 else if (e2 != dest) 00537 { 00538 may_create_irreducible_region = true; 00539 break; 00540 } 00541 } 00542 } 00543 00544 /* If we might create an irreducible region, then cancel any of 00545 the jump threading requests for incoming edges which are 00546 not backedges and which do not thread to the exit block. */ 00547 if (may_create_irreducible_region) 00548 { 00549 FOR_EACH_EDGE (e, ei, bb->preds) 00550 { 00551 edge e2; 00552 00553 /* Ignore back edges. */ 00554 if (e->flags & EDGE_DFS_BACK) 00555 continue; 00556 00557 e2 = e->aux; 00558 00559 /* If this incoming edge was not threaded, then there is 00560 nothing to do. */ 00561 if (!e2) 00562 continue; 00563 00564 /* If this incoming edge threaded to the loop exit, 00565 then it can be ignored as it is safe. */ 00566 if (e2->flags & EDGE_LOOP_EXIT) 00567 continue; 00568 00569 if (e2) 00570 { 00571 /* This edge threaded into the loop and the jump thread 00572 request must be cancelled. */ 00573 if (dump_file && (dump_flags & TDF_DETAILS)) 00574 fprintf (dump_file, " Not threading jump %d --> %d to %d\n", 00575 e->src->index, e->dest->index, e2->dest->index); 00576 e->aux = NULL; 00577 } 00578 } 00579 } 00580 } 00581 00582 /* Hash table traversal callback to redirect each incoming edge 00583 associated with this hash table element to its new destination. */ 00584 00585 static int 00586 redirect_edges (void **slot, void *data) 00587 { 00588 struct redirection_data *rd = (struct redirection_data *) *slot; 00589 struct local_info *local_info = (struct local_info *)data; 00590 struct el *next, *el; 00591 00592 /* Walk over all the incoming edges associated associated with this 00593 hash table entry. */ 00594 for (el = rd->incoming_edges; el; el = next) 00595 { 00596 edge e = el->e; 00597 00598 /* Go ahead and free this element from the list. Doing this now 00599 avoids the need for another list walk when we destroy the hash 00600 table. */ 00601 next = el->next; 00602 free (el); 00603 00604 /* Go ahead and clear E->aux. It's not needed anymore and failure 00605 to clear it will cause all kinds of unpleasant problems later. */ 00606 e->aux = NULL; 00607 00608 thread_stats.num_threaded_edges++; 00609 00610 if (rd->dup_block) 00611 { 00612 edge e2; 00613 00614 if (dump_file && (dump_flags & TDF_DETAILS)) 00615 fprintf (dump_file, " Threaded jump %d --> %d to %d\n", 00616 e->src->index, e->dest->index, rd->dup_block->index); 00617 00618 rd->dup_block->count += e->count; 00619 rd->dup_block->frequency += EDGE_FREQUENCY (e); 00620 EDGE_SUCC (rd->dup_block, 0)->count += e->count; 00621 /* Redirect the incoming edge to the appropriate duplicate 00622 block. */ 00623 e2 = redirect_edge_and_branch (e, rd->dup_block); 00624 flush_pending_stmts (e2); 00625 00626 if ((dump_file && (dump_flags & TDF_DETAILS)) 00627 && e->src != e2->src) 00628 fprintf (dump_file, " basic block %d created\n", e2->src->index); 00629 } 00630 else 00631 { 00632 if (dump_file && (dump_flags & TDF_DETAILS)) 00633 fprintf (dump_file, " Threaded jump %d --> %d to %d\n", 00634 e->src->index, e->dest->index, local_info->bb->index); 00635 00636 /* We are using BB as the duplicate. Remove the unnecessary 00637 outgoing edges and statements from BB. */ 00638 remove_ctrl_stmt_and_useless_edges (local_info->bb, 00639 rd->outgoing_edge->dest); 00640 00641 /* And fixup the flags on the single remaining edge. */ 00642 single_succ_edge (local_info->bb)->flags 00643 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); 00644 single_succ_edge (local_info->bb)->flags |= EDGE_FALLTHRU; 00645 } 00646 } 00647 00648 /* Indicate that we actually threaded one or more jumps. */ 00649 if (rd->incoming_edges) 00650 local_info->jumps_threaded = true; 00651 00652 return 1; 00653 } 00654 00655 /* Return true if this block has no executable statements other than 00656 a simple ctrl flow instruction. When the number of outgoing edges 00657 is one, this is equivalent to a "forwarder" block. */ 00658 00659 static bool 00660 redirection_block_p (basic_block bb) 00661 { 00662 block_stmt_iterator bsi; 00663 00664 /* Advance to the first executable statement. */ 00665 bsi = bsi_start (bb); 00666 while (!bsi_end_p (bsi) 00667 && (TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR 00668 || IS_EMPTY_STMT (bsi_stmt (bsi)))) 00669 bsi_next (&bsi); 00670 00671 /* Check if this is an empty block. */ 00672 if (bsi_end_p (bsi)) 00673 return true; 00674 00675 /* Test that we've reached the terminating control statement. */ 00676 return bsi_stmt (bsi) 00677 && (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR 00678 || TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR 00679 || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR); 00680 } 00681 00682 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB 00683 is reached via one or more specific incoming edges, we know which 00684 outgoing edge from BB will be traversed. 00685 00686 We want to redirect those incoming edges to the target of the 00687 appropriate outgoing edge. Doing so avoids a conditional branch 00688 and may expose new optimization opportunities. Note that we have 00689 to update dominator tree and SSA graph after such changes. 00690 00691 The key to keeping the SSA graph update manageable is to duplicate 00692 the side effects occurring in BB so that those side effects still 00693 occur on the paths which bypass BB after redirecting edges. 00694 00695 We accomplish this by creating duplicates of BB and arranging for 00696 the duplicates to unconditionally pass control to one specific 00697 successor of BB. We then revector the incoming edges into BB to 00698 the appropriate duplicate of BB. 00699 00700 BB and its duplicates will have assignments to the same set of 00701 SSA_NAMEs. Right now, we just call into update_ssa to update the 00702 SSA graph for those names. 00703 00704 We are also going to experiment with a true incremental update 00705 scheme for the duplicated resources. One of the interesting 00706 properties we can exploit here is that all the resources set 00707 in BB will have the same IDFS, so we have one IDFS computation 00708 per block with incoming threaded edges, which can lower the 00709 cost of the true incremental update algorithm. */ 00710 00711 static bool 00712 thread_block (basic_block bb) 00713 { 00714 /* E is an incoming edge into BB that we may or may not want to 00715 redirect to a duplicate of BB. */ 00716 edge e; 00717 edge_iterator ei; 00718 struct local_info local_info; 00719 00720 /* FOUND_BACKEDGE indicates that we found an incoming backedge 00721 into BB, in which case we may ignore certain jump threads 00722 to avoid creating irreducible regions. */ 00723 bool found_backedge = false; 00724 00725 /* ALL indicates whether or not all incoming edges into BB should 00726 be threaded to a duplicate of BB. */ 00727 bool all = true; 00728 00729 /* If optimizing for size, only thread this block if we don't have 00730 to duplicate it or it's an otherwise empty redirection block. */ 00731 if (optimize_size 00732 && EDGE_COUNT (bb->preds) > 1 00733 && !redirection_block_p (bb)) 00734 { 00735 FOR_EACH_EDGE (e, ei, bb->preds) 00736 e->aux = NULL; 00737 return false; 00738 } 00739 00740 /* To avoid scanning a linear array for the element we need we instead 00741 use a hash table. For normal code there should be no noticeable 00742 difference. However, if we have a block with a large number of 00743 incoming and outgoing edges such linear searches can get expensive. */ 00744 redirection_data = htab_create (EDGE_COUNT (bb->succs), 00745 redirection_data_hash, 00746 redirection_data_eq, 00747 free); 00748 00749 FOR_EACH_EDGE (e, ei, bb->preds) 00750 found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0); 00751 00752 /* If BB has incoming backedges, then threading across BB might 00753 introduce an irreducible region, which would be undesirable 00754 as that inhibits various optimizations later. Prune away 00755 any jump threading requests which we know will result in 00756 an irreducible region. */ 00757 if (found_backedge) 00758 prune_undesirable_thread_requests (bb); 00759 00760 /* Record each unique threaded destination into a hash table for 00761 efficient lookups. */ 00762 FOR_EACH_EDGE (e, ei, bb->preds) 00763 { 00764 if (!e->aux) 00765 { 00766 all = false; 00767 } 00768 else 00769 { 00770 edge e2 = e->aux; 00771 update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), 00772 e->count, e->aux); 00773 00774 /* Insert the outgoing edge into the hash table if it is not 00775 already in the hash table. */ 00776 lookup_redirection_data (e2, e, INSERT); 00777 } 00778 } 00779 00780 /* If we are going to thread all incoming edges to an outgoing edge, then 00781 BB will become unreachable. Rather than just throwing it away, use 00782 it for one of the duplicates. Mark the first incoming edge with the 00783 DO_NOT_DUPLICATE attribute. */ 00784 if (all) 00785 { 00786 edge e = EDGE_PRED (bb, 0)->aux; 00787 lookup_redirection_data (e, NULL, NO_INSERT)->do_not_duplicate = true; 00788 } 00789 00790 /* Now create duplicates of BB. 00791 00792 Note that for a block with a high outgoing degree we can waste 00793 a lot of time and memory creating and destroying useless edges. 00794 00795 So we first duplicate BB and remove the control structure at the 00796 tail of the duplicate as well as all outgoing edges from the 00797 duplicate. We then use that duplicate block as a template for 00798 the rest of the duplicates. */ 00799 local_info.template_block = NULL; 00800 local_info.bb = bb; 00801 local_info.jumps_threaded = false; 00802 htab_traverse (redirection_data, create_duplicates, &local_info); 00803 00804 /* The template does not have an outgoing edge. Create that outgoing 00805 edge and update PHI nodes as the edge's target as necessary. 00806 00807 We do this after creating all the duplicates to avoid creating 00808 unnecessary edges. */ 00809 htab_traverse (redirection_data, fixup_template_block, &local_info); 00810 00811 /* The hash table traversals above created the duplicate blocks (and the 00812 statements within the duplicate blocks). This loop creates PHI nodes for 00813 the duplicated blocks and redirects the incoming edges into BB to reach 00814 the duplicates of BB. */ 00815 htab_traverse (redirection_data, redirect_edges, &local_info); 00816 00817 /* Done with this block. Clear REDIRECTION_DATA. */ 00818 htab_delete (redirection_data); 00819 redirection_data = NULL; 00820 00821 /* Indicate to our caller whether or not any jumps were threaded. */ 00822 return local_info.jumps_threaded; 00823 } 00824 00825 /* Walk through the registered jump threads and convert them into a 00826 form convenient for this pass. 00827 00828 Any block which has incoming edges threaded to outgoing edges 00829 will have its entry in THREADED_BLOCK set. 00830 00831 Any threaded edge will have its new outgoing edge stored in the 00832 original edge's AUX field. 00833 00834 This form avoids the need to walk all the edges in the CFG to 00835 discover blocks which need processing and avoids unnecessary 00836 hash table lookups to map from threaded edge to new target. */ 00837 00838 static void 00839 mark_threaded_blocks (bitmap threaded_blocks) 00840 { 00841 unsigned int i; 00842 00843 for (i = 0; i < VEC_length (edge, threaded_edges); i += 2) 00844 { 00845 edge e = VEC_index (edge, threaded_edges, i); 00846 edge e2 = VEC_index (edge, threaded_edges, i + 1); 00847 00848 e->aux = e2; 00849 bitmap_set_bit (threaded_blocks, e->dest->index); 00850 } 00851 } 00852 00853 00854 /* Walk through all blocks and thread incoming edges to the appropriate 00855 outgoing edge for each edge pair recorded in THREADED_EDGES. 00856 00857 It is the caller's responsibility to fix the dominance information 00858 and rewrite duplicated SSA_NAMEs back into SSA form. 00859 00860 Returns true if one or more edges were threaded, false otherwise. */ 00861 00862 bool 00863 thread_through_all_blocks (void) 00864 { 00865 bool retval = false; 00866 unsigned int i; 00867 bitmap_iterator bi; 00868 bitmap threaded_blocks; 00869 00870 if (threaded_edges == NULL) 00871 return false; 00872 00873 threaded_blocks = BITMAP_ALLOC (NULL); 00874 memset (&thread_stats, 0, sizeof (thread_stats)); 00875 00876 mark_threaded_blocks (threaded_blocks); 00877 00878 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) 00879 { 00880 basic_block bb = BASIC_BLOCK (i); 00881 00882 if (EDGE_COUNT (bb->preds) > 0) 00883 retval |= thread_block (bb); 00884 } 00885 00886 if (dump_file && (dump_flags & TDF_STATS)) 00887 fprintf (dump_file, "\nJumps threaded: %lu\n", 00888 thread_stats.num_threaded_edges); 00889 00890 BITMAP_FREE (threaded_blocks); 00891 threaded_blocks = NULL; 00892 VEC_free (edge, heap, threaded_edges); 00893 threaded_edges = NULL; 00894 return retval; 00895 } 00896 00897 /* Register a jump threading opportunity. We queue up all the jump 00898 threading opportunities discovered by a pass and update the CFG 00899 and SSA form all at once. 00900 00901 E is the edge we can thread, E2 is the new target edge. ie, we 00902 are effectively recording that E->dest can be changed to E2->dest 00903 after fixing the SSA graph. */ 00904 00905 void 00906 register_jump_thread (edge e, edge e2) 00907 { 00908 if (threaded_edges == NULL) 00909 threaded_edges = VEC_alloc (edge, heap, 10); 00910 00911 VEC_safe_push (edge, heap, threaded_edges, e); 00912 VEC_safe_push (edge, heap, threaded_edges, e2); 00913 }
1.5.6