• Main Page
  • Modules
  • Data Types
  • Files

osprey/kg++fe/gnu/config/ia64/ia64.c

Go to the documentation of this file.
00001 /* Definitions of target machine for GNU compiler.
00002    Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
00003    Contributed by James E. Wilson <wilson@cygnus.com> and
00004         David Mosberger <davidm@hpl.hp.com>.
00005 
00006 This file is part of GNU CC.
00007 
00008 GNU CC is free software; you can redistribute it and/or modify
00009 it under the terms of the GNU General Public License as published by
00010 the Free Software Foundation; either version 2, or (at your option)
00011 any later version.
00012 
00013 GNU CC is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016 GNU General Public License for more details.
00017 
00018 You should have received a copy of the GNU General Public License
00019 along with GNU CC; see the file COPYING.  If not, write to
00020 the Free Software Foundation, 59 Temple Place - Suite 330,
00021 Boston, MA 02111-1307, USA.  */
00022 
00023 #include "config.h"
00024 #include "system.h"
00025 #include "rtl.h"
00026 #include "tree.h"
00027 #include "regs.h"
00028 #include "hard-reg-set.h"
00029 #include "real.h"
00030 #include "insn-config.h"
00031 #include "conditions.h"
00032 #include "output.h"
00033 #include "insn-attr.h"
00034 #include "flags.h"
00035 #include "recog.h"
00036 #include "expr.h"
00037 #include "optabs.h"
00038 #include "except.h"
00039 #include "function.h"
00040 #include "ggc.h"
00041 #include "basic-block.h"
00042 #include "toplev.h"
00043 #include "sched-int.h"
00044 #include "timevar.h"
00045 #include "target.h"
00046 #include "target-def.h"
00047 #include "tm_p.h"
00048 #include "langhooks.h"
00049 
00050 /* This is used for communication between ASM_OUTPUT_LABEL and
00051    ASM_OUTPUT_LABELREF.  */
00052 int ia64_asm_output_label = 0;
00053 
00054 /* Define the information needed to generate branch and scc insns.  This is
00055    stored from the compare operation.  */
00056 struct rtx_def * ia64_compare_op0;
00057 struct rtx_def * ia64_compare_op1;
00058 
00059 /* Register names for ia64_expand_prologue.  */
00060 static const char * const ia64_reg_numbers[96] =
00061 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
00062   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
00063   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
00064   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
00065   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
00066   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
00067   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
00068   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
00069   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
00070   "r104","r105","r106","r107","r108","r109","r110","r111",
00071   "r112","r113","r114","r115","r116","r117","r118","r119",
00072   "r120","r121","r122","r123","r124","r125","r126","r127"};
00073 
00074 /* ??? These strings could be shared with REGISTER_NAMES.  */
00075 static const char * const ia64_input_reg_names[8] =
00076 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
00077 
00078 /* ??? These strings could be shared with REGISTER_NAMES.  */
00079 static const char * const ia64_local_reg_names[80] =
00080 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
00081   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
00082   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
00083   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
00084   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
00085   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
00086   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
00087   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
00088   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
00089   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
00090 
00091 /* ??? These strings could be shared with REGISTER_NAMES.  */
00092 static const char * const ia64_output_reg_names[8] =
00093 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
00094 
00095 /* String used with the -mfixed-range= option.  */
00096 const char *ia64_fixed_range_string;
00097 
00098 /* Determines whether we use adds, addl, or movl to generate our
00099    TLS immediate offsets.  */
00100 int ia64_tls_size = 22;
00101 
00102 /* String used with the -mtls-size= option.  */
00103 const char *ia64_tls_size_string;
00104 
00105 /* Determines whether we run our final scheduling pass or not.  We always
00106    avoid the normal second scheduling pass.  */
00107 static int ia64_flag_schedule_insns2;
00108 
00109 /* Variables which are this size or smaller are put in the sdata/sbss
00110    sections.  */
00111 
00112 unsigned int ia64_section_threshold;
00113 
00114 /* Structure to be filled in by ia64_compute_frame_size with register
00115    save masks and offsets for the current function.  */
00116 
00117 struct ia64_frame_info
00118 {
00119   HOST_WIDE_INT total_size; /* size of the stack frame, not including
00120            the caller's scratch area.  */
00121   HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
00122   HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area.  */
00123   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
00124   HARD_REG_SET mask;    /* mask of saved registers.  */
00125   unsigned int gr_used_mask;  /* mask of registers in use as gr spill 
00126            registers or long-term scratches.  */
00127   int n_spilled;    /* number of spilled registers.  */
00128   int reg_fp;     /* register for fp.  */
00129   int reg_save_b0;    /* save register for b0.  */
00130   int reg_save_pr;    /* save register for prs.  */
00131   int reg_save_ar_pfs;    /* save register for ar.pfs.  */
00132   int reg_save_ar_unat;   /* save register for ar.unat.  */
00133   int reg_save_ar_lc;   /* save register for ar.lc.  */
00134   int reg_save_gp;    /* save register for gp.  */
00135   int n_input_regs;   /* number of input registers used.  */
00136   int n_local_regs;   /* number of local registers used.  */
00137   int n_output_regs;    /* number of output registers used.  */
00138   int n_rotate_regs;    /* number of rotating registers used.  */
00139 
00140   char need_regstk;   /* true if a .regstk directive needed.  */
00141   char initialized;   /* true if the data is finalized.  */
00142 };
00143 
00144 /* Current frame information calculated by ia64_compute_frame_size.  */
00145 static struct ia64_frame_info current_frame_info;
00146 
00147 static rtx gen_tls_get_addr PARAMS ((void));
00148 static rtx gen_thread_pointer PARAMS ((void));
00149 static int find_gr_spill PARAMS ((int));
00150 static int next_scratch_gr_reg PARAMS ((void));
00151 static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
00152 static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
00153 static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
00154 static void finish_spill_pointers PARAMS ((void));
00155 static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
00156 static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
00157 static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
00158 static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
00159 static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
00160 static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
00161 
00162 static enum machine_mode hfa_element_mode PARAMS ((tree, int));
00163 static void fix_range PARAMS ((const char *));
00164 static struct machine_function * ia64_init_machine_status PARAMS ((void));
00165 static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
00166 static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
00167 static void emit_predicate_relation_info PARAMS ((void));
00168 static bool ia64_in_small_data_p PARAMS ((tree));
00169 static void ia64_encode_section_info PARAMS ((tree, int));
00170 static const char *ia64_strip_name_encoding PARAMS ((const char *));
00171 static void process_epilogue PARAMS ((void));
00172 static int process_set PARAMS ((FILE *, rtx));
00173 
00174 static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
00175                tree, rtx));
00176 static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
00177                tree, rtx));
00178 static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
00179              tree, rtx));
00180 static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
00181               tree, rtx));
00182 static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
00183 static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
00184 static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
00185 static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
00186 static void ia64_output_function_end_prologue PARAMS ((FILE *));
00187 
00188 static int ia64_issue_rate PARAMS ((void));
00189 static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
00190 static void ia64_sched_init PARAMS ((FILE *, int, int));
00191 static void ia64_sched_finish PARAMS ((FILE *, int));
00192 static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
00193             int *, int, int));
00194 static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
00195 static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
00196 static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
00197 
00198 static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
00199             HOST_WIDE_INT, tree));
00200 
00201 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
00202                unsigned HOST_WIDE_INT));
00203 static void ia64_rwreloc_select_section PARAMS ((tree, int,
00204                    unsigned HOST_WIDE_INT))
00205      ATTRIBUTE_UNUSED;
00206 static void ia64_rwreloc_unique_section PARAMS ((tree, int))
00207      ATTRIBUTE_UNUSED;
00208 static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
00209                        unsigned HOST_WIDE_INT))
00210      ATTRIBUTE_UNUSED;
00211 static unsigned int ia64_rwreloc_section_type_flags
00212      PARAMS ((tree, const char *, int))
00213      ATTRIBUTE_UNUSED;
00214 
00215 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
00216      ATTRIBUTE_UNUSED;
00217 
00218 /* Table of valid machine attributes.  */
00219 static const struct attribute_spec ia64_attribute_table[] =
00220 {
00221   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
00222   { "syscall_linkage", 0, 0, false, true,  true,  NULL },
00223   { NULL,              0, 0, false, false, false, NULL }
00224 };
00225 
00226 /* Initialize the GCC target structure.  */
00227 #undef TARGET_ATTRIBUTE_TABLE
00228 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
00229 
00230 #undef TARGET_INIT_BUILTINS
00231 #define TARGET_INIT_BUILTINS ia64_init_builtins
00232 
00233 #undef TARGET_EXPAND_BUILTIN
00234 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
00235 
00236 #undef TARGET_ASM_BYTE_OP
00237 #define TARGET_ASM_BYTE_OP "\tdata1\t"
00238 #undef TARGET_ASM_ALIGNED_HI_OP
00239 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
00240 #undef TARGET_ASM_ALIGNED_SI_OP
00241 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
00242 #undef TARGET_ASM_ALIGNED_DI_OP
00243 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
00244 #undef TARGET_ASM_UNALIGNED_HI_OP
00245 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
00246 #undef TARGET_ASM_UNALIGNED_SI_OP
00247 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
00248 #undef TARGET_ASM_UNALIGNED_DI_OP
00249 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
00250 #undef TARGET_ASM_INTEGER
00251 #define TARGET_ASM_INTEGER ia64_assemble_integer
00252 
00253 #undef TARGET_ASM_FUNCTION_PROLOGUE
00254 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
00255 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
00256 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
00257 #undef TARGET_ASM_FUNCTION_EPILOGUE
00258 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
00259 
00260 #undef TARGET_IN_SMALL_DATA_P
00261 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
00262 #undef TARGET_ENCODE_SECTION_INFO
00263 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
00264 #undef TARGET_STRIP_NAME_ENCODING
00265 #define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
00266 
00267 #undef TARGET_SCHED_ADJUST_COST
00268 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
00269 #undef TARGET_SCHED_ISSUE_RATE
00270 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
00271 #undef TARGET_SCHED_VARIABLE_ISSUE
00272 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
00273 #undef TARGET_SCHED_INIT
00274 #define TARGET_SCHED_INIT ia64_sched_init
00275 #undef TARGET_SCHED_FINISH
00276 #define TARGET_SCHED_FINISH ia64_sched_finish
00277 #undef TARGET_SCHED_REORDER
00278 #define TARGET_SCHED_REORDER ia64_sched_reorder
00279 #undef TARGET_SCHED_REORDER2
00280 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
00281 
00282 #ifdef HAVE_AS_TLS
00283 #undef TARGET_HAVE_TLS
00284 #define TARGET_HAVE_TLS true
00285 #endif
00286 
00287 #undef TARGET_ASM_OUTPUT_MI_THUNK
00288 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
00289 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
00290 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
00291 
00292 struct gcc_target targetm = TARGET_INITIALIZER;
00293 
00294 /* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
00295 
00296 int
00297 call_operand (op, mode)
00298      rtx op;
00299      enum machine_mode mode;
00300 {
00301   if (mode != GET_MODE (op) && mode != VOIDmode)
00302     return 0;
00303 
00304   return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
00305     || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
00306 }
00307 
00308 /* Return 1 if OP refers to a symbol in the sdata section.  */
00309 
00310 int
00311 sdata_symbolic_operand (op, mode)
00312      rtx op;
00313      enum machine_mode mode ATTRIBUTE_UNUSED;
00314 {
00315   switch (GET_CODE (op))
00316     {
00317     case CONST:
00318       if (GET_CODE (XEXP (op, 0)) != PLUS
00319     || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
00320   break;
00321       op = XEXP (XEXP (op, 0), 0);
00322       /* FALLTHRU */
00323 
00324     case SYMBOL_REF:
00325       if (CONSTANT_POOL_ADDRESS_P (op))
00326   return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
00327       else
00328   {
00329     const char *str = XSTR (op, 0);
00330           return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
00331   }
00332 
00333     default:
00334       break;
00335     }
00336 
00337   return 0;
00338 }
00339 
00340 /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
00341 
00342 int
00343 got_symbolic_operand (op, mode)
00344      rtx op;
00345      enum machine_mode mode ATTRIBUTE_UNUSED;
00346 {
00347   switch (GET_CODE (op))
00348     {
00349     case CONST:
00350       op = XEXP (op, 0);
00351       if (GET_CODE (op) != PLUS)
00352   return 0;
00353       if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
00354   return 0;
00355       op = XEXP (op, 1);
00356       if (GET_CODE (op) != CONST_INT)
00357   return 0;
00358 
00359   return 1;
00360 
00361       /* Ok if we're not using GOT entries at all.  */
00362       if (TARGET_NO_PIC || TARGET_AUTO_PIC)
00363   return 1;
00364 
00365       /* "Ok" while emitting rtl, since otherwise we won't be provided
00366    with the entire offset during emission, which makes it very
00367    hard to split the offset into high and low parts.  */
00368       if (rtx_equal_function_value_matters)
00369   return 1;
00370 
00371       /* Force the low 14 bits of the constant to zero so that we do not
00372    use up so many GOT entries.  */
00373       return (INTVAL (op) & 0x3fff) == 0;
00374 
00375     case SYMBOL_REF:
00376     case LABEL_REF:
00377       return 1;
00378 
00379     default:
00380       break;
00381     }
00382   return 0;
00383 }
00384 
00385 /* Return 1 if OP refers to a symbol.  */
00386 
00387 int
00388 symbolic_operand (op, mode)
00389      rtx op;
00390      enum machine_mode mode ATTRIBUTE_UNUSED;
00391 {
00392   switch (GET_CODE (op))
00393     {
00394     case CONST:
00395     case SYMBOL_REF:
00396     case LABEL_REF:
00397       return 1;
00398 
00399     default:
00400       break;
00401     }
00402   return 0;
00403 }
00404 
00405 /* Return tls_model if OP refers to a TLS symbol.  */
00406 
00407 int
00408 tls_symbolic_operand (op, mode)
00409      rtx op;
00410      enum machine_mode mode ATTRIBUTE_UNUSED;
00411 {
00412   const char *str;
00413 
00414   if (GET_CODE (op) != SYMBOL_REF)
00415     return 0;
00416   str = XSTR (op, 0);
00417   if (str[0] != ENCODE_SECTION_INFO_CHAR)
00418     return 0;
00419   switch (str[1])
00420     {
00421     case 'G':
00422       return TLS_MODEL_GLOBAL_DYNAMIC;
00423     case 'L':
00424       return TLS_MODEL_LOCAL_DYNAMIC;
00425     case 'i':
00426       return TLS_MODEL_INITIAL_EXEC;
00427     case 'l':
00428       return TLS_MODEL_LOCAL_EXEC;
00429     }
00430   return 0;
00431 }
00432 
00433 
00434 /* Return 1 if OP refers to a function.  */
00435 
00436 int
00437 function_operand (op, mode)
00438      rtx op;
00439      enum machine_mode mode ATTRIBUTE_UNUSED;
00440 {
00441   if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
00442     return 1;
00443   else
00444     return 0;
00445 }
00446 
00447 /* Return 1 if OP is setjmp or a similar function.  */
00448 
00449 /* ??? This is an unsatisfying solution.  Should rethink.  */
00450 
00451 int
00452 setjmp_operand (op, mode)
00453      rtx op;
00454      enum machine_mode mode ATTRIBUTE_UNUSED;
00455 {
00456   const char *name;
00457   int retval = 0;
00458 
00459   if (GET_CODE (op) != SYMBOL_REF)
00460     return 0;
00461 
00462   name = XSTR (op, 0);
00463 
00464   /* The following code is borrowed from special_function_p in calls.c.  */
00465 
00466   /* Disregard prefix _, __ or __x.  */
00467   if (name[0] == '_')
00468     {
00469       if (name[1] == '_' && name[2] == 'x')
00470   name += 3;
00471       else if (name[1] == '_')
00472   name += 2;
00473       else
00474   name += 1;
00475     }
00476 
00477   if (name[0] == 's')
00478     {
00479       retval
00480   = ((name[1] == 'e'
00481       && (! strcmp (name, "setjmp")
00482     || ! strcmp (name, "setjmp_syscall")))
00483      || (name[1] == 'i'
00484          && ! strcmp (name, "sigsetjmp"))
00485      || (name[1] == 'a'
00486          && ! strcmp (name, "savectx")));
00487     }
00488   else if ((name[0] == 'q' && name[1] == 's'
00489       && ! strcmp (name, "qsetjmp"))
00490      || (name[0] == 'v' && name[1] == 'f'
00491          && ! strcmp (name, "vfork")))
00492     retval = 1;
00493 
00494   return retval;
00495 }
00496 
00497 /* Return 1 if OP is a general operand, but when pic exclude symbolic
00498    operands.  */
00499 
00500 /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
00501    from PREDICATE_CODES.  */
00502 
00503 int
00504 move_operand (op, mode)
00505      rtx op;
00506      enum machine_mode mode;
00507 {
00508   if (! TARGET_NO_PIC && symbolic_operand (op, mode))
00509     return 0;
00510 
00511   return general_operand (op, mode);
00512 }
00513 
00514 /* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
00515 
00516 int
00517 gr_register_operand (op, mode)
00518      rtx op;
00519      enum machine_mode mode;
00520 {
00521   if (! register_operand (op, mode))
00522     return 0;
00523   if (GET_CODE (op) == SUBREG)
00524     op = SUBREG_REG (op);
00525   if (GET_CODE (op) == REG)
00526     {
00527       unsigned int regno = REGNO (op);
00528       if (regno < FIRST_PSEUDO_REGISTER)
00529   return GENERAL_REGNO_P (regno);
00530     }
00531   return 1;
00532 }
00533 
00534 /* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
00535 
00536 int
00537 fr_register_operand (op, mode)
00538      rtx op;
00539      enum machine_mode mode;
00540 {
00541   if (! register_operand (op, mode))
00542     return 0;
00543   if (GET_CODE (op) == SUBREG)
00544     op = SUBREG_REG (op);
00545   if (GET_CODE (op) == REG)
00546     {
00547       unsigned int regno = REGNO (op);
00548       if (regno < FIRST_PSEUDO_REGISTER)
00549   return FR_REGNO_P (regno);
00550     }
00551   return 1;
00552 }
00553 
00554 /* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
00555 
00556 int
00557 grfr_register_operand (op, mode)
00558      rtx op;
00559      enum machine_mode mode;
00560 {
00561   if (! register_operand (op, mode))
00562     return 0;
00563   if (GET_CODE (op) == SUBREG)
00564     op = SUBREG_REG (op);
00565   if (GET_CODE (op) == REG)
00566     {
00567       unsigned int regno = REGNO (op);
00568       if (regno < FIRST_PSEUDO_REGISTER)
00569   return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
00570     }
00571   return 1;
00572 }
00573 
00574 /* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
00575 
00576 int
00577 gr_nonimmediate_operand (op, mode)
00578      rtx op;
00579      enum machine_mode mode;
00580 {
00581   if (! nonimmediate_operand (op, mode))
00582     return 0;
00583   if (GET_CODE (op) == SUBREG)
00584     op = SUBREG_REG (op);
00585   if (GET_CODE (op) == REG)
00586     {
00587       unsigned int regno = REGNO (op);
00588       if (regno < FIRST_PSEUDO_REGISTER)
00589   return GENERAL_REGNO_P (regno);
00590     }
00591   return 1;
00592 }
00593 
00594 /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
00595 
00596 int
00597 fr_nonimmediate_operand (op, mode)
00598      rtx op;
00599      enum machine_mode mode;
00600 {
00601   if (! nonimmediate_operand (op, mode))
00602     return 0;
00603   if (GET_CODE (op) == SUBREG)
00604     op = SUBREG_REG (op);
00605   if (GET_CODE (op) == REG)
00606     {
00607       unsigned int regno = REGNO (op);
00608       if (regno < FIRST_PSEUDO_REGISTER)
00609   return FR_REGNO_P (regno);
00610     }
00611   return 1;
00612 }
00613 
00614 /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
00615 
00616 int
00617 grfr_nonimmediate_operand (op, mode)
00618      rtx op;
00619      enum machine_mode mode;
00620 {
00621   if (! nonimmediate_operand (op, mode))
00622     return 0;
00623   if (GET_CODE (op) == SUBREG)
00624     op = SUBREG_REG (op);
00625   if (GET_CODE (op) == REG)
00626     {
00627       unsigned int regno = REGNO (op);
00628       if (regno < FIRST_PSEUDO_REGISTER)
00629   return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
00630     }
00631   return 1;
00632 }
00633 
00634 /* Return 1 if OP is a GR register operand, or zero.  */
00635 
00636 int
00637 gr_reg_or_0_operand (op, mode)
00638      rtx op;
00639      enum machine_mode mode;
00640 {
00641   return (op == const0_rtx || gr_register_operand (op, mode));
00642 }
00643 
00644 /* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
00645 
00646 int
00647 gr_reg_or_5bit_operand (op, mode)
00648      rtx op;
00649      enum machine_mode mode;
00650 {
00651   return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
00652     || GET_CODE (op) == CONSTANT_P_RTX
00653     || gr_register_operand (op, mode));
00654 }
00655 
00656 /* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
00657 
00658 int
00659 gr_reg_or_6bit_operand (op, mode)
00660      rtx op;
00661      enum machine_mode mode;
00662 {
00663   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
00664     || GET_CODE (op) == CONSTANT_P_RTX
00665     || gr_register_operand (op, mode));
00666 }
00667 
00668 /* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
00669 
00670 int
00671 gr_reg_or_8bit_operand (op, mode)
00672      rtx op;
00673      enum machine_mode mode;
00674 {
00675   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
00676     || GET_CODE (op) == CONSTANT_P_RTX
00677     || gr_register_operand (op, mode));
00678 }
00679 
00680 /* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
00681 
00682 int
00683 grfr_reg_or_8bit_operand (op, mode)
00684      rtx op;
00685      enum machine_mode mode;
00686 {
00687   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
00688     || GET_CODE (op) == CONSTANT_P_RTX
00689     || grfr_register_operand (op, mode));
00690 }
00691 
00692 /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
00693    operand.  */
00694 
00695 int
00696 gr_reg_or_8bit_adjusted_operand (op, mode)
00697      rtx op;
00698      enum machine_mode mode;
00699 {
00700   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
00701     || GET_CODE (op) == CONSTANT_P_RTX
00702     || gr_register_operand (op, mode));
00703 }
00704 
00705 /* Return 1 if OP is a register operand, or is valid for both an 8 bit
00706    immediate and an 8 bit adjusted immediate operand.  This is necessary
00707    because when we emit a compare, we don't know what the condition will be,
00708    so we need the union of the immediates accepted by GT and LT.  */
00709 
00710 int
00711 gr_reg_or_8bit_and_adjusted_operand (op, mode)
00712      rtx op;
00713      enum machine_mode mode;
00714 {
00715   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
00716      && CONST_OK_FOR_L (INTVAL (op)))
00717     || GET_CODE (op) == CONSTANT_P_RTX
00718     || gr_register_operand (op, mode));
00719 }
00720 
00721 /* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
00722 
00723 int
00724 gr_reg_or_14bit_operand (op, mode)
00725      rtx op;
00726      enum machine_mode mode;
00727 {
00728   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
00729     || GET_CODE (op) == CONSTANT_P_RTX
00730     || gr_register_operand (op, mode));
00731 }
00732 
00733 /* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
00734 
00735 int
00736 gr_reg_or_22bit_operand (op, mode)
00737      rtx op;
00738      enum machine_mode mode;
00739 {
00740   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
00741     || GET_CODE (op) == CONSTANT_P_RTX
00742     || gr_register_operand (op, mode));
00743 }
00744 
00745 /* Return 1 if OP is a 6 bit immediate operand.  */
00746 
00747 int
00748 shift_count_operand (op, mode)
00749      rtx op;
00750      enum machine_mode mode ATTRIBUTE_UNUSED;
00751 {
00752   return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
00753     || GET_CODE (op) == CONSTANT_P_RTX);
00754 }
00755 
00756 /* Return 1 if OP is a 5 bit immediate operand.  */
00757 
00758 int
00759 shift_32bit_count_operand (op, mode)
00760      rtx op;
00761      enum machine_mode mode ATTRIBUTE_UNUSED;
00762 {
00763   return ((GET_CODE (op) == CONST_INT
00764      && (INTVAL (op) >= 0 && INTVAL (op) < 32))
00765     || GET_CODE (op) == CONSTANT_P_RTX);
00766 }
00767 
00768 /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
00769 
00770 int
00771 shladd_operand (op, mode)
00772      rtx op;
00773      enum machine_mode mode ATTRIBUTE_UNUSED;
00774 {
00775   return (GET_CODE (op) == CONST_INT
00776     && (INTVAL (op) == 2 || INTVAL (op) == 4
00777         || INTVAL (op) == 8 || INTVAL (op) == 16));
00778 }
00779 
00780 /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
00781 
00782 int
00783 fetchadd_operand (op, mode)
00784      rtx op;
00785      enum machine_mode mode ATTRIBUTE_UNUSED;
00786 {
00787   return (GET_CODE (op) == CONST_INT
00788           && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
00789               INTVAL (op) == -4  || INTVAL (op) == -1 ||
00790               INTVAL (op) == 1   || INTVAL (op) == 4  ||
00791               INTVAL (op) == 8   || INTVAL (op) == 16));
00792 }
00793 
00794 /* Return 1 if OP is a floating-point constant zero, one, or a register.  */
00795 
00796 int
00797 fr_reg_or_fp01_operand (op, mode)
00798      rtx op;
00799      enum machine_mode mode;
00800 {
00801   return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
00802     || fr_register_operand (op, mode));
00803 }
00804 
00805 /* Like nonimmediate_operand, but don't allow MEMs that try to use a
00806    POST_MODIFY with a REG as displacement.  */
00807 
00808 int
00809 destination_operand (op, mode)
00810      rtx op;
00811      enum machine_mode mode;
00812 {
00813   if (! nonimmediate_operand (op, mode))
00814     return 0;
00815   if (GET_CODE (op) == MEM
00816       && GET_CODE (XEXP (op, 0)) == POST_MODIFY
00817       && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
00818     return 0;
00819   return 1;
00820 }
00821 
00822 /* Like memory_operand, but don't allow post-increments.  */
00823 
00824 int
00825 not_postinc_memory_operand (op, mode)
00826      rtx op;
00827      enum machine_mode mode;
00828 {
00829   return (memory_operand (op, mode)
00830     && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
00831 }
00832 
00833 /* Return 1 if this is a comparison operator, which accepts an normal 8-bit
00834    signed immediate operand.  */
00835 
00836 int
00837 normal_comparison_operator (op, mode)
00838     register rtx op;
00839     enum machine_mode mode;
00840 {
00841   enum rtx_code code = GET_CODE (op);
00842   return ((mode == VOIDmode || GET_MODE (op) == mode)
00843     && (code == EQ || code == NE
00844         || code == GT || code == LE || code == GTU || code == LEU));
00845 }
00846 
00847 /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
00848    signed immediate operand.  */
00849 
00850 int
00851 adjusted_comparison_operator (op, mode)
00852     register rtx op;
00853     enum machine_mode mode;
00854 {
00855   enum rtx_code code = GET_CODE (op);
00856   return ((mode == VOIDmode || GET_MODE (op) == mode)
00857     && (code == LT || code == GE || code == LTU || code == GEU));
00858 }
00859 
00860 /* Return 1 if this is a signed inequality operator.  */
00861 
00862 int
00863 signed_inequality_operator (op, mode)
00864     register rtx op;
00865     enum machine_mode mode;
00866 {
00867   enum rtx_code code = GET_CODE (op);
00868   return ((mode == VOIDmode || GET_MODE (op) == mode)
00869     && (code == GE || code == GT
00870         || code == LE || code == LT));
00871 }
00872 
00873 /* Return 1 if this operator is valid for predication.  */
00874 
00875 int
00876 predicate_operator (op, mode)
00877     register rtx op;
00878     enum machine_mode mode;
00879 {
00880   enum rtx_code code = GET_CODE (op);
00881   return ((GET_MODE (op) == mode || mode == VOIDmode)
00882     && (code == EQ || code == NE));
00883 }
00884 
00885 /* Return 1 if this operator can be used in a conditional operation.  */
00886 
00887 int
00888 condop_operator (op, mode)
00889     register rtx op;
00890     enum machine_mode mode;
00891 {
00892   enum rtx_code code = GET_CODE (op);
00893   return ((GET_MODE (op) == mode || mode == VOIDmode)
00894     && (code == PLUS || code == MINUS || code == AND
00895         || code == IOR || code == XOR));
00896 }
00897 
00898 /* Return 1 if this is the ar.lc register.  */
00899 
00900 int
00901 ar_lc_reg_operand (op, mode)
00902      register rtx op;
00903      enum machine_mode mode;
00904 {
00905   return (GET_MODE (op) == DImode
00906     && (mode == DImode || mode == VOIDmode)
00907     && GET_CODE (op) == REG
00908     && REGNO (op) == AR_LC_REGNUM);
00909 }
00910 
00911 /* Return 1 if this is the ar.ccv register.  */
00912 
00913 int
00914 ar_ccv_reg_operand (op, mode)
00915      register rtx op;
00916      enum machine_mode mode;
00917 {
00918   return ((GET_MODE (op) == mode || mode == VOIDmode)
00919     && GET_CODE (op) == REG
00920     && REGNO (op) == AR_CCV_REGNUM);
00921 }
00922 
00923 /* Return 1 if this is the ar.pfs register.  */
00924 
00925 int
00926 ar_pfs_reg_operand (op, mode)
00927      register rtx op;
00928      enum machine_mode mode;
00929 {
00930   return ((GET_MODE (op) == mode || mode == VOIDmode)
00931     && GET_CODE (op) == REG
00932     && REGNO (op) == AR_PFS_REGNUM);
00933 }
00934 
00935 /* Like general_operand, but don't allow (mem (addressof)).  */
00936 
00937 int
00938 general_tfmode_operand (op, mode)
00939      rtx op;
00940      enum machine_mode mode;
00941 {
00942   if (! general_operand (op, mode))
00943     return 0;
00944   if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
00945     return 0;
00946   return 1;
00947 }
00948 
00949 /* Similarly.  */
00950 
00951 int
00952 destination_tfmode_operand (op, mode)
00953      rtx op;
00954      enum machine_mode mode;
00955 {
00956   if (! destination_operand (op, mode))
00957     return 0;
00958   if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
00959     return 0;
00960   return 1;
00961 }
00962 
00963 /* Similarly.  */
00964 
00965 int
00966 tfreg_or_fp01_operand (op, mode)
00967      rtx op;
00968      enum machine_mode mode;
00969 {
00970   if (GET_CODE (op) == SUBREG)
00971     return 0;
00972   return fr_reg_or_fp01_operand (op, mode);
00973 }
00974 
00975 /* Return 1 if OP is valid as a base register in a reg + offset address.  */
00976 
00977 int
00978 basereg_operand (op, mode)
00979      rtx op;
00980      enum machine_mode mode;
00981 {
00982   /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
00983      checks from pa.c basereg_operand as well?  Seems to be OK without them
00984      in test runs.  */
00985 
00986   return (register_operand (op, mode) &&
00987     REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
00988 }
00989 
00990 /* Return 1 if the operands of a move are ok.  */
00991 
00992 int
00993 ia64_move_ok (dst, src)
00994      rtx dst, src;
00995 {
00996   /* If we're under init_recog_no_volatile, we'll not be able to use
00997      memory_operand.  So check the code directly and don't worry about
00998      the validity of the underlying address, which should have been
00999      checked elsewhere anyway.  */
01000   if (GET_CODE (dst) != MEM)
01001     return 1;
01002   if (GET_CODE (src) == MEM)
01003     return 0;
01004   if (register_operand (src, VOIDmode))
01005     return 1;
01006 
01007   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
01008   if (INTEGRAL_MODE_P (GET_MODE (dst)))
01009     return src == const0_rtx;
01010   else
01011     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
01012 }
01013 
01014 /* Return 0 if we are doing C++ code.  This optimization fails with
01015    C++ because of GNAT c++/6685.  */
01016 
01017 int
01018 addp4_optimize_ok (op1, op2)
01019      rtx op1, op2;
01020 {
01021 
01022   if (!strcmp (lang_hooks.name, "GNU C++"))
01023     return 0;
01024 
01025   return (basereg_operand (op1, GET_MODE(op1)) !=
01026     basereg_operand (op2, GET_MODE(op2)));
01027 }
01028 
01029 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
01030    Return the length of the field, or <= 0 on failure.  */
01031 
01032 int
01033 ia64_depz_field_mask (rop, rshift)
01034      rtx rop, rshift;
01035 {
01036   unsigned HOST_WIDE_INT op = INTVAL (rop);
01037   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
01038 
01039   /* Get rid of the zero bits we're shifting in.  */
01040   op >>= shift;
01041 
01042   /* We must now have a solid block of 1's at bit 0.  */
01043   return exact_log2 (op + 1);
01044 }
01045 
01046 /* Expand a symbolic constant load.  */
01047 /* ??? Should generalize this, so that we can also support 32 bit pointers.  */
01048 
01049 void
01050 ia64_expand_load_address (dest, src, scratch)
01051       rtx dest, src, scratch;
01052 {
01053   rtx temp;
01054 
01055   /* The destination could be a MEM during initial rtl generation,
01056      which isn't a valid destination for the PIC load address patterns.  */
01057   if (! register_operand (dest, DImode))
01058     if (! scratch || ! register_operand (scratch, DImode))
01059       temp = gen_reg_rtx (DImode);
01060     else
01061       temp = scratch;
01062   else
01063     temp = dest;
01064 
01065   if (tls_symbolic_operand (src, Pmode))
01066     abort ();
01067 
01068   if (TARGET_AUTO_PIC)
01069     emit_insn (gen_load_gprel64 (temp, src));
01070   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
01071     emit_insn (gen_load_fptr (temp, src));
01072   else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
01073            && sdata_symbolic_operand (src, VOIDmode))
01074     emit_insn (gen_load_gprel (temp, src));
01075   else if (GET_CODE (src) == CONST
01076      && GET_CODE (XEXP (src, 0)) == PLUS
01077      && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
01078      && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
01079     {
01080       rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
01081       rtx sym = XEXP (XEXP (src, 0), 0);
01082       HOST_WIDE_INT ofs, hi, lo;
01083 
01084       /* Split the offset into a sign extended 14-bit low part
01085    and a complementary high part.  */
01086       ofs = INTVAL (XEXP (XEXP (src, 0), 1));
01087       lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
01088       hi = ofs - lo;
01089 
01090       if (! scratch)
01091   scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
01092 
01093       emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
01094           scratch));
01095       emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
01096     }
01097   else
01098     {
01099       rtx insn;
01100       if (! scratch)
01101   scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
01102 
01103       insn = emit_insn (gen_load_symptr (temp, src, scratch));
01104 #ifdef POINTERS_EXTEND_UNSIGNED
01105       if (GET_MODE (temp) != GET_MODE (src))
01106   src = convert_memory_address (GET_MODE (temp), src);
01107 #endif
01108       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
01109     }
01110 
01111   if (temp != dest)
01112     {
01113       if (GET_MODE (dest) != GET_MODE (temp))
01114   temp = convert_to_mode (GET_MODE (dest), temp, 0);
01115       emit_move_insn (dest, temp);
01116     }
01117 }
01118 
01119 static GTY(()) rtx gen_tls_tga;
01120 static rtx
01121 gen_tls_get_addr ()
01122 {
01123   if (!gen_tls_tga)
01124     {
01125       gen_tls_tga = init_one_libfunc ("__tls_get_addr");
01126      }
01127   return gen_tls_tga;
01128 }
01129 
01130 static GTY(()) rtx thread_pointer_rtx;
01131 static rtx
01132 gen_thread_pointer ()
01133 {
01134   if (!thread_pointer_rtx)
01135     {
01136       thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
01137       RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
01138     }
01139   return thread_pointer_rtx;
01140 }
01141 
01142 rtx
01143 ia64_expand_move (op0, op1)
01144      rtx op0, op1;
01145 {
01146   enum machine_mode mode = GET_MODE (op0);
01147 
01148   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
01149     op1 = force_reg (mode, op1);
01150 
01151   if (mode == Pmode || mode == ptr_mode)
01152     {
01153       enum tls_model tls_kind;
01154       if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
01155   {
01156     rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
01157 
01158     switch (tls_kind)
01159       {
01160       case TLS_MODEL_GLOBAL_DYNAMIC:
01161         start_sequence ();
01162 
01163         tga_op1 = gen_reg_rtx (Pmode);
01164         emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
01165         tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
01166         RTX_UNCHANGING_P (tga_op1) = 1;
01167 
01168         tga_op2 = gen_reg_rtx (Pmode);
01169         emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
01170         tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
01171         RTX_UNCHANGING_P (tga_op2) = 1;
01172         
01173         tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
01174              LCT_CONST, Pmode, 2, tga_op1,
01175              Pmode, tga_op2, Pmode);
01176 
01177         insns = get_insns ();
01178         end_sequence ();
01179 
01180         emit_libcall_block (insns, op0, tga_ret, op1);
01181         return NULL_RTX;
01182 
01183       case TLS_MODEL_LOCAL_DYNAMIC:
01184         /* ??? This isn't the completely proper way to do local-dynamic
01185      If the call to __tls_get_addr is used only by a single symbol,
01186      then we should (somehow) move the dtprel to the second arg
01187      to avoid the extra add.  */
01188         start_sequence ();
01189 
01190         tga_op1 = gen_reg_rtx (Pmode);
01191         emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
01192         tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
01193         RTX_UNCHANGING_P (tga_op1) = 1;
01194 
01195         tga_op2 = const0_rtx;
01196 
01197         tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
01198              LCT_CONST, Pmode, 2, tga_op1,
01199              Pmode, tga_op2, Pmode);
01200 
01201         insns = get_insns ();
01202         end_sequence ();
01203 
01204         tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
01205           UNSPEC_LD_BASE);
01206         tmp = gen_reg_rtx (Pmode);
01207         emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
01208 
01209         if (register_operand (op0, Pmode))
01210     tga_ret = op0;
01211         else
01212     tga_ret = gen_reg_rtx (Pmode);
01213         if (TARGET_TLS64)
01214     {
01215       emit_insn (gen_load_dtprel (tga_ret, op1));
01216       emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
01217     }
01218         else
01219     emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
01220         if (tga_ret == op0)
01221     return NULL_RTX;
01222         op1 = tga_ret;
01223         break;
01224 
01225       case TLS_MODEL_INITIAL_EXEC:
01226         tmp = gen_reg_rtx (Pmode);
01227         emit_insn (gen_load_ltoff_tprel (tmp, op1));
01228         tmp = gen_rtx_MEM (Pmode, tmp);
01229         RTX_UNCHANGING_P (tmp) = 1;
01230         tmp = force_reg (Pmode, tmp);
01231 
01232         if (register_operand (op0, Pmode))
01233     op1 = op0;
01234         else
01235     op1 = gen_reg_rtx (Pmode);
01236         emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
01237         if (op1 == op0)
01238     return NULL_RTX;
01239         break;
01240 
01241       case TLS_MODEL_LOCAL_EXEC:
01242         if (register_operand (op0, Pmode))
01243     tmp = op0;
01244         else
01245     tmp = gen_reg_rtx (Pmode);
01246         if (TARGET_TLS64)
01247     {
01248       emit_insn (gen_load_tprel (tmp, op1));
01249       emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
01250     }
01251         else
01252     emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
01253         if (tmp == op0)
01254     return NULL_RTX;
01255         op1 = tmp;
01256         break;
01257 
01258       default:
01259         abort ();
01260       }
01261   }
01262       else if (!TARGET_NO_PIC &&
01263          (symbolic_operand (op1, Pmode) ||
01264     symbolic_operand (op1, ptr_mode)))
01265   {
01266     /* Before optimization starts, delay committing to any particular
01267        type of PIC address load.  If this function gets deferred, we
01268        may acquire information that changes the value of the
01269        sdata_symbolic_operand predicate.
01270 
01271        But don't delay for function pointers.  Loading a function address
01272        actually loads the address of the descriptor not the function.
01273        If we represent these as SYMBOL_REFs, then they get cse'd with
01274        calls, and we end up with calls to the descriptor address instead
01275        of calls to the function address.  Functions are not candidates
01276        for sdata anyways.
01277 
01278        Don't delay for LABEL_REF because the splitter loses REG_LABEL
01279        notes.  Don't delay for pool addresses on general principals;
01280        they'll never become non-local behind our back.  */
01281 
01282     if (rtx_equal_function_value_matters
01283         && GET_CODE (op1) != LABEL_REF
01284         && ! (GET_CODE (op1) == SYMBOL_REF
01285         && (SYMBOL_REF_FLAG (op1)
01286       || CONSTANT_POOL_ADDRESS_P (op1)
01287       || STRING_POOL_ADDRESS_P (op1))))
01288       if (GET_MODE (op1) == DImode)
01289         emit_insn (gen_movdi_symbolic (op0, op1));
01290       else
01291         emit_insn (gen_movsi_symbolic (op0, op1));
01292     else
01293       ia64_expand_load_address (op0, op1, NULL_RTX);
01294     return NULL_RTX;
01295   }
01296     }
01297 
01298   return op1;
01299 }
01300 
01301 /* Split a post-reload TImode reference into two DImode components.  */
01302 
01303 rtx
01304 ia64_split_timode (out, in, scratch)
01305      rtx out[2];
01306      rtx in, scratch;
01307 {
01308   switch (GET_CODE (in))
01309     {
01310     case REG:
01311       out[0] = gen_rtx_REG (DImode, REGNO (in));
01312       out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
01313       return NULL_RTX;
01314 
01315     case MEM:
01316       {
01317   rtx base = XEXP (in, 0);
01318 
01319   switch (GET_CODE (base))
01320     {
01321     case REG:
01322       out[0] = adjust_address (in, DImode, 0);
01323       break;
01324     case POST_MODIFY:
01325       base = XEXP (base, 0);
01326       out[0] = adjust_address (in, DImode, 0);
01327       break;
01328 
01329     /* Since we're changing the mode, we need to change to POST_MODIFY
01330        as well to preserve the size of the increment.  Either that or
01331        do the update in two steps, but we've already got this scratch
01332        register handy so let's use it.  */
01333     case POST_INC:
01334       base = XEXP (base, 0);
01335       out[0]
01336         = change_address (in, DImode,
01337         gen_rtx_POST_MODIFY
01338         (Pmode, base, plus_constant (base, 16)));
01339       break;
01340     case POST_DEC:
01341       base = XEXP (base, 0);
01342       out[0]
01343         = change_address (in, DImode,
01344         gen_rtx_POST_MODIFY
01345         (Pmode, base, plus_constant (base, -16)));
01346       break;
01347     default:
01348       abort ();
01349     }
01350 
01351   if (scratch == NULL_RTX)
01352     abort ();
01353   out[1] = change_address (in, DImode, scratch);
01354   return gen_adddi3 (scratch, base, GEN_INT (8));
01355       }
01356 
01357     case CONST_INT:
01358     case CONST_DOUBLE:
01359       split_double (in, &out[0], &out[1]);
01360       return NULL_RTX;
01361 
01362     default:
01363       abort ();
01364     }
01365 }
01366 
01367 /* ??? Fixing GR->FR TFmode moves during reload is hard.  You need to go
01368    through memory plus an extra GR scratch register.  Except that you can
01369    either get the first from SECONDARY_MEMORY_NEEDED or the second from
01370    SECONDARY_RELOAD_CLASS, but not both.
01371 
01372    We got into problems in the first place by allowing a construct like
01373    (subreg:TF (reg:TI)), which we got from a union containing a long double.  
01374    This solution attempts to prevent this situation from occurring.  When
01375    we see something like the above, we spill the inner register to memory.  */
01376 
01377 rtx
01378 spill_tfmode_operand (in, force)
01379      rtx in;
01380      int force;
01381 {
01382   if (GET_CODE (in) == SUBREG
01383       && GET_MODE (SUBREG_REG (in)) == TImode
01384       && GET_CODE (SUBREG_REG (in)) == REG)
01385     {
01386       rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
01387       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
01388     }
01389   else if (force && GET_CODE (in) == REG)
01390     {
01391       rtx mem = gen_mem_addressof (in, NULL_TREE, true);
01392       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
01393     }
01394   else if (GET_CODE (in) == MEM
01395      && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
01396     return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
01397   else
01398     return in;
01399 }
01400 
01401 /* Emit comparison instruction if necessary, returning the expression
01402    that holds the compare result in the proper mode.  */
01403 
01404 rtx
01405 ia64_expand_compare (code, mode)
01406      enum rtx_code code;
01407      enum machine_mode mode;
01408 {
01409   rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
01410   rtx cmp;
01411 
01412   /* If we have a BImode input, then we already have a compare result, and
01413      do not need to emit another comparison.  */
01414   if (GET_MODE (op0) == BImode)
01415     {
01416       if ((code == NE || code == EQ) && op1 == const0_rtx)
01417   cmp = op0;
01418       else
01419   abort ();
01420     }
01421   else
01422     {
01423       cmp = gen_reg_rtx (BImode);
01424       emit_insn (gen_rtx_SET (VOIDmode, cmp,
01425             gen_rtx_fmt_ee (code, BImode, op0, op1)));
01426       code = NE;
01427     }
01428 
01429   return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
01430 }
01431 
01432 /* Emit the appropriate sequence for a call.  */
01433 void
01434 ia64_expand_call (retval, addr, nextarg, sibcall_p)
01435      rtx retval;
01436      rtx addr;
01437      rtx nextarg ATTRIBUTE_UNUSED;
01438      int sibcall_p;
01439 {
01440   rtx insn, b0;
01441 
01442   addr = XEXP (addr, 0);
01443   b0 = gen_rtx_REG (DImode, R_BR (0));
01444 
01445   /* ??? Should do this for functions known to bind local too.  */
01446   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
01447     {
01448       if (sibcall_p)
01449   insn = gen_sibcall_nogp (addr);
01450       else if (! retval)
01451   insn = gen_call_nogp (addr, b0);
01452       else
01453   insn = gen_call_value_nogp (retval, addr, b0);
01454       insn = emit_call_insn (insn);
01455     }
01456   else
01457     {
01458       if (sibcall_p)
01459   insn = gen_sibcall_gp (addr);
01460       else if (! retval)
01461   insn = gen_call_gp (addr, b0);
01462       else
01463   insn = gen_call_value_gp (retval, addr, b0);
01464       insn = emit_call_insn (insn);
01465 
01466       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
01467     }
01468 
01469   if (sibcall_p)
01470     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
01471 }
01472 void
01473 ia64_reload_gp ()
01474 {
01475   rtx tmp;
01476 
01477   if (current_frame_info.reg_save_gp)
01478     tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
01479   else
01480     {
01481       HOST_WIDE_INT offset;
01482 
01483       offset = (current_frame_info.spill_cfa_off
01484           + current_frame_info.spill_size);
01485       if (frame_pointer_needed)
01486         {
01487           tmp = hard_frame_pointer_rtx;
01488           offset = -offset;
01489         }
01490       else
01491         {
01492           tmp = stack_pointer_rtx;
01493           offset = current_frame_info.total_size - offset;
01494         }
01495 
01496       if (CONST_OK_FOR_I (offset))
01497         emit_insn (gen_adddi3 (pic_offset_table_rtx,
01498              tmp, GEN_INT (offset)));
01499       else
01500         {
01501           emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
01502           emit_insn (gen_adddi3 (pic_offset_table_rtx,
01503                pic_offset_table_rtx, tmp));
01504         }
01505 
01506       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
01507     }
01508 
01509   emit_move_insn (pic_offset_table_rtx, tmp);
01510 }
01511 
01512 void
01513 ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
01514      noreturn_p, sibcall_p)
01515      rtx retval, addr, retaddr, scratch_r, scratch_b;
01516      int noreturn_p, sibcall_p;
01517 {
01518   rtx insn;
01519   bool is_desc = false;
01520 
01521   /* If we find we're calling through a register, then we're actually
01522      calling through a descriptor, so load up the values.  */
01523   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
01524     {
01525       rtx tmp;
01526       bool addr_dead_p;
01527 
01528       /* ??? We are currently constrained to *not* use peep2, because
01529    we can legitimiately change the global lifetime of the GP
01530    (in the form of killing where previously live).  This is 
01531    because a call through a descriptor doesn't use the previous
01532    value of the GP, while a direct call does, and we do not
01533    commit to either form until the split here.
01534 
01535    That said, this means that we lack precise life info for
01536    whether ADDR is dead after this call.  This is not terribly
01537    important, since we can fix things up essentially for free
01538    with the POST_DEC below, but it's nice to not use it when we
01539    can immediately tell it's not necessary.  */
01540       addr_dead_p = ((noreturn_p || sibcall_p
01541           || TEST_HARD_REG_BIT (regs_invalidated_by_call,
01542               REGNO (addr)))
01543          && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
01544 
01545       /* Load the code address into scratch_b.  */
01546       tmp = gen_rtx_POST_INC (Pmode, addr);
01547       tmp = gen_rtx_MEM (Pmode, tmp);
01548       emit_move_insn (scratch_r, tmp);
01549       emit_move_insn (scratch_b, scratch_r);
01550 
01551       /* Load the GP address.  If ADDR is not dead here, then we must
01552    revert the change made above via the POST_INCREMENT.  */
01553       if (!addr_dead_p)
01554   tmp = gen_rtx_POST_DEC (Pmode, addr);
01555       else
01556   tmp = addr;
01557       tmp = gen_rtx_MEM (Pmode, tmp);
01558       emit_move_insn (pic_offset_table_rtx, tmp);
01559 
01560       is_desc = true;
01561       addr = scratch_b;
01562     }
01563 
01564   if (sibcall_p)
01565     insn = gen_sibcall_nogp (addr);
01566   else if (retval)
01567     insn = gen_call_value_nogp (retval, addr, retaddr);
01568   else
01569     insn = gen_call_nogp (addr, retaddr);
01570   emit_call_insn (insn);
01571 
01572   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
01573     ia64_reload_gp ();
01574 }
01575 
01576 /* Begin the assembly file.  */
01577 
01578 void
01579 emit_safe_across_calls (f)
01580      FILE *f;
01581 {
01582   unsigned int rs, re;
01583   int out_state;
01584 
01585   rs = 1;
01586   out_state = 0;
01587   while (1)
01588     {
01589       while (rs < 64 && call_used_regs[PR_REG (rs)])
01590   rs++;
01591       if (rs >= 64)
01592   break;
01593       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
01594   continue;
01595       if (out_state == 0)
01596   {
01597     fputs ("\t.pred.safe_across_calls ", f);
01598     out_state = 1;
01599   }
01600       else
01601   fputc (',', f);
01602       if (re == rs + 1)
01603   fprintf (f, "p%u", rs);
01604       else
01605   fprintf (f, "p%u-p%u", rs, re - 1);
01606       rs = re + 1;
01607     }
01608   if (out_state)
01609     fputc ('\n', f);
01610 }
01611 
01612 /* Helper function for ia64_compute_frame_size: find an appropriate general
01613    register to spill some special register to.  SPECIAL_SPILL_MASK contains
01614    bits in GR0 to GR31 that have already been allocated by this routine.
01615    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
01616 
01617 static int
01618 find_gr_spill (try_locals)
01619      int try_locals;
01620 {
01621   int regno;
01622 
01623   /* If this is a leaf function, first try an otherwise unused
01624      call-clobbered register.  */
01625   if (current_function_is_leaf)
01626     {
01627       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
01628   if (! regs_ever_live[regno]
01629       && call_used_regs[regno]
01630       && ! fixed_regs[regno]
01631       && ! global_regs[regno]
01632       && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
01633     {
01634       current_frame_info.gr_used_mask |= 1 << regno;
01635       return regno;
01636     }
01637     }
01638 
01639   if (try_locals)
01640     {
01641       regno = current_frame_info.n_local_regs;
01642       /* If there is a frame pointer, then we can't use loc79, because
01643    that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
01644    reg_name switching code in ia64_expand_prologue.  */
01645       if (regno < (80 - frame_pointer_needed))
01646   {
01647     current_frame_info.n_local_regs = regno + 1;
01648     return LOC_REG (0) + regno;
01649   }
01650     }
01651 
01652   /* Failed to find a general register to spill to.  Must use stack.  */
01653   return 0;
01654 }
01655 
01656 /* In order to make for nice schedules, we try to allocate every temporary
01657    to a different register.  We must of course stay away from call-saved,
01658    fixed, and global registers.  We must also stay away from registers
01659    allocated in current_frame_info.gr_used_mask, since those include regs
01660    used all through the prologue.
01661 
01662    Any register allocated here must be used immediately.  The idea is to
01663    aid scheduling, not to solve data flow problems.  */
01664 
01665 static int last_scratch_gr_reg;
01666 
01667 static int
01668 next_scratch_gr_reg ()
01669 {
01670   int i, regno;
01671 
01672   for (i = 0; i < 32; ++i)
01673     {
01674       regno = (last_scratch_gr_reg + i + 1) & 31;
01675       if (call_used_regs[regno]
01676     && ! fixed_regs[regno]
01677     && ! global_regs[regno]
01678     && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
01679   {
01680     last_scratch_gr_reg = regno;
01681     return regno;
01682   }
01683     }
01684 
01685   /* There must be _something_ available.  */
01686   abort ();
01687 }
01688 
01689 /* Helper function for ia64_compute_frame_size, called through
01690    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
01691 
01692 static void
01693 mark_reg_gr_used_mask (reg, data)
01694      rtx reg;
01695      void *data ATTRIBUTE_UNUSED;
01696 {
01697   unsigned int regno = REGNO (reg);
01698   if (regno < 32)
01699     {
01700       unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
01701       for (i = 0; i < n; ++i)
01702   current_frame_info.gr_used_mask |= 1 << (regno + i);
01703     }
01704 }
01705 
01706 /* Returns the number of bytes offset between the frame pointer and the stack
01707    pointer for the current function.  SIZE is the number of bytes of space
01708    needed for local variables.  */
01709 
01710 static void
01711 ia64_compute_frame_size (size)
01712      HOST_WIDE_INT size;
01713 {
01714   HOST_WIDE_INT total_size;
01715   HOST_WIDE_INT spill_size = 0;
01716   HOST_WIDE_INT extra_spill_size = 0;
01717   HOST_WIDE_INT pretend_args_size;
01718   HARD_REG_SET mask;
01719   int n_spilled = 0;
01720   int spilled_gr_p = 0;
01721   int spilled_fr_p = 0;
01722   unsigned int regno;
01723   int i;
01724 
01725   if (current_frame_info.initialized)
01726     return;
01727 
01728   memset (&current_frame_info, 0, sizeof current_frame_info);
01729   CLEAR_HARD_REG_SET (mask);
01730 
01731   /* Don't allocate scratches to the return register.  */
01732   diddle_return_value (mark_reg_gr_used_mask, NULL);
01733 
01734   /* Don't allocate scratches to the EH scratch registers.  */
01735   if (cfun->machine->ia64_eh_epilogue_sp)
01736     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
01737   if (cfun->machine->ia64_eh_epilogue_bsp)
01738     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
01739 
01740   /* Find the size of the register stack frame.  We have only 80 local
01741      registers, because we reserve 8 for the inputs and 8 for the
01742      outputs.  */
01743 
01744   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
01745      since we'll be adjusting that down later.  */
01746   regno = LOC_REG (78) + ! frame_pointer_needed;
01747   for (; regno >= LOC_REG (0); regno--)
01748     if (regs_ever_live[regno])
01749       break;
01750   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
01751 
01752   /* For functions marked with the syscall_linkage attribute, we must mark
01753      all eight input registers as in use, so that locals aren't visible to
01754      the caller.  */
01755 
01756   if (cfun->machine->n_varargs > 0
01757       || lookup_attribute ("syscall_linkage",
01758          TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
01759     current_frame_info.n_input_regs = 8;
01760   else
01761     {
01762       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
01763   if (regs_ever_live[regno])
01764     break;
01765       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
01766     }
01767 
01768   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
01769     if (regs_ever_live[regno])
01770       break;
01771   i = regno - OUT_REG (0) + 1;
01772 
01773   /* When -p profiling, we need one output register for the mcount argument.
01774      Likwise for -a profiling for the bb_init_func argument.  For -ax
01775      profiling, we need two output registers for the two bb_init_trace_func
01776      arguments.  */
01777   if (current_function_profile)
01778     i = MAX (i, 1);
01779   current_frame_info.n_output_regs = i;
01780 
01781   /* ??? No rotating register support yet.  */
01782   current_frame_info.n_rotate_regs = 0;
01783 
01784   /* Discover which registers need spilling, and how much room that
01785      will take.  Begin with floating point and general registers, 
01786      which will always wind up on the stack.  */
01787 
01788   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
01789     if (regs_ever_live[regno] && ! call_used_regs[regno])
01790       {
01791   SET_HARD_REG_BIT (mask, regno);
01792   spill_size += 16;
01793   n_spilled += 1;
01794   spilled_fr_p = 1;
01795       }
01796 
01797   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
01798     if (regs_ever_live[regno] && ! call_used_regs[regno])
01799       {
01800   SET_HARD_REG_BIT (mask, regno);
01801   spill_size += 8;
01802   n_spilled += 1;
01803   spilled_gr_p = 1;
01804       }
01805 
01806   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
01807     if (regs_ever_live[regno] && ! call_used_regs[regno])
01808       {
01809   SET_HARD_REG_BIT (mask, regno);
01810   spill_size += 8;
01811   n_spilled += 1;
01812       }
01813 
01814   /* Now come all special registers that might get saved in other
01815      general registers.  */
01816   
01817   if (frame_pointer_needed)
01818     {
01819       current_frame_info.reg_fp = find_gr_spill (1);
01820       /* If we did not get a register, then we take LOC79.  This is guaranteed
01821    to be free, even if regs_ever_live is already set, because this is
01822    HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
01823    as we don't count loc79 above.  */
01824       if (current_frame_info.reg_fp == 0)
01825   {
01826     current_frame_info.reg_fp = LOC_REG (79);
01827     current_frame_info.n_local_regs++;
01828   }
01829     }
01830 
01831   if (! current_function_is_leaf)
01832     {
01833       /* Emit a save of BR0 if we call other functions.  Do this even
01834    if this function doesn't return, as EH depends on this to be
01835    able to unwind the stack.  */
01836       SET_HARD_REG_BIT (mask, BR_REG (0));
01837 
01838       current_frame_info.reg_save_b0 = find_gr_spill (1);
01839       if (current_frame_info.reg_save_b0 == 0)
01840   {
01841     spill_size += 8;
01842     n_spilled += 1;
01843   }
01844 
01845       /* Similarly for ar.pfs.  */
01846       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
01847       current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
01848       if (current_frame_info.reg_save_ar_pfs == 0)
01849   {
01850     extra_spill_size += 8;
01851     n_spilled += 1;
01852   }
01853 
01854       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
01855    registers are clobbered, so we fall back to the stack.  */
01856       current_frame_info.reg_save_gp
01857   = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
01858       if (current_frame_info.reg_save_gp == 0)
01859   {
01860     SET_HARD_REG_BIT (mask, GR_REG (1));
01861     spill_size += 8;
01862     n_spilled += 1;
01863   }
01864     }
01865   else
01866     {
01867       if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
01868   {
01869     SET_HARD_REG_BIT (mask, BR_REG (0));
01870     spill_size += 8;
01871     n_spilled += 1;
01872   }
01873 
01874       if (regs_ever_live[AR_PFS_REGNUM])
01875   {
01876     SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
01877     current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
01878     if (current_frame_info.reg_save_ar_pfs == 0)
01879       {
01880         extra_spill_size += 8;
01881         n_spilled += 1;
01882       }
01883   }
01884     }
01885 
01886   /* Unwind descriptor hackery: things are most efficient if we allocate
01887      consecutive GR save registers for RP, PFS, FP in that order. However,
01888      it is absolutely critical that FP get the only hard register that's
01889      guaranteed to be free, so we allocated it first.  If all three did
01890      happen to be allocated hard regs, and are consecutive, rearrange them
01891      into the preferred order now.  */
01892   if (current_frame_info.reg_fp != 0
01893       && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
01894       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
01895     {
01896       current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
01897       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
01898       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
01899     }
01900 
01901   /* See if we need to store the predicate register block.  */
01902   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
01903     if (regs_ever_live[regno] && ! call_used_regs[regno])
01904       break;
01905   if (regno <= PR_REG (63))
01906     {
01907       SET_HARD_REG_BIT (mask, PR_REG (0));
01908       current_frame_info.reg_save_pr = find_gr_spill (1);
01909       if (current_frame_info.reg_save_pr == 0)
01910   {
01911     extra_spill_size += 8;
01912     n_spilled += 1;
01913   }
01914 
01915       /* ??? Mark them all as used so that register renaming and such
01916    are free to use them.  */
01917       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
01918   regs_ever_live[regno] = 1;
01919     }
01920 
01921   /* If we're forced to use st8.spill, we're forced to save and restore
01922      ar.unat as well.  The check for existing liveness allows inline asm
01923      to touch ar.unat.  */
01924   if (spilled_gr_p || cfun->machine->n_varargs
01925       || regs_ever_live[AR_UNAT_REGNUM])
01926     {
01927       regs_ever_live[AR_UNAT_REGNUM] = 1;
01928       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
01929       current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
01930       if (current_frame_info.reg_save_ar_unat == 0)
01931   {
01932     extra_spill_size += 8;
01933     n_spilled += 1;
01934   }
01935     }
01936 
01937   if (regs_ever_live[AR_LC_REGNUM])
01938     {
01939       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
01940       current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
01941       if (current_frame_info.reg_save_ar_lc == 0)
01942   {
01943     extra_spill_size += 8;
01944     n_spilled += 1;
01945   }
01946     }
01947 
01948   /* If we have an odd number of words of pretend arguments written to
01949      the stack, then the FR save area will be unaligned.  We round the
01950      size of this area up to keep things 16 byte aligned.  */
01951   if (spilled_fr_p)
01952     pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
01953   else
01954     pretend_args_size = current_function_pretend_args_size;
01955 
01956   total_size = (spill_size + extra_spill_size + size + pretend_args_size
01957     + current_function_outgoing_args_size);
01958   total_size = IA64_STACK_ALIGN (total_size);
01959 
01960   /* We always use the 16-byte scratch area provided by the caller, but
01961      if we are a leaf function, there's no one to which we need to provide
01962      a scratch area.  */
01963   if (current_function_is_leaf)
01964     total_size = MAX (0, total_size - 16);
01965 
01966   current_frame_info.total_size = total_size;
01967   current_frame_info.spill_cfa_off = pretend_args_size - 16;
01968   current_frame_info.spill_size = spill_size;
01969   current_frame_info.extra_spill_size = extra_spill_size;
01970   COPY_HARD_REG_SET (current_frame_info.mask, mask);
01971   current_frame_info.n_spilled = n_spilled;
01972   current_frame_info.initialized = reload_completed;
01973 }
01974 
01975 /* Compute the initial difference between the specified pair of registers.  */
01976 
01977 HOST_WIDE_INT
01978 ia64_initial_elimination_offset (from, to)
01979      int from, to;
01980 {
01981   HOST_WIDE_INT offset;
01982 
01983   ia64_compute_frame_size (get_frame_size ());
01984   switch (from)
01985     {
01986     case FRAME_POINTER_REGNUM:
01987       if (to == HARD_FRAME_POINTER_REGNUM)
01988   {
01989     if (current_function_is_leaf)
01990       offset = -current_frame_info.total_size;
01991     else
01992       offset = -(current_frame_info.total_size
01993            - current_function_outgoing_args_size - 16);
01994   }
01995       else if (to == STACK_POINTER_REGNUM)
01996   {
01997     if (current_function_is_leaf)
01998       offset = 0;
01999     else
02000       offset = 16 + current_function_outgoing_args_size;
02001   }
02002       else
02003   abort ();
02004       break;
02005 
02006     case ARG_POINTER_REGNUM:
02007       /* Arguments start above the 16 byte save area, unless stdarg
02008    in which case we store through the 16 byte save area.  */
02009       if (to == HARD_FRAME_POINTER_REGNUM)
02010   offset = 16 - current_function_pretend_args_size;
02011       else if (to == STACK_POINTER_REGNUM)
02012   offset = (current_frame_info.total_size
02013       + 16 - current_function_pretend_args_size);
02014       else
02015   abort ();
02016       break;
02017 
02018     case RETURN_ADDRESS_POINTER_REGNUM:
02019       offset = 0;
02020       break;
02021 
02022     default:
02023       abort ();
02024     }
02025 
02026   return offset;
02027 }
02028 
02029 /* If there are more than a trivial number of register spills, we use
02030    two interleaved iterators so that we can get two memory references
02031    per insn group.
02032 
02033    In order to simplify things in the prologue and epilogue expanders,
02034    we use helper functions to fix up the memory references after the
02035    fact with the appropriate offsets to a POST_MODIFY memory mode.
02036    The following data structure tracks the state of the two iterators
02037    while insns are being emitted.  */
02038 
02039 struct spill_fill_data
02040 {
02041   rtx init_after;   /* point at which to emit initializations */
02042   rtx init_reg[2];    /* initial base register */
02043   rtx iter_reg[2];    /* the iterator registers */
02044   rtx *prev_addr[2];    /* address of last memory use */
02045   rtx prev_insn[2];   /* the insn corresponding to prev_addr */
02046   HOST_WIDE_INT prev_off[2];  /* last offset */
02047   int n_iter;     /* number of iterators in use */
02048   int next_iter;    /* next iterator to use */
02049   unsigned int save_gr_used_mask;
02050 };
02051 
02052 static struct spill_fill_data spill_fill_data;
02053 
02054 static void
02055 setup_spill_pointers (n_spills, init_reg, cfa_off)
02056      int n_spills;
02057      rtx init_reg;
02058      HOST_WIDE_INT cfa_off;
02059 {
02060   int i;
02061 
02062   spill_fill_data.init_after = get_last_insn ();
02063   spill_fill_data.init_reg[0] = init_reg;
02064   spill_fill_data.init_reg[1] = init_reg;
02065   spill_fill_data.prev_addr[0] = NULL;
02066   spill_fill_data.prev_addr[1] = NULL;
02067   spill_fill_data.prev_insn[0] = NULL;
02068   spill_fill_data.prev_insn[1] = NULL;
02069   spill_fill_data.prev_off[0] = cfa_off;
02070   spill_fill_data.prev_off[1] = cfa_off;
02071   spill_fill_data.next_iter = 0;
02072   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
02073 
02074   spill_fill_data.n_iter = 1 + (n_spills > 2);
02075   for (i = 0; i < spill_fill_data.n_iter; ++i)
02076     {
02077       int regno = next_scratch_gr_reg ();
02078       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
02079       current_frame_info.gr_used_mask |= 1 << regno;
02080     }
02081 }
02082 
02083 static void
02084 finish_spill_pointers ()
02085 {
02086   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
02087 }
02088 
02089 static rtx
02090 spill_restore_mem (reg, cfa_off)
02091      rtx reg;
02092      HOST_WIDE_INT cfa_off;
02093 {
02094   int iter = spill_fill_data.next_iter;
02095   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
02096   rtx disp_rtx = GEN_INT (disp);
02097   rtx mem;
02098 
02099   if (spill_fill_data.prev_addr[iter])
02100     {
02101       if (CONST_OK_FOR_N (disp))
02102   {
02103     *spill_fill_data.prev_addr[iter]
02104       = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
02105            gen_rtx_PLUS (DImode,
02106              spill_fill_data.iter_reg[iter],
02107              disp_rtx));
02108     REG_NOTES (spill_fill_data.prev_insn[iter])
02109       = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
02110          REG_NOTES (spill_fill_data.prev_insn[iter]));
02111   }
02112       else
02113   {
02114     /* ??? Could use register post_modify for loads.  */
02115     if (! CONST_OK_FOR_I (disp))
02116       {
02117         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02118         emit_move_insn (tmp, disp_rtx);
02119         disp_rtx = tmp;
02120       }
02121     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02122          spill_fill_data.iter_reg[iter], disp_rtx));
02123   }
02124     }
02125   /* Micro-optimization: if we've created a frame pointer, it's at
02126      CFA 0, which may allow the real iterator to be initialized lower,
02127      slightly increasing parallelism.  Also, if there are few saves
02128      it may eliminate the iterator entirely.  */
02129   else if (disp == 0
02130      && spill_fill_data.init_reg[iter] == stack_pointer_rtx
02131      && frame_pointer_needed)
02132     {
02133       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
02134       set_mem_alias_set (mem, get_varargs_alias_set ());
02135       return mem;
02136     }
02137   else
02138     {
02139       rtx seq, insn;
02140 
02141       if (disp == 0)
02142   seq = gen_movdi (spill_fill_data.iter_reg[iter],
02143        spill_fill_data.init_reg[iter]);
02144       else
02145   {
02146     start_sequence ();
02147 
02148     if (! CONST_OK_FOR_I (disp))
02149       {
02150         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02151         emit_move_insn (tmp, disp_rtx);
02152         disp_rtx = tmp;
02153       }
02154 
02155     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02156          spill_fill_data.init_reg[iter],
02157          disp_rtx));
02158 
02159     seq = get_insns ();
02160     end_sequence ();
02161   }
02162 
02163       /* Careful for being the first insn in a sequence.  */
02164       if (spill_fill_data.init_after)
02165   insn = emit_insn_after (seq, spill_fill_data.init_after);
02166       else
02167   {
02168     rtx first = get_insns ();
02169     if (first)
02170       insn = emit_insn_before (seq, first);
02171     else
02172       insn = emit_insn (seq);
02173   }
02174       spill_fill_data.init_after = insn;
02175 
02176       /* If DISP is 0, we may or may not have a further adjustment
02177    afterward.  If we do, then the load/store insn may be modified
02178    to be a post-modify.  If we don't, then this copy may be
02179    eliminated by copyprop_hardreg_forward, which makes this
02180    insn garbage, which runs afoul of the sanity check in
02181    propagate_one_insn.  So mark this insn as legal to delete.  */
02182       if (disp == 0)
02183   REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
02184                REG_NOTES (insn));
02185     }
02186 
02187   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
02188 
02189   /* ??? Not all of the spills are for varargs, but some of them are.
02190      The rest of the spills belong in an alias set of their own.  But
02191      it doesn't actually hurt to include them here.  */
02192   set_mem_alias_set (mem, get_varargs_alias_set ());
02193 
02194   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
02195   spill_fill_data.prev_off[iter] = cfa_off;
02196 
02197   if (++iter >= spill_fill_data.n_iter)
02198     iter = 0;
02199   spill_fill_data.next_iter = iter;
02200 
02201   return mem;
02202 }
02203 
02204 static void
02205 do_spill (move_fn, reg, cfa_off, frame_reg)
02206      rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
02207      rtx reg, frame_reg;
02208      HOST_WIDE_INT cfa_off;
02209 {
02210   int iter = spill_fill_data.next_iter;
02211   rtx mem, insn;
02212 
02213   mem = spill_restore_mem (reg, cfa_off);
02214   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
02215   spill_fill_data.prev_insn[iter] = insn;
02216 
02217   if (frame_reg)
02218     {
02219       rtx base;
02220       HOST_WIDE_INT off;
02221 
02222       RTX_FRAME_RELATED_P (insn) = 1;
02223 
02224       /* Don't even pretend that the unwind code can intuit its way 
02225    through a pair of interleaved post_modify iterators.  Just
02226    provide the correct answer.  */
02227 
02228       if (frame_pointer_needed)
02229   {
02230     base = hard_frame_pointer_rtx;
02231     off = - cfa_off;
02232   }
02233       else
02234   {
02235     base = stack_pointer_rtx;
02236     off = current_frame_info.total_size - cfa_off;
02237   }
02238 
02239       REG_NOTES (insn)
02240   = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02241     gen_rtx_SET (VOIDmode,
02242            gen_rtx_MEM (GET_MODE (reg),
02243             plus_constant (base, off)),
02244            frame_reg),
02245     REG_NOTES (insn));
02246     }
02247 }
02248 
02249 static void
02250 do_restore (move_fn, reg, cfa_off)
02251      rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
02252      rtx reg;
02253      HOST_WIDE_INT cfa_off;
02254 {
02255   int iter = spill_fill_data.next_iter;
02256   rtx insn;
02257 
02258   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
02259         GEN_INT (cfa_off)));
02260   spill_fill_data.prev_insn[iter] = insn;
02261 }
02262 
02263 /* Wrapper functions that discards the CONST_INT spill offset.  These
02264    exist so that we can give gr_spill/gr_fill the offset they need and
02265    use a consistant function interface.  */
02266 
02267 static rtx
02268 gen_movdi_x (dest, src, offset)
02269      rtx dest, src;
02270      rtx offset ATTRIBUTE_UNUSED;
02271 {
02272   return gen_movdi (dest, src);
02273 }
02274 
02275 static rtx
02276 gen_fr_spill_x (dest, src, offset)
02277      rtx dest, src;
02278      rtx offset ATTRIBUTE_UNUSED;
02279 {
02280   return gen_fr_spill (dest, src);
02281 }
02282 
02283 static rtx
02284 gen_fr_restore_x (dest, src, offset)
02285      rtx dest, src;
02286      rtx offset ATTRIBUTE_UNUSED;
02287 {
02288   return gen_fr_restore (dest, src);
02289 }
02290 
02291 /* Called after register allocation to add any instructions needed for the
02292    prologue.  Using a prologue insn is favored compared to putting all of the
02293    instructions in output_function_prologue(), since it allows the scheduler
02294    to intermix instructions with the saves of the caller saved registers.  In
02295    some cases, it might be necessary to emit a barrier instruction as the last
02296    insn to prevent such scheduling.
02297 
02298    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
02299    so that the debug info generation code can handle them properly.
02300 
02301    The register save area is layed out like so:
02302    cfa+16
02303   [ varargs spill area ]
02304   [ fr register spill area ]
02305   [ br register spill area ]
02306   [ ar register spill area ]
02307   [ pr register spill area ]
02308   [ gr register spill area ] */
02309 
02310 /* ??? Get inefficient code when the frame size is larger than can fit in an
02311    adds instruction.  */
02312 
02313 void
02314 ia64_expand_prologue ()
02315 {
02316   rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
02317   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
02318   rtx reg, alt_reg;
02319 
02320   ia64_compute_frame_size (get_frame_size ());
02321   last_scratch_gr_reg = 15;
02322 
02323   /* If there is no epilogue, then we don't need some prologue insns.
02324      We need to avoid emitting the dead prologue insns, because flow
02325      will complain about them.  */
02326   if (optimize)
02327     {
02328       edge e;
02329 
02330       for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
02331   if ((e->flags & EDGE_FAKE) == 0
02332       && (e->flags & EDGE_FALLTHRU) != 0)
02333     break;
02334       epilogue_p = (e != NULL);
02335     }
02336   else
02337     epilogue_p = 1;
02338 
02339   /* Set the local, input, and output register names.  We need to do this
02340      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
02341      half.  If we use in/loc/out register names, then we get assembler errors
02342      in crtn.S because there is no alloc insn or regstk directive in there.  */
02343   if (! TARGET_REG_NAMES)
02344     {
02345       int inputs = current_frame_info.n_input_regs;
02346       int locals = current_frame_info.n_local_regs;
02347       int outputs = current_frame_info.n_output_regs;
02348 
02349       for (i = 0; i < inputs; i++)
02350   reg_names[IN_REG (i)] = ia64_reg_numbers[i];
02351       for (i = 0; i < locals; i++)
02352   reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
02353       for (i = 0; i < outputs; i++)
02354   reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
02355     }
02356 
02357   /* Set the frame pointer register name.  The regnum is logically loc79,
02358      but of course we'll not have allocated that many locals.  Rather than
02359      worrying about renumbering the existing rtxs, we adjust the name.  */
02360   /* ??? This code means that we can never use one local register when
02361      there is a frame pointer.  loc79 gets wasted in this case, as it is
02362      renamed to a register that will never be used.  See also the try_locals
02363      code in find_gr_spill.  */
02364   if (current_frame_info.reg_fp)
02365     {
02366       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
02367       reg_names[HARD_FRAME_POINTER_REGNUM]
02368   = reg_names[current_frame_info.reg_fp];
02369       reg_names[current_frame_info.reg_fp] = tmp;
02370     }
02371 
02372   /* Fix up the return address placeholder.  */
02373   /* ??? We can fail if __builtin_return_address is used, and we didn't
02374      allocate a register in which to save b0.  I can't think of a way to
02375      eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
02376      then be sure that I got the right one.  Further, reload doesn't seem
02377      to care if an eliminable register isn't used, and "eliminates" it
02378      anyway.  */
02379   if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
02380       && current_frame_info.reg_save_b0 != 0)
02381     XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
02382 
02383   /* We don't need an alloc instruction if we've used no outputs or locals.  */
02384   if (current_frame_info.n_local_regs == 0
02385       && current_frame_info.n_output_regs == 0
02386       && current_frame_info.n_input_regs <= current_function_args_info.int_regs
02387       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
02388     {
02389       /* If there is no alloc, but there are input registers used, then we
02390    need a .regstk directive.  */
02391       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
02392       ar_pfs_save_reg = NULL_RTX;
02393     }
02394   else
02395     {
02396       current_frame_info.need_regstk = 0;
02397 
02398       if (current_frame_info.reg_save_ar_pfs)
02399   regno = current_frame_info.reg_save_ar_pfs;
02400       else
02401   regno = next_scratch_gr_reg ();
02402       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
02403 
02404       insn = emit_insn (gen_alloc (ar_pfs_save_reg, 
02405            GEN_INT (current_frame_info.n_input_regs),
02406            GEN_INT (current_frame_info.n_local_regs),
02407            GEN_INT (current_frame_info.n_output_regs),
02408            GEN_INT (current_frame_info.n_rotate_regs)));
02409       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
02410     }
02411 
02412   /* Set up frame pointer, stack pointer, and spill iterators.  */
02413 
02414   n_varargs = cfun->machine->n_varargs;
02415   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
02416       stack_pointer_rtx, 0);
02417 
02418   if (frame_pointer_needed)
02419     {
02420       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
02421       RTX_FRAME_RELATED_P (insn) = 1;
02422     }
02423 
02424   if (current_frame_info.total_size != 0)
02425     {
02426       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
02427       rtx offset;
02428 
02429       if (CONST_OK_FOR_I (- current_frame_info.total_size))
02430   offset = frame_size_rtx;
02431       else
02432   {
02433     regno = next_scratch_gr_reg ();
02434     offset = gen_rtx_REG (DImode, regno);
02435     emit_move_insn (offset, frame_size_rtx);
02436   }
02437 
02438       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
02439             stack_pointer_rtx, offset));
02440 
02441       if (! frame_pointer_needed)
02442   {
02443     RTX_FRAME_RELATED_P (insn) = 1;
02444     if (GET_CODE (offset) != CONST_INT)
02445       {
02446         REG_NOTES (insn)
02447     = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02448       gen_rtx_SET (VOIDmode,
02449              stack_pointer_rtx,
02450              gen_rtx_PLUS (DImode,
02451                stack_pointer_rtx,
02452                frame_size_rtx)),
02453       REG_NOTES (insn));
02454       }
02455   }
02456 
02457       /* ??? At this point we must generate a magic insn that appears to
02458    modify the stack pointer, the frame pointer, and all spill
02459    iterators.  This would allow the most scheduling freedom.  For
02460    now, just hard stop.  */
02461       emit_insn (gen_blockage ());
02462     }
02463 
02464   /* Must copy out ar.unat before doing any integer spills.  */
02465   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
02466     {
02467       if (current_frame_info.reg_save_ar_unat)
02468   ar_unat_save_reg
02469     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
02470       else
02471   {
02472     alt_regno = next_scratch_gr_reg ();
02473     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
02474     current_frame_info.gr_used_mask |= 1 << alt_regno;
02475   }
02476 
02477       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
02478       insn = emit_move_insn (ar_unat_save_reg, reg);
02479       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
02480 
02481       /* Even if we're not going to generate an epilogue, we still
02482    need to save the register so that EH works.  */
02483       if (! epilogue_p && current_frame_info.reg_save_ar_unat)
02484   emit_insn (gen_prologue_use (ar_unat_save_reg));
02485     }
02486   else
02487     ar_unat_save_reg = NULL_RTX;
02488 
02489   /* Spill all varargs registers.  Do this before spilling any GR registers,
02490      since we want the UNAT bits for the GR registers to override the UNAT
02491      bits from varargs, which we don't care about.  */
02492 
02493   cfa_off = -16;
02494   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
02495     {
02496       reg = gen_rtx_REG (DImode, regno);
02497       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
02498     }
02499 
02500   /* Locate the bottom of the register save area.  */
02501   cfa_off = (current_frame_info.spill_cfa_off
02502        + current_frame_info.spill_size
02503        + current_frame_info.extra_spill_size);
02504 
02505   /* Save the predicate register block either in a register or in memory.  */
02506   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
02507     {
02508       reg = gen_rtx_REG (DImode, PR_REG (0));
02509       if (current_frame_info.reg_save_pr != 0)
02510   {
02511     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
02512     insn = emit_move_insn (alt_reg, reg);
02513 
02514     /* ??? Denote pr spill/fill by a DImode move that modifies all
02515        64 hard registers.  */
02516     RTX_FRAME_RELATED_P (insn) = 1;
02517     REG_NOTES (insn)
02518       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02519       gen_rtx_SET (VOIDmode, alt_reg, reg),
02520       REG_NOTES (insn));
02521 
02522     /* Even if we're not going to generate an epilogue, we still
02523        need to save the register so that EH works.  */
02524     if (! epilogue_p)
02525       emit_insn (gen_prologue_use (alt_reg));
02526   }
02527       else
02528   {
02529     alt_regno = next_scratch_gr_reg ();
02530     alt_reg = gen_rtx_REG (DImode, alt_regno);
02531     insn = emit_move_insn (alt_reg, reg);
02532     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02533     cfa_off -= 8;
02534   }
02535     }
02536 
02537   /* Handle AR regs in numerical order.  All of them get special handling.  */
02538   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
02539       && current_frame_info.reg_save_ar_unat == 0)
02540     {
02541       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
02542       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
02543       cfa_off -= 8;
02544     }
02545 
02546   /* The alloc insn already copied ar.pfs into a general register.  The
02547      only thing we have to do now is copy that register to a stack slot
02548      if we'd not allocated a local register for the job.  */
02549   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
02550       && current_frame_info.reg_save_ar_pfs == 0)
02551     {
02552       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02553       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
02554       cfa_off -= 8;
02555     }
02556 
02557   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
02558     {
02559       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
02560       if (current_frame_info.reg_save_ar_lc != 0)
02561   {
02562     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
02563     insn = emit_move_insn (alt_reg, reg);
02564     RTX_FRAME_RELATED_P (insn) = 1;
02565 
02566     /* Even if we're not going to generate an epilogue, we still
02567        need to save the register so that EH works.  */
02568     if (! epilogue_p)
02569       emit_insn (gen_prologue_use (alt_reg));
02570   }
02571       else
02572   {
02573     alt_regno = next_scratch_gr_reg ();
02574     alt_reg = gen_rtx_REG (DImode, alt_regno);
02575     emit_move_insn (alt_reg, reg);
02576     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02577     cfa_off -= 8;
02578   }
02579     }
02580 
02581   if (current_frame_info.reg_save_gp)
02582     {
02583       insn = emit_move_insn (gen_rtx_REG (DImode,
02584             current_frame_info.reg_save_gp),
02585            pic_offset_table_rtx);
02586       /* We don't know for sure yet if this is actually needed, since
02587    we've not split the PIC call patterns.  If all of the calls
02588    are indirect, and not followed by any uses of the gp, then
02589    this save is dead.  Allow it to go away.  */
02590       REG_NOTES (insn)
02591   = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
02592     }
02593 
02594   /* We should now be at the base of the gr/br/fr spill area.  */
02595   if (cfa_off != (current_frame_info.spill_cfa_off
02596       + current_frame_info.spill_size))
02597     abort ();
02598 
02599   /* Spill all general registers.  */
02600   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
02601     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02602       {
02603   reg = gen_rtx_REG (DImode, regno);
02604   do_spill (gen_gr_spill, reg, cfa_off, reg);
02605   cfa_off -= 8;
02606       }
02607 
02608   /* Handle BR0 specially -- it may be getting stored permanently in
02609      some GR register.  */
02610   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
02611     {
02612       reg = gen_rtx_REG (DImode, BR_REG (0));
02613       if (current_frame_info.reg_save_b0 != 0)
02614   {
02615     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
02616     insn = emit_move_insn (alt_reg, reg);
02617     RTX_FRAME_RELATED_P (insn) = 1;
02618 
02619     /* Even if we're not going to generate an epilogue, we still
02620        need to save the register so that EH works.  */
02621     if (! epilogue_p)
02622       emit_insn (gen_prologue_use (alt_reg));
02623   }
02624       else
02625   {
02626     alt_regno = next_scratch_gr_reg ();
02627     alt_reg = gen_rtx_REG (DImode, alt_regno);
02628     emit_move_insn (alt_reg, reg);
02629     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02630     cfa_off -= 8;
02631   }
02632     }
02633 
02634   /* Spill the rest of the BR registers.  */
02635   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
02636     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02637       {
02638   alt_regno = next_scratch_gr_reg ();
02639   alt_reg = gen_rtx_REG (DImode, alt_regno);
02640   reg = gen_rtx_REG (DImode, regno);
02641   emit_move_insn (alt_reg, reg);
02642   do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02643   cfa_off -= 8;
02644       }
02645 
02646   /* Align the frame and spill all FR registers.  */
02647   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
02648     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02649       {
02650         if (cfa_off & 15)
02651     abort ();
02652   reg = gen_rtx_REG (TFmode, regno);
02653   do_spill (gen_fr_spill_x, reg, cfa_off, reg);
02654   cfa_off -= 16;
02655       }
02656 
02657   if (cfa_off != current_frame_info.spill_cfa_off)
02658     abort ();
02659 
02660   finish_spill_pointers ();
02661 }
02662 
02663 /* Called after register allocation to add any instructions needed for the
02664    epilogue.  Using an epilogue insn is favored compared to putting all of the
02665    instructions in output_function_prologue(), since it allows the scheduler
02666    to intermix instructions with the saves of the caller saved registers.  In
02667    some cases, it might be necessary to emit a barrier instruction as the last
02668    insn to prevent such scheduling.  */
02669 
02670 void
02671 ia64_expand_epilogue (sibcall_p)
02672      int sibcall_p;
02673 {
02674   rtx insn, reg, alt_reg, ar_unat_save_reg;
02675   int regno, alt_regno, cfa_off;
02676 
02677   ia64_compute_frame_size (get_frame_size ());
02678 
02679   /* If there is a frame pointer, then we use it instead of the stack
02680      pointer, so that the stack pointer does not need to be valid when
02681      the epilogue starts.  See EXIT_IGNORE_STACK.  */
02682   if (frame_pointer_needed)
02683     setup_spill_pointers (current_frame_info.n_spilled,
02684         hard_frame_pointer_rtx, 0);
02685   else
02686     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 
02687         current_frame_info.total_size);
02688 
02689   if (current_frame_info.total_size != 0)
02690     {
02691       /* ??? At this point we must generate a magic insn that appears to
02692          modify the spill iterators and the frame pointer.  This would
02693    allow the most scheduling freedom.  For now, just hard stop.  */
02694       emit_insn (gen_blockage ());
02695     }
02696 
02697   /* Locate the bottom of the register save area.  */
02698   cfa_off = (current_frame_info.spill_cfa_off
02699        + current_frame_info.spill_size
02700        + current_frame_info.extra_spill_size);
02701 
02702   /* Restore the predicate registers.  */
02703   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
02704     {
02705       if (current_frame_info.reg_save_pr != 0)
02706   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
02707       else
02708   {
02709     alt_regno = next_scratch_gr_reg ();
02710     alt_reg = gen_rtx_REG (DImode, alt_regno);
02711     do_restore (gen_movdi_x, alt_reg, cfa_off);
02712     cfa_off -= 8;
02713   }
02714       reg = gen_rtx_REG (DImode, PR_REG (0));
02715       emit_move_insn (reg, alt_reg);
02716     }
02717 
02718   /* Restore the application registers.  */
02719 
02720   /* Load the saved unat from the stack, but do not restore it until
02721      after the GRs have been restored.  */
02722   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
02723     {
02724       if (current_frame_info.reg_save_ar_unat != 0)
02725         ar_unat_save_reg
02726     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
02727       else
02728   {
02729     alt_regno = next_scratch_gr_reg ();
02730     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
02731     current_frame_info.gr_used_mask |= 1 << alt_regno;
02732     do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
02733     cfa_off -= 8;
02734   }
02735     }
02736   else
02737     ar_unat_save_reg = NULL_RTX;
02738       
02739   if (current_frame_info.reg_save_ar_pfs != 0)
02740     {
02741       alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
02742       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02743       emit_move_insn (reg, alt_reg);
02744     }
02745   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
02746     {
02747       alt_regno = next_scratch_gr_reg ();
02748       alt_reg = gen_rtx_REG (DImode, alt_regno);
02749       do_restore (gen_movdi_x, alt_reg, cfa_off);
02750       cfa_off -= 8;
02751       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02752       emit_move_insn (reg, alt_reg);
02753     }
02754 
02755   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
02756     {
02757       if (current_frame_info.reg_save_ar_lc != 0)
02758   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
02759       else
02760   {
02761     alt_regno = next_scratch_gr_reg ();
02762     alt_reg = gen_rtx_REG (DImode, alt_regno);
02763     do_restore (gen_movdi_x, alt_reg, cfa_off);
02764     cfa_off -= 8;
02765   }
02766       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
02767       emit_move_insn (reg, alt_reg);
02768     }
02769 
02770   /* We should now be at the base of the gr/br/fr spill area.  */
02771   if (cfa_off != (current_frame_info.spill_cfa_off
02772       + current_frame_info.spill_size))
02773     abort ();
02774 
02775   /* The GP may be stored on the stack in the prologue, but it's
02776      never restored in the epilogue.  Skip the stack slot.  */
02777   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
02778     cfa_off -= 8;
02779 
02780   /* Restore all general registers.  */
02781   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
02782     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02783       {
02784   reg = gen_rtx_REG (DImode, regno);
02785   do_restore (gen_gr_restore, reg, cfa_off);
02786   cfa_off -= 8;
02787       }
02788   
02789   /* Restore the branch registers.  Handle B0 specially, as it may
02790      have gotten stored in some GR register.  */
02791   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
02792     {
02793       if (current_frame_info.reg_save_b0 != 0)
02794   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
02795       else
02796   {
02797     alt_regno = next_scratch_gr_reg ();
02798     alt_reg = gen_rtx_REG (DImode, alt_regno);
02799     do_restore (gen_movdi_x, alt_reg, cfa_off);
02800     cfa_off -= 8;
02801   }
02802       reg = gen_rtx_REG (DImode, BR_REG (0));
02803       emit_move_insn (reg, alt_reg);
02804     }
02805     
02806   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
02807     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02808       {
02809   alt_regno = next_scratch_gr_reg ();
02810   alt_reg = gen_rtx_REG (DImode, alt_regno);
02811   do_restore (gen_movdi_x, alt_reg, cfa_off);
02812   cfa_off -= 8;
02813   reg = gen_rtx_REG (DImode, regno);
02814   emit_move_insn (reg, alt_reg);
02815       }
02816 
02817   /* Restore floating point registers.  */
02818   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
02819     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02820       {
02821         if (cfa_off & 15)
02822     abort ();
02823   reg = gen_rtx_REG (TFmode, regno);
02824   do_restore (gen_fr_restore_x, reg, cfa_off);
02825   cfa_off -= 16;
02826       }
02827 
02828   /* Restore ar.unat for real.  */
02829   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
02830     {
02831       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
02832       emit_move_insn (reg, ar_unat_save_reg);
02833     }
02834 
02835   if (cfa_off != current_frame_info.spill_cfa_off)
02836     abort ();
02837 
02838   finish_spill_pointers ();
02839 
02840   if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
02841     {
02842       /* ??? At this point we must generate a magic insn that appears to
02843          modify the spill iterators, the stack pointer, and the frame
02844    pointer.  This would allow the most scheduling freedom.  For now,
02845    just hard stop.  */
02846       emit_insn (gen_blockage ());
02847     }
02848 
02849   if (cfun->machine->ia64_eh_epilogue_sp)
02850     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
02851   else if (frame_pointer_needed)
02852     {
02853       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
02854       RTX_FRAME_RELATED_P (insn) = 1;
02855     }
02856   else if (current_frame_info.total_size)
02857     {
02858       rtx offset, frame_size_rtx;
02859 
02860       frame_size_rtx = GEN_INT (current_frame_info.total_size);
02861       if (CONST_OK_FOR_I (current_frame_info.total_size))
02862   offset = frame_size_rtx;
02863       else
02864   {
02865     regno = next_scratch_gr_reg ();
02866     offset = gen_rtx_REG (DImode, regno);
02867     emit_move_insn (offset, frame_size_rtx);
02868   }
02869 
02870       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
02871             offset));
02872 
02873       RTX_FRAME_RELATED_P (insn) = 1;
02874       if (GET_CODE (offset) != CONST_INT)
02875   {
02876     REG_NOTES (insn)
02877       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02878       gen_rtx_SET (VOIDmode,
02879              stack_pointer_rtx,
02880              gen_rtx_PLUS (DImode,
02881                stack_pointer_rtx,
02882                frame_size_rtx)),
02883       REG_NOTES (insn));
02884   }
02885     }
02886 
02887   if (cfun->machine->ia64_eh_epilogue_bsp)
02888     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
02889  
02890   if (! sibcall_p)
02891     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
02892   else
02893     {
02894       int fp = GR_REG (2);
02895       /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
02896    first available call clobbered register.  If there was a frame_pointer 
02897    register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, 
02898    so we have to make sure we're using the string "r2" when emitting
02899    the register name for the assmbler.  */
02900       if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
02901   fp = HARD_FRAME_POINTER_REGNUM;
02902 
02903       /* We must emit an alloc to force the input registers to become output
02904    registers.  Otherwise, if the callee tries to pass its parameters
02905    through to another call without an intervening alloc, then these
02906    values get lost.  */
02907       /* ??? We don't need to preserve all input registers.  We only need to
02908    preserve those input registers used as arguments to the sibling call.
02909    It is unclear how to compute that number here.  */
02910       if (current_frame_info.n_input_regs != 0)
02911   emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
02912             GEN_INT (0), GEN_INT (0),
02913             GEN_INT (current_frame_info.n_input_regs),
02914             GEN_INT (0)));
02915     }
02916 }
02917 
02918 /* Return 1 if br.ret can do all the work required to return from a
02919    function.  */
02920 
02921 int
02922 ia64_direct_return ()
02923 {
02924   if (reload_completed && ! frame_pointer_needed)
02925     {
02926       ia64_compute_frame_size (get_frame_size ());
02927 
02928       return (current_frame_info.total_size == 0
02929         && current_frame_info.n_spilled == 0
02930         && current_frame_info.reg_save_b0 == 0
02931         && current_frame_info.reg_save_pr == 0
02932         && current_frame_info.reg_save_ar_pfs == 0
02933         && current_frame_info.reg_save_ar_unat == 0
02934         && current_frame_info.reg_save_ar_lc == 0);
02935     }
02936   return 0;
02937 }
02938 
02939 int
02940 ia64_hard_regno_rename_ok (from, to)
02941      int from;
02942      int to;
02943 {
02944   /* Don't clobber any of the registers we reserved for the prologue.  */
02945   if (to == current_frame_info.reg_fp
02946       || to == current_frame_info.reg_save_b0
02947       || to == current_frame_info.reg_save_pr
02948       || to == current_frame_info.reg_save_ar_pfs
02949       || to == current_frame_info.reg_save_ar_unat
02950       || to == current_frame_info.reg_save_ar_lc)
02951     return 0;
02952 
02953   if (from == current_frame_info.reg_fp
02954       || from == current_frame_info.reg_save_b0
02955       || from == current_frame_info.reg_save_pr
02956       || from == current_frame_info.reg_save_ar_pfs
02957       || from == current_frame_info.reg_save_ar_unat
02958       || from == current_frame_info.reg_save_ar_lc)
02959     return 0;
02960 
02961   /* Don't use output registers outside the register frame.  */
02962   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
02963     return 0;
02964 
02965   /* Retain even/oddness on predicate register pairs.  */
02966   if (PR_REGNO_P (from) && PR_REGNO_P (to))
02967     return (from & 1) == (to & 1);
02968 
02969   return 1;
02970 }
02971 
02972 /* Target hook for assembling integer objects.  Handle word-sized
02973    aligned objects and detect the cases when @fptr is needed.  */
02974 
02975 static bool
02976 ia64_assemble_integer (x, size, aligned_p)
02977      rtx x;
02978      unsigned int size;
02979      int aligned_p;
02980 {
02981   if (size == (TARGET_ILP32 ? 4 : 8)
02982       && aligned_p
02983       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
02984       && GET_CODE (x) == SYMBOL_REF
02985       && SYMBOL_REF_FLAG (x))
02986     {
02987       if (TARGET_ILP32)
02988   fputs ("\tdata4\t@fptr(", asm_out_file);
02989       else
02990   fputs ("\tdata8\t@fptr(", asm_out_file);
02991       output_addr_const (asm_out_file, x);
02992       fputs (")\n", asm_out_file);
02993       return true;
02994     }
02995   return default_assemble_integer (x, size, aligned_p);
02996 }
02997 
02998 /* Emit the function prologue.  */
02999 
03000 static void
03001 ia64_output_function_prologue (file, size)
03002      FILE *file;
03003      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
03004 {
03005   int mask, grsave, grsave_prev;
03006 
03007   if (current_frame_info.need_regstk)
03008     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
03009        current_frame_info.n_input_regs,
03010        current_frame_info.n_local_regs,
03011        current_frame_info.n_output_regs,
03012        current_frame_info.n_rotate_regs);
03013 
03014   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03015     return;
03016 
03017   /* Emit the .prologue directive.  */
03018 
03019   mask = 0;
03020   grsave = grsave_prev = 0;
03021   if (current_frame_info.reg_save_b0 != 0)
03022     {
03023       mask |= 8;
03024       grsave = grsave_prev = current_frame_info.reg_save_b0;
03025     }
03026   if (current_frame_info.reg_save_ar_pfs != 0
03027       && (grsave_prev == 0
03028     || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
03029     {
03030       mask |= 4;
03031       if (grsave_prev == 0)
03032   grsave = current_frame_info.reg_save_ar_pfs;
03033       grsave_prev = current_frame_info.reg_save_ar_pfs;
03034     }
03035   if (current_frame_info.reg_fp != 0
03036       && (grsave_prev == 0
03037     || current_frame_info.reg_fp == grsave_prev + 1))
03038     {
03039       mask |= 2;
03040       if (grsave_prev == 0)
03041   grsave = HARD_FRAME_POINTER_REGNUM;
03042       grsave_prev = current_frame_info.reg_fp;
03043     }
03044   if (current_frame_info.reg_save_pr != 0
03045       && (grsave_prev == 0
03046     || current_frame_info.reg_save_pr == grsave_prev + 1))
03047     {
03048       mask |= 1;
03049       if (grsave_prev == 0)
03050   grsave = current_frame_info.reg_save_pr;
03051     }
03052 
03053   if (mask)
03054     fprintf (file, "\t.prologue %d, %d\n", mask,
03055        ia64_dbx_register_number (grsave));
03056   else
03057     fputs ("\t.prologue\n", file);
03058 
03059   /* Emit a .spill directive, if necessary, to relocate the base of
03060      the register spill area.  */
03061   if (current_frame_info.spill_cfa_off != -16)
03062     fprintf (file, "\t.spill %ld\n",
03063        (long) (current_frame_info.spill_cfa_off
03064          + current_frame_info.spill_size));
03065 }
03066 
03067 /* Emit the .body directive at the scheduled end of the prologue.  */
03068 
03069 static void
03070 ia64_output_function_end_prologue (file)
03071      FILE *file;
03072 {
03073   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03074     return;
03075 
03076   fputs ("\t.body\n", file);
03077 }
03078 
03079 /* Emit the function epilogue.  */
03080 
03081 static void
03082 ia64_output_function_epilogue (file, size)
03083      FILE *file ATTRIBUTE_UNUSED;
03084      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
03085 {
03086   int i;
03087 
03088   /* Reset from the function's potential modifications.  */
03089   XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
03090 
03091   if (current_frame_info.reg_fp)
03092     {
03093       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
03094       reg_names[HARD_FRAME_POINTER_REGNUM]
03095   = reg_names[current_frame_info.reg_fp];
03096       reg_names[current_frame_info.reg_fp] = tmp;
03097     }
03098   if (! TARGET_REG_NAMES)
03099     {
03100       for (i = 0; i < current_frame_info.n_input_regs; i++)
03101   reg_names[IN_REG (i)] = ia64_input_reg_names[i];
03102       for (i = 0; i < current_frame_info.n_local_regs; i++)
03103   reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
03104       for (i = 0; i < current_frame_info.n_output_regs; i++)
03105   reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
03106     }
03107 
03108   current_frame_info.initialized = 0;
03109 }
03110 
03111 int
03112 ia64_dbx_register_number (regno)
03113      int regno;
03114 {
03115   /* In ia64_expand_prologue we quite literally renamed the frame pointer
03116      from its home at loc79 to something inside the register frame.  We
03117      must perform the same renumbering here for the debug info.  */
03118   if (current_frame_info.reg_fp)
03119     {
03120       if (regno == HARD_FRAME_POINTER_REGNUM)
03121   regno = current_frame_info.reg_fp;
03122       else if (regno == current_frame_info.reg_fp)
03123   regno = HARD_FRAME_POINTER_REGNUM;
03124     }
03125 
03126   if (IN_REGNO_P (regno))
03127     return 32 + regno - IN_REG (0);
03128   else if (LOC_REGNO_P (regno))
03129     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
03130   else if (OUT_REGNO_P (regno))
03131     return (32 + current_frame_info.n_input_regs
03132       + current_frame_info.n_local_regs + regno - OUT_REG (0));
03133   else
03134     return regno;
03135 }
03136 
03137 void
03138 ia64_initialize_trampoline (addr, fnaddr, static_chain)
03139      rtx addr, fnaddr, static_chain;
03140 {
03141   rtx addr_reg, eight = GEN_INT (8);
03142 
03143   /* Load up our iterator.  */
03144   addr_reg = gen_reg_rtx (Pmode);
03145   emit_move_insn (addr_reg, addr);
03146 
03147   /* The first two words are the fake descriptor:
03148      __ia64_trampoline, ADDR+16.  */
03149   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03150       gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
03151   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03152 
03153   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03154       copy_to_reg (plus_constant (addr, 16)));
03155   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03156 
03157   /* The third word is the target descriptor.  */
03158   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
03159   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03160 
03161   /* The fourth word is the static chain.  */
03162   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
03163 }
03164 
03165 /* Do any needed setup for a variadic function.  CUM has not been updated
03166    for the last named argument which has type TYPE and mode MODE.
03167 
03168    We generate the actual spill instructions during prologue generation.  */
03169 
03170 void
03171 ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
03172      CUMULATIVE_ARGS cum;
03173      int             int_mode;
03174      tree            type;
03175      int *           pretend_size;
03176      int       second_time ATTRIBUTE_UNUSED;
03177 {
03178   /* Skip the current argument.  */
03179   ia64_function_arg_advance (&cum, int_mode, type, 1);
03180 
03181   if (cum.words < MAX_ARGUMENT_SLOTS)
03182     {
03183       int n = MAX_ARGUMENT_SLOTS - cum.words;
03184       *pretend_size = n * UNITS_PER_WORD;
03185       cfun->machine->n_varargs = n;
03186     }
03187 }
03188 
03189 /* Check whether TYPE is a homogeneous floating point aggregate.  If
03190    it is, return the mode of the floating point type that appears
03191    in all leafs.  If it is not, return VOIDmode.
03192 
03193    An aggregate is a homogeneous floating point aggregate is if all
03194    fields/elements in it have the same floating point type (e.g,
03195    SFmode).  128-bit quad-precision floats are excluded.  */
03196 
03197 static enum machine_mode
03198 hfa_element_mode (type, nested)
03199      tree type;
03200      int nested;
03201 {
03202   enum machine_mode element_mode = VOIDmode;
03203   enum machine_mode mode;
03204   enum tree_code code = TREE_CODE (type);
03205   int know_element_mode = 0;
03206   tree t;
03207 
03208   switch (code)
03209     {
03210     case VOID_TYPE: case INTEGER_TYPE:  case ENUMERAL_TYPE:
03211     case BOOLEAN_TYPE:  case CHAR_TYPE:   case POINTER_TYPE:
03212     case OFFSET_TYPE: case REFERENCE_TYPE:  case METHOD_TYPE:
03213     case FILE_TYPE: case SET_TYPE:    case LANG_TYPE:
03214     case FUNCTION_TYPE:
03215       return VOIDmode;
03216 
03217       /* Fortran complex types are supposed to be HFAs, so we need to handle
03218    gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
03219    types though.  */
03220     case COMPLEX_TYPE:
03221       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
03222     && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
03223   return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
03224             * BITS_PER_UNIT, MODE_FLOAT, 0);
03225       else
03226   return VOIDmode;
03227 
03228     case REAL_TYPE:
03229       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
03230    mode if this is contained within an aggregate.  */
03231       if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
03232   return TYPE_MODE (type);
03233       else
03234   return VOIDmode;
03235 
03236     case ARRAY_TYPE:
03237       return hfa_element_mode (TREE_TYPE (type), 1);
03238 
03239     case RECORD_TYPE:
03240     case UNION_TYPE:
03241     case QUAL_UNION_TYPE:
03242       for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
03243   {
03244     if (TREE_CODE (t) != FIELD_DECL)
03245       continue;
03246 
03247     mode = hfa_element_mode (TREE_TYPE (t), 1);
03248     if (know_element_mode)
03249       {
03250         if (mode != element_mode)
03251     return VOIDmode;
03252       }
03253     else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
03254       return VOIDmode;
03255     else
03256       {
03257         know_element_mode = 1;
03258         element_mode = mode;
03259       }
03260   }
03261       return element_mode;
03262 
03263     default:
03264       /* If we reach here, we probably have some front-end specific type
03265    that the backend doesn't know about.  This can happen via the
03266    aggregate_value_p call in init_function_start.  All we can do is
03267    ignore unknown tree types.  */
03268       return VOIDmode;
03269     }
03270 
03271   return VOIDmode;
03272 }
03273 
03274 /* Return rtx for register where argument is passed, or zero if it is passed
03275    on the stack.  */
03276 
03277 /* ??? 128-bit quad-precision floats are always passed in general
03278    registers.  */
03279 
03280 rtx
03281 ia64_function_arg (cum, mode, type, named, incoming)
03282      CUMULATIVE_ARGS *cum;
03283      enum machine_mode mode;
03284      tree type;
03285      int named;
03286      int incoming;
03287 {
03288   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
03289   int words = (((mode == BLKmode ? int_size_in_bytes (type)
03290      : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
03291          / UNITS_PER_WORD);
03292   int offset = 0;
03293   enum machine_mode hfa_mode = VOIDmode;
03294 
03295   /* Integer and float arguments larger than 8 bytes start at the next even
03296      boundary.  Aggregates larger than 8 bytes start at the next even boundary
03297      if the aggregate has 16 byte alignment.  Net effect is that types with
03298      alignment greater than 8 start at the next even boundary.  */
03299   /* ??? The ABI does not specify how to handle aggregates with alignment from
03300      9 to 15 bytes, or greater than 16.   We handle them all as if they had
03301      16 byte alignment.  Such aggregates can occur only if gcc extensions are
03302      used.  */
03303   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
03304        : (words > 1))
03305       && (cum->words & 1))
03306     offset = 1;
03307 
03308   /* If all argument slots are used, then it must go on the stack.  */
03309   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
03310     return 0;
03311 
03312   /* Check for and handle homogeneous FP aggregates.  */
03313   if (type)
03314     hfa_mode = hfa_element_mode (type, 0);
03315 
03316   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
03317      and unprototyped hfas are passed specially.  */
03318   if (hfa_mode != VOIDmode && (! cum->prototype || named))
03319     {
03320       rtx loc[16];
03321       int i = 0;
03322       int fp_regs = cum->fp_regs;
03323       int int_regs = cum->words + offset;
03324       int hfa_size = GET_MODE_SIZE (hfa_mode);
03325       int byte_size;
03326       int args_byte_size;
03327 
03328       /* If prototyped, pass it in FR regs then GR regs.
03329    If not prototyped, pass it in both FR and GR regs.
03330 
03331    If this is an SFmode aggregate, then it is possible to run out of
03332    FR regs while GR regs are still left.  In that case, we pass the
03333    remaining part in the GR regs.  */
03334 
03335       /* Fill the FP regs.  We do this always.  We stop if we reach the end
03336    of the argument, the last FP register, or the last argument slot.  */
03337 
03338       byte_size = ((mode == BLKmode)
03339        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03340       args_byte_size = int_regs * UNITS_PER_WORD;
03341       offset = 0;
03342       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
03343         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
03344   {
03345     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03346               gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
03347                     + fp_regs)),
03348               GEN_INT (offset));
03349     offset += hfa_size;
03350     args_byte_size += hfa_size;
03351     fp_regs++;
03352   }
03353 
03354       /* If no prototype, then the whole thing must go in GR regs.  */
03355       if (! cum->prototype)
03356   offset = 0;
03357       /* If this is an SFmode aggregate, then we might have some left over
03358    that needs to go in GR regs.  */
03359       else if (byte_size != offset)
03360   int_regs += offset / UNITS_PER_WORD;
03361 
03362       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
03363 
03364       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
03365   {
03366     enum machine_mode gr_mode = DImode;
03367 
03368     /* If we have an odd 4 byte hunk because we ran out of FR regs,
03369        then this goes in a GR reg left adjusted/little endian, right
03370        adjusted/big endian.  */
03371     /* ??? Currently this is handled wrong, because 4-byte hunks are
03372        always right adjusted/little endian.  */
03373     if (offset & 0x4)
03374       gr_mode = SImode;
03375     /* If we have an even 4 byte hunk because the aggregate is a
03376        multiple of 4 bytes in size, then this goes in a GR reg right
03377        adjusted/little endian.  */
03378     else if (byte_size - offset == 4)
03379       gr_mode = SImode;
03380     /* Complex floats need to have float mode.  */
03381     if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
03382       gr_mode = hfa_mode;
03383 
03384     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03385               gen_rtx_REG (gr_mode, (basereg
03386                    + int_regs)),
03387               GEN_INT (offset));
03388     offset += GET_MODE_SIZE (gr_mode);
03389     int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
03390           ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
03391   }
03392 
03393       /* If we ended up using just one location, just return that one loc.  */
03394       if (i == 1)
03395   return XEXP (loc[0], 0);
03396       else
03397   return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
03398     }
03399 
03400   /* Integral and aggregates go in general registers.  If we have run out of
03401      FR registers, then FP values must also go in general registers.  This can
03402      happen when we have a SFmode HFA.  */
03403   else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
03404           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
03405     {
03406       int byte_size = ((mode == BLKmode)
03407                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03408       if (BYTES_BIG_ENDIAN
03409   && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
03410   && byte_size < UNITS_PER_WORD
03411   && byte_size > 0)
03412   {
03413     rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
03414             gen_rtx_REG (DImode,
03415                    (basereg + cum->words
03416               + offset)),
03417             const0_rtx);
03418     return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
03419   }
03420       else
03421   return gen_rtx_REG (mode, basereg + cum->words + offset);
03422 
03423     }
03424 
03425   /* If there is a prototype, then FP values go in a FR register when
03426      named, and in a GR registeer when unnamed.  */
03427   else if (cum->prototype)
03428     {
03429       if (! named)
03430   return gen_rtx_REG (mode, basereg + cum->words + offset);
03431       else
03432   return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
03433     }
03434   /* If there is no prototype, then FP values go in both FR and GR
03435      registers.  */
03436   else
03437     {
03438       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
03439               gen_rtx_REG (mode, (FR_ARG_FIRST
03440                 + cum->fp_regs)),
03441               const0_rtx);
03442       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
03443               gen_rtx_REG (mode,
03444                (basereg + cum->words
03445                 + offset)),
03446               const0_rtx);
03447 
03448       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
03449     }
03450 }
03451 
03452 /* Return number of words, at the beginning of the argument, that must be
03453    put in registers.  0 is the argument is entirely in registers or entirely
03454    in memory.  */
03455 
03456 int
03457 ia64_function_arg_partial_nregs (cum, mode, type, named)
03458      CUMULATIVE_ARGS *cum;
03459      enum machine_mode mode;
03460      tree type;
03461      int named ATTRIBUTE_UNUSED;
03462 {
03463   int words = (((mode == BLKmode ? int_size_in_bytes (type)
03464      : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
03465          / UNITS_PER_WORD);
03466   int offset = 0;
03467 
03468   /* Arguments with alignment larger than 8 bytes start at the next even
03469      boundary.  */
03470   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
03471        : (words > 1))
03472       && (cum->words & 1))
03473     offset = 1;
03474 
03475   /* If all argument slots are used, then it must go on the stack.  */
03476   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
03477     return 0;
03478 
03479   /* It doesn't matter whether the argument goes in FR or GR regs.  If
03480      it fits within the 8 argument slots, then it goes entirely in
03481      registers.  If it extends past the last argument slot, then the rest
03482      goes on the stack.  */
03483 
03484   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
03485     return 0;
03486 
03487   return MAX_ARGUMENT_SLOTS - cum->words - offset;
03488 }
03489 
03490 /* Update CUM to point after this argument.  This is patterned after
03491    ia64_function_arg.  */
03492 
03493 void
03494 ia64_function_arg_advance (cum, mode, type, named)
03495      CUMULATIVE_ARGS *cum;
03496      enum machine_mode mode;
03497      tree type;
03498      int named;
03499 {
03500   int words = (((mode == BLKmode ? int_size_in_bytes (type)
03501      : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
03502          / UNITS_PER_WORD);
03503   int offset = 0;
03504   enum machine_mode hfa_mode = VOIDmode;
03505 
03506   /* If all arg slots are already full, then there is nothing to do.  */
03507   if (cum->words >= MAX_ARGUMENT_SLOTS)
03508     return;
03509 
03510   /* Arguments with alignment larger than 8 bytes start at the next even
03511      boundary.  */
03512   if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
03513        : (words > 1))
03514       && (cum->words & 1))
03515     offset = 1;
03516 
03517   cum->words += words + offset;
03518 
03519   /* Check for and handle homogeneous FP aggregates.  */
03520   if (type)
03521     hfa_mode = hfa_element_mode (type, 0);
03522 
03523   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
03524      and unprototyped hfas are passed specially.  */
03525   if (hfa_mode != VOIDmode && (! cum->prototype || named))
03526     {
03527       int fp_regs = cum->fp_regs;
03528       /* This is the original value of cum->words + offset.  */
03529       int int_regs = cum->words - words;
03530       int hfa_size = GET_MODE_SIZE (hfa_mode);
03531       int byte_size;
03532       int args_byte_size;
03533 
03534       /* If prototyped, pass it in FR regs then GR regs.
03535    If not prototyped, pass it in both FR and GR regs.
03536 
03537    If this is an SFmode aggregate, then it is possible to run out of
03538    FR regs while GR regs are still left.  In that case, we pass the
03539    remaining part in the GR regs.  */
03540 
03541       /* Fill the FP regs.  We do this always.  We stop if we reach the end
03542    of the argument, the last FP register, or the last argument slot.  */
03543 
03544       byte_size = ((mode == BLKmode)
03545        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03546       args_byte_size = int_regs * UNITS_PER_WORD;
03547       offset = 0;
03548       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
03549         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
03550   {
03551     offset += hfa_size;
03552     args_byte_size += hfa_size;
03553     fp_regs++;
03554   }
03555 
03556       cum->fp_regs = fp_regs;
03557     }
03558 
03559   /* Integral and aggregates go in general registers.  If we have run out of
03560      FR registers, then FP values must also go in general registers.  This can
03561      happen when we have a SFmode HFA.  */
03562   else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
03563     cum->int_regs = cum->words;
03564 
03565   /* If there is a prototype, then FP values go in a FR register when
03566      named, and in a GR registeer when unnamed.  */
03567   else if (cum->prototype)
03568     {
03569       if (! named)
03570   cum->int_regs = cum->words;
03571       else
03572   /* ??? Complex types should not reach here.  */
03573   cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
03574     }
03575   /* If there is no prototype, then FP values go in both FR and GR
03576      registers.  */
03577   else
03578     { 
03579       /* ??? Complex types should not reach here.  */
03580       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
03581       cum->int_regs = cum->words;
03582     }
03583 }
03584 
03585 /* Variable sized types are passed by reference.  */
03586 /* ??? At present this is a GCC extension to the IA-64 ABI.  */
03587 
03588 int
03589 ia64_function_arg_pass_by_reference (cum, mode, type, named)
03590      CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
03591      enum machine_mode mode ATTRIBUTE_UNUSED;
03592      tree type;
03593      int named ATTRIBUTE_UNUSED;
03594 {
03595   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
03596 }
03597 
03598 
03599 /* Implement va_arg.  */
03600 
03601 rtx
03602 ia64_va_arg (valist, type)
03603      tree valist, type;
03604 {
03605   tree t;
03606 
03607   /* Variable sized types are passed by reference.  */
03608   if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
03609     {
03610       rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
03611       return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
03612     }
03613 
03614   /* Arguments with alignment larger than 8 bytes start at the next even
03615      boundary.  */
03616   if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
03617     {
03618       t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
03619      build_int_2 (2 * UNITS_PER_WORD - 1, 0));
03620       t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
03621      build_int_2 (-2 * UNITS_PER_WORD, -1));
03622       t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
03623       TREE_SIDE_EFFECTS (t) = 1;
03624       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
03625     }
03626 
03627   return std_expand_builtin_va_arg (valist, type);
03628 }
03629 
03630 /* Return 1 if function return value returned in memory.  Return 0 if it is
03631    in a register.  */
03632 
03633 int
03634 ia64_return_in_memory (valtype)
03635      tree valtype;
03636 {
03637   enum machine_mode mode;
03638   enum machine_mode hfa_mode;
03639   HOST_WIDE_INT byte_size;
03640 
03641   mode = TYPE_MODE (valtype);
03642   byte_size = GET_MODE_SIZE (mode);
03643   if (mode == BLKmode)
03644     {
03645       byte_size = int_size_in_bytes (valtype);
03646       if (byte_size < 0)
03647   return 1;
03648     }
03649 
03650   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
03651 
03652   hfa_mode = hfa_element_mode (valtype, 0);
03653   if (hfa_mode != VOIDmode)
03654     {
03655       int hfa_size = GET_MODE_SIZE (hfa_mode);
03656 
03657       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
03658   return 1;
03659       else
03660   return 0;
03661     }
03662   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
03663     return 1;
03664   else
03665     return 0;
03666 }
03667 
03668 /* Return rtx for register that holds the function return value.  */
03669 
03670 rtx
03671 ia64_function_value (valtype, func)
03672      tree valtype;
03673      tree func ATTRIBUTE_UNUSED;
03674 {
03675   enum machine_mode mode;
03676   enum machine_mode hfa_mode;
03677 
03678   mode = TYPE_MODE (valtype);
03679   hfa_mode = hfa_element_mode (valtype, 0);
03680 
03681   if (hfa_mode != VOIDmode)
03682     {
03683       rtx loc[8];
03684       int i;
03685       int hfa_size;
03686       int byte_size;
03687       int offset;
03688 
03689       hfa_size = GET_MODE_SIZE (hfa_mode);
03690       byte_size = ((mode == BLKmode)
03691        ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
03692       offset = 0;
03693       for (i = 0; offset < byte_size; i++)
03694   {
03695     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03696               gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
03697               GEN_INT (offset));
03698     offset += hfa_size;
03699   }
03700 
03701       if (i == 1)
03702   return XEXP (loc[0], 0);
03703       else
03704   return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
03705     }
03706   else if (FLOAT_TYPE_P (valtype) &&
03707            ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
03708     return gen_rtx_REG (mode, FR_ARG_FIRST);
03709   else
03710     {
03711       if (BYTES_BIG_ENDIAN
03712     && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
03713   {
03714     rtx loc[8];
03715     int offset;
03716     int bytesize;
03717     int i;
03718 
03719     offset = 0;
03720     bytesize = int_size_in_bytes (valtype);
03721     for (i = 0; offset < bytesize; i++)
03722       {
03723         loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03724             gen_rtx_REG (DImode,
03725                    GR_RET_FIRST + i),
03726             GEN_INT (offset));
03727         offset += UNITS_PER_WORD;
03728       }
03729     return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
03730   }
03731       else
03732   return gen_rtx_REG (mode, GR_RET_FIRST);
03733     }
03734 }
03735 
03736 /* Print a memory address as an operand to reference that memory location.  */
03737 
03738 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
03739    also call this from ia64_print_operand for memory addresses.  */
03740 
03741 void
03742 ia64_print_operand_address (stream, address)
03743      FILE * stream ATTRIBUTE_UNUSED;
03744      rtx    address ATTRIBUTE_UNUSED;
03745 {
03746 }
03747 
03748 /* Print an operand to an assembler instruction.
03749    C  Swap and print a comparison operator.
03750    D  Print an FP comparison operator.
03751    E    Print 32 - constant, for SImode shifts as extract.
03752    e    Print 64 - constant, for DImode rotates.
03753    F  A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
03754         a floating point register emitted normally.
03755    I  Invert a predicate register by adding 1.
03756    J    Select the proper predicate register for a condition.
03757    j    Select the inverse predicate register for a condition.
03758    O  Append .acq for volatile load.
03759    P  Postincrement of a MEM.
03760    Q  Append .rel for volatile store.
03761    S  Shift amount for shladd instruction.
03762    T  Print an 8-bit sign extended number (K) as a 32-bit unsigned number
03763   for Intel assembler.
03764    U  Print an 8-bit sign extended number (K) as a 64-bit unsigned number
03765   for Intel assembler.
03766    r  Print register name, or constant 0 as r0.  HP compatibility for
03767   Linux kernel.  */
03768 void
03769 ia64_print_operand (file, x, code)
03770      FILE * file;
03771      rtx    x;
03772      int    code;
03773 {
03774   const char *str;
03775 
03776   switch (code)
03777     {
03778     case 0:
03779       /* Handled below.  */
03780       break;
03781 
03782     case 'C':
03783       {
03784   enum rtx_code c = swap_condition (GET_CODE (x));
03785   fputs (GET_RTX_NAME (c), file);
03786   return;
03787       }
03788 
03789     case 'D':
03790       switch (GET_CODE (x))
03791   {
03792   case NE:
03793     str = "neq";
03794     break;
03795   case UNORDERED:
03796     str = "unord";
03797     break;
03798   case ORDERED:
03799     str = "ord";
03800     break;
03801   default:
03802     str = GET_RTX_NAME (GET_CODE (x));
03803     break;
03804   }
03805       fputs (str, file);
03806       return;
03807 
03808     case 'E':
03809       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
03810       return;
03811 
03812     case 'e':
03813       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
03814       return;
03815 
03816     case 'F':
03817       if (x == CONST0_RTX (GET_MODE (x)))
03818   str = reg_names [FR_REG (0)];
03819       else if (x == CONST1_RTX (GET_MODE (x)))
03820   str = reg_names [FR_REG (1)];
03821       else if (GET_CODE (x) == REG)
03822   str = reg_names [REGNO (x)];
03823       else
03824   abort ();
03825       fputs (str, file);
03826       return;
03827 
03828     case 'I':
03829       fputs (reg_names [REGNO (x) + 1], file);
03830       return;
03831 
03832     case 'J':
03833     case 'j':
03834       {
03835   unsigned int regno = REGNO (XEXP (x, 0));
03836   if (GET_CODE (x) == EQ)
03837     regno += 1;
03838   if (code == 'j')
03839     regno ^= 1;
03840         fputs (reg_names [regno], file);
03841       }
03842       return;
03843 
03844     case 'O':
03845       if (MEM_VOLATILE_P (x))
03846   fputs(".acq", file);
03847       return;
03848 
03849     case 'P':
03850       {
03851   HOST_WIDE_INT value;
03852 
03853   switch (GET_CODE (XEXP (x, 0)))
03854     {
03855     default:
03856       return;
03857 
03858     case POST_MODIFY:
03859       x = XEXP (XEXP (XEXP (x, 0), 1), 1);
03860       if (GET_CODE (x) == CONST_INT)
03861         value = INTVAL (x);
03862       else if (GET_CODE (x) == REG)
03863         {
03864     fprintf (file, ", %s", reg_names[REGNO (x)]);
03865     return;
03866         }
03867       else
03868         abort ();
03869       break;
03870 
03871     case POST_INC:
03872       value = GET_MODE_SIZE (GET_MODE (x));
03873       break;
03874 
03875     case POST_DEC:
03876       value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
03877       break;
03878     }
03879 
03880   putc (',', file);
03881   putc (' ', file);
03882   fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
03883   return;
03884       }
03885 
03886     case 'Q':
03887       if (MEM_VOLATILE_P (x))
03888   fputs(".rel", file);
03889       return;
03890 
03891     case 'S':
03892       fprintf (file, "%d", exact_log2 (INTVAL (x)));
03893       return;
03894 
03895     case 'T':
03896       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
03897   {
03898     fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
03899     return;
03900   }
03901       break;
03902 
03903     case 'U':
03904       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
03905   {
03906     const char *prefix = "0x";
03907     if (INTVAL (x) & 0x80000000)
03908       {
03909         fprintf (file, "0xffffffff");
03910         prefix = "";
03911       }
03912     fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
03913     return;
03914   }
03915       break;
03916 
03917     case 'r':
03918       /* If this operand is the constant zero, write it as register zero.
03919    Any register, zero, or CONST_INT value is OK here.  */
03920       if (GET_CODE (x) == REG)
03921   fputs (reg_names[REGNO (x)], file);
03922       else if (x == CONST0_RTX (GET_MODE (x)))
03923   fputs ("r0", file);
03924       else if (GET_CODE (x) == CONST_INT)
03925   output_addr_const (file, x);
03926       else
03927   output_operand_lossage ("invalid %%r value");
03928       return;
03929 
03930     case '+':
03931       {
03932   const char *which;
03933   
03934   /* For conditional branches, returns or calls, substitute
03935      sptk, dptk, dpnt, or spnt for %s.  */
03936   x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
03937   if (x)
03938     {
03939       int pred_val = INTVAL (XEXP (x, 0));
03940 
03941       /* Guess top and bottom 10% statically predicted.  */
03942       if (pred_val < REG_BR_PROB_BASE / 50)
03943         which = ".spnt";
03944       else if (pred_val < REG_BR_PROB_BASE / 2)
03945         which = ".dpnt";
03946       else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
03947         which = ".dptk";
03948       else
03949         which = ".sptk";
03950     }
03951   else if (GET_CODE (current_output_insn) == CALL_INSN)
03952     which = ".sptk";
03953   else
03954     which = ".dptk";
03955 
03956   fputs (which, file);
03957   return;
03958       }
03959 
03960     case ',':
03961       x = current_insn_predicate;
03962       if (x)
03963   {
03964     unsigned int regno = REGNO (XEXP (x, 0));
03965     if (GET_CODE (x) == EQ)
03966       regno += 1;
03967           fprintf (file, "(%s) ", reg_names [regno]);
03968   }
03969       return;
03970 
03971     default:
03972       output_operand_lossage ("ia64_print_operand: unknown code");
03973       return;
03974     }
03975 
03976   switch (GET_CODE (x))
03977     {
03978       /* This happens for the spill/restore instructions.  */
03979     case POST_INC:
03980     case POST_DEC:
03981     case POST_MODIFY:
03982       x = XEXP (x, 0);
03983       /* ... fall through ...  */
03984 
03985     case REG:
03986       fputs (reg_names [REGNO (x)], file);
03987       break;
03988 
03989     case MEM:
03990       {
03991   rtx addr = XEXP (x, 0);
03992   if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
03993     addr = XEXP (addr, 0);
03994   fprintf (file, "[%s]", reg_names [REGNO (addr)]);
03995   break;
03996       }
03997 
03998     default:
03999       output_addr_const (file, x);
04000       break;
04001     }
04002 
04003   return;
04004 }
04005 
04006 /* Calulate the cost of moving data from a register in class FROM to
04007    one in class TO, using MODE.  */
04008 
04009 int
04010 ia64_register_move_cost (mode, from, to)
04011      enum machine_mode mode;
04012      enum reg_class from, to;
04013 {
04014   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
04015   if (to == ADDL_REGS)
04016     to = GR_REGS;
04017   if (from == ADDL_REGS)
04018     from = GR_REGS;
04019 
04020   /* All costs are symmetric, so reduce cases by putting the
04021      lower number class as the destination.  */
04022   if (from < to)
04023     {
04024       enum reg_class tmp = to;
04025       to = from, from = tmp;
04026     }
04027 
04028   /* Moving from FR<->GR in TFmode must be more expensive than 2,
04029      so that we get secondary memory reloads.  Between FR_REGS,
04030      we have to make this at least as expensive as MEMORY_MOVE_COST
04031      to avoid spectacularly poor register class preferencing.  */
04032   if (mode == TFmode)
04033     {
04034       if (to != GR_REGS || from != GR_REGS)
04035         return MEMORY_MOVE_COST (mode, to, 0);
04036       else
04037   return 3;
04038     }
04039 
04040   switch (to)
04041     {
04042     case PR_REGS:
04043       /* Moving between PR registers takes two insns.  */
04044       if (from == PR_REGS)
04045   return 3;
04046       /* Moving between PR and anything but GR is impossible.  */
04047       if (from != GR_REGS)
04048   return MEMORY_MOVE_COST (mode, to, 0);
04049       break;
04050 
04051     case BR_REGS:
04052       /* Moving between BR and anything but GR is impossible.  */
04053       if (from != GR_REGS && from != GR_AND_BR_REGS)
04054   return MEMORY_MOVE_COST (mode, to, 0);
04055       break;
04056 
04057     case AR_I_REGS:
04058     case AR_M_REGS:
04059       /* Moving between AR and anything but GR is impossible.  */
04060       if (from != GR_REGS)
04061   return MEMORY_MOVE_COST (mode, to, 0);
04062       break;
04063 
04064     case GR_REGS:
04065     case FR_REGS:
04066     case GR_AND_FR_REGS:
04067     case GR_AND_BR_REGS:
04068     case ALL_REGS:
04069       break;
04070 
04071     default:
04072       abort ();
04073     }
04074 
04075   return 2;
04076 }
04077 
04078 /* This function returns the register class required for a secondary
04079    register when copying between one of the registers in CLASS, and X,
04080    using MODE.  A return value of NO_REGS means that no secondary register
04081    is required.  */
04082 
04083 enum reg_class
04084 ia64_secondary_reload_class (class, mode, x)
04085      enum reg_class class;
04086      enum machine_mode mode ATTRIBUTE_UNUSED;
04087      rtx x;
04088 {
04089   int regno = -1;
04090 
04091   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
04092     regno = true_regnum (x);
04093 
04094   switch (class)
04095     {
04096     case BR_REGS:
04097     case AR_M_REGS:
04098     case AR_I_REGS:
04099       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
04100    interaction.  We end up with two pseudos with overlapping lifetimes
04101    both of which are equiv to the same constant, and both which need
04102    to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
04103    changes depending on the path length, which means the qty_first_reg
04104    check in make_regs_eqv can give different answers at different times.
04105    At some point I'll probably need a reload_indi pattern to handle
04106    this.
04107 
04108    We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
04109    wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
04110    non-general registers for good measure.  */
04111       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
04112   return GR_REGS;
04113 
04114       /* This is needed if a pseudo used as a call_operand gets spilled to a
04115    stack slot.  */
04116       if (GET_CODE (x) == MEM)
04117   return GR_REGS;
04118       break;
04119 
04120     case FR_REGS:
04121       /* Need to go through general regsters to get to other class regs.  */
04122       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
04123   return GR_REGS;
04124  
04125       /* This can happen when a paradoxical subreg is an operand to the
04126    muldi3 pattern.  */
04127       /* ??? This shouldn't be necessary after instruction scheduling is
04128    enabled, because paradoxical subregs are not accepted by
04129    register_operand when INSN_SCHEDULING is defined.  Or alternatively,
04130    stop the paradoxical subreg stupidity in the *_operand functions
04131    in recog.c.  */
04132       if (GET_CODE (x) == MEM
04133     && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
04134         || GET_MODE (x) == QImode))
04135   return GR_REGS;
04136 
04137       /* This can happen because of the ior/and/etc patterns that accept FP
04138    registers as operands.  If the third operand is a constant, then it
04139    needs to be reloaded into a FP register.  */
04140       if (GET_CODE (x) == CONST_INT)
04141   return GR_REGS;
04142 
04143       /* This can happen because of register elimination in a muldi3 insn.
04144    E.g. `26107 * (unsigned long)&u'.  */
04145       if (GET_CODE (x) == PLUS)
04146   return GR_REGS;
04147       break;
04148 
04149     case PR_REGS:
04150       /* ??? This happens if we cse/gcse a BImode value across a call,
04151    and the function has a nonlocal goto.  This is because global
04152    does not allocate call crossing pseudos to hard registers when
04153    current_function_has_nonlocal_goto is true.  This is relatively
04154    common for C++ programs that use exceptions.  To reproduce,
04155    return NO_REGS and compile libstdc++.  */
04156       if (GET_CODE (x) == MEM)
04157   return GR_REGS;
04158 
04159       /* This can happen when we take a BImode subreg of a DImode value,
04160    and that DImode value winds up in some non-GR register.  */
04161       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
04162   return GR_REGS;
04163       break;
04164 
04165     case GR_REGS:
04166       /* Since we have no offsettable memory addresses, we need a temporary
04167    to hold the address of the second word.  */
04168       if (mode == TImode)
04169   return GR_REGS;
04170       break;
04171 
04172     default:
04173       break;
04174     }
04175 
04176   return NO_REGS;
04177 }
04178 
04179 /* Emit text to declare externally defined variables and functions, because
04180    the Intel assembler does not support undefined externals.  */
04181 
04182 void
04183 ia64_asm_output_external (file, decl, name)
04184      FILE *file;
04185      tree decl;
04186      const char *name;
04187 {
04188   int save_referenced;
04189 
04190   /* GNU as does not need anything here, but the HP linker does need
04191      something for external functions.  */
04192 
04193   if (TARGET_GNU_AS
04194       && (!TARGET_HPUX_LD
04195     || TREE_CODE (decl) != FUNCTION_DECL
04196     || strstr(name, "__builtin_") == name))
04197     return;
04198 
04199   /* ??? The Intel assembler creates a reference that needs to be satisfied by
04200      the linker when we do this, so we need to be careful not to do this for
04201      builtin functions which have no library equivalent.  Unfortunately, we
04202      can't tell here whether or not a function will actually be called by
04203      expand_expr, so we pull in library functions even if we may not need
04204      them later.  */
04205   if (! strcmp (name, "__builtin_next_arg")
04206       || ! strcmp (name, "alloca")
04207       || ! strcmp (name, "__builtin_constant_p")
04208       || ! strcmp (name, "__builtin_args_info"))
04209     return;
04210 
04211   if (TARGET_HPUX_LD)
04212     ia64_hpux_add_extern_decl (name);
04213   else
04214     {
04215       /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
04216          restore it.  */
04217       save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
04218       if (TREE_CODE (decl) == FUNCTION_DECL)
04219         ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
04220       (*targetm.asm_out.globalize_label) (file, name);
04221       TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
04222     }
04223 }
04224 
04225 /* Parse the -mfixed-range= option string.  */
04226 
04227 static void
04228 fix_range (const_str)
04229      const char *const_str;
04230 {
04231   int i, first, last;
04232   char *str, *dash, *comma;
04233 
04234   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
04235      REG2 are either register names or register numbers.  The effect
04236      of this option is to mark the registers in the range from REG1 to
04237      REG2 as ``fixed'' so they won't be used by the compiler.  This is
04238      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
04239 
04240   i = strlen (const_str);
04241   str = (char *) alloca (i + 1);
04242   memcpy (str, const_str, i + 1);
04243 
04244   while (1)
04245     {
04246       dash = strchr (str, '-');
04247       if (!dash)
04248   {
04249     warning ("value of -mfixed-range must have form REG1-REG2");
04250     return;
04251   }
04252       *dash = '\0';
04253 
04254       comma = strchr (dash + 1, ',');
04255       if (comma)
04256   *comma = '\0';
04257 
04258       first = decode_reg_name (str);
04259       if (first < 0)
04260   {
04261     warning ("unknown register name: %s", str);
04262     return;
04263   }
04264 
04265       last = decode_reg_name (dash + 1);
04266       if (last < 0)
04267   {
04268     warning ("unknown register name: %s", dash + 1);
04269     return;
04270   }
04271 
04272       *dash = '-';
04273 
04274       if (first > last)
04275   {
04276     warning ("%s-%s is an empty range", str, dash + 1);
04277     return;
04278   }
04279 
04280       for (i = first; i <= last; ++i)
04281   fixed_regs[i] = call_used_regs[i] = 1;
04282 
04283       if (!comma)
04284   break;
04285 
04286       *comma = ',';
04287       str = comma + 1;
04288     }
04289 }
04290 
04291 static struct machine_function *
04292 ia64_init_machine_status ()
04293 {
04294   return ggc_alloc_cleared (sizeof (struct machine_function));
04295 }
04296 
04297 /* Handle TARGET_OPTIONS switches.  */
04298 
04299 void
04300 ia64_override_options ()
04301 {
04302   if (TARGET_AUTO_PIC)
04303     target_flags |= MASK_CONST_GP;
04304 
04305   if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
04306     {
04307       warning ("cannot optimize floating point division for both latency and throughput");
04308       target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
04309     }
04310 
04311   if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
04312     {
04313       warning ("cannot optimize integer division for both latency and throughput");
04314       target_flags &= ~MASK_INLINE_INT_DIV_THR;
04315     }
04316 
04317   if (ia64_fixed_range_string)
04318     fix_range (ia64_fixed_range_string);
04319 
04320   if (ia64_tls_size_string)
04321     {
04322       char *end;
04323       unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
04324       if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
04325   error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
04326       else
04327   ia64_tls_size = tmp;
04328     }
04329 
04330   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
04331   flag_schedule_insns_after_reload = 0;
04332 
04333   ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
04334 
04335   init_machine_status = ia64_init_machine_status;
04336 
04337   /* Tell the compiler which flavor of TFmode we're using.  */
04338   if (INTEL_EXTENDED_IEEE_FORMAT)
04339     real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
04340 }
04341 
04342 static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
04343 static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
04344 static enum attr_type ia64_safe_type PARAMS((rtx));
04345 
04346 static enum attr_itanium_requires_unit0
04347 ia64_safe_itanium_requires_unit0 (insn)
04348      rtx insn;
04349 {
04350   if (recog_memoized (insn) >= 0)
04351     return get_attr_itanium_requires_unit0 (insn);
04352   else
04353     return ITANIUM_REQUIRES_UNIT0_NO;
04354 }
04355 
04356 static enum attr_itanium_class
04357 ia64_safe_itanium_class (insn)
04358      rtx insn;
04359 {
04360   if (recog_memoized (insn) >= 0)
04361     return get_attr_itanium_class (insn);
04362   else
04363     return ITANIUM_CLASS_UNKNOWN;
04364 }
04365 
04366 static enum attr_type
04367 ia64_safe_type (insn)
04368      rtx insn;
04369 {
04370   if (recog_memoized (insn) >= 0)
04371     return get_attr_type (insn);
04372   else
04373     return TYPE_UNKNOWN;
04374 }
04375 
04376 /* The following collection of routines emit instruction group stop bits as
04377    necessary to avoid dependencies.  */
04378 
04379 /* Need to track some additional registers as far as serialization is
04380    concerned so we can properly handle br.call and br.ret.  We could
04381    make these registers visible to gcc, but since these registers are
04382    never explicitly used in gcc generated code, it seems wasteful to
04383    do so (plus it would make the call and return patterns needlessly
04384    complex).  */
04385 #define REG_GP    (GR_REG (1))
04386 #define REG_RP    (BR_REG (0))
04387 #define REG_AR_CFM  (FIRST_PSEUDO_REGISTER + 1)
04388 /* This is used for volatile asms which may require a stop bit immediately
04389    before and after them.  */
04390 #define REG_VOLATILE  (FIRST_PSEUDO_REGISTER + 2)
04391 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
04392 #define NUM_REGS  (AR_UNAT_BIT_0 + 64)
04393 
04394 /* For each register, we keep track of how it has been written in the
04395    current instruction group.
04396 
04397    If a register is written unconditionally (no qualifying predicate),
04398    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
04399 
04400    If a register is written if its qualifying predicate P is true, we
04401    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
04402    may be written again by the complement of P (P^1) and when this happens,
04403    WRITE_COUNT gets set to 2.
04404 
04405    The result of this is that whenever an insn attempts to write a register
04406    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
04407 
04408    If a predicate register is written by a floating-point insn, we set
04409    WRITTEN_BY_FP to true.
04410 
04411    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
04412    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
04413 
04414 struct reg_write_state
04415 {
04416   unsigned int write_count : 2;
04417   unsigned int first_pred : 16;
04418   unsigned int written_by_fp : 1;
04419   unsigned int written_by_and : 1;
04420   unsigned int written_by_or : 1;
04421 };
04422 
04423 /* Cumulative info for the current instruction group.  */
04424 struct reg_write_state rws_sum[NUM_REGS];
04425 /* Info for the current instruction.  This gets copied to rws_sum after a
04426    stop bit is emitted.  */
04427 struct reg_write_state rws_insn[NUM_REGS];
04428 
04429 /* Indicates whether this is the first instruction after a stop bit,
04430    in which case we don't need another stop bit.  Without this, we hit
04431    the abort in ia64_variable_issue when scheduling an alloc.  */
04432 static int first_instruction;
04433 
04434 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
04435    RTL for one instruction.  */
04436 struct reg_flags
04437 {
04438   unsigned int is_write : 1;  /* Is register being written?  */
04439   unsigned int is_fp : 1; /* Is register used as part of an fp op?  */
04440   unsigned int is_branch : 1; /* Is register used as part of a branch?  */
04441   unsigned int is_and : 1;  /* Is register used as part of and.orcm?  */
04442   unsigned int is_or : 1; /* Is register used as part of or.andcm?  */
04443   unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
04444 };
04445 
04446 static void rws_update PARAMS ((struct reg_write_state *, int,
04447         struct reg_flags, int));
04448 static int rws_access_regno PARAMS ((int, struct reg_flags, int));
04449 static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
04450 static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
04451 static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
04452 static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
04453 static void init_insn_group_barriers PARAMS ((void));
04454 static int group_barrier_needed_p PARAMS ((rtx));
04455 static int safe_group_barrier_needed_p PARAMS ((rtx));
04456 
04457 /* Update *RWS for REGNO, which is being written by the current instruction,
04458    with predicate PRED, and associated register flags in FLAGS.  */
04459 
04460 static void
04461 rws_update (rws, regno, flags, pred)
04462      struct reg_write_state *rws;
04463      int regno;
04464      struct reg_flags flags;
04465      int pred;
04466 {
04467   if (pred)
04468     rws[regno].write_count++;
04469   else
04470     rws[regno].write_count = 2;
04471   rws[regno].written_by_fp |= flags.is_fp;
04472   /* ??? Not tracking and/or across differing predicates.  */
04473   rws[regno].written_by_and = flags.is_and;
04474   rws[regno].written_by_or = flags.is_or;
04475   rws[regno].first_pred = pred;
04476 }
04477 
04478 /* Handle an access to register REGNO of type FLAGS using predicate register
04479    PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
04480    a dependency with an earlier instruction in the same group.  */
04481 
04482 static int
04483 rws_access_regno (regno, flags, pred)
04484      int regno;
04485      struct reg_flags flags;
04486      int pred;
04487 {
04488   int need_barrier = 0;
04489 
04490   if (regno >= NUM_REGS)
04491     abort ();
04492 
04493   if (! PR_REGNO_P (regno))
04494     flags.is_and = flags.is_or = 0;
04495 
04496   if (flags.is_write)
04497     {
04498       int write_count;
04499 
04500       /* One insn writes same reg multiple times?  */
04501       if (rws_insn[regno].write_count > 0)
04502   abort ();
04503 
04504       /* Update info for current instruction.  */
04505       rws_update (rws_insn, regno, flags, pred);
04506       write_count = rws_sum[regno].write_count;
04507 
04508       switch (write_count)
04509   {
04510   case 0:
04511     /* The register has not been written yet.  */
04512     rws_update (rws_sum, regno, flags, pred);
04513     break;
04514 
04515   case 1:
04516     /* The register has been written via a predicate.  If this is
04517        not a complementary predicate, then we need a barrier.  */
04518     /* ??? This assumes that P and P+1 are always complementary
04519        predicates for P even.  */
04520     if (flags.is_and && rws_sum[regno].written_by_and)
04521       ; 
04522     else if (flags.is_or && rws_sum[regno].written_by_or)
04523       ;
04524     else if ((rws_sum[regno].first_pred ^ 1) != pred)
04525       need_barrier = 1;
04526     rws_update (rws_sum, regno, flags, pred);
04527     break;
04528 
04529   case 2:
04530     /* The register has been unconditionally written already.  We
04531        need a barrier.  */
04532     if (flags.is_and && rws_sum[regno].written_by_and)
04533       ;
04534     else if (flags.is_or && rws_sum[regno].written_by_or)
04535       ;
04536     else
04537       need_barrier = 1;
04538     rws_sum[regno].written_by_and = flags.is_and;
04539     rws_sum[regno].written_by_or = flags.is_or;
04540     break;
04541 
04542   default:
04543     abort ();
04544   }
04545     }
04546   else
04547     {
04548       if (flags.is_branch)
04549   {
04550     /* Branches have several RAW exceptions that allow to avoid
04551        barriers.  */
04552 
04553     if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
04554       /* RAW dependencies on branch regs are permissible as long
04555          as the writer is a non-branch instruction.  Since we
04556          never generate code that uses a branch register written
04557          by a branch instruction, handling this case is
04558          easy.  */
04559       return 0;
04560 
04561     if (REGNO_REG_CLASS (regno) == PR_REGS
04562         && ! rws_sum[regno].written_by_fp)
04563       /* The predicates of a branch are available within the
04564          same insn group as long as the predicate was written by
04565          something other than a floating-point instruction.  */
04566       return 0;
04567   }
04568 
04569       if (flags.is_and && rws_sum[regno].written_by_and)
04570   return 0;
04571       if (flags.is_or && rws_sum[regno].written_by_or)
04572   return 0;
04573 
04574       switch (rws_sum[regno].write_count)
04575   {
04576   case 0:
04577     /* The register has not been written yet.  */
04578     break;
04579 
04580   case 1:
04581     /* The register has been written via a predicate.  If this is
04582        not a complementary predicate, then we need a barrier.  */
04583     /* ??? This assumes that P and P+1 are always complementary
04584        predicates for P even.  */
04585     if ((rws_sum[regno].first_pred ^ 1) != pred)
04586       need_barrier = 1;
04587     break;
04588 
04589   case 2:
04590     /* The register has been unconditionally written already.  We
04591        need a barrier.  */
04592     need_barrier = 1;
04593     break;
04594 
04595   default:
04596     abort ();
04597   }
04598     }
04599 
04600   return need_barrier;
04601 }
04602 
04603 static int
04604 rws_access_reg (reg, flags, pred)
04605      rtx reg;
04606      struct reg_flags flags;
04607      int pred;
04608 {
04609   int regno = REGNO (reg);
04610   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
04611 
04612   if (n == 1)
04613     return rws_access_regno (regno, flags, pred);
04614   else
04615     {
04616       int need_barrier = 0;
04617       while (--n >= 0)
04618   need_barrier |= rws_access_regno (regno + n, flags, pred);
04619       return need_barrier;
04620     }
04621 }
04622 
04623 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
04624    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
04625 
04626 static void
04627 update_set_flags (x, pflags, ppred, pcond)
04628      rtx x;
04629      struct reg_flags *pflags;
04630      int *ppred;
04631      rtx *pcond;
04632 {
04633   rtx src = SET_SRC (x);
04634 
04635   *pcond = 0;
04636 
04637   switch (GET_CODE (src))
04638     {
04639     case CALL:
04640       return;
04641 
04642     case IF_THEN_ELSE:
04643       if (SET_DEST (x) == pc_rtx)
04644   /* X is a conditional branch.  */
04645   return; 
04646       else
04647   {
04648     int is_complemented = 0;
04649 
04650     /* X is a conditional move.  */
04651     rtx cond = XEXP (src, 0);
04652     if (GET_CODE (cond) == EQ)
04653       is_complemented = 1;
04654     cond = XEXP (cond, 0);
04655     if (GET_CODE (cond) != REG
04656         && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
04657       abort ();
04658     *pcond = cond;
04659     if (XEXP (src, 1) == SET_DEST (x)
04660         || XEXP (src, 2) == SET_DEST (x))
04661       {
04662         /* X is a conditional move that conditionally writes the
04663      destination.  */
04664 
04665         /* We need another complement in this case.  */
04666         if (XEXP (src, 1) == SET_DEST (x))
04667     is_complemented = ! is_complemented;
04668 
04669         *ppred = REGNO (cond);
04670         if (is_complemented)
04671     ++*ppred;
04672       }
04673 
04674     /* ??? If this is a conditional write to the dest, then this
04675        instruction does not actually read one source.  This probably
04676        doesn't matter, because that source is also the dest.  */
04677     /* ??? Multiple writes to predicate registers are allowed
04678        if they are all AND type compares, or if they are all OR
04679        type compares.  We do not generate such instructions
04680        currently.  */
04681   }
04682       /* ... fall through ...  */
04683 
04684     default:
04685       if (GET_RTX_CLASS (GET_CODE (src)) == '<'
04686     && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
04687   /* Set pflags->is_fp to 1 so that we know we're dealing
04688      with a floating point comparison when processing the
04689      destination of the SET.  */
04690   pflags->is_fp = 1;
04691 
04692       /* Discover if this is a parallel comparison.  We only handle
04693    and.orcm and or.andcm at present, since we must retain a
04694    strict inverse on the predicate pair.  */
04695       else if (GET_CODE (src) == AND)
04696   pflags->is_and = 1;
04697       else if (GET_CODE (src) == IOR)
04698   pflags->is_or = 1;
04699 
04700       break;
04701     }
04702 }
04703 
04704 /* Subroutine of rtx_needs_barrier; this function determines whether the
04705    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
04706    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
04707    for this insn.  */
04708    
04709 static int
04710 set_src_needs_barrier (x, flags, pred, cond)
04711      rtx x;
04712      struct reg_flags flags;
04713      int pred;
04714      rtx cond;
04715 {
04716   int need_barrier = 0;
04717   rtx dst;
04718   rtx src = SET_SRC (x);
04719 
04720   if (GET_CODE (src) == CALL)
04721     /* We don't need to worry about the result registers that
04722        get written by subroutine call.  */
04723     return rtx_needs_barrier (src, flags, pred);
04724   else if (SET_DEST (x) == pc_rtx)
04725     {
04726       /* X is a conditional branch.  */
04727       /* ??? This seems redundant, as the caller sets this bit for
04728    all JUMP_INSNs.  */
04729       flags.is_branch = 1;
04730       return rtx_needs_barrier (src, flags, pred);
04731     }
04732 
04733   need_barrier = rtx_needs_barrier (src, flags, pred);
04734 
04735   /* This instruction unconditionally uses a predicate register.  */
04736   if (cond)
04737     need_barrier |= rws_access_reg (cond, flags, 0);
04738 
04739   dst = SET_DEST (x);
04740   if (GET_CODE (dst) == ZERO_EXTRACT)
04741     {
04742       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
04743       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
04744       dst = XEXP (dst, 0);
04745     }
04746   return need_barrier;
04747 }
04748 
04749 /* Handle an access to rtx X of type FLAGS using predicate register PRED.
04750    Return 1 is this access creates a dependency with an earlier instruction
04751    in the same group.  */
04752 
04753 static int
04754 rtx_needs_barrier (x, flags, pred)
04755      rtx x;
04756      struct reg_flags flags;
04757      int pred;
04758 {
04759   int i, j;
04760   int is_complemented = 0;
04761   int need_barrier = 0;
04762   const char *format_ptr;
04763   struct reg_flags new_flags;
04764   rtx cond = 0;
04765 
04766   if (! x)
04767     return 0;
04768 
04769   new_flags = flags;
04770 
04771   switch (GET_CODE (x))
04772     {
04773     case SET:      
04774       update_set_flags (x, &new_flags, &pred, &cond);
04775       need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
04776       if (GET_CODE (SET_SRC (x)) != CALL)
04777   {
04778     new_flags.is_write = 1;
04779     need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
04780   }
04781       break;
04782 
04783     case CALL:
04784       new_flags.is_write = 0;
04785       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
04786 
04787       /* Avoid multiple register writes, in case this is a pattern with
04788    multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
04789       if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
04790   {
04791     new_flags.is_write = 1;
04792     need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
04793     need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
04794     need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
04795   }
04796       break;
04797 
04798     case COND_EXEC:
04799       /* X is a predicated instruction.  */
04800 
04801       cond = COND_EXEC_TEST (x);
04802       if (pred)
04803   abort ();
04804       need_barrier = rtx_needs_barrier (cond, flags, 0);
04805 
04806       if (GET_CODE (cond) == EQ)
04807   is_complemented = 1;
04808       cond = XEXP (cond, 0);
04809       if (GET_CODE (cond) != REG
04810     && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
04811   abort ();
04812       pred = REGNO (cond);
04813       if (is_complemented)
04814   ++pred;
04815 
04816       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
04817       return need_barrier;
04818 
04819     case CLOBBER:
04820     case USE:
04821       /* Clobber & use are for earlier compiler-phases only.  */
04822       break;
04823 
04824     case ASM_OPERANDS:
04825     case ASM_INPUT:
04826       /* We always emit stop bits for traditional asms.  We emit stop bits
04827    for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
04828       if (GET_CODE (x) != ASM_OPERANDS
04829     || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
04830   {
04831     /* Avoid writing the register multiple times if we have multiple
04832        asm outputs.  This avoids an abort in rws_access_reg.  */
04833     if (! rws_insn[REG_VOLATILE].write_count)
04834       {
04835         new_flags.is_write = 1;
04836         rws_access_regno (REG_VOLATILE, new_flags, pred);
04837       }
04838     return 1;
04839   }
04840 
04841       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
04842    We can not just fall through here since then we would be confused
04843    by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
04844    traditional asms unlike their normal usage.  */
04845 
04846       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
04847   if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
04848     need_barrier = 1;
04849       break;
04850 
04851     case PARALLEL:
04852       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
04853   {
04854     rtx pat = XVECEXP (x, 0, i);
04855     if (GET_CODE (pat) == SET)
04856       {
04857         update_set_flags (pat, &new_flags, &pred, &cond);
04858         need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
04859       }
04860     else if (GET_CODE (pat) == USE
04861        || GET_CODE (pat) == CALL
04862        || GET_CODE (pat) == ASM_OPERANDS)
04863       need_barrier |= rtx_needs_barrier (pat, flags, pred);
04864     else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
04865       abort ();
04866   }
04867       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
04868   {
04869     rtx pat = XVECEXP (x, 0, i);
04870     if (GET_CODE (pat) == SET)
04871       {
04872         if (GET_CODE (SET_SRC (pat)) != CALL)
04873     {
04874       new_flags.is_write = 1;
04875       need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
04876                  pred);
04877     }
04878       }
04879     else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
04880       need_barrier |= rtx_needs_barrier (pat, flags, pred);
04881   }
04882       break;
04883 
04884     case SUBREG:
04885       x = SUBREG_REG (x);
04886       /* FALLTHRU */
04887     case REG:
04888       if (REGNO (x) == AR_UNAT_REGNUM)
04889   {
04890     for (i = 0; i < 64; ++i)
04891       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
04892   }
04893       else
04894   need_barrier = rws_access_reg (x, flags, pred);
04895       break;
04896 
04897     case MEM:
04898       /* Find the regs used in memory address computation.  */
04899       new_flags.is_write = 0;
04900       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
04901       break;
04902 
04903     case CONST_INT:   case CONST_DOUBLE:
04904     case SYMBOL_REF:  case LABEL_REF:     case CONST:
04905       break;
04906 
04907       /* Operators with side-effects.  */
04908     case POST_INC:    case POST_DEC:
04909       if (GET_CODE (XEXP (x, 0)) != REG)
04910   abort ();
04911 
04912       new_flags.is_write = 0;
04913       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
04914       new_flags.is_write = 1;
04915       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
04916       break;
04917 
04918     case POST_MODIFY:
04919       if (GET_CODE (XEXP (x, 0)) != REG)
04920   abort ();
04921 
04922       new_flags.is_write = 0;
04923       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
04924       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
04925       new_flags.is_write = 1;
04926       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
04927       break;
04928 
04929       /* Handle common unary and binary ops for efficiency.  */
04930     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
04931     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
04932     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
04933     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
04934     case NE:       case EQ:      case GE:      case GT:        case LE:
04935     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
04936       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
04937       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
04938       break;
04939 
04940     case NEG:      case NOT:          case SIGN_EXTEND:     case ZERO_EXTEND:
04941     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
04942     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
04943     case SQRT:     case FFS:
04944       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
04945       break;
04946 
04947     case UNSPEC:
04948       switch (XINT (x, 1))
04949   {
04950   case UNSPEC_LTOFF_DTPMOD:
04951   case UNSPEC_LTOFF_DTPREL:
04952   case UNSPEC_DTPREL:
04953   case UNSPEC_LTOFF_TPREL:
04954   case UNSPEC_TPREL:
04955   case UNSPEC_PRED_REL_MUTEX:
04956   case UNSPEC_PIC_CALL:
04957         case UNSPEC_MF:
04958         case UNSPEC_FETCHADD_ACQ:
04959   case UNSPEC_BSP_VALUE:
04960   case UNSPEC_FLUSHRS:
04961   case UNSPEC_BUNDLE_SELECTOR:
04962           break;
04963 
04964   case UNSPEC_GR_SPILL:
04965   case UNSPEC_GR_RESTORE:
04966     {
04967       HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
04968       HOST_WIDE_INT bit = (offset >> 3) & 63;
04969 
04970       need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
04971       new_flags.is_write = (XINT (x, 1) == 1);
04972       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
04973                 new_flags, pred);
04974       break;
04975     }
04976     
04977   case UNSPEC_FR_SPILL:
04978   case UNSPEC_FR_RESTORE:
04979   case UNSPEC_POPCNT:
04980     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
04981     break;
04982 
04983         case UNSPEC_ADDP4:
04984     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
04985     break;
04986 
04987   case UNSPEC_FR_RECIP_APPROX:
04988     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
04989     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
04990     break;
04991 
04992         case UNSPEC_CMPXCHG_ACQ:
04993     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
04994     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
04995     break;
04996 
04997   default:
04998     abort ();
04999   }
05000       break;
05001 
05002     case UNSPEC_VOLATILE:
05003       switch (XINT (x, 1))
05004   {
05005   case UNSPECV_ALLOC:
05006     /* Alloc must always be the first instruction of a group.
05007        We force this by always returning true.  */
05008     /* ??? We might get better scheduling if we explicitly check for
05009        input/local/output register dependencies, and modify the
05010        scheduler so that alloc is always reordered to the start of
05011        the current group.  We could then eliminate all of the
05012        first_instruction code.  */
05013     rws_access_regno (AR_PFS_REGNUM, flags, pred);
05014 
05015     new_flags.is_write = 1;
05016     rws_access_regno (REG_AR_CFM, new_flags, pred);
05017     return 1;
05018 
05019   case UNSPECV_SET_BSP:
05020     need_barrier = 1;
05021           break;
05022 
05023   case UNSPECV_BLOCKAGE:
05024   case UNSPECV_INSN_GROUP_BARRIER:
05025   case UNSPECV_BREAK:
05026   case UNSPECV_PSAC_ALL:
05027   case UNSPECV_PSAC_NORMAL:
05028     return 0;
05029 
05030   default:
05031     abort ();
05032   }
05033       break;
05034 
05035     case RETURN:
05036       new_flags.is_write = 0;
05037       need_barrier  = rws_access_regno (REG_RP, flags, pred);
05038       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
05039 
05040       new_flags.is_write = 1;
05041       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
05042       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
05043       break;
05044 
05045     default:
05046       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
05047       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
05048   switch (format_ptr[i])
05049     {
05050     case '0': /* unused field */
05051     case 'i': /* integer */
05052     case 'n': /* note */
05053     case 'w': /* wide integer */
05054     case 's': /* pointer to string */
05055     case 'S': /* optional pointer to string */
05056       break;
05057 
05058     case 'e':
05059       if (rtx_needs_barrier (XEXP (x, i), flags, pred))
05060         need_barrier = 1;
05061       break;
05062 
05063     case 'E':
05064       for (j = XVECLEN (x, i) - 1; j >= 0; --j)
05065         if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
05066     need_barrier = 1;
05067       break;
05068 
05069     default:
05070       abort ();
05071     }
05072       break;
05073     }
05074   return need_barrier;
05075 }
05076 
05077 /* Clear out the state for group_barrier_needed_p at the start of a
05078    sequence of insns.  */
05079 
05080 static void
05081 init_insn_group_barriers ()
05082 {
05083   memset (rws_sum, 0, sizeof (rws_sum));
05084   first_instruction = 1;
05085 }
05086 
05087 /* Given the current state, recorded by previous calls to this function,
05088    determine whether a group barrier (a stop bit) is necessary before INSN.
05089    Return nonzero if so.  */
05090 
05091 static int
05092 group_barrier_needed_p (insn)
05093      rtx insn;
05094 {
05095   rtx pat;
05096   int need_barrier = 0;
05097   struct reg_flags flags;
05098 
05099   memset (&flags, 0, sizeof (flags));
05100   switch (GET_CODE (insn))
05101     {
05102     case NOTE:
05103       break;
05104 
05105     case BARRIER:
05106       /* A barrier doesn't imply an instruction group boundary.  */
05107       break;
05108 
05109     case CODE_LABEL:
05110       memset (rws_insn, 0, sizeof (rws_insn));
05111       return 1;
05112 
05113     case CALL_INSN:
05114       flags.is_branch = 1;
05115       flags.is_sibcall = SIBLING_CALL_P (insn);
05116       memset (rws_insn, 0, sizeof (rws_insn));
05117 
05118       /* Don't bundle a call following another call.  */
05119       if ((pat = prev_active_insn (insn))
05120     && GET_CODE (pat) == CALL_INSN)
05121   {
05122     need_barrier = 1;
05123     break;
05124   }
05125 
05126       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
05127       break;
05128 
05129     case JUMP_INSN:
05130       flags.is_branch = 1;
05131 
05132       /* Don't bundle a jump following a call.  */
05133       if ((pat = prev_active_insn (insn))
05134     && GET_CODE (pat) == CALL_INSN)
05135   {
05136     need_barrier = 1;
05137     break;
05138   }
05139       /* FALLTHRU */
05140 
05141     case INSN:
05142       if (GET_CODE (PATTERN (insn)) == USE
05143     || GET_CODE (PATTERN (insn)) == CLOBBER)
05144   /* Don't care about USE and CLOBBER "insns"---those are used to
05145      indicate to the optimizer that it shouldn't get rid of
05146      certain operations.  */
05147   break;
05148 
05149       pat = PATTERN (insn);
05150 
05151       /* Ug.  Hack hacks hacked elsewhere.  */
05152       switch (recog_memoized (insn))
05153   {
05154     /* We play dependency tricks with the epilogue in order
05155        to get proper schedules.  Undo this for dv analysis.  */
05156   case CODE_FOR_epilogue_deallocate_stack:
05157   case CODE_FOR_prologue_allocate_stack:
05158     pat = XVECEXP (pat, 0, 0);
05159     break;
05160 
05161     /* The pattern we use for br.cloop confuses the code above.
05162        The second element of the vector is representative.  */
05163   case CODE_FOR_doloop_end_internal:
05164     pat = XVECEXP (pat, 0, 1);
05165     break;
05166 
05167     /* Doesn't generate code.  */
05168   case CODE_FOR_pred_rel_mutex:
05169   case CODE_FOR_prologue_use:
05170     return 0;
05171 
05172   default:
05173     break;
05174   }
05175 
05176       memset (rws_insn, 0, sizeof (rws_insn));
05177       need_barrier = rtx_needs_barrier (pat, flags, 0);
05178 
05179       /* Check to see if the previous instruction was a volatile
05180    asm.  */
05181       if (! need_barrier)
05182   need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
05183       break;
05184 
05185     default:
05186       abort ();
05187     }
05188 
05189   if (first_instruction)
05190     {
05191       need_barrier = 0;
05192       first_instruction = 0;
05193     }
05194 
05195   return need_barrier;
05196 }
05197 
05198 /* Like group_barrier_needed_p, but do not clobber the current state.  */
05199 
05200 static int
05201 safe_group_barrier_needed_p (insn)
05202      rtx insn;
05203 {
05204   struct reg_write_state rws_saved[NUM_REGS];
05205   int saved_first_instruction;
05206   int t;
05207 
05208   memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
05209   saved_first_instruction = first_instruction;
05210 
05211   t = group_barrier_needed_p (insn);
05212 
05213   memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
05214   first_instruction = saved_first_instruction;
05215 
05216   return t;
05217 }
05218 
05219 /* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
05220    as necessary to eliminate dependendencies.  This function assumes that
05221    a final instruction scheduling pass has been run which has already
05222    inserted most of the necessary stop bits.  This function only inserts
05223    new ones at basic block boundaries, since these are invisible to the
05224    scheduler.  */
05225 
05226 static void
05227 emit_insn_group_barriers (dump, insns)
05228      FILE *dump;
05229      rtx insns;
05230 {
05231   rtx insn;
05232   rtx last_label = 0;
05233   int insns_since_last_label = 0;
05234 
05235   init_insn_group_barriers ();
05236 
05237   for (insn = insns; insn; insn = NEXT_INSN (insn))
05238     {
05239       if (GET_CODE (insn) == CODE_LABEL)
05240   {
05241     if (insns_since_last_label)
05242       last_label = insn;
05243     insns_since_last_label = 0;
05244   }
05245       else if (GET_CODE (insn) == NOTE
05246          && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
05247   {
05248     if (insns_since_last_label)
05249       last_label = insn;
05250     insns_since_last_label = 0;
05251   }
05252       else if (GET_CODE (insn) == INSN
05253          && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
05254          && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
05255   {
05256     init_insn_group_barriers ();
05257     last_label = 0;
05258   }
05259       else if (INSN_P (insn))
05260   {
05261     insns_since_last_label = 1;
05262 
05263     if (group_barrier_needed_p (insn))
05264       {
05265         if (last_label)
05266     {
05267       if (dump)
05268         fprintf (dump, "Emitting stop before label %d\n",
05269            INSN_UID (last_label));
05270       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
05271       insn = last_label;
05272 
05273       init_insn_group_barriers ();
05274       last_label = 0;
05275     }
05276       }
05277   }
05278     }
05279 }
05280 
05281 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
05282    This function has to emit all necessary group barriers.  */
05283 
05284 static void
05285 emit_all_insn_group_barriers (dump, insns)
05286      FILE *dump ATTRIBUTE_UNUSED;
05287      rtx insns;
05288 {
05289   rtx insn;
05290 
05291   init_insn_group_barriers ();
05292 
05293   for (insn = insns; insn; insn = NEXT_INSN (insn))
05294     {
05295       if (GET_CODE (insn) == BARRIER)
05296   {
05297     rtx last = prev_active_insn (insn);
05298 
05299     if (! last)
05300       continue;
05301     if (GET_CODE (last) == JUMP_INSN
05302         && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
05303       last = prev_active_insn (last);
05304     if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
05305       emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
05306 
05307     init_insn_group_barriers ();
05308   }
05309       else if (INSN_P (insn))
05310   {
05311     if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
05312       init_insn_group_barriers ();
05313     else if (group_barrier_needed_p (insn))
05314       {
05315         emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
05316         init_insn_group_barriers ();
05317         group_barrier_needed_p (insn);
05318       }
05319   }
05320     }
05321 }
05322 
05323 static int errata_find_address_regs PARAMS ((rtx *, void *));
05324 static void errata_emit_nops PARAMS ((rtx));
05325 static void fixup_errata PARAMS ((void));
05326 
05327 /* This structure is used to track some details about the previous insns
05328    groups so we can determine if it may be necessary to insert NOPs to
05329    workaround hardware errata.  */
05330 static struct group
05331 {
05332   HARD_REG_SET p_reg_set;
05333   HARD_REG_SET gr_reg_conditionally_set;
05334 } last_group[2];
05335 
05336 /* Index into the last_group array.  */
05337 static int group_idx;
05338 
05339 /* Called through for_each_rtx; determines if a hard register that was
05340    conditionally set in the previous group is used as an address register.
05341    It ensures that for_each_rtx returns 1 in that case.  */
05342 static int
05343 errata_find_address_regs (xp, data)
05344      rtx *xp;
05345      void *data ATTRIBUTE_UNUSED;
05346 {
05347   rtx x = *xp;
05348   if (GET_CODE (x) != MEM)
05349     return 0;
05350   x = XEXP (x, 0);
05351   if (GET_CODE (x) == POST_MODIFY)
05352     x = XEXP (x, 0);
05353   if (GET_CODE (x) == REG)
05354     {
05355       struct group *prev_group = last_group + (group_idx ^ 1);
05356       if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
05357            REGNO (x)))
05358   return 1;
05359       return -1;
05360     }
05361   return 0;
05362 }
05363 
05364 /* Called for each insn; this function keeps track of the state in
05365    last_group and emits additional NOPs if necessary to work around
05366    an Itanium A/B step erratum.  */
05367 static void
05368 errata_emit_nops (insn)
05369      rtx insn;
05370 {
05371   struct group *this_group = last_group + group_idx;
05372   struct group *prev_group = last_group + (group_idx ^ 1);
05373   rtx pat = PATTERN (insn);
05374   rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
05375   rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
05376   enum attr_type type;
05377   rtx set = real_pat;
05378 
05379   if (GET_CODE (real_pat) == USE
05380       || GET_CODE (real_pat) == CLOBBER
05381       || GET_CODE (real_pat) == ASM_INPUT
05382       || GET_CODE (real_pat) == ADDR_VEC
05383       || GET_CODE (real_pat) == ADDR_DIFF_VEC
05384       || asm_noperands (PATTERN (insn)) >= 0)
05385     return;
05386 
05387   /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
05388      parts of it.  */
05389 
05390   if (GET_CODE (set) == PARALLEL)
05391     {
05392       int i;
05393       set = XVECEXP (real_pat, 0, 0);
05394       for (i = 1; i < XVECLEN (real_pat, 0); i++)
05395   if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
05396       && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
05397     {
05398       set = 0;
05399       break;
05400     }
05401     }
05402 
05403   if (set && GET_CODE (set) != SET)
05404     set = 0;
05405 
05406   type  = get_attr_type (insn);
05407 
05408   if (type == TYPE_F
05409       && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
05410     SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
05411 
05412   if ((type == TYPE_M || type == TYPE_A) && cond && set
05413       && REG_P (SET_DEST (set))
05414       && GET_CODE (SET_SRC (set)) != PLUS
05415       && GET_CODE (SET_SRC (set)) != MINUS
05416       && (GET_CODE (SET_SRC (set)) != ASHIFT
05417     || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
05418       && (GET_CODE (SET_SRC (set)) != MEM
05419     || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
05420       && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
05421     {
05422       if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
05423     || ! REG_P (XEXP (cond, 0)))
05424   abort ();
05425 
05426       if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
05427   SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
05428     }
05429   if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
05430     {
05431       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
05432       emit_insn_before (gen_nop (), insn);
05433       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
05434       group_idx = 0;
05435       memset (last_group, 0, sizeof last_group);
05436     }
05437 }
05438 
05439 /* Emit extra nops if they are required to work around hardware errata.  */
05440 
05441 static void
05442 fixup_errata ()
05443 {
05444   rtx insn;
05445 
05446   if (! TARGET_B_STEP)
05447     return;
05448 
05449   group_idx = 0;
05450   memset (last_group, 0, sizeof last_group);
05451 
05452   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
05453     {
05454       if (!INSN_P (insn))
05455   continue;
05456 
05457       if (ia64_safe_type (insn) == TYPE_S)
05458   {
05459     group_idx ^= 1;
05460     memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
05461   }
05462       else
05463   errata_emit_nops (insn);
05464     }
05465 }
05466 
05467 /* Instruction scheduling support.  */
05468 /* Describe one bundle.  */
05469 
05470 struct bundle
05471 {
05472   /* Zero if there's no possibility of a stop in this bundle other than
05473      at the end, otherwise the position of the optional stop bit.  */
05474   int possible_stop;
05475   /* The types of the three slots.  */
05476   enum attr_type t[3];
05477   /* The pseudo op to be emitted into the assembler output.  */
05478   const char *name;
05479 };
05480 
05481 #define NR_BUNDLES 10
05482 
05483 /* A list of all available bundles.  */
05484 
05485 static const struct bundle bundle[NR_BUNDLES] =
05486 {
05487   { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
05488   { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
05489   { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
05490   { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
05491 #if NR_BUNDLES == 10
05492   { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
05493   { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
05494 #endif
05495   { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
05496   { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
05497   { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
05498   /* .mfi needs to occur earlier than .mlx, so that we only generate it if
05499      it matches an L type insn.  Otherwise we'll try to generate L type
05500      nops.  */
05501   { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
05502 };
05503 
05504 /* Describe a packet of instructions.  Packets consist of two bundles that
05505    are visible to the hardware in one scheduling window.  */
05506 
05507 struct ia64_packet
05508 {
05509   const struct bundle *t1, *t2;
05510   /* Precomputed value of the first split issue in this packet if a cycle
05511      starts at its beginning.  */
05512   int first_split;
05513   /* For convenience, the insn types are replicated here so we don't have
05514      to go through T1 and T2 all the time.  */
05515   enum attr_type t[6];
05516 };
05517 
05518 /* An array containing all possible packets.  */
05519 #define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
05520 static struct ia64_packet packets[NR_PACKETS];
05521 
05522 /* Map attr_type to a string with the name.  */
05523 
05524 static const char *const type_names[] =
05525 {
05526   "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
05527 };
05528 
05529 /* Nonzero if we should insert stop bits into the schedule.  */
05530 int ia64_final_schedule = 0;
05531 
05532 static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
05533 static rtx ia64_single_set PARAMS ((rtx));
05534 static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
05535 static void ia64_emit_insn_before PARAMS ((rtx, rtx));
05536 static void maybe_rotate PARAMS ((FILE *));
05537 static void finish_last_head PARAMS ((FILE *, int));
05538 static void rotate_one_bundle PARAMS ((FILE *));
05539 static void rotate_two_bundles PARAMS ((FILE *));
05540 static void nop_cycles_until PARAMS ((int, FILE *));
05541 static void cycle_end_fill_slots PARAMS ((FILE *));
05542 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
05543 static int get_split PARAMS ((const struct ia64_packet *, int));
05544 static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
05545            const struct ia64_packet *, int));
05546 static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
05547               rtx *, enum attr_type *, int));
05548 static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
05549 static void dump_current_packet PARAMS ((FILE *));
05550 static void schedule_stop PARAMS ((FILE *));
05551 static rtx gen_nop_type PARAMS ((enum attr_type));
05552 static void ia64_emit_nops PARAMS ((void));
05553 
05554 /* Map a bundle number to its pseudo-op.  */
05555 
05556 const char *
05557 get_bundle_name (b)
05558      int b;
05559 {
05560   return bundle[b].name;
05561 }
05562 
05563 /* Compute the slot which will cause a split issue in packet P if the
05564    current cycle begins at slot BEGIN.  */
05565 
05566 static int
05567 itanium_split_issue (p, begin)
05568      const struct ia64_packet *p;
05569      int begin;
05570 {
05571   int type_count[TYPE_S];
05572   int i;
05573   int split = 6;
05574 
05575   if (begin < 3)
05576     {
05577       /* Always split before and after MMF.  */
05578       if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
05579   return 3;
05580       if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
05581   return 3;
05582       /* Always split after MBB and BBB.  */
05583       if (p->t[1] == TYPE_B)
05584   return 3;
05585       /* Split after first bundle in MIB BBB combination.  */
05586       if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
05587   return 3;
05588     }
05589 
05590   memset (type_count, 0, sizeof type_count);
05591   for (i = begin; i < split; i++)
05592     {
05593       enum attr_type t0 = p->t[i];
05594       /* An MLX bundle reserves the same units as an MFI bundle.  */
05595       enum attr_type t = (t0 == TYPE_L ? TYPE_F
05596         : t0 == TYPE_X ? TYPE_I
05597         : t0);
05598 
05599       /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
05600    2 integer per cycle.  */
05601       int max = (t == TYPE_B ? 3 : 2);
05602       if (type_count[t] == max)
05603   return i;
05604 
05605       type_count[t]++;
05606     }
05607   return split;
05608 }
05609 
05610 /* Return the maximum number of instructions a cpu can issue.  */
05611 
05612 static int
05613 ia64_issue_rate ()
05614 {
05615   return 6;
05616 }
05617 
05618 /* Helper function - like single_set, but look inside COND_EXEC.  */
05619 
05620 static rtx
05621 ia64_single_set (insn)
05622      rtx insn;
05623 {
05624   rtx x = PATTERN (insn), ret;
05625   if (GET_CODE (x) == COND_EXEC)
05626     x = COND_EXEC_CODE (x);
05627   if (GET_CODE (x) == SET)
05628     return x;
05629 
05630   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
05631      Although they are not classical single set, the second set is there just
05632      to protect it from moving past FP-relative stack accesses.  */
05633   switch (recog_memoized (insn))
05634     {
05635     case CODE_FOR_prologue_allocate_stack:
05636     case CODE_FOR_epilogue_deallocate_stack:
05637       ret = XVECEXP (x, 0, 0);
05638       break;
05639 
05640     default:
05641       ret = single_set_2 (insn, x);
05642       break;
05643     }
05644 
05645   return ret;
05646 }
05647 
05648 /* Adjust the cost of a scheduling dependency.  Return the new cost of
05649    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
05650 
05651 static int
05652 ia64_adjust_cost (insn, link, dep_insn, cost)
05653      rtx insn, link, dep_insn;
05654      int cost;
05655 {
05656   enum attr_type dep_type;
05657   enum attr_itanium_class dep_class;
05658   enum attr_itanium_class insn_class;
05659   rtx dep_set, set, src, addr;
05660 
05661   if (GET_CODE (PATTERN (insn)) == CLOBBER
05662       || GET_CODE (PATTERN (insn)) == USE
05663       || GET_CODE (PATTERN (dep_insn)) == CLOBBER
05664       || GET_CODE (PATTERN (dep_insn)) == USE
05665       /* @@@ Not accurate for indirect calls.  */
05666       || GET_CODE (insn) == CALL_INSN
05667       || ia64_safe_type (insn) == TYPE_S)
05668     return 0;
05669 
05670   if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
05671       || REG_NOTE_KIND (link) == REG_DEP_ANTI)
05672     return 0;
05673 
05674   dep_type = ia64_safe_type (dep_insn);
05675   dep_class = ia64_safe_itanium_class (dep_insn);
05676   insn_class = ia64_safe_itanium_class (insn);
05677 
05678   /* Compares that feed a conditional branch can execute in the same
05679      cycle.  */
05680   dep_set = ia64_single_set (dep_insn);
05681   set = ia64_single_set (insn);
05682 
05683   if (dep_type != TYPE_F
05684       && dep_set
05685       && GET_CODE (SET_DEST (dep_set)) == REG
05686       && PR_REG (REGNO (SET_DEST (dep_set)))
05687       && GET_CODE (insn) == JUMP_INSN)
05688     return 0;
05689 
05690   if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
05691     {
05692       /* ??? Can't find any information in the documenation about whether
05693    a sequence
05694      st [rx] = ra
05695      ld rb = [ry]
05696    splits issue.  Assume it doesn't.  */
05697       return 0;
05698     }
05699 
05700   src = set ? SET_SRC (set) : 0;
05701   addr = 0;
05702   if (set)
05703     {
05704       if (GET_CODE (SET_DEST (set)) == MEM)
05705   addr = XEXP (SET_DEST (set), 0);
05706       else if (GET_CODE (SET_DEST (set)) == SUBREG
05707          && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
05708   addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
05709       else
05710   {
05711     addr = src;
05712     if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
05713       addr = XVECEXP (addr, 0, 0);
05714     while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
05715       addr = XEXP (addr, 0);
05716 
05717     /* Note that LO_SUM is used for GOT loads.  */
05718     if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
05719       addr = XEXP (addr, 0);
05720     else
05721       addr = 0;
05722   }
05723     }
05724 
05725   if (addr && GET_CODE (addr) == POST_MODIFY)
05726     addr = XEXP (addr, 0);
05727 
05728   set = ia64_single_set (dep_insn);
05729 
05730   if ((dep_class == ITANIUM_CLASS_IALU
05731        || dep_class == ITANIUM_CLASS_ILOG
05732        || dep_class == ITANIUM_CLASS_LD)
05733       && (insn_class == ITANIUM_CLASS_LD
05734     || insn_class == ITANIUM_CLASS_ST))
05735     {
05736       if (! addr || ! set)
05737   abort ();
05738       /* This isn't completely correct - an IALU that feeds an address has
05739    a latency of 1 cycle if it's issued in an M slot, but 2 cycles
05740    otherwise.  Unfortunately there's no good way to describe this.  */
05741       if (reg_overlap_mentioned_p (SET_DEST (set), addr))
05742   return cost + 1;
05743     }
05744 
05745   if ((dep_class == ITANIUM_CLASS_IALU
05746        || dep_class == ITANIUM_CLASS_ILOG
05747        || dep_class == ITANIUM_CLASS_LD)
05748       && (insn_class == ITANIUM_CLASS_MMMUL
05749     || insn_class == ITANIUM_CLASS_MMSHF
05750     || insn_class == ITANIUM_CLASS_MMSHFI))
05751     return 3;
05752 
05753   if (dep_class == ITANIUM_CLASS_FMAC
05754       && (insn_class == ITANIUM_CLASS_FMISC
05755     || insn_class == ITANIUM_CLASS_FCVTFX
05756     || insn_class == ITANIUM_CLASS_XMPY))
05757     return 7;
05758 
05759   if ((dep_class == ITANIUM_CLASS_FMAC
05760        || dep_class == ITANIUM_CLASS_FMISC
05761        || dep_class == ITANIUM_CLASS_FCVTFX
05762        || dep_class == ITANIUM_CLASS_XMPY)
05763       && insn_class == ITANIUM_CLASS_STF)
05764     return 8;
05765 
05766   /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
05767      but HP engineers say any non-MM operation.  */
05768   if ((dep_class == ITANIUM_CLASS_MMMUL
05769        || dep_class == ITANIUM_CLASS_MMSHF
05770        || dep_class == ITANIUM_CLASS_MMSHFI)
05771       && insn_class != ITANIUM_CLASS_MMMUL
05772       && insn_class != ITANIUM_CLASS_MMSHF
05773       && insn_class != ITANIUM_CLASS_MMSHFI)
05774     return 4;
05775 
05776   return cost;
05777 }
05778 
05779 /* Describe the current state of the Itanium pipeline.  */
05780 static struct
05781 {
05782   /* The first slot that is used in the current cycle.  */
05783   int first_slot;
05784   /* The next slot to fill.  */
05785   int cur;
05786   /* The packet we have selected for the current issue window.  */
05787   const struct ia64_packet *packet;
05788   /* The position of the split issue that occurs due to issue width
05789      limitations (6 if there's no split issue).  */
05790   int split;
05791   /* Record data about the insns scheduled so far in the same issue
05792      window.  The elements up to but not including FIRST_SLOT belong
05793      to the previous cycle, the ones starting with FIRST_SLOT belong
05794      to the current cycle.  */
05795   enum attr_type types[6];
05796   rtx insns[6];
05797   int stopbit[6];
05798   /* Nonzero if we decided to schedule a stop bit.  */
05799   int last_was_stop;
05800 } sched_data;
05801 
05802 /* Temporary arrays; they have enough elements to hold all insns that
05803    can be ready at the same time while scheduling of the current block.
05804    SCHED_READY can hold ready insns, SCHED_TYPES their types.  */
05805 static rtx *sched_ready;
05806 static enum attr_type *sched_types;
05807 
05808 /* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
05809    of packet P.  */
05810 
05811 static int
05812 insn_matches_slot (p, itype, slot, insn)
05813      const struct ia64_packet *p;
05814      enum attr_type itype;
05815      int slot;
05816      rtx insn;
05817 {
05818   enum attr_itanium_requires_unit0 u0;
05819   enum attr_type stype = p->t[slot];
05820 
05821   if (insn)
05822     {
05823       u0 = ia64_safe_itanium_requires_unit0 (insn);
05824       if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
05825   {
05826     int i;
05827     for (i = sched_data.first_slot; i < slot; i++)
05828       if (p->t[i] == stype
05829     || (stype == TYPE_F && p->t[i] == TYPE_L)
05830     || (stype == TYPE_I && p->t[i] == TYPE_X))
05831         return 0;
05832   }
05833       if (GET_CODE (insn) == CALL_INSN)
05834   {
05835     /* Reject calls in multiway branch packets.  We want to limit
05836        the number of multiway branches we generate (since the branch
05837        predictor is limited), and this seems to work fairly well.
05838        (If we didn't do this, we'd have to add another test here to
05839        force calls into the third slot of the bundle.)  */
05840     if (slot < 3)
05841       {
05842         if (p->t[1] == TYPE_B)
05843     return 0;
05844       }
05845     else
05846       {
05847         if (p->t[4] == TYPE_B)
05848     return 0;
05849       }
05850   }
05851     }
05852 
05853   if (itype == stype)
05854     return 1;
05855   if (itype == TYPE_A)
05856     return stype == TYPE_M || stype == TYPE_I;
05857   return 0;
05858 }
05859 
05860 /* Like emit_insn_before, but skip cycle_display notes.
05861    ??? When cycle display notes are implemented, update this.  */
05862 
05863 static void
05864 ia64_emit_insn_before (insn, before)
05865      rtx insn, before;
05866 {
05867   emit_insn_before (insn, before);
05868 }
05869 
05870 /* When rotating a bundle out of the issue window, insert a bundle selector
05871    insn in front of it.  DUMP is the scheduling dump file or NULL.  START
05872    is either 0 or 3, depending on whether we want to emit a bundle selector
05873    for the first bundle or the second bundle in the current issue window.
05874 
05875    The selector insns are emitted this late because the selected packet can
05876    be changed until parts of it get rotated out.  */
05877 
05878 static void
05879 finish_last_head (dump, start)
05880      FILE *dump;
05881      int start;
05882 {
05883   const struct ia64_packet *p = sched_data.packet;
05884   const struct bundle *b = start == 0 ? p->t1 : p->t2;
05885   int bundle_type = b - bundle;
05886   rtx insn;
05887   int i;
05888 
05889   if (! ia64_final_schedule)
05890     return;
05891 
05892   for (i = start; sched_data.insns[i] == 0; i++)
05893     if (i == start + 3)
05894       abort ();
05895   insn = sched_data.insns[i];
05896 
05897   if (dump)
05898     fprintf (dump, "//    Emitting template before %d: %s\n",
05899        INSN_UID (insn), b->name);
05900 
05901   ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
05902 }
05903 
05904 /* We can't schedule more insns this cycle.  Fix up the scheduling state
05905    and advance FIRST_SLOT and CUR.
05906    We have to distribute the insns that are currently found between
05907    FIRST_SLOT and CUR into the slots of the packet we have selected.  So
05908    far, they are stored successively in the fields starting at FIRST_SLOT;
05909    now they must be moved to the correct slots.
05910    DUMP is the current scheduling dump file, or NULL.  */
05911 
05912 static void
05913 cycle_end_fill_slots (dump)
05914      FILE *dump;
05915 {
05916   const struct ia64_packet *packet = sched_data.packet;
05917   int slot, i;
05918   enum attr_type tmp_types[6];
05919   rtx tmp_insns[6];
05920 
05921   memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
05922   memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
05923 
05924   for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
05925     {
05926       enum attr_type t = tmp_types[i];
05927       if (t != ia64_safe_type (tmp_insns[i]))
05928   abort ();
05929       while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
05930   {
05931     if (slot > sched_data.split)
05932       abort ();
05933     if (dump)
05934       fprintf (dump, "// Packet needs %s, have %s\n",
05935          type_names[packet->t[slot]], type_names[t]);
05936     sched_data.types[slot] = packet->t[slot];
05937     sched_data.insns[slot] = 0;
05938     sched_data.stopbit[slot] = 0;
05939 
05940     /* ??? TYPE_L instructions always fill up two slots, but we don't
05941        support TYPE_L nops.  */
05942     if (packet->t[slot] == TYPE_L)
05943       abort ();
05944 
05945     slot++;
05946   }
05947 
05948       /* Do _not_ use T here.  If T == TYPE_A, then we'd risk changing the
05949    actual slot type later.  */
05950       sched_data.types[slot] = packet->t[slot];
05951       sched_data.insns[slot] = tmp_insns[i];
05952       sched_data.stopbit[slot] = 0;
05953       slot++;
05954 
05955       /* TYPE_L instructions always fill up two slots.  */
05956       if (t == TYPE_L)
05957   {
05958     sched_data.types[slot] = packet->t[slot];
05959     sched_data.insns[slot] = 0;
05960     sched_data.stopbit[slot] = 0;
05961     slot++;
05962   }
05963     }
05964 
05965   /* This isn't right - there's no need to pad out until the forced split;
05966      the CPU will automatically split if an insn isn't ready.  */
05967 #if 0
05968   while (slot < sched_data.split)
05969     {
05970       sched_data.types[slot] = packet->t[slot];
05971       sched_data.insns[slot] = 0;
05972       sched_data.stopbit[slot] = 0;
05973       slot++;
05974     }
05975 #endif
05976 
05977   sched_data.first_slot = sched_data.cur = slot;
05978 }
05979 
05980 /* Bundle rotations, as described in the Itanium optimization manual.
05981    We can rotate either one or both bundles out of the issue window.
05982    DUMP is the current scheduling dump file, or NULL.  */
05983 
05984 static void
05985 rotate_one_bundle (dump)
05986      FILE *dump;
05987 {
05988   if (dump)
05989     fprintf (dump, "// Rotating one bundle.\n");
05990 
05991   finish_last_head (dump, 0);
05992   if (sched_data.cur > 3)
05993     {
05994       sched_data.cur -= 3;
05995       sched_data.first_slot -= 3;
05996       memmove (sched_data.types,
05997          sched_data.types + 3,
05998          sched_data.cur * sizeof *sched_data.types);
05999       memmove (sched_data.stopbit,
06000          sched_data.stopbit + 3,
06001          sched_data.cur * sizeof *sched_data.stopbit);
06002       memmove (sched_data.insns,
06003          sched_data.insns + 3,
06004          sched_data.cur * sizeof *sched_data.insns);
06005       sched_data.packet
06006   = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
06007     }
06008   else
06009     {
06010       sched_data.cur = 0;
06011       sched_data.first_slot = 0;
06012     }
06013 }
06014 
06015 static void
06016 rotate_two_bundles (dump)
06017      FILE *dump;
06018 {
06019   if (dump)
06020     fprintf (dump, "// Rotating two bundles.\n");
06021 
06022   if (sched_data.cur == 0)
06023     return;
06024 
06025   finish_last_head (dump, 0);
06026   if (sched_data.cur > 3)
06027     finish_last_head (dump, 3);
06028   sched_data.cur = 0;
06029   sched_data.first_slot = 0;
06030 }
06031 
06032 /* We're beginning a new block.  Initialize data structures as necessary.  */
06033 
06034 static void
06035 ia64_sched_init (dump, sched_verbose, max_ready)
06036      FILE *dump ATTRIBUTE_UNUSED;
06037      int sched_verbose ATTRIBUTE_UNUSED;
06038      int max_ready;
06039 {
06040   static int initialized = 0;
06041 
06042   if (! initialized)
06043     {
06044       int b1, b2, i;
06045 
06046       initialized = 1;
06047 
06048       for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
06049   {
06050     const struct bundle *t1 = bundle + b1;
06051     for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
06052       {
06053         const struct bundle *t2 = bundle + b2;
06054 
06055         packets[i].t1 = t1;
06056         packets[i].t2 = t2;
06057       }
06058   }
06059       for (i = 0; i < NR_PACKETS; i++)
06060   {
06061     int j;
06062     for (j = 0; j < 3; j++)
06063       packets[i].t[j] = packets[i].t1->t[j];
06064     for (j = 0; j < 3; j++)
06065       packets[i].t[j + 3] = packets[i].t2->t[j];
06066     packets[i].first_split = itanium_split_issue (packets + i, 0);
06067   }
06068   
06069     }
06070 
06071   init_insn_group_barriers ();
06072 
06073   memset (&sched_data, 0, sizeof sched_data);
06074   sched_types = (enum attr_type *) xmalloc (max_ready
06075               * sizeof (enum attr_type));
06076   sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
06077 }
06078 
06079 /* See if the packet P can match the insns we have already scheduled.  Return
06080    nonzero if so.  In *PSLOT, we store the first slot that is available for
06081    more instructions if we choose this packet.
06082    SPLIT holds the last slot we can use, there's a split issue after it so
06083    scheduling beyond it would cause us to use more than one cycle.  */
06084 
06085 static int
06086 packet_matches_p (p, split, pslot)
06087      const struct ia64_packet *p;
06088      int split;
06089      int *pslot;
06090 {
06091   int filled = sched_data.cur;
06092   int first = sched_data.first_slot;
06093   int i, slot;
06094 
06095   /* First, check if the first of the two bundles must be a specific one (due
06096      to stop bits).  */
06097   if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
06098     return 0;
06099   if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
06100     return 0;
06101 
06102   for (i = 0; i < first; i++)
06103     if (! insn_matches_slot (p, sched_data.types[i], i,
06104            sched_data.insns[i]))
06105       return 0;
06106   for (i = slot = first; i < filled; i++)
06107     {
06108       while (slot < split)
06109   {
06110     if (insn_matches_slot (p, sched_data.types[i], slot,
06111          sched_data.insns[i]))
06112       break;
06113     slot++;
06114   }
06115       if (slot == split)
06116   return 0;
06117       slot++;
06118     }
06119 
06120   if (pslot)
06121     *pslot = slot;
06122   return 1;
06123 }
06124 
06125 /* A frontend for itanium_split_issue.  For a packet P and a slot
06126    number FIRST that describes the start of the current clock cycle,
06127    return the slot number of the first split issue.  This function
06128    uses the cached number found in P if possible.  */
06129 
06130 static int
06131 get_split (p, first)
06132      const struct ia64_packet *p;
06133      int first;
06134 {
06135   if (first == 0)
06136     return p->first_split;
06137   return itanium_split_issue (p, first);
06138 }
06139 
06140 /* Given N_READY insns in the array READY, whose types are found in the
06141    corresponding array TYPES, return the insn that is best suited to be
06142    scheduled in slot SLOT of packet P.  */
06143 
06144 static int
06145 find_best_insn (ready, types, n_ready, p, slot)
06146      rtx *ready;
06147      enum attr_type *types;
06148      int n_ready;
06149      const struct ia64_packet *p;
06150      int slot;
06151 {
06152   int best = -1;
06153   int best_pri = 0;
06154   while (n_ready-- > 0)
06155     {
06156       rtx insn = ready[n_ready];
06157       if (! insn)
06158   continue;
06159       if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
06160   break;
06161       /* If we have equally good insns, one of which has a stricter
06162    slot requirement, prefer the one with the stricter requirement.  */
06163       if (best >= 0 && types[n_ready] == TYPE_A)
06164   continue;
06165       if (insn_matches_slot (p, types[n_ready], slot, insn))
06166   {
06167     best = n_ready;
06168     best_pri = INSN_PRIORITY (ready[best]);
06169 
06170     /* If there's no way we could get a stricter requirement, stop
06171        looking now.  */
06172     if (types[n_ready] != TYPE_A
06173         && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
06174       break;
06175     break;
06176   }
06177     }
06178   return best;
06179 }
06180 
06181 /* Select the best packet to use given the current scheduler state and the
06182    current ready list.
06183    READY is an array holding N_READY ready insns; TYPES is a corresponding
06184    array that holds their types.  Store the best packet in *PPACKET and the
06185    number of insns that can be scheduled in the current cycle in *PBEST.  */
06186 
06187 static void
06188 find_best_packet (pbest, ppacket, ready, types, n_ready)
06189      int *pbest;
06190      const struct ia64_packet **ppacket;
06191      rtx *ready;
06192      enum attr_type *types;
06193      int n_ready;
06194 {
06195   int first = sched_data.first_slot;
06196   int best = 0;
06197   int lowest_end = 6;
06198   const struct ia64_packet *best_packet = NULL;
06199   int i;
06200 
06201   for (i = 0; i < NR_PACKETS; i++)
06202     {
06203       const struct ia64_packet *p = packets + i;
06204       int slot;
06205       int split = get_split (p, first);
06206       int win = 0;
06207       int first_slot, last_slot;
06208       int b_nops = 0;
06209 
06210       if (! packet_matches_p (p, split, &first_slot))
06211   continue;
06212 
06213       memcpy (sched_ready, ready, n_ready * sizeof (rtx));
06214 
06215       win = 0;
06216       last_slot = 6;
06217       for (slot = first_slot; slot < split; slot++)
06218   {
06219     int insn_nr;
06220 
06221     /* Disallow a degenerate case where the first bundle doesn't
06222        contain anything but NOPs!  */
06223     if (first_slot == 0 && win == 0 && slot == 3)
06224       {
06225         win = -1;
06226         break;
06227       }
06228 
06229     insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
06230     if (insn_nr >= 0)
06231       {
06232         sched_ready[insn_nr] = 0;
06233         last_slot = slot;
06234         win++;
06235       }
06236     else if (p->t[slot] == TYPE_B)
06237       b_nops++;
06238   }
06239       /* We must disallow MBB/BBB packets if any of their B slots would be
06240    filled with nops.  */
06241       if (last_slot < 3)
06242   {
06243     if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
06244       win = -1;
06245   }
06246       else
06247   {
06248     if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
06249       win = -1;
06250   }
06251 
06252       if (win > best
06253     || (win == best && last_slot < lowest_end))
06254   {
06255     best = win;
06256     lowest_end = last_slot;
06257     best_packet = p;
06258   }
06259     }
06260   *pbest = best;
06261   *ppacket = best_packet;
06262 }
06263 
06264 /* Reorder the ready list so that the insns that can be issued in this cycle
06265    are found in the correct order at the end of the list.
06266    DUMP is the scheduling dump file, or NULL.  READY points to the start,
06267    E_READY to the end of the ready list.  MAY_FAIL determines what should be
06268    done if no insns can be scheduled in this cycle: if it is zero, we abort,
06269    otherwise we return 0.
06270    Return 1 if any insns can be scheduled in this cycle.  */
06271 
06272 static int
06273 itanium_reorder (dump, ready, e_ready, may_fail)
06274      FILE *dump;
06275      rtx *ready;
06276      rtx *e_ready;
06277      int may_fail;
06278 {
06279   const struct ia64_packet *best_packet;
06280   int n_ready = e_ready - ready;
06281   int first = sched_data.first_slot;
06282   int i, best, best_split, filled;
06283 
06284   for (i = 0; i < n_ready; i++)
06285     sched_types[i] = ia64_safe_type (ready[i]);
06286 
06287   find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
06288 
06289   if (best == 0)
06290     {
06291       if (may_fail)
06292   return 0;
06293       abort ();
06294     }
06295 
06296   if (dump)
06297     {
06298       fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
06299          best_packet->t1->name,
06300          best_packet->t2 ? best_packet->t2->name : NULL, best);
06301     }
06302 
06303   best_split = itanium_split_issue (best_packet, first);
06304   packet_matches_p (best_packet, best_split, &filled);
06305 
06306   for (i = filled; i < best_split; i++)
06307     {
06308       int insn_nr;
06309 
06310       insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
06311       if (insn_nr >= 0)
06312   {
06313     rtx insn = ready[insn_nr];
06314     memmove (ready + insn_nr, ready + insn_nr + 1,
06315        (n_ready - insn_nr - 1) * sizeof (rtx));
06316     memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
06317        (n_ready - insn_nr - 1) * sizeof (enum attr_type));
06318     ready[--n_ready] = insn;
06319   }
06320     }
06321 
06322   sched_data.packet = best_packet;
06323   sched_data.split = best_split;
06324   return 1;
06325 }
06326 
06327 /* Dump information about the current scheduling state to file DUMP.  */
06328 
06329 static void
06330 dump_current_packet (dump)
06331      FILE *dump;
06332 {
06333   int i;
06334   fprintf (dump, "//    %d slots filled:", sched_data.cur);
06335   for (i = 0; i < sched_data.first_slot; i++)
06336     {
06337       rtx insn = sched_data.insns[i];
06338       fprintf (dump, " %s", type_names[sched_data.types[i]]);
06339       if (insn)
06340   fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
06341       if (sched_data.stopbit[i])
06342   fprintf (dump, " ;;");
06343     }
06344   fprintf (dump, " :::");
06345   for (i = sched_data.first_slot; i < sched_data.cur; i++)
06346     {
06347       rtx insn = sched_data.insns[i];
06348       enum attr_type t = ia64_safe_type (insn);
06349       fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
06350     }
06351   fprintf (dump, "\n");
06352 }
06353 
06354 /* Schedule a stop bit.  DUMP is the current scheduling dump file, or
06355    NULL.  */
06356 
06357 static void
06358 schedule_stop (dump)
06359      FILE *dump;
06360 {
06361   const struct ia64_packet *best = sched_data.packet;
06362   int i;
06363   int best_stop = 6;
06364 
06365   if (dump)
06366     fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
06367 
06368   if (sched_data.cur == 0)
06369     {
06370       if (dump)
06371   fprintf (dump, "//   At start of bundle, so nothing to do.\n");
06372 
06373       rotate_two_bundles (NULL);
06374       return;
06375     }
06376 
06377   for (i = -1; i < NR_PACKETS; i++)
06378     {
06379       /* This is a slight hack to give the current packet the first chance.
06380    This is done to avoid e.g. switching from MIB to MBB bundles.  */
06381       const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
06382       int split = get_split (p, sched_data.first_slot);
06383       const struct bundle *compare;
06384       int next, stoppos;
06385 
06386       if (! packet_matches_p (p, split, &next))
06387   continue;
06388 
06389       compare = next > 3 ? p->t2 : p->t1;
06390 
06391       stoppos = 3;
06392       if (compare->possible_stop)
06393   stoppos = compare->possible_stop;
06394       if (next > 3)
06395   stoppos += 3;
06396 
06397       if (stoppos < next || stoppos >= best_stop)
06398   {
06399     if (compare->possible_stop == 0)
06400       continue;
06401     stoppos = (next > 3 ? 6 : 3);
06402   }
06403       if (stoppos < next || stoppos >= best_stop)
06404   continue;
06405 
06406       if (dump)
06407   fprintf (dump, "//   switching from %s %s to %s %s (stop at %d)\n",
06408      best->t1->name, best->t2->name, p->t1->name, p->t2->name,
06409      stoppos);
06410 
06411       best_stop = stoppos;
06412       best = p;
06413     }
06414 
06415   sched_data.packet = best;
06416   cycle_end_fill_slots (dump);
06417   while (sched_data.cur < best_stop)
06418     {
06419       sched_data.types[sched_data.cur] = best->t[sched_data.cur];
06420       sched_data.insns[sched_data.cur] = 0;
06421       sched_data.stopbit[sched_data.cur] = 0;
06422       sched_data.cur++;
06423     }
06424   sched_data.stopbit[sched_data.cur - 1] = 1;
06425   sched_data.first_slot = best_stop;
06426 
06427   if (dump)
06428     dump_current_packet (dump);
06429 }
06430 
06431 /* If necessary, perform one or two rotations on the scheduling state.  
06432    This should only be called if we are starting a new cycle.  */
06433 
06434 static void
06435 maybe_rotate (dump)
06436      FILE *dump;
06437 {
06438   cycle_end_fill_slots (dump);
06439   if (sched_data.cur == 6)
06440     rotate_two_bundles (dump);
06441   else if (sched_data.cur >= 3)
06442     rotate_one_bundle (dump);
06443   sched_data.first_slot = sched_data.cur;
06444 }
06445 
06446 /* The clock cycle when ia64_sched_reorder was last called.  */
06447 static int prev_cycle;
06448 
06449 /* The first insn scheduled in the previous cycle.  This is the saved
06450    value of sched_data.first_slot.  */
06451 static int prev_first;
06452 
06453 /* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
06454    pad out the delay between MM (shifts, etc.) and integer operations.  */
06455 
06456 static void
06457 nop_cycles_until (clock_var, dump)
06458      int clock_var;
06459      FILE *dump;
06460 {
06461   int prev_clock = prev_cycle;
06462   int cycles_left = clock_var - prev_clock;
06463   bool did_stop = false;
06464 
06465   /* Finish the previous cycle; pad it out with NOPs.  */
06466   if (sched_data.cur == 3)
06467     {
06468       sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
06469       did_stop = true;
06470       maybe_rotate (dump);
06471     }
06472   else if (sched_data.cur > 0)
06473     {
06474       int need_stop = 0;
06475       int split = itanium_split_issue (sched_data.packet, prev_first);
06476 
06477       if (sched_data.cur < 3 && split > 3)
06478   {
06479     split = 3;
06480     need_stop = 1;
06481   }
06482 
06483       if (split > sched_data.cur)
06484   {
06485     int i;
06486     for (i = sched_data.cur; i < split; i++)
06487       {
06488         rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
06489         sched_data.types[i] = sched_data.packet->t[i];
06490         sched_data.insns[i] = t;
06491         sched_data.stopbit[i] = 0;
06492       }
06493     sched_data.cur = split;
06494   }
06495 
06496       if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
06497     && cycles_left > 1)
06498   {
06499     int i;
06500     for (i = sched_data.cur; i < 6; i++)
06501       {
06502         rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
06503         sched_data.types[i] = sched_data.packet->t[i];
06504         sched_data.insns[i] = t;
06505         sched_data.stopbit[i] = 0;
06506       }
06507     sched_data.cur = 6;
06508     cycles_left--;
06509     need_stop = 1;
06510   }
06511 
06512       if (need_stop || sched_data.cur == 6)
06513   {
06514     sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
06515     did_stop = true;
06516   }
06517       maybe_rotate (dump);
06518     }
06519 
06520   cycles_left--;
06521   while (cycles_left > 0)
06522     {
06523       sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
06524       sched_emit_insn (gen_nop_type (TYPE_M));
06525       sched_emit_insn (gen_nop_type (TYPE_I));
06526       if (cycles_left > 1)
06527   {
06528     sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
06529     cycles_left--;
06530   }
06531       sched_emit_insn (gen_nop_type (TYPE_I));
06532       sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
06533       did_stop = true;
06534       cycles_left--;
06535     }
06536 
06537   if (did_stop)
06538     init_insn_group_barriers ();
06539 }
06540 
06541 /* We are about to being issuing insns for this clock cycle.
06542    Override the default sort algorithm to better slot instructions.  */
06543 
06544 static int
06545 ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
06546         reorder_type, clock_var)
06547      FILE *dump ATTRIBUTE_UNUSED;
06548      int sched_verbose ATTRIBUTE_UNUSED;
06549      rtx *ready;
06550      int *pn_ready;
06551      int reorder_type, clock_var;
06552 {
06553   int n_asms;
06554   int n_ready = *pn_ready;
06555   rtx *e_ready = ready + n_ready;
06556   rtx *insnp;
06557 
06558   if (sched_verbose)
06559     {
06560       fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
06561       dump_current_packet (dump);
06562     }
06563 
06564   /* Work around the pipeline flush that will occurr if the results of
06565      an MM instruction are accessed before the result is ready.  Intel
06566      documentation says this only happens with IALU, ISHF, ILOG, LD,
06567      and ST consumers, but experimental evidence shows that *any* non-MM
06568      type instruction will incurr the flush.  */
06569   if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
06570     {
06571       for (insnp = ready; insnp < e_ready; insnp++)
06572   {
06573     rtx insn = *insnp, link;
06574     enum attr_itanium_class t = ia64_safe_itanium_class (insn);
06575 
06576     if (t == ITANIUM_CLASS_MMMUL
06577         || t == ITANIUM_CLASS_MMSHF
06578         || t == ITANIUM_CLASS_MMSHFI)
06579       continue;
06580 
06581     for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
06582       if (REG_NOTE_KIND (link) == 0)
06583         {
06584     rtx other = XEXP (link, 0);
06585     enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
06586     if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
06587       {
06588         nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
06589         goto out;
06590       }
06591         }
06592   }
06593     }
06594  out:
06595 
06596   prev_first = sched_data.first_slot;
06597   prev_cycle = clock_var;
06598 
06599   if (reorder_type == 0)
06600     maybe_rotate (sched_verbose ? dump : NULL);
06601 
06602   /* First, move all USEs, CLOBBERs and other crud out of the way.  */
06603   n_asms = 0;
06604   for (insnp = ready; insnp < e_ready; insnp++)
06605     if (insnp < e_ready)
06606       {
06607   rtx insn = *insnp;
06608   enum attr_type t = ia64_safe_type (insn);
06609   if (t == TYPE_UNKNOWN)
06610     {
06611       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06612     || asm_noperands (PATTERN (insn)) >= 0)
06613         {
06614     rtx lowest = ready[n_asms];
06615     ready[n_asms] = insn;
06616     *insnp = lowest;
06617     n_asms++;
06618         }
06619       else
06620         {
06621     rtx highest = ready[n_ready - 1];
06622     ready[n_ready - 1] = insn;
06623     *insnp = highest;
06624     if (ia64_final_schedule && group_barrier_needed_p (insn))
06625       {
06626         schedule_stop (sched_verbose ? dump : NULL);
06627         sched_data.last_was_stop = 1;
06628         maybe_rotate (sched_verbose ? dump : NULL);
06629       }
06630 
06631     return 1;
06632         }
06633     }
06634       }
06635   if (n_asms < n_ready)
06636     {
06637       /* Some normal insns to process.  Skip the asms.  */
06638       ready += n_asms;
06639       n_ready -= n_asms;
06640     }
06641   else if (n_ready > 0)
06642     {
06643       /* Only asm insns left.  */
06644       if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
06645   {
06646     schedule_stop (sched_verbose ? dump : NULL);
06647     sched_data.last_was_stop = 1;
06648     maybe_rotate (sched_verbose ? dump : NULL);
06649   }
06650       cycle_end_fill_slots (sched_verbose ? dump : NULL);
06651       return 1;
06652     }
06653 
06654   if (ia64_final_schedule)
06655     {
06656       int nr_need_stop = 0;
06657 
06658       for (insnp = ready; insnp < e_ready; insnp++)
06659   if (safe_group_barrier_needed_p (*insnp))
06660     nr_need_stop++;
06661 
06662       /* Schedule a stop bit if
06663           - all insns require a stop bit, or
06664           - we are starting a new cycle and _any_ insns require a stop bit.
06665          The reason for the latter is that if our schedule is accurate, then
06666          the additional stop won't decrease performance at this point (since
06667    there's a split issue at this point anyway), but it gives us more
06668          freedom when scheduling the currently ready insns.  */
06669       if ((reorder_type == 0 && nr_need_stop)
06670     || (reorder_type == 1 && n_ready == nr_need_stop))
06671   {
06672     schedule_stop (sched_verbose ? dump : NULL);
06673     sched_data.last_was_stop = 1;
06674     maybe_rotate (sched_verbose ? dump : NULL);
06675     if (reorder_type == 1)
06676       return 0;
06677   }
06678       else
06679   {
06680     int deleted = 0;
06681     insnp = e_ready;
06682     /* Move down everything that needs a stop bit, preserving relative
06683        order.  */
06684     while (insnp-- > ready + deleted)
06685       while (insnp >= ready + deleted)
06686         {
06687     rtx insn = *insnp;
06688     if (! safe_group_barrier_needed_p (insn))
06689       break;
06690     memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
06691     *ready = insn;
06692     deleted++;
06693         }
06694     n_ready -= deleted;
06695     ready += deleted;
06696     if (deleted != nr_need_stop)
06697       abort ();
06698   }
06699     }
06700 
06701   return itanium_reorder (sched_verbose ? dump : NULL,
06702         ready, e_ready, reorder_type == 1);
06703 }
06704 
06705 static int
06706 ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
06707      FILE *dump;
06708      int sched_verbose;
06709      rtx *ready;
06710      int *pn_ready;
06711      int clock_var;
06712 {
06713   return ia64_internal_sched_reorder (dump, sched_verbose, ready,
06714               pn_ready, 0, clock_var);
06715 }
06716 
06717 /* Like ia64_sched_reorder, but called after issuing each insn.
06718    Override the default sort algorithm to better slot instructions.  */
06719 
06720 static int
06721 ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
06722      FILE *dump ATTRIBUTE_UNUSED;
06723      int sched_verbose ATTRIBUTE_UNUSED;
06724      rtx *ready;
06725      int *pn_ready;
06726      int clock_var;
06727 {
06728   if (sched_data.last_was_stop)
06729     return 0;
06730 
06731   /* Detect one special case and try to optimize it.
06732      If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
06733      then we can get better code by transforming this to 1.MFB;; 2.MIx.  */
06734   if (sched_data.first_slot == 1
06735       && sched_data.stopbit[0]
06736       && ((sched_data.cur == 4
06737      && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
06738      && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
06739      && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
06740     || (sched_data.cur == 3
06741         && (sched_data.types[1] == TYPE_M
06742       || sched_data.types[1] == TYPE_A)
06743         && (sched_data.types[2] != TYPE_M
06744       && sched_data.types[2] != TYPE_I
06745       && sched_data.types[2] != TYPE_A))))
06746       
06747     {
06748       int i, best;
06749       rtx stop = sched_data.insns[1];
06750 
06751       /* Search backward for the stop bit that must be there.  */
06752       while (1)
06753   {
06754     int insn_code;
06755 
06756     stop = PREV_INSN (stop);
06757     if (GET_CODE (stop) != INSN)
06758       abort ();
06759     insn_code = recog_memoized (stop);
06760 
06761     /* Ignore .pred.rel.mutex.
06762 
06763        ??? Update this to ignore cycle display notes too
06764        ??? once those are implemented  */
06765     if (insn_code == CODE_FOR_pred_rel_mutex
06766         || insn_code == CODE_FOR_prologue_use)
06767       continue;
06768 
06769     if (insn_code == CODE_FOR_insn_group_barrier)
06770       break;
06771     abort ();
06772   }
06773 
06774       /* Adjust the stop bit's slot selector.  */
06775       if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
06776   abort ();
06777       XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
06778 
06779       sched_data.stopbit[0] = 0;
06780       sched_data.stopbit[2] = 1;
06781 
06782       sched_data.types[5] = sched_data.types[3];
06783       sched_data.types[4] = sched_data.types[2];
06784       sched_data.types[3] = sched_data.types[1];
06785       sched_data.insns[5] = sched_data.insns[3];
06786       sched_data.insns[4] = sched_data.insns[2];
06787       sched_data.insns[3] = sched_data.insns[1];
06788       sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
06789       sched_data.cur += 2;
06790       sched_data.first_slot = 3;
06791       for (i = 0; i < NR_PACKETS; i++)
06792   {
06793     const struct ia64_packet *p = packets + i;
06794     if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
06795       {
06796         sched_data.packet = p;
06797         break;
06798       }
06799   }
06800       rotate_one_bundle (sched_verbose ? dump : NULL);
06801 
06802       best = 6;
06803       for (i = 0; i < NR_PACKETS; i++)
06804   {
06805     const struct ia64_packet *p = packets + i;
06806     int split = get_split (p, sched_data.first_slot);
06807     int next;
06808 
06809     /* Disallow multiway branches here.  */
06810     if (p->t[1] == TYPE_B)
06811       continue;
06812 
06813     if (packet_matches_p (p, split, &next) && next < best)
06814       {
06815         best = next;
06816         sched_data.packet = p;
06817         sched_data.split = split;
06818       }
06819   }
06820       if (best == 6)
06821   abort ();
06822     }
06823 
06824   if (*pn_ready > 0)
06825     {
06826       int more = ia64_internal_sched_reorder (dump, sched_verbose,
06827                 ready, pn_ready, 1,
06828                 clock_var);
06829       if (more)
06830   return more;
06831       /* Did we schedule a stop?  If so, finish this cycle.  */
06832       if (sched_data.cur == sched_data.first_slot)
06833   return 0;
06834     }
06835 
06836   if (sched_verbose)
06837     fprintf (dump, "//   Can't issue more this cycle; updating type array.\n");
06838 
06839   cycle_end_fill_slots (sched_verbose ? dump : NULL);
06840   if (sched_verbose)
06841     dump_current_packet (dump);
06842   return 0;
06843 }
06844 
06845 /* We are about to issue INSN.  Return the number of insns left on the
06846    ready queue that can be issued this cycle.  */
06847 
06848 static int
06849 ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
06850      FILE *dump;
06851      int sched_verbose;
06852      rtx insn;
06853      int can_issue_more ATTRIBUTE_UNUSED;
06854 {
06855   enum attr_type t = ia64_safe_type (insn);
06856 
06857   if (sched_data.last_was_stop)
06858     {
06859       int t = sched_data.first_slot;
06860       if (t == 0)
06861   t = 3;
06862       ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
06863       init_insn_group_barriers ();
06864       sched_data.last_was_stop = 0;
06865     }
06866 
06867   if (t == TYPE_UNKNOWN)
06868     {
06869       if (sched_verbose)
06870   fprintf (dump, "// Ignoring type %s\n", type_names[t]);
06871       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06872     || asm_noperands (PATTERN (insn)) >= 0)
06873   {
06874     /* This must be some kind of asm.  Clear the scheduling state.  */
06875     rotate_two_bundles (sched_verbose ? dump : NULL);
06876     if (ia64_final_schedule)
06877       group_barrier_needed_p (insn);
06878   }
06879       return 1;
06880     }
06881 
06882   /* This is _not_ just a sanity check.  group_barrier_needed_p will update
06883      important state info.  Don't delete this test.  */
06884   if (ia64_final_schedule
06885       && group_barrier_needed_p (insn))
06886     abort ();
06887 
06888   sched_data.stopbit[sched_data.cur] = 0;
06889   sched_data.insns[sched_data.cur] = insn;
06890   sched_data.types[sched_data.cur] = t;
06891 
06892   sched_data.cur++;
06893   if (sched_verbose)
06894     fprintf (dump, "// Scheduling insn %d of type %s\n",
06895        INSN_UID (insn), type_names[t]);
06896 
06897   if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
06898     {
06899       schedule_stop (sched_verbose ? dump : NULL);
06900       sched_data.last_was_stop = 1;
06901     }
06902 
06903   return 1;
06904 }
06905 
06906 /* Free data allocated by ia64_sched_init.  */
06907 
06908 static void
06909 ia64_sched_finish (dump, sched_verbose)
06910      FILE *dump;
06911      int sched_verbose;
06912 {
06913   if (sched_verbose)
06914     fprintf (dump, "// Finishing schedule.\n");
06915   rotate_two_bundles (NULL);
06916   free (sched_types);
06917   free (sched_ready);
06918 }
06919 
06920 /* Emit pseudo-ops for the assembler to describe predicate relations.
06921    At present this assumes that we only consider predicate pairs to
06922    be mutex, and that the assembler can deduce proper values from
06923    straight-line code.  */
06924 
06925 static void
06926 emit_predicate_relation_info ()
06927 {
06928   basic_block bb;
06929 
06930   FOR_EACH_BB_REVERSE (bb)
06931     {
06932       int r;
06933       rtx head = bb->head;
06934 
06935       /* We only need such notes at code labels.  */
06936       if (GET_CODE (head) != CODE_LABEL)
06937   continue;
06938       if (GET_CODE (NEXT_INSN (head)) == NOTE
06939     && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
06940   head = NEXT_INSN (head);
06941 
06942       for (r = PR_REG (0); r < PR_REG (64); r += 2)
06943   if (REGNO_REG_SET_P (bb->global_live_at_start, r))
06944     {
06945       rtx p = gen_rtx_REG (BImode, r);
06946       rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
06947       if (head == bb->end)
06948         bb->end = n;
06949       head = n;
06950     }
06951     }
06952 
06953   /* Look for conditional calls that do not return, and protect predicate
06954      relations around them.  Otherwise the assembler will assume the call
06955      returns, and complain about uses of call-clobbered predicates after
06956      the call.  */
06957   FOR_EACH_BB_REVERSE (bb)
06958     {
06959       rtx insn = bb->head;
06960       
06961       while (1)
06962   {
06963     if (GET_CODE (insn) == CALL_INSN
06964         && GET_CODE (PATTERN (insn)) == COND_EXEC
06965         &&