• Main Page
  • Modules
  • Data Types
  • Files

osprey-gcc-4.2.0/gcc/config/ia64/ia64.c

Go to the documentation of this file.
00001 /* Definitions of target machine for GNU compiler.
00002    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
00003    Free Software Foundation, Inc.
00004    Contributed by James E. Wilson <wilson@cygnus.com> and
00005       David Mosberger <davidm@hpl.hp.com>.
00006 
00007 This file is part of GCC.
00008 
00009 GCC is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation; either version 2, or (at your option)
00012 any later version.
00013 
00014 GCC is distributed in the hope that it will be useful,
00015 but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 GNU General Public License for more details.
00018 
00019 You should have received a copy of the GNU General Public License
00020 along with GCC; see the file COPYING.  If not, write to
00021 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
00022 Boston, MA 02110-1301, USA.  */
00023 
00024 #include "config.h"
00025 #include "system.h"
00026 #include "coretypes.h"
00027 #include "tm.h"
00028 #include "rtl.h"
00029 #include "tree.h"
00030 #include "regs.h"
00031 #include "hard-reg-set.h"
00032 #include "real.h"
00033 #include "insn-config.h"
00034 #include "conditions.h"
00035 #include "output.h"
00036 #include "insn-attr.h"
00037 #include "flags.h"
00038 #include "recog.h"
00039 #include "expr.h"
00040 #include "optabs.h"
00041 #include "except.h"
00042 #include "function.h"
00043 #include "ggc.h"
00044 #include "basic-block.h"
00045 #include "toplev.h"
00046 #include "sched-int.h"
00047 #include "timevar.h"
00048 #include "target.h"
00049 #include "target-def.h"
00050 #include "tm_p.h"
00051 #include "hashtab.h"
00052 #include "langhooks.h"
00053 #include "cfglayout.h"
00054 #include "tree-gimple.h"
00055 #include "intl.h"
00056 #include "debug.h"
00057 #include "params.h"
00058 
00059 /* This is used for communication between ASM_OUTPUT_LABEL and
00060    ASM_OUTPUT_LABELREF.  */
00061 int ia64_asm_output_label = 0;
00062 
00063 /* Define the information needed to generate branch and scc insns.  This is
00064    stored from the compare operation.  */
00065 struct rtx_def * ia64_compare_op0;
00066 struct rtx_def * ia64_compare_op1;
00067 
00068 /* Register names for ia64_expand_prologue.  */
00069 static const char * const ia64_reg_numbers[96] =
00070 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
00071   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
00072   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
00073   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
00074   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
00075   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
00076   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
00077   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
00078   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
00079   "r104","r105","r106","r107","r108","r109","r110","r111",
00080   "r112","r113","r114","r115","r116","r117","r118","r119",
00081   "r120","r121","r122","r123","r124","r125","r126","r127"};
00082 
00083 /* ??? These strings could be shared with REGISTER_NAMES.  */
00084 static const char * const ia64_input_reg_names[8] =
00085 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
00086 
00087 /* ??? These strings could be shared with REGISTER_NAMES.  */
00088 static const char * const ia64_local_reg_names[80] =
00089 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
00090   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
00091   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
00092   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
00093   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
00094   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
00095   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
00096   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
00097   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
00098   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
00099 
00100 /* ??? These strings could be shared with REGISTER_NAMES.  */
00101 static const char * const ia64_output_reg_names[8] =
00102 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
00103 
00104 /* Which cpu are we scheduling for.  */
00105 enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
00106 
00107 /* Determines whether we run our final scheduling pass or not.  We always
00108    avoid the normal second scheduling pass.  */
00109 static int ia64_flag_schedule_insns2;
00110 
00111 /* Determines whether we run variable tracking in machine dependent
00112    reorganization.  */
00113 static int ia64_flag_var_tracking;
00114 
00115 /* Variables which are this size or smaller are put in the sdata/sbss
00116    sections.  */
00117 
00118 unsigned int ia64_section_threshold;
00119 
00120 /* The following variable is used by the DFA insn scheduler.  The value is
00121    TRUE if we do insn bundling instead of insn scheduling.  */
00122 int bundling_p = 0;
00123 
00124 /* Structure to be filled in by ia64_compute_frame_size with register
00125    save masks and offsets for the current function.  */
00126 
00127 struct ia64_frame_info
00128 {
00129   HOST_WIDE_INT total_size; /* size of the stack frame, not including
00130            the caller's scratch area.  */
00131   HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
00132   HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area.  */
00133   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
00134   HARD_REG_SET mask;    /* mask of saved registers.  */
00135   unsigned int gr_used_mask;  /* mask of registers in use as gr spill
00136            registers or long-term scratches.  */
00137   int n_spilled;    /* number of spilled registers.  */
00138   int reg_fp;     /* register for fp.  */
00139   int reg_save_b0;    /* save register for b0.  */
00140   int reg_save_pr;    /* save register for prs.  */
00141   int reg_save_ar_pfs;    /* save register for ar.pfs.  */
00142   int reg_save_ar_unat;   /* save register for ar.unat.  */
00143   int reg_save_ar_lc;   /* save register for ar.lc.  */
00144   int reg_save_gp;    /* save register for gp.  */
00145   int n_input_regs;   /* number of input registers used.  */
00146   int n_local_regs;   /* number of local registers used.  */
00147   int n_output_regs;    /* number of output registers used.  */
00148   int n_rotate_regs;    /* number of rotating registers used.  */
00149 
00150   char need_regstk;   /* true if a .regstk directive needed.  */
00151   char initialized;   /* true if the data is finalized.  */
00152 };
00153 
00154 /* Current frame information calculated by ia64_compute_frame_size.  */
00155 static struct ia64_frame_info current_frame_info;
00156 
00157 static int ia64_first_cycle_multipass_dfa_lookahead (void);
00158 static void ia64_dependencies_evaluation_hook (rtx, rtx);
00159 static void ia64_init_dfa_pre_cycle_insn (void);
00160 static rtx ia64_dfa_pre_cycle_insn (void);
00161 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
00162 static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
00163 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
00164 static void ia64_h_i_d_extended (void);
00165 static int ia64_mode_to_int (enum machine_mode);
00166 static void ia64_set_sched_flags (spec_info_t);
00167 static int ia64_speculate_insn (rtx, ds_t, rtx *);
00168 static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
00169 static bool ia64_needs_block_p (rtx);
00170 static rtx ia64_gen_check (rtx, rtx, bool);
00171 static int ia64_spec_check_p (rtx);
00172 static int ia64_spec_check_src_p (rtx);
00173 static rtx gen_tls_get_addr (void);
00174 static rtx gen_thread_pointer (void);
00175 static int find_gr_spill (int);
00176 static int next_scratch_gr_reg (void);
00177 static void mark_reg_gr_used_mask (rtx, void *);
00178 static void ia64_compute_frame_size (HOST_WIDE_INT);
00179 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
00180 static void finish_spill_pointers (void);
00181 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
00182 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
00183 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
00184 static rtx gen_movdi_x (rtx, rtx, rtx);
00185 static rtx gen_fr_spill_x (rtx, rtx, rtx);
00186 static rtx gen_fr_restore_x (rtx, rtx, rtx);
00187 
00188 static enum machine_mode hfa_element_mode (tree, bool);
00189 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
00190            tree, int *, int);
00191 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
00192            tree, bool);
00193 static bool ia64_function_ok_for_sibcall (tree, tree);
00194 static bool ia64_return_in_memory (tree, tree);
00195 static bool ia64_rtx_costs (rtx, int, int, int *);
00196 static void fix_range (const char *);
00197 static bool ia64_handle_option (size_t, const char *, int);
00198 static struct machine_function * ia64_init_machine_status (void);
00199 static void emit_insn_group_barriers (FILE *);
00200 static void emit_all_insn_group_barriers (FILE *);
00201 static void final_emit_insn_group_barriers (FILE *);
00202 static void emit_predicate_relation_info (void);
00203 static void ia64_reorg (void);
00204 static bool ia64_in_small_data_p (tree);
00205 static void process_epilogue (FILE *, rtx, bool, bool);
00206 static int process_set (FILE *, rtx, rtx, bool, bool);
00207 
00208 static bool ia64_assemble_integer (rtx, unsigned int, int);
00209 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
00210 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
00211 static void ia64_output_function_end_prologue (FILE *);
00212 
00213 static int ia64_issue_rate (void);
00214 static int ia64_adjust_cost_2 (rtx, int, rtx, int);
00215 static void ia64_sched_init (FILE *, int, int);
00216 static void ia64_sched_init_global (FILE *, int, int);
00217 static void ia64_sched_finish_global (FILE *, int);
00218 static void ia64_sched_finish (FILE *, int);
00219 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
00220 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
00221 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
00222 static int ia64_variable_issue (FILE *, int, rtx, int);
00223 
00224 static struct bundle_state *get_free_bundle_state (void);
00225 static void free_bundle_state (struct bundle_state *);
00226 static void initiate_bundle_states (void);
00227 static void finish_bundle_states (void);
00228 static unsigned bundle_state_hash (const void *);
00229 static int bundle_state_eq_p (const void *, const void *);
00230 static int insert_bundle_state (struct bundle_state *);
00231 static void initiate_bundle_state_table (void);
00232 static void finish_bundle_state_table (void);
00233 static int try_issue_nops (struct bundle_state *, int);
00234 static int try_issue_insn (struct bundle_state *, rtx);
00235 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
00236 static int get_max_pos (state_t);
00237 static int get_template (state_t, int);
00238 
00239 static rtx get_next_important_insn (rtx, rtx);
00240 static void bundling (FILE *, int, rtx, rtx);
00241 
00242 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
00243           HOST_WIDE_INT, tree);
00244 static void ia64_file_start (void);
00245 
00246 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
00247 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
00248 static section *ia64_select_rtx_section (enum machine_mode, rtx,
00249            unsigned HOST_WIDE_INT);
00250 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
00251      ATTRIBUTE_UNUSED;
00252 static unsigned int ia64_section_type_flags (tree, const char *, int);
00253 static void ia64_hpux_add_extern_decl (tree decl)
00254      ATTRIBUTE_UNUSED;
00255 static void ia64_hpux_file_end (void)
00256      ATTRIBUTE_UNUSED;
00257 static void ia64_init_libfuncs (void)
00258      ATTRIBUTE_UNUSED;
00259 static void ia64_hpux_init_libfuncs (void)
00260      ATTRIBUTE_UNUSED;
00261 static void ia64_sysv4_init_libfuncs (void)
00262      ATTRIBUTE_UNUSED;
00263 static void ia64_vms_init_libfuncs (void)
00264      ATTRIBUTE_UNUSED;
00265 
00266 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
00267 static void ia64_encode_section_info (tree, rtx, int);
00268 static rtx ia64_struct_value_rtx (tree, int);
00269 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
00270 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
00271 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
00272 static bool ia64_cannot_force_const_mem (rtx);
00273 static const char *ia64_mangle_fundamental_type (tree);
00274 static const char *ia64_invalid_conversion (tree, tree);
00275 static const char *ia64_invalid_unary_op (int, tree);
00276 static const char *ia64_invalid_binary_op (int, tree, tree);
00277 
00278 /* Table of valid machine attributes.  */
00279 static const struct attribute_spec ia64_attribute_table[] =
00280 {
00281   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
00282   { "syscall_linkage", 0, 0, false, true,  true,  NULL },
00283   { "model",         1, 1, true, false, false, ia64_handle_model_attribute },
00284   { NULL,        0, 0, false, false, false, NULL }
00285 };
00286 
00287 /* Initialize the GCC target structure.  */
00288 #undef TARGET_ATTRIBUTE_TABLE
00289 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
00290 
00291 #undef TARGET_INIT_BUILTINS
00292 #define TARGET_INIT_BUILTINS ia64_init_builtins
00293 
00294 #undef TARGET_EXPAND_BUILTIN
00295 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
00296 
00297 #undef TARGET_ASM_BYTE_OP
00298 #define TARGET_ASM_BYTE_OP "\tdata1\t"
00299 #undef TARGET_ASM_ALIGNED_HI_OP
00300 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
00301 #undef TARGET_ASM_ALIGNED_SI_OP
00302 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
00303 #undef TARGET_ASM_ALIGNED_DI_OP
00304 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
00305 #undef TARGET_ASM_UNALIGNED_HI_OP
00306 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
00307 #undef TARGET_ASM_UNALIGNED_SI_OP
00308 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
00309 #undef TARGET_ASM_UNALIGNED_DI_OP
00310 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
00311 #undef TARGET_ASM_INTEGER
00312 #define TARGET_ASM_INTEGER ia64_assemble_integer
00313 
00314 #undef TARGET_ASM_FUNCTION_PROLOGUE
00315 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
00316 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
00317 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
00318 #undef TARGET_ASM_FUNCTION_EPILOGUE
00319 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
00320 
00321 #undef TARGET_IN_SMALL_DATA_P
00322 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
00323 
00324 #undef TARGET_SCHED_ADJUST_COST_2
00325 #define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
00326 #undef TARGET_SCHED_ISSUE_RATE
00327 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
00328 #undef TARGET_SCHED_VARIABLE_ISSUE
00329 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
00330 #undef TARGET_SCHED_INIT
00331 #define TARGET_SCHED_INIT ia64_sched_init
00332 #undef TARGET_SCHED_FINISH
00333 #define TARGET_SCHED_FINISH ia64_sched_finish
00334 #undef TARGET_SCHED_INIT_GLOBAL
00335 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
00336 #undef TARGET_SCHED_FINISH_GLOBAL
00337 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
00338 #undef TARGET_SCHED_REORDER
00339 #define TARGET_SCHED_REORDER ia64_sched_reorder
00340 #undef TARGET_SCHED_REORDER2
00341 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
00342 
00343 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
00344 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
00345 
00346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
00347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
00348 
00349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
00350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
00351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
00352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
00353 
00354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
00355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
00356   ia64_first_cycle_multipass_dfa_lookahead_guard
00357 
00358 #undef TARGET_SCHED_DFA_NEW_CYCLE
00359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
00360 
00361 #undef TARGET_SCHED_H_I_D_EXTENDED
00362 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
00363 
00364 #undef TARGET_SCHED_SET_SCHED_FLAGS
00365 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
00366 
00367 #undef TARGET_SCHED_SPECULATE_INSN
00368 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
00369 
00370 #undef TARGET_SCHED_NEEDS_BLOCK_P
00371 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
00372 
00373 #undef TARGET_SCHED_GEN_CHECK
00374 #define TARGET_SCHED_GEN_CHECK ia64_gen_check
00375 
00376 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
00377 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
00378   ia64_first_cycle_multipass_dfa_lookahead_guard_spec
00379 
00380 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
00381 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
00382 #undef TARGET_ARG_PARTIAL_BYTES
00383 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
00384 
00385 #undef TARGET_ASM_OUTPUT_MI_THUNK
00386 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
00387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
00388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
00389 
00390 #undef TARGET_ASM_FILE_START
00391 #define TARGET_ASM_FILE_START ia64_file_start
00392 
00393 #undef TARGET_RTX_COSTS
00394 #define TARGET_RTX_COSTS ia64_rtx_costs
00395 #undef TARGET_ADDRESS_COST
00396 #define TARGET_ADDRESS_COST hook_int_rtx_0
00397 
00398 #undef TARGET_MACHINE_DEPENDENT_REORG
00399 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
00400 
00401 #undef TARGET_ENCODE_SECTION_INFO
00402 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
00403 
00404 #undef  TARGET_SECTION_TYPE_FLAGS
00405 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
00406 
00407 #ifdef HAVE_AS_TLS
00408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
00409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
00410 #endif
00411 
00412 /* ??? ABI doesn't allow us to define this.  */
00413 #if 0
00414 #undef TARGET_PROMOTE_FUNCTION_ARGS
00415 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
00416 #endif
00417 
00418 /* ??? ABI doesn't allow us to define this.  */
00419 #if 0
00420 #undef TARGET_PROMOTE_FUNCTION_RETURN
00421 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
00422 #endif
00423 
00424 /* ??? Investigate.  */
00425 #if 0
00426 #undef TARGET_PROMOTE_PROTOTYPES
00427 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
00428 #endif
00429 
00430 #undef TARGET_STRUCT_VALUE_RTX
00431 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
00432 #undef TARGET_RETURN_IN_MEMORY
00433 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
00434 #undef TARGET_SETUP_INCOMING_VARARGS
00435 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
00436 #undef TARGET_STRICT_ARGUMENT_NAMING
00437 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
00438 #undef TARGET_MUST_PASS_IN_STACK
00439 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
00440 
00441 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
00442 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
00443 
00444 #undef TARGET_UNWIND_EMIT
00445 #define TARGET_UNWIND_EMIT process_for_unwind_directive
00446 
00447 #undef TARGET_SCALAR_MODE_SUPPORTED_P
00448 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
00449 #undef TARGET_VECTOR_MODE_SUPPORTED_P
00450 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
00451 
00452 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
00453    in an order different from the specified program order.  */
00454 #undef TARGET_RELAXED_ORDERING
00455 #define TARGET_RELAXED_ORDERING true
00456 
00457 #undef TARGET_DEFAULT_TARGET_FLAGS
00458 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
00459 #undef TARGET_HANDLE_OPTION
00460 #define TARGET_HANDLE_OPTION ia64_handle_option
00461 
00462 #undef TARGET_CANNOT_FORCE_CONST_MEM
00463 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
00464 
00465 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
00466 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
00467 
00468 #undef TARGET_INVALID_CONVERSION
00469 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
00470 #undef TARGET_INVALID_UNARY_OP
00471 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
00472 #undef TARGET_INVALID_BINARY_OP
00473 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
00474 
00475 struct gcc_target targetm = TARGET_INITIALIZER;
00476 
00477 typedef enum
00478   {
00479     ADDR_AREA_NORMAL, /* normal address area */
00480     ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
00481   }
00482 ia64_addr_area;
00483 
00484 static GTY(()) tree small_ident1;
00485 static GTY(()) tree small_ident2;
00486 
00487 static void
00488 init_idents (void)
00489 {
00490   if (small_ident1 == 0)
00491     {
00492       small_ident1 = get_identifier ("small");
00493       small_ident2 = get_identifier ("__small__");
00494     }
00495 }
00496 
00497 /* Retrieve the address area that has been chosen for the given decl.  */
00498 
00499 static ia64_addr_area
00500 ia64_get_addr_area (tree decl)
00501 {
00502   tree model_attr;
00503 
00504   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
00505   if (model_attr)
00506     {
00507       tree id;
00508 
00509       init_idents ();
00510       id = TREE_VALUE (TREE_VALUE (model_attr));
00511       if (id == small_ident1 || id == small_ident2)
00512   return ADDR_AREA_SMALL;
00513     }
00514   return ADDR_AREA_NORMAL;
00515 }
00516 
00517 static tree
00518 ia64_handle_model_attribute (tree *node, tree name, tree args,
00519            int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
00520 {
00521   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
00522   ia64_addr_area area;
00523   tree arg, decl = *node;
00524 
00525   init_idents ();
00526   arg = TREE_VALUE (args);
00527   if (arg == small_ident1 || arg == small_ident2)
00528     {
00529       addr_area = ADDR_AREA_SMALL;
00530     }
00531   else
00532     {
00533       warning (OPT_Wattributes, "invalid argument of %qs attribute",
00534          IDENTIFIER_POINTER (name));
00535       *no_add_attrs = true;
00536     }
00537 
00538   switch (TREE_CODE (decl))
00539     {
00540     case VAR_DECL:
00541       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
00542      == FUNCTION_DECL)
00543     && !TREE_STATIC (decl))
00544   {
00545     error ("%Jan address area attribute cannot be specified for "
00546      "local variables", decl);
00547     *no_add_attrs = true;
00548   }
00549       area = ia64_get_addr_area (decl);
00550       if (area != ADDR_AREA_NORMAL && addr_area != area)
00551   {
00552     error ("address area of %q+D conflicts with previous "
00553      "declaration", decl);
00554     *no_add_attrs = true;
00555   }
00556       break;
00557 
00558     case FUNCTION_DECL:
00559       error ("%Jaddress area attribute cannot be specified for functions",
00560        decl);
00561       *no_add_attrs = true;
00562       break;
00563 
00564     default:
00565       warning (OPT_Wattributes, "%qs attribute ignored",
00566          IDENTIFIER_POINTER (name));
00567       *no_add_attrs = true;
00568       break;
00569     }
00570 
00571   return NULL_TREE;
00572 }
00573 
00574 static void
00575 ia64_encode_addr_area (tree decl, rtx symbol)
00576 {
00577   int flags;
00578 
00579   flags = SYMBOL_REF_FLAGS (symbol);
00580   switch (ia64_get_addr_area (decl))
00581     {
00582     case ADDR_AREA_NORMAL: break;
00583     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
00584     default: gcc_unreachable ();
00585     }
00586   SYMBOL_REF_FLAGS (symbol) = flags;
00587 }
00588 
00589 static void
00590 ia64_encode_section_info (tree decl, rtx rtl, int first)
00591 {
00592   default_encode_section_info (decl, rtl, first);
00593 
00594   /* Careful not to prod global register variables.  */
00595   if (TREE_CODE (decl) == VAR_DECL
00596       && GET_CODE (DECL_RTL (decl)) == MEM
00597       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
00598       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
00599     ia64_encode_addr_area (decl, XEXP (rtl, 0));
00600 }
00601 
00602 /* Implement CONST_OK_FOR_LETTER_P.  */
00603 
00604 bool
00605 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
00606 {
00607   switch (c)
00608     {
00609     case 'I':
00610       return CONST_OK_FOR_I (value);
00611     case 'J':
00612       return CONST_OK_FOR_J (value);
00613     case 'K':
00614       return CONST_OK_FOR_K (value);
00615     case 'L':
00616       return CONST_OK_FOR_L (value);
00617     case 'M':
00618       return CONST_OK_FOR_M (value);
00619     case 'N':
00620       return CONST_OK_FOR_N (value);
00621     case 'O':
00622       return CONST_OK_FOR_O (value);
00623     case 'P':
00624       return CONST_OK_FOR_P (value);
00625     default:
00626       return false;
00627     }
00628 }
00629 
00630 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
00631 
00632 bool
00633 ia64_const_double_ok_for_letter_p (rtx value, char c)
00634 {
00635   switch (c)
00636     {
00637     case 'G':
00638       return CONST_DOUBLE_OK_FOR_G (value);
00639     default:
00640       return false;
00641     }
00642 }
00643 
00644 /* Implement EXTRA_CONSTRAINT.  */
00645 
00646 bool
00647 ia64_extra_constraint (rtx value, char c)
00648 {
00649   switch (c)
00650     {
00651     case 'Q':
00652       /* Non-volatile memory for FP_REG loads/stores.  */
00653       return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
00654 
00655     case 'R':
00656       /* 1..4 for shladd arguments.  */
00657       return (GET_CODE (value) == CONST_INT
00658         && INTVAL (value) >= 1 && INTVAL (value) <= 4);
00659 
00660     case 'S':
00661       /* Non-post-inc memory for asms and other unsavory creatures.  */
00662       return (GET_CODE (value) == MEM
00663         && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
00664         && (reload_in_progress || memory_operand (value, VOIDmode)));
00665 
00666     case 'T':
00667       /* Symbol ref to small-address-area.  */
00668       return small_addr_symbolic_operand (value, VOIDmode);
00669 
00670     case 'U':
00671       /* Vector zero.  */
00672       return value == CONST0_RTX (GET_MODE (value));
00673 
00674     case 'W':
00675       /* An integer vector, such that conversion to an integer yields a
00676    value appropriate for an integer 'J' constraint.  */
00677       if (GET_CODE (value) == CONST_VECTOR
00678     && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
00679   {
00680     value = simplify_subreg (DImode, value, GET_MODE (value), 0);
00681     return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
00682   }
00683       return false;
00684 
00685     case 'Y':
00686       /* A V2SF vector containing elements that satisfy 'G'.  */
00687       return
00688   (GET_CODE (value) == CONST_VECTOR
00689    && GET_MODE (value) == V2SFmode
00690    && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
00691    && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
00692 
00693     default:
00694       return false;
00695     }
00696 }
00697 
00698 /* Return 1 if the operands of a move are ok.  */
00699 
00700 int
00701 ia64_move_ok (rtx dst, rtx src)
00702 {
00703   /* If we're under init_recog_no_volatile, we'll not be able to use
00704      memory_operand.  So check the code directly and don't worry about
00705      the validity of the underlying address, which should have been
00706      checked elsewhere anyway.  */
00707   if (GET_CODE (dst) != MEM)
00708     return 1;
00709   if (GET_CODE (src) == MEM)
00710     return 0;
00711   if (register_operand (src, VOIDmode))
00712     return 1;
00713 
00714   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
00715   if (INTEGRAL_MODE_P (GET_MODE (dst)))
00716     return src == const0_rtx;
00717   else
00718     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
00719 }
00720 
00721 /* Return 1 if the operands are ok for a floating point load pair.  */
00722 
00723 int
00724 ia64_load_pair_ok (rtx dst, rtx src)
00725 {
00726   if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
00727     return 0;
00728   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
00729     return 0;
00730   switch (GET_CODE (XEXP (src, 0)))
00731     {
00732     case REG:
00733     case POST_INC:
00734       break;
00735     case POST_DEC:
00736       return 0;
00737     case POST_MODIFY:
00738       {
00739   rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
00740 
00741   if (GET_CODE (adjust) != CONST_INT
00742       || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
00743     return 0;
00744       }
00745       break;
00746     default:
00747       abort ();
00748     }
00749   return 1;
00750 }
00751 
00752 int
00753 addp4_optimize_ok (rtx op1, rtx op2)
00754 {
00755   return (basereg_operand (op1, GET_MODE(op1)) !=
00756     basereg_operand (op2, GET_MODE(op2)));
00757 }
00758 
00759 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
00760    Return the length of the field, or <= 0 on failure.  */
00761 
00762 int
00763 ia64_depz_field_mask (rtx rop, rtx rshift)
00764 {
00765   unsigned HOST_WIDE_INT op = INTVAL (rop);
00766   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
00767 
00768   /* Get rid of the zero bits we're shifting in.  */
00769   op >>= shift;
00770 
00771   /* We must now have a solid block of 1's at bit 0.  */
00772   return exact_log2 (op + 1);
00773 }
00774 
00775 /* Return the TLS model to use for ADDR.  */
00776 
00777 static enum tls_model
00778 tls_symbolic_operand_type (rtx addr)
00779 {
00780   enum tls_model tls_kind = 0;
00781 
00782   if (GET_CODE (addr) == CONST)
00783     {
00784       if (GET_CODE (XEXP (addr, 0)) == PLUS
00785     && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
00786         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
00787     }
00788   else if (GET_CODE (addr) == SYMBOL_REF)
00789     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
00790 
00791   return tls_kind;
00792 }
00793 
00794 /* Return true if X is a constant that is valid for some immediate
00795    field in an instruction.  */
00796 
00797 bool
00798 ia64_legitimate_constant_p (rtx x)
00799 {
00800   switch (GET_CODE (x))
00801     {
00802     case CONST_INT:
00803     case LABEL_REF:
00804       return true;
00805 
00806     case CONST_DOUBLE:
00807       if (GET_MODE (x) == VOIDmode)
00808   return true;
00809       return CONST_DOUBLE_OK_FOR_G (x);
00810 
00811     case CONST:
00812     case SYMBOL_REF:
00813       /* ??? Short term workaround for PR 28490.  We must make the code here
00814    match the code in ia64_expand_move and move_operand, even though they
00815    are both technically wrong.  */
00816       if (tls_symbolic_operand_type (x) == 0)
00817   {
00818     HOST_WIDE_INT addend = 0;
00819     rtx op = x;
00820 
00821     if (GET_CODE (op) == CONST
00822         && GET_CODE (XEXP (op, 0)) == PLUS
00823         && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
00824       {
00825         addend = INTVAL (XEXP (XEXP (op, 0), 1));
00826         op = XEXP (XEXP (op, 0), 0);
00827       }
00828 
00829           if (any_offset_symbol_operand (op, GET_MODE (op))
00830               || function_operand (op, GET_MODE (op)))
00831             return true;
00832     if (aligned_offset_symbol_operand (op, GET_MODE (op)))
00833       return (addend & 0x3fff) == 0;
00834     return false;
00835   }
00836       return false;
00837 
00838     case CONST_VECTOR:
00839       {
00840   enum machine_mode mode = GET_MODE (x);
00841 
00842   if (mode == V2SFmode)
00843     return ia64_extra_constraint (x, 'Y');
00844 
00845   return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
00846     && GET_MODE_SIZE (mode) <= 8);
00847       }
00848 
00849     default:
00850       return false;
00851     }
00852 }
00853 
00854 /* Don't allow TLS addresses to get spilled to memory.  */
00855 
00856 static bool
00857 ia64_cannot_force_const_mem (rtx x)
00858 {
00859   return tls_symbolic_operand_type (x) != 0;
00860 }
00861 
00862 /* Expand a symbolic constant load.  */
00863 
00864 bool
00865 ia64_expand_load_address (rtx dest, rtx src)
00866 {
00867   gcc_assert (GET_CODE (dest) == REG);
00868 
00869   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
00870      having to pointer-extend the value afterward.  Other forms of address
00871      computation below are also more natural to compute as 64-bit quantities.
00872      If we've been given an SImode destination register, change it.  */
00873   if (GET_MODE (dest) != Pmode)
00874     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
00875 
00876   if (TARGET_NO_PIC)
00877     return false;
00878   if (small_addr_symbolic_operand (src, VOIDmode))
00879     return false;
00880 
00881   if (TARGET_AUTO_PIC)
00882     emit_insn (gen_load_gprel64 (dest, src));
00883   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
00884     emit_insn (gen_load_fptr (dest, src));
00885   else if (sdata_symbolic_operand (src, VOIDmode))
00886     emit_insn (gen_load_gprel (dest, src));
00887   else
00888     {
00889       HOST_WIDE_INT addend = 0;
00890       rtx tmp;
00891 
00892       /* We did split constant offsets in ia64_expand_move, and we did try
00893    to keep them split in move_operand, but we also allowed reload to
00894    rematerialize arbitrary constants rather than spill the value to
00895    the stack and reload it.  So we have to be prepared here to split
00896    them apart again.  */
00897       if (GET_CODE (src) == CONST)
00898   {
00899     HOST_WIDE_INT hi, lo;
00900 
00901     hi = INTVAL (XEXP (XEXP (src, 0), 1));
00902     lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
00903     hi = hi - lo;
00904 
00905     if (lo != 0)
00906       {
00907         addend = lo;
00908         src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
00909       }
00910   }
00911 
00912       tmp = gen_rtx_HIGH (Pmode, src);
00913       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
00914       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00915 
00916       tmp = gen_rtx_LO_SUM (Pmode, dest, src);
00917       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00918 
00919       if (addend)
00920   {
00921     tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
00922     emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00923   }
00924     }
00925 
00926   return true;
00927 }
00928 
00929 static GTY(()) rtx gen_tls_tga;
00930 static rtx
00931 gen_tls_get_addr (void)
00932 {
00933   if (!gen_tls_tga)
00934     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
00935   return gen_tls_tga;
00936 }
00937 
00938 static GTY(()) rtx thread_pointer_rtx;
00939 static rtx
00940 gen_thread_pointer (void)
00941 {
00942   if (!thread_pointer_rtx)
00943     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
00944   return thread_pointer_rtx;
00945 }
00946 
00947 static rtx
00948 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
00949        rtx orig_op1, HOST_WIDE_INT addend)
00950 {
00951   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
00952   rtx orig_op0 = op0;
00953   HOST_WIDE_INT addend_lo, addend_hi;
00954 
00955   switch (tls_kind)
00956     {
00957     case TLS_MODEL_GLOBAL_DYNAMIC:
00958       start_sequence ();
00959 
00960       tga_op1 = gen_reg_rtx (Pmode);
00961       emit_insn (gen_load_dtpmod (tga_op1, op1));
00962 
00963       tga_op2 = gen_reg_rtx (Pmode);
00964       emit_insn (gen_load_dtprel (tga_op2, op1));
00965 
00966       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
00967            LCT_CONST, Pmode, 2, tga_op1,
00968            Pmode, tga_op2, Pmode);
00969 
00970       insns = get_insns ();
00971       end_sequence ();
00972 
00973       if (GET_MODE (op0) != Pmode)
00974   op0 = tga_ret;
00975       emit_libcall_block (insns, op0, tga_ret, op1);
00976       break;
00977 
00978     case TLS_MODEL_LOCAL_DYNAMIC:
00979       /* ??? This isn't the completely proper way to do local-dynamic
00980    If the call to __tls_get_addr is used only by a single symbol,
00981    then we should (somehow) move the dtprel to the second arg
00982    to avoid the extra add.  */
00983       start_sequence ();
00984 
00985       tga_op1 = gen_reg_rtx (Pmode);
00986       emit_insn (gen_load_dtpmod (tga_op1, op1));
00987 
00988       tga_op2 = const0_rtx;
00989 
00990       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
00991            LCT_CONST, Pmode, 2, tga_op1,
00992            Pmode, tga_op2, Pmode);
00993 
00994       insns = get_insns ();
00995       end_sequence ();
00996 
00997       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
00998         UNSPEC_LD_BASE);
00999       tmp = gen_reg_rtx (Pmode);
01000       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
01001 
01002       if (!register_operand (op0, Pmode))
01003   op0 = gen_reg_rtx (Pmode);
01004       if (TARGET_TLS64)
01005   {
01006     emit_insn (gen_load_dtprel (op0, op1));
01007     emit_insn (gen_adddi3 (op0, tmp, op0));
01008   }
01009       else
01010   emit_insn (gen_add_dtprel (op0, op1, tmp));
01011       break;
01012 
01013     case TLS_MODEL_INITIAL_EXEC:
01014       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
01015       addend_hi = addend - addend_lo;
01016 
01017       op1 = plus_constant (op1, addend_hi);
01018       addend = addend_lo;
01019 
01020       tmp = gen_reg_rtx (Pmode);
01021       emit_insn (gen_load_tprel (tmp, op1));
01022 
01023       if (!register_operand (op0, Pmode))
01024   op0 = gen_reg_rtx (Pmode);
01025       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
01026       break;
01027 
01028     case TLS_MODEL_LOCAL_EXEC:
01029       if (!register_operand (op0, Pmode))
01030   op0 = gen_reg_rtx (Pmode);
01031 
01032       op1 = orig_op1;
01033       addend = 0;
01034       if (TARGET_TLS64)
01035   {
01036     emit_insn (gen_load_tprel (op0, op1));
01037     emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
01038   }
01039       else
01040   emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
01041       break;
01042 
01043     default:
01044       gcc_unreachable ();
01045     }
01046 
01047   if (addend)
01048     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
01049              orig_op0, 1, OPTAB_DIRECT);
01050   if (orig_op0 == op0)
01051     return NULL_RTX;
01052   if (GET_MODE (orig_op0) == Pmode)
01053     return op0;
01054   return gen_lowpart (GET_MODE (orig_op0), op0);
01055 }
01056 
01057 rtx
01058 ia64_expand_move (rtx op0, rtx op1)
01059 {
01060   enum machine_mode mode = GET_MODE (op0);
01061 
01062   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
01063     op1 = force_reg (mode, op1);
01064 
01065   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
01066     {
01067       HOST_WIDE_INT addend = 0;
01068       enum tls_model tls_kind;
01069       rtx sym = op1;
01070 
01071       if (GET_CODE (op1) == CONST
01072     && GET_CODE (XEXP (op1, 0)) == PLUS
01073     && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
01074   {
01075     addend = INTVAL (XEXP (XEXP (op1, 0), 1));
01076     sym = XEXP (XEXP (op1, 0), 0);
01077   }
01078 
01079       tls_kind = tls_symbolic_operand_type (sym);
01080       if (tls_kind)
01081   return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
01082 
01083       if (any_offset_symbol_operand (sym, mode))
01084   addend = 0;
01085       else if (aligned_offset_symbol_operand (sym, mode))
01086   {
01087     HOST_WIDE_INT addend_lo, addend_hi;
01088         
01089     addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
01090     addend_hi = addend - addend_lo;
01091 
01092     if (addend_lo != 0)
01093       {
01094         op1 = plus_constant (sym, addend_hi);
01095         addend = addend_lo;
01096       }
01097     else
01098       addend = 0;
01099   }
01100       else
01101   op1 = sym;
01102 
01103       if (reload_completed)
01104   {
01105     /* We really should have taken care of this offset earlier.  */
01106     gcc_assert (addend == 0);
01107     if (ia64_expand_load_address (op0, op1))
01108       return NULL_RTX;
01109   }
01110 
01111       if (addend)
01112   {
01113     rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
01114 
01115     emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
01116 
01117     op1 = expand_simple_binop (mode, PLUS, subtarget,
01118              GEN_INT (addend), op0, 1, OPTAB_DIRECT);
01119     if (op0 == op1)
01120       return NULL_RTX;
01121   }
01122     }
01123 
01124   return op1;
01125 }
01126 
01127 /* Split a move from OP1 to OP0 conditional on COND.  */
01128 
01129 void
01130 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
01131 {
01132   rtx insn, first = get_last_insn ();
01133 
01134   emit_move_insn (op0, op1);
01135 
01136   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
01137     if (INSN_P (insn))
01138       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
01139             PATTERN (insn));
01140 }
01141 
01142 /* Split a post-reload TImode or TFmode reference into two DImode
01143    components.  This is made extra difficult by the fact that we do
01144    not get any scratch registers to work with, because reload cannot
01145    be prevented from giving us a scratch that overlaps the register
01146    pair involved.  So instead, when addressing memory, we tweak the
01147    pointer register up and back down with POST_INCs.  Or up and not
01148    back down when we can get away with it.
01149 
01150    REVERSED is true when the loads must be done in reversed order
01151    (high word first) for correctness.  DEAD is true when the pointer
01152    dies with the second insn we generate and therefore the second
01153    address must not carry a postmodify.
01154 
01155    May return an insn which is to be emitted after the moves.  */
01156 
01157 static rtx
01158 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
01159 {
01160   rtx fixup = 0;
01161 
01162   switch (GET_CODE (in))
01163     {
01164     case REG:
01165       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
01166       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
01167       break;
01168 
01169     case CONST_INT:
01170     case CONST_DOUBLE:
01171       /* Cannot occur reversed.  */
01172       gcc_assert (!reversed);
01173       
01174       if (GET_MODE (in) != TFmode)
01175   split_double (in, &out[0], &out[1]);
01176       else
01177   /* split_double does not understand how to split a TFmode
01178      quantity into a pair of DImode constants.  */
01179   {
01180     REAL_VALUE_TYPE r;
01181     unsigned HOST_WIDE_INT p[2];
01182     long l[4];  /* TFmode is 128 bits */
01183 
01184     REAL_VALUE_FROM_CONST_DOUBLE (r, in);
01185     real_to_target (l, &r, TFmode);
01186 
01187     if (FLOAT_WORDS_BIG_ENDIAN)
01188       {
01189         p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
01190         p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
01191       }
01192     else
01193       {
01194         p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
01195         p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
01196       }
01197     out[0] = GEN_INT (p[0]);
01198     out[1] = GEN_INT (p[1]);
01199   }
01200       break;
01201 
01202     case MEM:
01203       {
01204   rtx base = XEXP (in, 0);
01205   rtx offset;
01206 
01207   switch (GET_CODE (base))
01208     {
01209     case REG:
01210       if (!reversed)
01211         {
01212     out[0] = adjust_automodify_address
01213       (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01214     out[1] = adjust_automodify_address
01215       (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
01216         }
01217       else
01218         {
01219     /* Reversal requires a pre-increment, which can only
01220        be done as a separate insn.  */
01221     emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
01222     out[0] = adjust_automodify_address
01223       (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
01224     out[1] = adjust_address (in, DImode, 0);
01225         }
01226       break;
01227 
01228     case POST_INC:
01229       gcc_assert (!reversed && !dead);
01230       
01231       /* Just do the increment in two steps.  */
01232       out[0] = adjust_automodify_address (in, DImode, 0, 0);
01233       out[1] = adjust_automodify_address (in, DImode, 0, 8);
01234       break;
01235 
01236     case POST_DEC:
01237       gcc_assert (!reversed && !dead);
01238       
01239       /* Add 8, subtract 24.  */
01240       base = XEXP (base, 0);
01241       out[0] = adjust_automodify_address
01242         (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01243       out[1] = adjust_automodify_address
01244         (in, DImode,
01245          gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
01246          8);
01247       break;
01248 
01249     case POST_MODIFY:
01250       gcc_assert (!reversed && !dead);
01251 
01252       /* Extract and adjust the modification.  This case is
01253          trickier than the others, because we might have an
01254          index register, or we might have a combined offset that
01255          doesn't fit a signed 9-bit displacement field.  We can
01256          assume the incoming expression is already legitimate.  */
01257       offset = XEXP (base, 1);
01258       base = XEXP (base, 0);
01259 
01260       out[0] = adjust_automodify_address
01261         (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01262 
01263       if (GET_CODE (XEXP (offset, 1)) == REG)
01264         {
01265     /* Can't adjust the postmodify to match.  Emit the
01266        original, then a separate addition insn.  */
01267     out[1] = adjust_automodify_address (in, DImode, 0, 8);
01268     fixup = gen_adddi3 (base, base, GEN_INT (-8));
01269         }
01270       else
01271         {
01272     gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
01273     if (INTVAL (XEXP (offset, 1)) < -256 + 8)
01274       {
01275         /* Again the postmodify cannot be made to match,
01276            but in this case it's more efficient to get rid
01277            of the postmodify entirely and fix up with an
01278            add insn.  */
01279         out[1] = adjust_automodify_address (in, DImode, base, 8);
01280         fixup = gen_adddi3
01281           (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
01282       }
01283     else
01284       {
01285         /* Combined offset still fits in the displacement field.
01286            (We cannot overflow it at the high end.)  */
01287         out[1] = adjust_automodify_address
01288           (in, DImode, gen_rtx_POST_MODIFY
01289            (Pmode, base, gen_rtx_PLUS
01290       (Pmode, base,
01291        GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
01292            8);
01293       }
01294         }
01295       break;
01296 
01297     default:
01298       gcc_unreachable ();
01299     }
01300   break;
01301       }
01302 
01303     default:
01304       gcc_unreachable ();
01305     }
01306 
01307   return fixup;
01308 }
01309 
01310 /* Split a TImode or TFmode move instruction after reload.
01311    This is used by *movtf_internal and *movti_internal.  */
01312 void
01313 ia64_split_tmode_move (rtx operands[])
01314 {
01315   rtx in[2], out[2], insn;
01316   rtx fixup[2];
01317   bool dead = false;
01318   bool reversed = false;
01319 
01320   /* It is possible for reload to decide to overwrite a pointer with
01321      the value it points to.  In that case we have to do the loads in
01322      the appropriate order so that the pointer is not destroyed too
01323      early.  Also we must not generate a postmodify for that second
01324      load, or rws_access_regno will die.  */
01325   if (GET_CODE (operands[1]) == MEM
01326       && reg_overlap_mentioned_p (operands[0], operands[1]))
01327     {
01328       rtx base = XEXP (operands[1], 0);
01329       while (GET_CODE (base) != REG)
01330   base = XEXP (base, 0);
01331 
01332       if (REGNO (base) == REGNO (operands[0]))
01333   reversed = true;
01334       dead = true;
01335     }
01336   /* Another reason to do the moves in reversed order is if the first
01337      element of the target register pair is also the second element of
01338      the source register pair.  */
01339   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
01340       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
01341     reversed = true;
01342 
01343   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
01344   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
01345 
01346 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)       \
01347   if (GET_CODE (EXP) == MEM           \
01348       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY     \
01349     || GET_CODE (XEXP (EXP, 0)) == POST_INC     \
01350     || GET_CODE (XEXP (EXP, 0)) == POST_DEC))     \
01351     REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,      \
01352             XEXP (XEXP (EXP, 0), 0),  \
01353             REG_NOTES (INSN))
01354 
01355   insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
01356   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
01357   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
01358 
01359   insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
01360   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
01361   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
01362 
01363   if (fixup[0])
01364     emit_insn (fixup[0]);
01365   if (fixup[1])
01366     emit_insn (fixup[1]);
01367 
01368 #undef MAYBE_ADD_REG_INC_NOTE
01369 }
01370 
01371 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
01372    through memory plus an extra GR scratch register.  Except that you can
01373    either get the first from SECONDARY_MEMORY_NEEDED or the second from
01374    SECONDARY_RELOAD_CLASS, but not both.
01375 
01376    We got into problems in the first place by allowing a construct like
01377    (subreg:XF (reg:TI)), which we got from a union containing a long double.
01378    This solution attempts to prevent this situation from occurring.  When
01379    we see something like the above, we spill the inner register to memory.  */
01380 
01381 static rtx
01382 spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
01383 {
01384   if (GET_CODE (in) == SUBREG
01385       && GET_MODE (SUBREG_REG (in)) == TImode
01386       && GET_CODE (SUBREG_REG (in)) == REG)
01387     {
01388       rtx memt = assign_stack_temp (TImode, 16, 0);
01389       emit_move_insn (memt, SUBREG_REG (in));
01390       return adjust_address (memt, mode, 0);
01391     }
01392   else if (force && GET_CODE (in) == REG)
01393     {
01394       rtx memx = assign_stack_temp (mode, 16, 0);
01395       emit_move_insn (memx, in);
01396       return memx;
01397     }
01398   else
01399     return in;
01400 }
01401 
01402 /* Expand the movxf or movrf pattern (MODE says which) with the given
01403    OPERANDS, returning true if the pattern should then invoke
01404    DONE.  */
01405 
01406 bool
01407 ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
01408 {
01409   rtx op0 = operands[0];
01410 
01411   if (GET_CODE (op0) == SUBREG)
01412     op0 = SUBREG_REG (op0);
01413 
01414   /* We must support XFmode loads into general registers for stdarg/vararg,
01415      unprototyped calls, and a rare case where a long double is passed as
01416      an argument after a float HFA fills the FP registers.  We split them into
01417      DImode loads for convenience.  We also need to support XFmode stores
01418      for the last case.  This case does not happen for stdarg/vararg routines,
01419      because we do a block store to memory of unnamed arguments.  */
01420 
01421   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
01422     {
01423       rtx out[2];
01424 
01425       /* We're hoping to transform everything that deals with XFmode
01426    quantities and GR registers early in the compiler.  */
01427       gcc_assert (!no_new_pseudos);
01428 
01429       /* Struct to register can just use TImode instead.  */
01430       if ((GET_CODE (operands[1]) == SUBREG
01431      && GET_MODE (SUBREG_REG (operands[1])) == TImode)
01432     || (GET_CODE (operands[1]) == REG
01433         && GR_REGNO_P (REGNO (operands[1]))))
01434   {
01435     rtx op1 = operands[1];
01436 
01437     if (GET_CODE (op1) == SUBREG)
01438       op1 = SUBREG_REG (op1);
01439     else
01440       op1 = gen_rtx_REG (TImode, REGNO (op1));
01441 
01442     emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
01443     return true;
01444   }
01445 
01446       if (GET_CODE (operands[1]) == CONST_DOUBLE)
01447   {
01448     /* Don't word-swap when reading in the constant.  */
01449     emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
01450         operand_subword (operands[1], WORDS_BIG_ENDIAN,
01451              0, mode));
01452     emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
01453         operand_subword (operands[1], !WORDS_BIG_ENDIAN,
01454              0, mode));
01455     return true;
01456   }
01457 
01458       /* If the quantity is in a register not known to be GR, spill it.  */
01459       if (register_operand (operands[1], mode))
01460   operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
01461 
01462       gcc_assert (GET_CODE (operands[1]) == MEM);
01463 
01464       /* Don't word-swap when reading in the value.  */
01465       out[0] = gen_rtx_REG (DImode, REGNO (op0));
01466       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
01467 
01468       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
01469       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
01470       return true;
01471     }
01472 
01473   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
01474     {
01475       /* We're hoping to transform everything that deals with XFmode
01476    quantities and GR registers early in the compiler.  */
01477       gcc_assert (!no_new_pseudos);
01478 
01479       /* Op0 can't be a GR_REG here, as that case is handled above.
01480    If op0 is a register, then we spill op1, so that we now have a
01481    MEM operand.  This requires creating an XFmode subreg of a TImode reg
01482    to force the spill.  */
01483       if (register_operand (operands[0], mode))
01484   {
01485     rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
01486     op1 = gen_rtx_SUBREG (mode, op1, 0);
01487     operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
01488   }
01489 
01490       else
01491   {
01492     rtx in[2];
01493 
01494     gcc_assert (GET_CODE (operands[0]) == MEM);
01495 
01496     /* Don't word-swap when writing out the value.  */
01497     in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
01498     in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
01499 
01500     emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
01501     emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
01502     return true;
01503   }
01504     }
01505 
01506   if (!reload_in_progress && !reload_completed)
01507     {
01508       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
01509 
01510       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
01511   {
01512     rtx memt, memx, in = operands[1];
01513     if (CONSTANT_P (in))
01514       in = validize_mem (force_const_mem (mode, in));
01515     if (GET_CODE (in) == MEM)
01516       memt = adjust_address (in, TImode, 0);
01517     else
01518       {
01519         memt = assign_stack_temp (TImode, 16, 0);
01520         memx = adjust_address (memt, mode, 0);
01521         emit_move_insn (memx, in);
01522       }
01523     emit_move_insn (op0, memt);
01524     return true;
01525   }
01526 
01527       if (!ia64_move_ok (operands[0], operands[1]))
01528   operands[1] = force_reg (mode, operands[1]);
01529     }
01530 
01531   return false;
01532 }
01533 
01534 /* Emit comparison instruction if necessary, returning the expression
01535    that holds the compare result in the proper mode.  */
01536 
01537 static GTY(()) rtx cmptf_libfunc;
01538 
01539 rtx
01540 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
01541 {
01542   rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
01543   rtx cmp;
01544 
01545   /* If we have a BImode input, then we already have a compare result, and
01546      do not need to emit another comparison.  */
01547   if (GET_MODE (op0) == BImode)
01548     {
01549       gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
01550       cmp = op0;
01551     }
01552   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
01553      magic number as its third argument, that indicates what to do.
01554      The return value is an integer to be compared against zero.  */
01555   else if (GET_MODE (op0) == TFmode)
01556     {
01557       enum qfcmp_magic {
01558   QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect.  */
01559   QCMP_UNORD = 2,
01560   QCMP_EQ = 4,
01561   QCMP_LT = 8,
01562   QCMP_GT = 16
01563       } magic;
01564       enum rtx_code ncode;
01565       rtx ret, insns;
01566       
01567       gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
01568       switch (code)
01569   {
01570     /* 1 = equal, 0 = not equal.  Equality operators do
01571        not raise FP_INVALID when given an SNaN operand.  */
01572   case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
01573   case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
01574     /* isunordered() from C99.  */
01575   case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
01576   case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
01577     /* Relational operators raise FP_INVALID when given
01578        an SNaN operand.  */
01579   case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
01580   case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
01581   case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
01582   case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
01583     /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
01584        Expanders for buneq etc. weuld have to be added to ia64.md
01585        for this to be useful.  */
01586   default: gcc_unreachable ();
01587   }
01588 
01589       start_sequence ();
01590 
01591       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
01592              op0, TFmode, op1, TFmode,
01593              GEN_INT (magic), DImode);
01594       cmp = gen_reg_rtx (BImode);
01595       emit_insn (gen_rtx_SET (VOIDmode, cmp,
01596             gen_rtx_fmt_ee (ncode, BImode,
01597                 ret, const0_rtx)));
01598 
01599       insns = get_insns ();
01600       end_sequence ();
01601 
01602       emit_libcall_block (insns, cmp, cmp,
01603         gen_rtx_fmt_ee (code, BImode, op0, op1));
01604       code = NE;
01605     }
01606   else
01607     {
01608       cmp = gen_reg_rtx (BImode);
01609       emit_insn (gen_rtx_SET (VOIDmode, cmp,
01610             gen_rtx_fmt_ee (code, BImode, op0, op1)));
01611       code = NE;
01612     }
01613 
01614   return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
01615 }
01616 
01617 /* Generate an integral vector comparison.  Return true if the condition has
01618    been reversed, and so the sense of the comparison should be inverted.  */
01619 
01620 static bool
01621 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
01622           rtx dest, rtx op0, rtx op1)
01623 {
01624   bool negate = false;
01625   rtx x;
01626 
01627   /* Canonicalize the comparison to EQ, GT, GTU.  */
01628   switch (code)
01629     {
01630     case EQ:
01631     case GT:
01632     case GTU:
01633       break;
01634 
01635     case NE:
01636     case LE:
01637     case LEU:
01638       code = reverse_condition (code);
01639       negate = true;
01640       break;
01641 
01642     case GE:
01643     case GEU:
01644       code = reverse_condition (code);
01645       negate = true;
01646       /* FALLTHRU */
01647 
01648     case LT:
01649     case LTU:
01650       code = swap_condition (code);
01651       x = op0, op0 = op1, op1 = x;
01652       break;
01653 
01654     default:
01655       gcc_unreachable ();
01656     }
01657 
01658   /* Unsigned parallel compare is not supported by the hardware.  Play some
01659      tricks to turn this into a signed comparison against 0.  */
01660   if (code == GTU)
01661     {
01662       switch (mode)
01663   {
01664   case V2SImode:
01665     {
01666       rtx t1, t2, mask;
01667 
01668       /* Perform a parallel modulo subtraction.  */
01669       t1 = gen_reg_rtx (V2SImode);
01670       emit_insn (gen_subv2si3 (t1, op0, op1));
01671 
01672       /* Extract the original sign bit of op0.  */
01673       mask = GEN_INT (-0x80000000);
01674       mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
01675       mask = force_reg (V2SImode, mask);
01676       t2 = gen_reg_rtx (V2SImode);
01677       emit_insn (gen_andv2si3 (t2, op0, mask));
01678 
01679       /* XOR it back into the result of the subtraction.  This results
01680          in the sign bit set iff we saw unsigned underflow.  */
01681       x = gen_reg_rtx (V2SImode);
01682       emit_insn (gen_xorv2si3 (x, t1, t2));
01683 
01684       code = GT;
01685       op0 = x;
01686       op1 = CONST0_RTX (mode);
01687     }
01688     break;
01689 
01690   case V8QImode:
01691   case V4HImode:
01692     /* Perform a parallel unsigned saturating subtraction.  */
01693     x = gen_reg_rtx (mode);
01694     emit_insn (gen_rtx_SET (VOIDmode, x,
01695           gen_rtx_US_MINUS (mode, op0, op1)));
01696 
01697     code = EQ;
01698     op0 = x;
01699     op1 = CONST0_RTX (mode);
01700     negate = !negate;
01701     break;
01702 
01703   default:
01704     gcc_unreachable ();
01705   }
01706     }
01707 
01708   x = gen_rtx_fmt_ee (code, mode, op0, op1);
01709   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
01710 
01711   return negate;
01712 }
01713 
01714 /* Emit an integral vector conditional move.  */
01715 
01716 void
01717 ia64_expand_vecint_cmov (rtx operands[])
01718 {
01719   enum machine_mode mode = GET_MODE (operands[0]);
01720   enum rtx_code code = GET_CODE (operands[3]);
01721   bool negate;
01722   rtx cmp, x, ot, of;
01723 
01724   cmp = gen_reg_rtx (mode);
01725   negate = ia64_expand_vecint_compare (code, mode, cmp,
01726                operands[4], operands[5]);
01727 
01728   ot = operands[1+negate];
01729   of = operands[2-negate];
01730 
01731   if (ot == CONST0_RTX (mode))
01732     {
01733       if (of == CONST0_RTX (mode))
01734   {
01735     emit_move_insn (operands[0], ot);
01736     return;
01737   }
01738 
01739       x = gen_rtx_NOT (mode, cmp);
01740       x = gen_rtx_AND (mode, x, of);
01741       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01742     }
01743   else if (of == CONST0_RTX (mode))
01744     {
01745       x = gen_rtx_AND (mode, cmp, ot);
01746       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01747     }
01748   else
01749     {
01750       rtx t, f;
01751 
01752       t = gen_reg_rtx (mode);
01753       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
01754       emit_insn (gen_rtx_SET (VOIDmode, t, x));
01755 
01756       f = gen_reg_rtx (mode);
01757       x = gen_rtx_NOT (mode, cmp);
01758       x = gen_rtx_AND (mode, x, operands[2-negate]);
01759       emit_insn (gen_rtx_SET (VOIDmode, f, x));
01760 
01761       x = gen_rtx_IOR (mode, t, f);
01762       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01763     }
01764 }
01765 
01766 /* Emit an integral vector min or max operation.  Return true if all done.  */
01767 
01768 bool
01769 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
01770          rtx operands[])
01771 {
01772   rtx xops[6];
01773 
01774   /* These four combinations are supported directly.  */
01775   if (mode == V8QImode && (code == UMIN || code == UMAX))
01776     return false;
01777   if (mode == V4HImode && (code == SMIN || code == SMAX))
01778     return false;
01779 
01780   /* This combination can be implemented with only saturating subtraction.  */
01781   if (mode == V4HImode && code == UMAX)
01782     {
01783       rtx x, tmp = gen_reg_rtx (mode);
01784 
01785       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
01786       emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
01787 
01788       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
01789       return true;
01790     }
01791 
01792   /* Everything else implemented via vector comparisons.  */
01793   xops[0] = operands[0];
01794   xops[4] = xops[1] = operands[1];
01795   xops[5] = xops[2] = operands[2];
01796 
01797   switch (code)
01798     {
01799     case UMIN:
01800       code = LTU;
01801       break;
01802     case UMAX:
01803       code = GTU;
01804       break;
01805     case SMIN:
01806       code = LT;
01807       break;
01808     case SMAX:
01809       code = GT;
01810       break;
01811     default:
01812       gcc_unreachable ();
01813     }
01814   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
01815 
01816   ia64_expand_vecint_cmov (xops);
01817   return true;
01818 }
01819 
01820 /* Emit an integral vector widening sum operations.  */
01821 
01822 void
01823 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
01824 {
01825   rtx l, h, x, s;
01826   enum machine_mode wmode, mode;
01827   rtx (*unpack_l) (rtx, rtx, rtx);
01828   rtx (*unpack_h) (rtx, rtx, rtx);
01829   rtx (*plus) (rtx, rtx, rtx);
01830 
01831   wmode = GET_MODE (operands[0]);
01832   mode = GET_MODE (operands[1]);
01833 
01834   switch (mode)
01835     {
01836     case V8QImode:
01837       unpack_l = gen_unpack1_l;
01838       unpack_h = gen_unpack1_h;
01839       plus = gen_addv4hi3;
01840       break;
01841     case V4HImode:
01842       unpack_l = gen_unpack2_l;
01843       unpack_h = gen_unpack2_h;
01844       plus = gen_addv2si3;
01845       break;
01846     default:
01847       gcc_unreachable ();
01848     }
01849 
01850   /* Fill in x with the sign extension of each element in op1.  */
01851   if (unsignedp)
01852     x = CONST0_RTX (mode);
01853   else
01854     {
01855       bool neg;
01856 
01857       x = gen_reg_rtx (mode);
01858 
01859       neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
01860           CONST0_RTX (mode));
01861       gcc_assert (!neg);
01862     }
01863 
01864   l = gen_reg_rtx (wmode);
01865   h = gen_reg_rtx (wmode);
01866   s = gen_reg_rtx (wmode);
01867 
01868   emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
01869   emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
01870   emit_insn (plus (s, l, operands[2]));
01871   emit_insn (plus (operands[0], h, s));
01872 }
01873 
01874 /* Emit a signed or unsigned V8QI dot product operation.  */
01875 
01876 void
01877 ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
01878 {
01879   rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
01880 
01881   /* Fill in x1 and x2 with the sign extension of each element.  */
01882   if (unsignedp)
01883     x1 = x2 = CONST0_RTX (V8QImode);
01884   else
01885     {
01886       bool neg;
01887 
01888       x1 = gen_reg_rtx (V8QImode);
01889       x2 = gen_reg_rtx (V8QImode);
01890 
01891       neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
01892           CONST0_RTX (V8QImode));
01893       gcc_assert (!neg);
01894       neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
01895           CONST0_RTX (V8QImode));
01896       gcc_assert (!neg);
01897     }
01898 
01899   l1 = gen_reg_rtx (V4HImode);
01900   l2 = gen_reg_rtx (V4HImode);
01901   h1 = gen_reg_rtx (V4HImode);
01902   h2 = gen_reg_rtx (V4HImode);
01903 
01904   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
01905   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
01906   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
01907   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
01908 
01909   p1 = gen_reg_rtx (V2SImode);
01910   p2 = gen_reg_rtx (V2SImode);
01911   p3 = gen_reg_rtx (V2SImode);
01912   p4 = gen_reg_rtx (V2SImode);
01913   emit_insn (gen_pmpy2_r (p1, l1, l2));
01914   emit_insn (gen_pmpy2_l (p2, l1, l2));
01915   emit_insn (gen_pmpy2_r (p3, h1, h2));
01916   emit_insn (gen_pmpy2_l (p4, h1, h2));
01917 
01918   s1 = gen_reg_rtx (V2SImode);
01919   s2 = gen_reg_rtx (V2SImode);
01920   s3 = gen_reg_rtx (V2SImode);
01921   emit_insn (gen_addv2si3 (s1, p1, p2));
01922   emit_insn (gen_addv2si3 (s2, p3, p4));
01923   emit_insn (gen_addv2si3 (s3, s1, operands[3]));
01924   emit_insn (gen_addv2si3 (operands[0], s2, s3));
01925 }
01926 
01927 /* Emit the appropriate sequence for a call.  */
01928 
01929 void
01930 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
01931       int sibcall_p)
01932 {
01933   rtx insn, b0;
01934 
01935   addr = XEXP (addr, 0);
01936   addr = convert_memory_address (DImode, addr);
01937   b0 = gen_rtx_REG (DImode, R_BR (0));
01938 
01939   /* ??? Should do this for functions known to bind local too.  */
01940   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
01941     {
01942       if (sibcall_p)
01943   insn = gen_sibcall_nogp (addr);
01944       else if (! retval)
01945   insn = gen_call_nogp (addr, b0);
01946       else
01947   insn = gen_call_value_nogp (retval, addr, b0);
01948       insn = emit_call_insn (insn);
01949     }
01950   else
01951     {
01952       if (sibcall_p)
01953   insn = gen_sibcall_gp (addr);
01954       else if (! retval)
01955   insn = gen_call_gp (addr, b0);
01956       else
01957   insn = gen_call_value_gp (retval, addr, b0);
01958       insn = emit_call_insn (insn);
01959 
01960       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
01961     }
01962 
01963   if (sibcall_p)
01964     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
01965 }
01966 
01967 void
01968 ia64_reload_gp (void)
01969 {
01970   rtx tmp;
01971 
01972   if (current_frame_info.reg_save_gp)
01973     tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
01974   else
01975     {
01976       HOST_WIDE_INT offset;
01977 
01978       offset = (current_frame_info.spill_cfa_off
01979           + current_frame_info.spill_size);
01980       if (frame_pointer_needed)
01981         {
01982           tmp = hard_frame_pointer_rtx;
01983           offset = -offset;
01984         }
01985       else
01986         {
01987           tmp = stack_pointer_rtx;
01988           offset = current_frame_info.total_size - offset;
01989         }
01990 
01991       if (CONST_OK_FOR_I (offset))
01992         emit_insn (gen_adddi3 (pic_offset_table_rtx,
01993              tmp, GEN_INT (offset)));
01994       else
01995         {
01996           emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
01997           emit_insn (gen_adddi3 (pic_offset_table_rtx,
01998                pic_offset_table_rtx, tmp));
01999         }
02000 
02001       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
02002     }
02003 
02004   emit_move_insn (pic_offset_table_rtx, tmp);
02005 }
02006 
02007 void
02008 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
02009      rtx scratch_b, int noreturn_p, int sibcall_p)
02010 {
02011   rtx insn;
02012   bool is_desc = false;
02013 
02014   /* If we find we're calling through a register, then we're actually
02015      calling through a descriptor, so load up the values.  */
02016   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
02017     {
02018       rtx tmp;
02019       bool addr_dead_p;
02020 
02021       /* ??? We are currently constrained to *not* use peep2, because
02022    we can legitimately change the global lifetime of the GP
02023    (in the form of killing where previously live).  This is
02024    because a call through a descriptor doesn't use the previous
02025    value of the GP, while a direct call does, and we do not
02026    commit to either form until the split here.
02027 
02028    That said, this means that we lack precise life info for
02029    whether ADDR is dead after this call.  This is not terribly
02030    important, since we can fix things up essentially for free
02031    with the POST_DEC below, but it's nice to not use it when we
02032    can immediately tell it's not necessary.  */
02033       addr_dead_p = ((noreturn_p || sibcall_p
02034           || TEST_HARD_REG_BIT (regs_invalidated_by_call,
02035               REGNO (addr)))
02036          && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
02037 
02038       /* Load the code address into scratch_b.  */
02039       tmp = gen_rtx_POST_INC (Pmode, addr);
02040       tmp = gen_rtx_MEM (Pmode, tmp);
02041       emit_move_insn (scratch_r, tmp);
02042       emit_move_insn (scratch_b, scratch_r);
02043 
02044       /* Load the GP address.  If ADDR is not dead here, then we must
02045    revert the change made above via the POST_INCREMENT.  */
02046       if (!addr_dead_p)
02047   tmp = gen_rtx_POST_DEC (Pmode, addr);
02048       else
02049   tmp = addr;
02050       tmp = gen_rtx_MEM (Pmode, tmp);
02051       emit_move_insn (pic_offset_table_rtx, tmp);
02052 
02053       is_desc = true;
02054       addr = scratch_b;
02055     }
02056 
02057   if (sibcall_p)
02058     insn = gen_sibcall_nogp (addr);
02059   else if (retval)
02060     insn = gen_call_value_nogp (retval, addr, retaddr);
02061   else
02062     insn = gen_call_nogp (addr, retaddr);
02063   emit_call_insn (insn);
02064 
02065   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
02066     ia64_reload_gp ();
02067 }
02068 
02069 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
02070 
02071    This differs from the generic code in that we know about the zero-extending
02072    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
02073    also know that ld.acq+cmpxchg.rel equals a full barrier.
02074 
02075    The loop we want to generate looks like
02076 
02077   cmp_reg = mem;
02078       label:
02079         old_reg = cmp_reg;
02080   new_reg = cmp_reg op val;
02081   cmp_reg = compare-and-swap(mem, old_reg, new_reg)
02082   if (cmp_reg != old_reg)
02083     goto label;
02084 
02085    Note that we only do the plain load from memory once.  Subsequent
02086    iterations use the value loaded by the compare-and-swap pattern.  */
02087 
02088 void
02089 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
02090            rtx old_dst, rtx new_dst)
02091 {
02092   enum machine_mode mode = GET_MODE (mem);
02093   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
02094   enum insn_code icode;
02095 
02096   /* Special case for using fetchadd.  */
02097   if ((mode == SImode || mode == DImode)
02098       && (code == PLUS || code == MINUS)
02099       && fetchadd_operand (val, mode))
02100     {
02101       if (code == MINUS)
02102   val = GEN_INT (-INTVAL (val));
02103 
02104       if (!old_dst)
02105         old_dst = gen_reg_rtx (mode);
02106 
02107       emit_insn (gen_memory_barrier ());
02108 
02109       if (mode == SImode)
02110   icode = CODE_FOR_fetchadd_acq_si;
02111       else
02112   icode = CODE_FOR_fetchadd_acq_di;
02113       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
02114 
02115       if (new_dst)
02116   {
02117     new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
02118            true, OPTAB_WIDEN);
02119     if (new_reg != new_dst)
02120       emit_move_insn (new_dst, new_reg);
02121   }
02122       return;
02123     }
02124 
02125   /* Because of the volatile mem read, we get an ld.acq, which is the
02126      front half of the full barrier.  The end half is the cmpxchg.rel.  */
02127   gcc_assert (MEM_VOLATILE_P (mem));
02128 
02129   old_reg = gen_reg_rtx (DImode);
02130   cmp_reg = gen_reg_rtx (DImode);
02131   label = gen_label_rtx ();
02132 
02133   if (mode != DImode)
02134     {
02135       val = simplify_gen_subreg (DImode, val, mode, 0);
02136       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
02137     }
02138   else
02139     emit_move_insn (cmp_reg, mem);
02140 
02141   emit_label (label);
02142 
02143   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
02144   emit_move_insn (old_reg, cmp_reg);
02145   emit_move_insn (ar_ccv, cmp_reg);
02146 
02147   if (old_dst)
02148     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
02149 
02150   new_reg = cmp_reg;
02151   if (code == NOT)
02152     {
02153       new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
02154       code = AND;
02155     }
02156   new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
02157          true, OPTAB_DIRECT);
02158 
02159   if (mode != DImode)
02160     new_reg = gen_lowpart (mode, new_reg);
02161   if (new_dst)
02162     emit_move_insn (new_dst, new_reg);
02163 
02164   switch (mode)
02165     {
02166     case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
02167     case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
02168     case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
02169     case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
02170     default:
02171       gcc_unreachable ();
02172     }
02173 
02174   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
02175 
02176   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
02177 }
02178 
02179 /* Begin the assembly file.  */
02180 
02181 static void
02182 ia64_file_start (void)
02183 {
02184   /* Variable tracking should be run after all optimizations which change order
02185      of insns.  It also needs a valid CFG.  This can't be done in
02186      ia64_override_options, because flag_var_tracking is finalized after
02187      that.  */
02188   ia64_flag_var_tracking = flag_var_tracking;
02189   flag_var_tracking = 0;
02190 
02191   default_file_start ();
02192   emit_safe_across_calls ();
02193 }
02194 
02195 void
02196 emit_safe_across_calls (void)
02197 {
02198   unsigned int rs, re;
02199   int out_state;
02200 
02201   rs = 1;
02202   out_state = 0;
02203   while (1)
02204     {
02205       while (rs < 64 && call_used_regs[PR_REG (rs)])
02206   rs++;
02207       if (rs >= 64)
02208   break;
02209       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
02210   continue;
02211       if (out_state == 0)
02212   {
02213     fputs ("\t.pred.safe_across_calls ", asm_out_file);
02214     out_state = 1;
02215   }
02216       else
02217   fputc (',', asm_out_file);
02218       if (re == rs + 1)
02219   fprintf (asm_out_file, "p%u", rs);
02220       else
02221   fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
02222       rs = re + 1;
02223     }
02224   if (out_state)
02225     fputc ('\n', asm_out_file);
02226 }
02227 
02228 /* Helper function for ia64_compute_frame_size: find an appropriate general
02229    register to spill some special register to.  SPECIAL_SPILL_MASK contains
02230    bits in GR0 to GR31 that have already been allocated by this routine.
02231    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
02232 
02233 static int
02234 find_gr_spill (int try_locals)
02235 {
02236   int regno;
02237 
02238   /* If this is a leaf function, first try an otherwise unused
02239      call-clobbered register.  */
02240   if (current_function_is_leaf)
02241     {
02242       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
02243   if (! regs_ever_live[regno]
02244       && call_used_regs[regno]
02245       && ! fixed_regs[regno]
02246       && ! global_regs[regno]
02247       && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
02248     {
02249       current_frame_info.gr_used_mask |= 1 << regno;
02250       return regno;
02251     }
02252     }
02253 
02254   if (try_locals)
02255     {
02256       regno = current_frame_info.n_local_regs;
02257       /* If there is a frame pointer, then we can't use loc79, because
02258    that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
02259    reg_name switching code in ia64_expand_prologue.  */
02260       if (regno < (80 - frame_pointer_needed))
02261   {
02262     current_frame_info.n_local_regs = regno + 1;
02263     return LOC_REG (0) + regno;
02264   }
02265     }
02266 
02267   /* Failed to find a general register to spill to.  Must use stack.  */
02268   return 0;
02269 }
02270 
02271 /* In order to make for nice schedules, we try to allocate every temporary
02272    to a different register.  We must of course stay away from call-saved,
02273    fixed, and global registers.  We must also stay away from registers
02274    allocated in current_frame_info.gr_used_mask, since those include regs
02275    used all through the prologue.
02276 
02277    Any register allocated here must be used immediately.  The idea is to
02278    aid scheduling, not to solve data flow problems.  */
02279 
02280 static int last_scratch_gr_reg;
02281 
02282 static int
02283 next_scratch_gr_reg (void)
02284 {
02285   int i, regno;
02286 
02287   for (i = 0; i < 32; ++i)
02288     {
02289       regno = (last_scratch_gr_reg + i + 1) & 31;
02290       if (call_used_regs[regno]
02291     && ! fixed_regs[regno]
02292     && ! global_regs[regno]
02293     && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
02294   {
02295     last_scratch_gr_reg = regno;
02296     return regno;
02297   }
02298     }
02299 
02300   /* There must be _something_ available.  */
02301   gcc_unreachable ();
02302 }
02303 
02304 /* Helper function for ia64_compute_frame_size, called through
02305    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
02306 
02307 static void
02308 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
02309 {
02310   unsigned int regno = REGNO (reg);
02311   if (regno < 32)
02312     {
02313       unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
02314       for (i = 0; i < n; ++i)
02315   current_frame_info.gr_used_mask |= 1 << (regno + i);
02316     }
02317 }
02318 
02319 /* Returns the number of bytes offset between the frame pointer and the stack
02320    pointer for the current function.  SIZE is the number of bytes of space
02321    needed for local variables.  */
02322 
02323 static void
02324 ia64_compute_frame_size (HOST_WIDE_INT size)
02325 {
02326   HOST_WIDE_INT total_size;
02327   HOST_WIDE_INT spill_size = 0;
02328   HOST_WIDE_INT extra_spill_size = 0;
02329   HOST_WIDE_INT pretend_args_size;
02330   HARD_REG_SET mask;
02331   int n_spilled = 0;
02332   int spilled_gr_p = 0;
02333   int spilled_fr_p = 0;
02334   unsigned int regno;
02335   int i;
02336 
02337   if (current_frame_info.initialized)
02338     return;
02339 
02340   memset (&current_frame_info, 0, sizeof current_frame_info);
02341   CLEAR_HARD_REG_SET (mask);
02342 
02343   /* Don't allocate scratches to the return register.  */
02344   diddle_return_value (mark_reg_gr_used_mask, NULL);
02345 
02346   /* Don't allocate scratches to the EH scratch registers.  */
02347   if (cfun->machine->ia64_eh_epilogue_sp)
02348     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
02349   if (cfun->machine->ia64_eh_epilogue_bsp)
02350     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
02351 
02352   /* Find the size of the register stack frame.  We have only 80 local
02353      registers, because we reserve 8 for the inputs and 8 for the
02354      outputs.  */
02355 
02356   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
02357      since we'll be adjusting that down later.  */
02358   regno = LOC_REG (78) + ! frame_pointer_needed;
02359   for (; regno >= LOC_REG (0); regno--)
02360     if (regs_ever_live[regno])
02361       break;
02362   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
02363 
02364   /* For functions marked with the syscall_linkage attribute, we must mark
02365      all eight input registers as in use, so that locals aren't visible to
02366      the caller.  */
02367 
02368   if (cfun->machine->n_varargs > 0
02369       || lookup_attribute ("syscall_linkage",
02370          TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
02371     current_frame_info.n_input_regs = 8;
02372   else
02373     {
02374       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
02375   if (regs_ever_live[regno])
02376     break;
02377       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
02378     }
02379 
02380   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
02381     if (regs_ever_live[regno])
02382       break;
02383   i = regno - OUT_REG (0) + 1;
02384 
02385 #ifndef PROFILE_HOOK
02386   /* When -p profiling, we need one output register for the mcount argument.
02387      Likewise for -a profiling for the bb_init_func argument.  For -ax
02388      profiling, we need two output registers for the two bb_init_trace_func
02389      arguments.  */
02390   if (current_function_profile)
02391     i = MAX (i, 1);
02392 #endif
02393   current_frame_info.n_output_regs = i;
02394 
02395   /* ??? No rotating register support yet.  */
02396   current_frame_info.n_rotate_regs = 0;
02397 
02398   /* Discover which registers need spilling, and how much room that
02399      will take.  Begin with floating point and general registers,
02400      which will always wind up on the stack.  */
02401 
02402   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
02403     if (regs_ever_live[regno] && ! call_used_regs[regno])
02404       {
02405   SET_HARD_REG_BIT (mask, regno);
02406   spill_size += 16;
02407   n_spilled += 1;
02408   spilled_fr_p = 1;
02409       }
02410 
02411   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
02412     if (regs_ever_live[regno] && ! call_used_regs[regno])
02413       {
02414   SET_HARD_REG_BIT (mask, regno);
02415   spill_size += 8;
02416   n_spilled += 1;
02417   spilled_gr_p = 1;
02418       }
02419 
02420   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
02421     if (regs_ever_live[regno] && ! call_used_regs[regno])
02422       {
02423   SET_HARD_REG_BIT (mask, regno);
02424   spill_size += 8;
02425   n_spilled += 1;
02426       }
02427 
02428   /* Now come all special registers that might get saved in other
02429      general registers.  */
02430 
02431   if (frame_pointer_needed)
02432     {
02433       current_frame_info.reg_fp = find_gr_spill (1);
02434       /* If we did not get a register, then we take LOC79.  This is guaranteed
02435    to be free, even if regs_ever_live is already set, because this is
02436    HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
02437    as we don't count loc79 above.  */
02438       if (current_frame_info.reg_fp == 0)
02439   {
02440     current_frame_info.reg_fp = LOC_REG (79);
02441     current_frame_info.n_local_regs++;
02442   }
02443     }
02444 
02445   if (! current_function_is_leaf)
02446     {
02447       /* Emit a save of BR0 if we call other functions.  Do this even
02448    if this function doesn't return, as EH depends on this to be
02449    able to unwind the stack.  */
02450       SET_HARD_REG_BIT (mask, BR_REG (0));
02451 
02452       current_frame_info.reg_save_b0 = find_gr_spill (1);
02453       if (current_frame_info.reg_save_b0 == 0)
02454   {
02455     extra_spill_size += 8;
02456     n_spilled += 1;
02457   }
02458 
02459       /* Similarly for ar.pfs.  */
02460       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
02461       current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
02462       if (current_frame_info.reg_save_ar_pfs == 0)
02463   {
02464     extra_spill_size += 8;
02465     n_spilled += 1;
02466   }
02467 
02468       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
02469    registers are clobbered, so we fall back to the stack.  */
02470       current_frame_info.reg_save_gp
02471   = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
02472       if (current_frame_info.reg_save_gp == 0)
02473   {
02474     SET_HARD_REG_BIT (mask, GR_REG (1));
02475     spill_size += 8;
02476     n_spilled += 1;
02477   }
02478     }
02479   else
02480     {
02481       if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
02482   {
02483     SET_HARD_REG_BIT (mask, BR_REG (0));
02484     extra_spill_size += 8;
02485     n_spilled += 1;
02486   }
02487 
02488       if (regs_ever_live[AR_PFS_REGNUM])
02489   {
02490     SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
02491     current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
02492     if (current_frame_info.reg_save_ar_pfs == 0)
02493       {
02494         extra_spill_size += 8;
02495         n_spilled += 1;
02496       }
02497   }
02498     }
02499 
02500   /* Unwind descriptor hackery: things are most efficient if we allocate
02501      consecutive GR save registers for RP, PFS, FP in that order. However,
02502      it is absolutely critical that FP get the only hard register that's
02503      guaranteed to be free, so we allocated it first.  If all three did
02504      happen to be allocated hard regs, and are consecutive, rearrange them
02505      into the preferred order now.  */
02506   if (current_frame_info.reg_fp != 0
02507       && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
02508       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
02509     {
02510       current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
02511       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
02512       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
02513     }
02514 
02515   /* See if we need to store the predicate register block.  */
02516   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
02517     if (regs_ever_live[regno] && ! call_used_regs[regno])
02518       break;
02519   if (regno <= PR_REG (63))
02520     {
02521       SET_HARD_REG_BIT (mask, PR_REG (0));
02522       current_frame_info.reg_save_pr = find_gr_spill (1);
02523       if (current_frame_info.reg_save_pr == 0)
02524   {
02525     extra_spill_size += 8;
02526     n_spilled += 1;
02527   }
02528 
02529       /* ??? Mark them all as used so that register renaming and such
02530    are free to use them.  */
02531       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
02532   regs_ever_live[regno] = 1;
02533     }
02534 
02535   /* If we're forced to use st8.spill, we're forced to save and restore
02536      ar.unat as well.  The check for existing liveness allows inline asm
02537      to touch ar.unat.  */
02538   if (spilled_gr_p || cfun->machine->n_varargs
02539       || regs_ever_live[AR_UNAT_REGNUM])
02540     {
02541       regs_ever_live[AR_UNAT_REGNUM] = 1;
02542       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
02543       current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
02544       if (current_frame_info.reg_save_ar_unat == 0)
02545   {
02546     extra_spill_size += 8;
02547     n_spilled += 1;
02548   }
02549     }
02550 
02551   if (regs_ever_live[AR_LC_REGNUM])
02552     {
02553       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
02554       current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
02555       if (current_frame_info.reg_save_ar_lc == 0)
02556   {
02557     extra_spill_size += 8;
02558     n_spilled += 1;
02559   }
02560     }
02561 
02562   /* If we have an odd number of words of pretend arguments written to
02563      the stack, then the FR save area will be unaligned.  We round the
02564      size of this area up to keep things 16 byte aligned.  */
02565   if (spilled_fr_p)
02566     pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
02567   else
02568     pretend_args_size = current_function_pretend_args_size;
02569 
02570   total_size = (spill_size + extra_spill_size + size + pretend_args_size
02571     + current_function_outgoing_args_size);
02572   total_size = IA64_STACK_ALIGN (total_size);
02573 
02574   /* We always use the 16-byte scratch area provided by the caller, but
02575      if we are a leaf function, there's no one to which we need to provide
02576      a scratch area.  */
02577   if (current_function_is_leaf)
02578     total_size = MAX (0, total_size - 16);
02579 
02580   current_frame_info.total_size = total_size;
02581   current_frame_info.spill_cfa_off = pretend_args_size - 16;
02582   current_frame_info.spill_size = spill_size;
02583   current_frame_info.extra_spill_size = extra_spill_size;
02584   COPY_HARD_REG_SET (current_frame_info.mask, mask);
02585   current_frame_info.n_spilled = n_spilled;
02586   current_frame_info.initialized = reload_completed;
02587 }
02588 
02589 /* Compute the initial difference between the specified pair of registers.  */
02590 
02591 HOST_WIDE_INT
02592 ia64_initial_elimination_offset (int from, int to)
02593 {
02594   HOST_WIDE_INT offset;
02595 
02596   ia64_compute_frame_size (get_frame_size ());
02597   switch (from)
02598     {
02599     case FRAME_POINTER_REGNUM:
02600       switch (to)
02601   {
02602   case HARD_FRAME_POINTER_REGNUM:
02603     if (current_function_is_leaf)
02604       offset = -current_frame_info.total_size;
02605     else
02606       offset = -(current_frame_info.total_size
02607            - current_function_outgoing_args_size - 16);
02608     break;
02609 
02610   case STACK_POINTER_REGNUM:
02611     if (current_function_is_leaf)
02612       offset = 0;
02613     else
02614       offset = 16 + current_function_outgoing_args_size;
02615     break;
02616 
02617   default:
02618     gcc_unreachable ();
02619   }
02620       break;
02621 
02622     case ARG_POINTER_REGNUM:
02623       /* Arguments start above the 16 byte save area, unless stdarg
02624    in which case we store through the 16 byte save area.  */
02625       switch (to)
02626   {
02627   case HARD_FRAME_POINTER_REGNUM:
02628     offset = 16 - current_function_pretend_args_size;
02629     break;
02630 
02631   case STACK_POINTER_REGNUM:
02632     offset = (current_frame_info.total_size
02633         + 16 - current_function_pretend_args_size);
02634     break;
02635 
02636   default:
02637     gcc_unreachable ();
02638   }
02639       break;
02640 
02641     default:
02642       gcc_unreachable ();
02643     }
02644 
02645   return offset;
02646 }
02647 
02648 /* If there are more than a trivial number of register spills, we use
02649    two interleaved iterators so that we can get two memory references
02650    per insn group.
02651 
02652    In order to simplify things in the prologue and epilogue expanders,
02653    we use helper functions to fix up the memory references after the
02654    fact with the appropriate offsets to a POST_MODIFY memory mode.
02655    The following data structure tracks the state of the two iterators
02656    while insns are being emitted.  */
02657 
02658 struct spill_fill_data
02659 {
02660   rtx init_after;   /* point at which to emit initializations */
02661   rtx init_reg[2];    /* initial base register */
02662   rtx iter_reg[2];    /* the iterator registers */
02663   rtx *prev_addr[2];    /* address of last memory use */
02664   rtx prev_insn[2];   /* the insn corresponding to prev_addr */
02665   HOST_WIDE_INT prev_off[2];  /* last offset */
02666   int n_iter;     /* number of iterators in use */
02667   int next_iter;    /* next iterator to use */
02668   unsigned int save_gr_used_mask;
02669 };
02670 
02671 static struct spill_fill_data spill_fill_data;
02672 
02673 static void
02674 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
02675 {
02676   int i;
02677 
02678   spill_fill_data.init_after = get_last_insn ();
02679   spill_fill_data.init_reg[0] = init_reg;
02680   spill_fill_data.init_reg[1] = init_reg;
02681   spill_fill_data.prev_addr[0] = NULL;
02682   spill_fill_data.prev_addr[1] = NULL;
02683   spill_fill_data.prev_insn[0] = NULL;
02684   spill_fill_data.prev_insn[1] = NULL;
02685   spill_fill_data.prev_off[0] = cfa_off;
02686   spill_fill_data.prev_off[1] = cfa_off;
02687   spill_fill_data.next_iter = 0;
02688   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
02689 
02690   spill_fill_data.n_iter = 1 + (n_spills > 2);
02691   for (i = 0; i < spill_fill_data.n_iter; ++i)
02692     {
02693       int regno = next_scratch_gr_reg ();
02694       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
02695       current_frame_info.gr_used_mask |= 1 << regno;
02696     }
02697 }
02698 
02699 static void
02700 finish_spill_pointers (void)
02701 {
02702   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
02703 }
02704 
02705 static rtx
02706 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
02707 {
02708   int iter = spill_fill_data.next_iter;
02709   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
02710   rtx disp_rtx = GEN_INT (disp);
02711   rtx mem;
02712 
02713   if (spill_fill_data.prev_addr[iter])
02714     {
02715       if (CONST_OK_FOR_N (disp))
02716   {
02717     *spill_fill_data.prev_addr[iter]
02718       = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
02719            gen_rtx_PLUS (DImode,
02720              spill_fill_data.iter_reg[iter],
02721              disp_rtx));
02722     REG_NOTES (spill_fill_data.prev_insn[iter])
02723       = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
02724          REG_NOTES (spill_fill_data.prev_insn[iter]));
02725   }
02726       else
02727   {
02728     /* ??? Could use register post_modify for loads.  */
02729     if (! CONST_OK_FOR_I (disp))
02730       {
02731         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02732         emit_move_insn (tmp, disp_rtx);
02733         disp_rtx = tmp;
02734       }
02735     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02736          spill_fill_data.iter_reg[iter], disp_rtx));
02737   }
02738     }
02739   /* Micro-optimization: if we've created a frame pointer, it's at
02740      CFA 0, which may allow the real iterator to be initialized lower,
02741      slightly increasing parallelism.  Also, if there are few saves
02742      it may eliminate the iterator entirely.  */
02743   else if (disp == 0
02744      && spill_fill_data.init_reg[iter] == stack_pointer_rtx
02745      && frame_pointer_needed)
02746     {
02747       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
02748       set_mem_alias_set (mem, get_varargs_alias_set ());
02749       return mem;
02750     }
02751   else
02752     {
02753       rtx seq, insn;
02754 
02755       if (disp == 0)
02756   seq = gen_movdi (spill_fill_data.iter_reg[iter],
02757        spill_fill_data.init_reg[iter]);
02758       else
02759   {
02760     start_sequence ();
02761 
02762     if (! CONST_OK_FOR_I (disp))
02763       {
02764         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02765         emit_move_insn (tmp, disp_rtx);
02766         disp_rtx = tmp;
02767       }
02768 
02769     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02770          spill_fill_data.init_reg[iter],
02771          disp_rtx));
02772 
02773     seq = get_insns ();
02774     end_sequence ();
02775   }
02776 
02777       /* Careful for being the first insn in a sequence.  */
02778       if (spill_fill_data.init_after)
02779   insn = emit_insn_after (seq, spill_fill_data.init_after);
02780       else
02781   {
02782     rtx first = get_insns ();
02783     if (first)
02784       insn = emit_insn_before (seq, first);
02785     else
02786       insn = emit_insn (seq);
02787   }
02788       spill_fill_data.init_after = insn;
02789 
02790       /* If DISP is 0, we may or may not have a further adjustment
02791    afterward.  If we do, then the load/store insn may be modified
02792    to be a post-modify.  If we don't, then this copy may be
02793    eliminated by copyprop_hardreg_forward, which makes this
02794    insn garbage, which runs afoul of the sanity check in
02795    propagate_one_insn.  So mark this insn as legal to delete.  */
02796       if (disp == 0)
02797   REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
02798                REG_NOTES (insn));
02799     }
02800 
02801   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
02802 
02803   /* ??? Not all of the spills are for varargs, but some of them are.
02804      The rest of the spills belong in an alias set of their own.  But
02805      it doesn't actually hurt to include them here.  */
02806   set_mem_alias_set (mem, get_varargs_alias_set ());
02807 
02808   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
02809   spill_fill_data.prev_off[iter] = cfa_off;
02810 
02811   if (++iter >= spill_fill_data.n_iter)
02812     iter = 0;
02813   spill_fill_data.next_iter = iter;
02814 
02815   return mem;
02816 }
02817 
02818 static void
02819 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
02820     rtx frame_reg)
02821 {
02822   int iter = spill_fill_data.next_iter;
02823   rtx mem, insn;
02824 
02825   mem = spill_restore_mem (reg, cfa_off);
02826   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
02827   spill_fill_data.prev_insn[iter] = insn;
02828 
02829   if (frame_reg)
02830     {
02831       rtx base;
02832       HOST_WIDE_INT off;
02833 
02834       RTX_FRAME_RELATED_P (insn) = 1;
02835 
02836       /* Don't even pretend that the unwind code can intuit its way
02837    through a pair of interleaved post_modify iterators.  Just
02838    provide the correct answer.  */
02839 
02840       if (frame_pointer_needed)
02841   {
02842     base = hard_frame_pointer_rtx;
02843     off = - cfa_off;
02844   }
02845       else
02846   {
02847     base = stack_pointer_rtx;
02848     off = current_frame_info.total_size - cfa_off;
02849   }
02850 
02851       REG_NOTES (insn)
02852   = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02853     gen_rtx_SET (VOIDmode,
02854            gen_rtx_MEM (GET_MODE (reg),
02855             plus_constant (base, off)),
02856            frame_reg),
02857     REG_NOTES (insn));
02858     }
02859 }
02860 
02861 static void
02862 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
02863 {
02864   int iter = spill_fill_data.next_iter;
02865   rtx insn;
02866 
02867   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
02868         GEN_INT (cfa_off)));
02869   spill_fill_data.prev_insn[iter] = insn;
02870 }
02871 
02872 /* Wrapper functions that discards the CONST_INT spill offset.  These
02873    exist so that we can give gr_spill/gr_fill the offset they need and
02874    use a consistent function interface.  */
02875 
02876 static rtx
02877 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02878 {
02879   return gen_movdi (dest, src);
02880 }
02881 
02882 static rtx
02883 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02884 {
02885   return gen_fr_spill (dest, src);
02886 }
02887 
02888 static rtx
02889 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02890 {
02891   return gen_fr_restore (dest, src);
02892 }
02893 
02894 /* Called after register allocation to add any instructions needed for the
02895    prologue.  Using a prologue insn is favored compared to putting all of the
02896    instructions in output_function_prologue(), since it allows the scheduler
02897    to intermix instructions with the saves of the caller saved registers.  In
02898    some cases, it might be necessary to emit a barrier instruction as the last
02899    insn to prevent such scheduling.
02900 
02901    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
02902    so that the debug info generation code can handle them properly.
02903 
02904    The register save area is layed out like so:
02905    cfa+16
02906   [ varargs spill area ]
02907   [ fr register spill area ]
02908   [ br register spill area ]
02909   [ ar register spill area ]
02910   [ pr register spill area ]
02911   [ gr register spill area ] */
02912 
02913 /* ??? Get inefficient code when the frame size is larger than can fit in an
02914    adds instruction.  */
02915 
02916 void
02917 ia64_expand_prologue (void)
02918 {
02919   rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
02920   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
02921   rtx reg, alt_reg;
02922 
02923   ia64_compute_frame_size (get_frame_size ());
02924   last_scratch_gr_reg = 15;
02925 
02926   /* If there is no epilogue, then we don't need some prologue insns.
02927      We need to avoid emitting the dead prologue insns, because flow
02928      will complain about them.  */
02929   if (optimize)
02930     {
02931       edge e;
02932       edge_iterator ei;
02933 
02934       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
02935   if ((e->flags & EDGE_FAKE) == 0
02936       && (e->flags & EDGE_FALLTHRU) != 0)
02937     break;
02938       epilogue_p = (e != NULL);
02939     }
02940   else
02941     epilogue_p = 1;
02942 
02943   /* Set the local, input, and output register names.  We need to do this
02944      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
02945      half.  If we use in/loc/out register names, then we get assembler errors
02946      in crtn.S because there is no alloc insn or regstk directive in there.  */
02947   if (! TARGET_REG_NAMES)
02948     {
02949       int inputs = current_frame_info.n_input_regs;
02950       int locals = current_frame_info.n_local_regs;
02951       int outputs = current_frame_info.n_output_regs;
02952 
02953       for (i = 0; i < inputs; i++)
02954   reg_names[IN_REG (i)] = ia64_reg_numbers[i];
02955       for (i = 0; i < locals; i++)
02956   reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
02957       for (i = 0; i < outputs; i++)
02958   reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
02959     }
02960 
02961   /* Set the frame pointer register name.  The regnum is logically loc79,
02962      but of course we'll not have allocated that many locals.  Rather than
02963      worrying about renumbering the existing rtxs, we adjust the name.  */
02964   /* ??? This code means that we can never use one local register when
02965      there is a frame pointer.  loc79 gets wasted in this case, as it is
02966      renamed to a register that will never be used.  See also the try_locals
02967      code in find_gr_spill.  */
02968   if (current_frame_info.reg_fp)
02969     {
02970       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
02971       reg_names[HARD_FRAME_POINTER_REGNUM]
02972   = reg_names[current_frame_info.reg_fp];
02973       reg_names[current_frame_info.reg_fp] = tmp;
02974     }
02975 
02976   /* We don't need an alloc instruction if we've used no outputs or locals.  */
02977   if (current_frame_info.n_local_regs == 0
02978       && current_frame_info.n_output_regs == 0
02979       && current_frame_info.n_input_regs <= current_function_args_info.int_regs
02980       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
02981     {
02982       /* If there is no alloc, but there are input registers used, then we
02983    need a .regstk directive.  */
02984       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
02985       ar_pfs_save_reg = NULL_RTX;
02986     }
02987   else
02988     {
02989       current_frame_info.need_regstk = 0;
02990 
02991       if (current_frame_info.reg_save_ar_pfs)
02992   regno = current_frame_info.reg_save_ar_pfs;
02993       else
02994   regno = next_scratch_gr_reg ();
02995       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
02996 
02997       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
02998            GEN_INT (current_frame_info.n_input_regs),
02999            GEN_INT (current_frame_info.n_local_regs),
03000            GEN_INT (current_frame_info.n_output_regs),
03001            GEN_INT (current_frame_info.n_rotate_regs)));
03002       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
03003     }
03004 
03005   /* Set up frame pointer, stack pointer, and spill iterators.  */
03006 
03007   n_varargs = cfun->machine->n_varargs;
03008   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
03009       stack_pointer_rtx, 0);
03010 
03011   if (frame_pointer_needed)
03012     {
03013       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
03014       RTX_FRAME_RELATED_P (insn) = 1;
03015     }
03016 
03017   if (current_frame_info.total_size != 0)
03018     {
03019       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
03020       rtx offset;
03021 
03022       if (CONST_OK_FOR_I (- current_frame_info.total_size))
03023   offset = frame_size_rtx;
03024       else
03025   {
03026     regno = next_scratch_gr_reg ();
03027     offset = gen_rtx_REG (DImode, regno);
03028     emit_move_insn (offset, frame_size_rtx);
03029   }
03030 
03031       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
03032             stack_pointer_rtx, offset));
03033 
03034       if (! frame_pointer_needed)
03035   {
03036     RTX_FRAME_RELATED_P (insn) = 1;
03037     if (GET_CODE (offset) != CONST_INT)
03038       {
03039         REG_NOTES (insn)
03040     = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
03041       gen_rtx_SET (VOIDmode,
03042              stack_pointer_rtx,
03043              gen_rtx_PLUS (DImode,
03044                stack_pointer_rtx,
03045                frame_size_rtx)),
03046       REG_NOTES (insn));
03047       }
03048   }
03049 
03050       /* ??? At this point we must generate a magic insn that appears to
03051    modify the stack pointer, the frame pointer, and all spill
03052    iterators.  This would allow the most scheduling freedom.  For
03053    now, just hard stop.  */
03054       emit_insn (gen_blockage ());
03055     }
03056 
03057   /* Must copy out ar.unat before doing any integer spills.  */
03058   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
03059     {
03060       if (current_frame_info.reg_save_ar_unat)
03061   ar_unat_save_reg
03062     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
03063       else
03064   {
03065     alt_regno = next_scratch_gr_reg ();
03066     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
03067     current_frame_info.gr_used_mask |= 1 << alt_regno;
03068   }
03069 
03070       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
03071       insn = emit_move_insn (ar_unat_save_reg, reg);
03072       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
03073 
03074       /* Even if we're not going to generate an epilogue, we still
03075    need to save the register so that EH works.  */
03076       if (! epilogue_p && current_frame_info.reg_save_ar_unat)
03077   emit_insn (gen_prologue_use (ar_unat_save_reg));
03078     }
03079   else
03080     ar_unat_save_reg = NULL_RTX;
03081 
03082   /* Spill all varargs registers.  Do this before spilling any GR registers,
03083      since we want the UNAT bits for the GR registers to override the UNAT
03084      bits from varargs, which we don't care about.  */
03085 
03086   cfa_off = -16;
03087   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
03088     {
03089       reg = gen_rtx_REG (DImode, regno);
03090       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
03091     }
03092 
03093   /* Locate the bottom of the register save area.  */
03094   cfa_off = (current_frame_info.spill_cfa_off
03095        + current_frame_info.spill_size
03096        + current_frame_info.extra_spill_size);
03097 
03098   /* Save the predicate register block either in a register or in memory.  */
03099   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
03100     {
03101       reg = gen_rtx_REG (DImode, PR_REG (0));
03102       if (current_frame_info.reg_save_pr != 0)
03103   {
03104     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
03105     insn = emit_move_insn (alt_reg, reg);
03106 
03107     /* ??? Denote pr spill/fill by a DImode move that modifies all
03108        64 hard registers.  */
03109     RTX_FRAME_RELATED_P (insn) = 1;
03110     REG_NOTES (insn)
03111       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
03112       gen_rtx_SET (VOIDmode, alt_reg, reg),
03113       REG_NOTES (insn));
03114 
03115     /* Even if we're not going to generate an epilogue, we still
03116        need to save the register so that EH works.  */
03117     if (! epilogue_p)
03118       emit_insn (gen_prologue_use (alt_reg));
03119   }
03120       else
03121   {
03122     alt_regno = next_scratch_gr_reg ();
03123     alt_reg = gen_rtx_REG (DImode, alt_regno);
03124     insn = emit_move_insn (alt_reg, reg);
03125     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
03126     cfa_off -= 8;
03127   }
03128     }
03129 
03130   /* Handle AR regs in numerical order.  All of them get special handling.  */
03131   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
03132       && current_frame_info.reg_save_ar_unat == 0)
03133     {
03134       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
03135       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
03136       cfa_off -= 8;
03137     }
03138 
03139   /* The alloc insn already copied ar.pfs into a general register.  The
03140      only thing we have to do now is copy that register to a stack slot
03141      if we'd not allocated a local register for the job.  */
03142   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
03143       && current_frame_info.reg_save_ar_pfs == 0)
03144     {
03145       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
03146       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
03147       cfa_off -= 8;
03148     }
03149 
03150   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
03151     {
03152       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
03153       if (current_frame_info.reg_save_ar_lc != 0)
03154   {
03155     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
03156     insn = emit_move_insn (alt_reg, reg);
03157     RTX_FRAME_RELATED_P (insn) = 1;
03158 
03159     /* Even if we're not going to generate an epilogue, we still
03160        need to save the register so that EH works.  */
03161     if (! epilogue_p)
03162       emit_insn (gen_prologue_use (alt_reg));
03163   }
03164       else
03165   {
03166     alt_regno = next_scratch_gr_reg ();
03167     alt_reg = gen_rtx_REG (DImode, alt_regno);
03168     emit_move_insn (alt_reg, reg);
03169     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
03170     cfa_off -= 8;
03171   }
03172     }
03173 
03174   /* Save the return pointer.  */
03175   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
03176     {
03177       reg = gen_rtx_REG (DImode, BR_REG (0));
03178       if (current_frame_info.reg_save_b0 != 0)
03179   {
03180     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
03181     insn = emit_move_insn (alt_reg, reg);
03182     RTX_FRAME_RELATED_P (insn) = 1;
03183 
03184     /* Even if we're not going to generate an epilogue, we still
03185        need to save the register so that EH works.  */
03186     if (! epilogue_p)
03187       emit_insn (gen_prologue_use (alt_reg));
03188   }
03189       else
03190   {
03191     alt_regno = next_scratch_gr_reg ();
03192     alt_reg = gen_rtx_REG (DImode, alt_regno);
03193     emit_move_insn (alt_reg, reg);
03194     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
03195     cfa_off -= 8;
03196   }
03197     }
03198 
03199   if (current_frame_info.reg_save_gp)
03200     {
03201       insn = emit_move_insn (gen_rtx_REG (DImode,
03202             current_frame_info.reg_save_gp),
03203            pic_offset_table_rtx);
03204       /* We don't know for sure yet if this is actually needed, since
03205    we've not split the PIC call patterns.  If all of the calls
03206    are indirect, and not followed by any uses of the gp, then
03207    this save is dead.  Allow it to go away.  */
03208       REG_NOTES (insn)
03209   = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
03210     }
03211 
03212   /* We should now be at the base of the gr/br/fr spill area.  */
03213   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
03214         + current_frame_info.spill_size));
03215 
03216   /* Spill all general registers.  */
03217   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
03218     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03219       {
03220   reg = gen_rtx_REG (DImode, regno);
03221   do_spill (gen_gr_spill, reg, cfa_off, reg);
03222   cfa_off -= 8;
03223       }
03224 
03225   /* Spill the rest of the BR registers.  */
03226   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
03227     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03228       {
03229   alt_regno = next_scratch_gr_reg ();
03230   alt_reg = gen_rtx_REG (DImode, alt_regno);
03231   reg = gen_rtx_REG (DImode, regno);
03232   emit_move_insn (alt_reg, reg);
03233   do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
03234   cfa_off -= 8;
03235       }
03236 
03237   /* Align the frame and spill all FR registers.  */
03238   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
03239     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03240       {
03241         gcc_assert (!(cfa_off & 15));
03242   reg = gen_rtx_REG (XFmode, regno);
03243   do_spill (gen_fr_spill_x, reg, cfa_off, reg);
03244   cfa_off -= 16;
03245       }
03246 
03247   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
03248 
03249   finish_spill_pointers ();
03250 }
03251 
03252 /* Called after register allocation to add any instructions needed for the
03253    epilogue.  Using an epilogue insn is favored compared to putting all of the
03254    instructions in output_function_prologue(), since it allows the scheduler
03255    to intermix instructions with the saves of the caller saved registers.  In
03256    some cases, it might be necessary to emit a barrier instruction as the last
03257    insn to prevent such scheduling.  */
03258 
03259 void
03260 ia64_expand_epilogue (int sibcall_p)
03261 {
03262   rtx insn, reg, alt_reg, ar_unat_save_reg;
03263   int regno, alt_regno, cfa_off;
03264 
03265   ia64_compute_frame_size (get_frame_size ());
03266 
03267   /* If there is a frame pointer, then we use it instead of the stack
03268      pointer, so that the stack pointer does not need to be valid when
03269      the epilogue starts.  See EXIT_IGNORE_STACK.  */
03270   if (frame_pointer_needed)
03271     setup_spill_pointers (current_frame_info.n_spilled,
03272         hard_frame_pointer_rtx, 0);
03273   else
03274     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
03275         current_frame_info.total_size);
03276 
03277   if (current_frame_info.total_size != 0)
03278     {
03279       /* ??? At this point we must generate a magic insn that appears to
03280          modify the spill iterators and the frame pointer.  This would
03281    allow the most scheduling freedom.  For now, just hard stop.  */
03282       emit_insn (gen_blockage ());
03283     }
03284 
03285   /* Locate the bottom of the register save area.  */
03286   cfa_off = (current_frame_info.spill_cfa_off
03287        + current_frame_info.spill_size
03288        + current_frame_info.extra_spill_size);
03289 
03290   /* Restore the predicate registers.  */
03291   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
03292     {
03293       if (current_frame_info.reg_save_pr != 0)
03294   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
03295       else
03296   {
03297     alt_regno = next_scratch_gr_reg ();
03298     alt_reg = gen_rtx_REG (DImode, alt_regno);
03299     do_restore (gen_movdi_x, alt_reg, cfa_off);
03300     cfa_off -= 8;
03301   }
03302       reg = gen_rtx_REG (DImode, PR_REG (0));
03303       emit_move_insn (reg, alt_reg);
03304     }
03305 
03306   /* Restore the application registers.  */
03307 
03308   /* Load the saved unat from the stack, but do not restore it until
03309      after the GRs have been restored.  */
03310   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
03311     {
03312       if (current_frame_info.reg_save_ar_unat != 0)
03313         ar_unat_save_reg
03314     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
03315       else
03316   {
03317     alt_regno = next_scratch_gr_reg ();
03318     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
03319     current_frame_info.gr_used_mask |= 1 << alt_regno;
03320     do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
03321     cfa_off -= 8;
03322   }
03323     }
03324   else
03325     ar_unat_save_reg = NULL_RTX;
03326 
03327   if (current_frame_info.reg_save_ar_pfs != 0)
03328     {
03329       alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
03330       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
03331       emit_move_insn (reg, alt_reg);
03332     }
03333   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
03334     {
03335       alt_regno = next_scratch_gr_reg ();
03336       alt_reg = gen_rtx_REG (DImode, alt_regno);
03337       do_restore (gen_movdi_x, alt_reg, cfa_off);
03338       cfa_off -= 8;
03339       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
03340       emit_move_insn (reg, alt_reg);
03341     }
03342 
03343   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
03344     {
03345       if (current_frame_info.reg_save_ar_lc != 0)
03346   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
03347       else
03348   {
03349     alt_regno = next_scratch_gr_reg ();
03350     alt_reg = gen_rtx_REG (DImode, alt_regno);
03351     do_restore (gen_movdi_x, alt_reg, cfa_off);
03352     cfa_off -= 8;
03353   }
03354       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
03355       emit_move_insn (reg, alt_reg);
03356     }
03357 
03358   /* Restore the return pointer.  */
03359   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
03360     {
03361       if (current_frame_info.reg_save_b0 != 0)
03362   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
03363       else
03364   {
03365     alt_regno = next_scratch_gr_reg ();
03366     alt_reg = gen_rtx_REG (DImode, alt_regno);
03367     do_restore (gen_movdi_x, alt_reg, cfa_off);
03368     cfa_off -= 8;
03369   }
03370       reg = gen_rtx_REG (DImode, BR_REG (0));
03371       emit_move_insn (reg, alt_reg);
03372     }
03373 
03374   /* We should now be at the base of the gr/br/fr spill area.  */
03375   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
03376         + current_frame_info.spill_size));
03377 
03378   /* The GP may be stored on the stack in the prologue, but it's
03379      never restored in the epilogue.  Skip the stack slot.  */
03380   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
03381     cfa_off -= 8;
03382 
03383   /* Restore all general registers.  */
03384   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
03385     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03386       {
03387   reg = gen_rtx_REG (DImode, regno);
03388   do_restore (gen_gr_restore, reg, cfa_off);
03389   cfa_off -= 8;
03390       }
03391 
03392   /* Restore the branch registers.  */
03393   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
03394     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03395       {
03396   alt_regno = next_scratch_gr_reg ();
03397   alt_reg = gen_rtx_REG (DImode, alt_regno);
03398   do_restore (gen_movdi_x, alt_reg, cfa_off);
03399   cfa_off -= 8;
03400   reg = gen_rtx_REG (DImode, regno);
03401   emit_move_insn (reg, alt_reg);
03402       }
03403 
03404   /* Restore floating point registers.  */
03405   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
03406     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03407       {
03408         gcc_assert (!(cfa_off & 15));
03409   reg = gen_rtx_REG (XFmode, regno);
03410   do_restore (gen_fr_restore_x, reg, cfa_off);
03411   cfa_off -= 16;
03412       }
03413 
03414   /* Restore ar.unat for real.  */
03415   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
03416     {
03417       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
03418       emit_move_insn (reg, ar_unat_save_reg);
03419     }
03420 
03421   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
03422 
03423   finish_spill_pointers ();
03424 
03425   if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
03426     {
03427       /* ??? At this point we must generate a magic insn that appears to
03428          modify the spill iterators, the stack pointer, and the frame
03429    pointer.  This would allow the most scheduling freedom.  For now,
03430    just hard stop.  */
03431       emit_insn (gen_blockage ());
03432     }
03433 
03434   if (cfun->machine->ia64_eh_epilogue_sp)
03435     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
03436   else if (frame_pointer_needed)
03437     {
03438       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
03439       RTX_FRAME_RELATED_P (insn) = 1;
03440     }
03441   else if (current_frame_info.total_size)
03442     {
03443       rtx offset, frame_size_rtx;
03444 
03445       frame_size_rtx = GEN_INT (current_frame_info.total_size);
03446       if (CONST_OK_FOR_I (current_frame_info.total_size))
03447   offset = frame_size_rtx;
03448       else
03449   {
03450     regno = next_scratch_gr_reg ();
03451     offset = gen_rtx_REG (DImode, regno);
03452     emit_move_insn (offset, frame_size_rtx);
03453   }
03454 
03455       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
03456             offset));
03457 
03458       RTX_FRAME_RELATED_P (insn) = 1;
03459       if (GET_CODE (offset) != CONST_INT)
03460   {
03461     REG_NOTES (insn)
03462       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
03463       gen_rtx_SET (VOIDmode,
03464              stack_pointer_rtx,
03465              gen_rtx_PLUS (DImode,
03466                stack_pointer_rtx,
03467                frame_size_rtx)),
03468       REG_NOTES (insn));
03469   }
03470     }
03471 
03472   if (cfun->machine->ia64_eh_epilogue_bsp)
03473     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
03474 
03475   if (! sibcall_p)
03476     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
03477   else
03478     {
03479       int fp = GR_REG (2);
03480       /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
03481    first available call clobbered register.  If there was a frame_pointer
03482    register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
03483    so we have to make sure we're using the string "r2" when emitting
03484    the register name for the assembler.  */
03485       if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
03486   fp = HARD_FRAME_POINTER_REGNUM;
03487 
03488       /* We must emit an alloc to force the input registers to become output
03489    registers.  Otherwise, if the callee tries to pass its parameters
03490    through to another call without an intervening alloc, then these
03491    values get lost.  */
03492       /* ??? We don't need to preserve all input registers.  We only need to
03493    preserve those input registers used as arguments to the sibling call.
03494    It is unclear how to compute that number here.  */
03495       if (current_frame_info.n_input_regs != 0)
03496   {
03497     rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
03498     insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
03499         const0_rtx, const0_rtx,
03500         n_inputs, const0_rtx));
03501     RTX_FRAME_RELATED_P (insn) = 1;
03502   }
03503     }
03504 }
03505 
03506 /* Return 1 if br.ret can do all the work required to return from a
03507    function.  */
03508 
03509 int
03510 ia64_direct_return (void)
03511 {
03512   if (reload_completed && ! frame_pointer_needed)
03513     {
03514       ia64_compute_frame_size (get_frame_size ());
03515 
03516       return (current_frame_info.total_size == 0
03517         && current_frame_info.n_spilled == 0
03518         && current_frame_info.reg_save_b0 == 0
03519         && current_frame_info.reg_save_pr == 0
03520         && current_frame_info.reg_save_ar_pfs == 0
03521         && current_frame_info.reg_save_ar_unat == 0
03522         && current_frame_info.reg_save_ar_lc == 0);
03523     }
03524   return 0;
03525 }
03526 
03527 /* Return the magic cookie that we use to hold the return address
03528    during early compilation.  */
03529 
03530 rtx
03531 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
03532 {
03533   if (count != 0)
03534     return NULL;
03535   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
03536 }
03537 
03538 /* Split this value after reload, now that we know where the return
03539    address is saved.  */
03540 
03541 void
03542 ia64_split_return_addr_rtx (rtx dest)
03543 {
03544   rtx src;
03545 
03546   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
03547     {
03548       if (current_frame_info.reg_save_b0 != 0)
03549   src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
03550       else
03551   {
03552     HOST_WIDE_INT off;
03553     unsigned int regno;
03554 
03555     /* Compute offset from CFA for BR0.  */
03556     /* ??? Must be kept in sync with ia64_expand_prologue.  */
03557     off = (current_frame_info.spill_cfa_off
03558      + current_frame_info.spill_size);
03559     for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
03560       if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03561         off -= 8;
03562 
03563     /* Convert CFA offset to a register based offset.  */
03564     if (frame_pointer_needed)
03565       src = hard_frame_pointer_rtx;
03566     else
03567       {
03568         src = stack_pointer_rtx;
03569         off += current_frame_info.total_size;
03570       }
03571 
03572     /* Load address into scratch register.  */
03573     if (CONST_OK_FOR_I (off))
03574       emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
03575     else
03576       {
03577         emit_move_insn (dest, GEN_INT (off));
03578         emit_insn (gen_adddi3 (dest, src, dest));
03579       }
03580 
03581     src = gen_rtx_MEM (Pmode, dest);
03582   }
03583     }
03584   else
03585     src = gen_rtx_REG (DImode, BR_REG (0));
03586 
03587   emit_move_insn (dest, src);
03588 }
03589 
03590 int
03591 ia64_hard_regno_rename_ok (int from, int to)
03592 {
03593   /* Don't clobber any of the registers we reserved for the prologue.  */
03594   if (to == current_frame_info.reg_fp
03595       || to == current_frame_info.reg_save_b0
03596       || to == current_frame_info.reg_save_pr
03597       || to == current_frame_info.reg_save_ar_pfs
03598       || to == current_frame_info.reg_save_ar_unat
03599       || to == current_frame_info.reg_save_ar_lc)
03600     return 0;
03601 
03602   if (from == current_frame_info.reg_fp
03603       || from == current_frame_info.reg_save_b0
03604       || from == current_frame_info.reg_save_pr
03605       || from == current_frame_info.reg_save_ar_pfs
03606       || from == current_frame_info.reg_save_ar_unat
03607       || from == current_frame_info.reg_save_ar_lc)
03608     return 0;
03609 
03610   /* Don't use output registers outside the register frame.  */
03611   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
03612     return 0;
03613 
03614   /* Retain even/oddness on predicate register pairs.  */
03615   if (PR_REGNO_P (from) && PR_REGNO_P (to))
03616     return (from & 1) == (to & 1);
03617 
03618   return 1;
03619 }
03620 
03621 /* Target hook for assembling integer objects.  Handle word-sized
03622    aligned objects and detect the cases when @fptr is needed.  */
03623 
03624 static bool
03625 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
03626 {
03627   if (size == POINTER_SIZE / BITS_PER_UNIT
03628       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
03629       && GET_CODE (x) == SYMBOL_REF
03630       && SYMBOL_REF_FUNCTION_P (x))
03631     {
03632       static const char * const directive[2][2] = {
03633     /* 64-bit pointer */  /* 32-bit pointer */
03634   { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},  /* unaligned */
03635   { "\tdata8\t@fptr(",    "\tdata4\t@fptr("}  /* aligned */
03636       };
03637       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
03638       output_addr_const (asm_out_file, x);
03639       fputs (")\n", asm_out_file);
03640       return true;
03641     }
03642   return default_assemble_integer (x, size, aligned_p);
03643 }
03644 
03645 /* Emit the function prologue.  */
03646 
03647 static void
03648 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
03649 {
03650   int mask, grsave, grsave_prev;
03651 
03652   if (current_frame_info.need_regstk)
03653     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
03654        current_frame_info.n_input_regs,
03655        current_frame_info.n_local_regs,
03656        current_frame_info.n_output_regs,
03657        current_frame_info.n_rotate_regs);
03658 
03659   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03660     return;
03661 
03662   /* Emit the .prologue directive.  */
03663 
03664   mask = 0;
03665   grsave = grsave_prev = 0;
03666   if (current_frame_info.reg_save_b0 != 0)
03667     {
03668       mask |= 8;
03669       grsave = grsave_prev = current_frame_info.reg_save_b0;
03670     }
03671   if (current_frame_info.reg_save_ar_pfs != 0
03672       && (grsave_prev == 0
03673     || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
03674     {
03675       mask |= 4;
03676       if (grsave_prev == 0)
03677   grsave = current_frame_info.reg_save_ar_pfs;
03678       grsave_prev = current_frame_info.reg_save_ar_pfs;
03679     }
03680   if (current_frame_info.reg_fp != 0
03681       && (grsave_prev == 0
03682     || current_frame_info.reg_fp == grsave_prev + 1))
03683     {
03684       mask |= 2;
03685       if (grsave_prev == 0)
03686   grsave = HARD_FRAME_POINTER_REGNUM;
03687       grsave_prev = current_frame_info.reg_fp;
03688     }
03689   if (current_frame_info.reg_save_pr != 0
03690       && (grsave_prev == 0
03691     || current_frame_info.reg_save_pr == grsave_prev + 1))
03692     {
03693       mask |= 1;
03694       if (grsave_prev == 0)
03695   grsave = current_frame_info.reg_save_pr;
03696     }
03697 
03698   if (mask && TARGET_GNU_AS)
03699     fprintf (file, "\t.prologue %d, %d\n", mask,
03700        ia64_dbx_register_number (grsave));
03701   else
03702     fputs ("\t.prologue\n", file);
03703 
03704   /* Emit a .spill directive, if necessary, to relocate the base of
03705      the register spill area.  */
03706   if (current_frame_info.spill_cfa_off != -16)
03707     fprintf (file, "\t.spill %ld\n",
03708        (long) (current_frame_info.spill_cfa_off
03709          + current_frame_info.spill_size));
03710 }
03711 
03712 /* Emit the .body directive at the scheduled end of the prologue.  */
03713 
03714 static void
03715 ia64_output_function_end_prologue (FILE *file)
03716 {
03717   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03718     return;
03719 
03720   fputs ("\t.body\n", file);
03721 }
03722 
03723 /* Emit the function epilogue.  */
03724 
03725 static void
03726 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
03727              HOST_WIDE_INT size ATTRIBUTE_UNUSED)
03728 {
03729   int i;
03730 
03731   if (current_frame_info.reg_fp)
03732     {
03733       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
03734       reg_names[HARD_FRAME_POINTER_REGNUM]
03735   = reg_names[current_frame_info.reg_fp];
03736       reg_names[current_frame_info.reg_fp] = tmp;
03737     }
03738   if (! TARGET_REG_NAMES)
03739     {
03740       for (i = 0; i < current_frame_info.n_input_regs; i++)
03741   reg_names[IN_REG (i)] = ia64_input_reg_names[i];
03742       for (i = 0; i < current_frame_info.n_local_regs; i++)
03743   reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
03744       for (i = 0; i < current_frame_info.n_output_regs; i++)
03745   reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
03746     }
03747 
03748   current_frame_info.initialized = 0;
03749 }
03750 
03751 int
03752 ia64_dbx_register_number (int regno)
03753 {
03754   /* In ia64_expand_prologue we quite literally renamed the frame pointer
03755      from its home at loc79 to something inside the register frame.  We
03756      must perform the same renumbering here for the debug info.  */
03757   if (current_frame_info.reg_fp)
03758     {
03759       if (regno == HARD_FRAME_POINTER_REGNUM)
03760   regno = current_frame_info.reg_fp;
03761       else if (regno == current_frame_info.reg_fp)
03762   regno = HARD_FRAME_POINTER_REGNUM;
03763     }
03764 
03765   if (IN_REGNO_P (regno))
03766     return 32 + regno - IN_REG (0);
03767   else if (LOC_REGNO_P (regno))
03768     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
03769   else if (OUT_REGNO_P (regno))
03770     return (32 + current_frame_info.n_input_regs
03771       + current_frame_info.n_local_regs + regno - OUT_REG (0));
03772   else
03773     return regno;
03774 }
03775 
03776 void
03777 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
03778 {
03779   rtx addr_reg, eight = GEN_INT (8);
03780 
03781   /* The Intel assembler requires that the global __ia64_trampoline symbol
03782      be declared explicitly */
03783   if (!TARGET_GNU_AS)
03784     {
03785       static bool declared_ia64_trampoline = false;
03786 
03787       if (!declared_ia64_trampoline)
03788   {
03789     declared_ia64_trampoline = true;
03790     (*targetm.asm_out.globalize_label) (asm_out_file,
03791                 "__ia64_trampoline");
03792   }
03793     }
03794 
03795   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
03796   addr = convert_memory_address (Pmode, addr);
03797   fnaddr = convert_memory_address (Pmode, fnaddr);
03798   static_chain = convert_memory_address (Pmode, static_chain);
03799 
03800   /* Load up our iterator.  */
03801   addr_reg = gen_reg_rtx (Pmode);
03802   emit_move_insn (addr_reg, addr);
03803 
03804   /* The first two words are the fake descriptor:
03805      __ia64_trampoline, ADDR+16.  */
03806   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03807       gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
03808   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03809 
03810   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03811       copy_to_reg (plus_constant (addr, 16)));
03812   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03813 
03814   /* The third word is the target descriptor.  */
03815   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
03816   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03817 
03818   /* The fourth word is the static chain.  */
03819   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
03820 }
03821 
03822 /* Do any needed setup for a variadic function.  CUM has not been updated
03823    for the last named argument which has type TYPE and mode MODE.
03824 
03825    We generate the actual spill instructions during prologue generation.  */
03826 
03827 static void
03828 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
03829            tree type, int * pretend_size,
03830            int second_time ATTRIBUTE_UNUSED)
03831 {
03832   CUMULATIVE_ARGS next_cum = *cum;
03833 
03834   /* Skip the current argument.  */
03835   ia64_function_arg_advance (&next_cum, mode, type, 1);
03836 
03837   if (next_cum.words < MAX_ARGUMENT_SLOTS)
03838     {
03839       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
03840       *pretend_size = n * UNITS_PER_WORD;
03841       cfun->machine->n_varargs = n;
03842     }
03843 }
03844 
03845 /* Check whether TYPE is a homogeneous floating point aggregate.  If
03846    it is, return the mode of the floating point type that appears
03847    in all leafs.  If it is not, return VOIDmode.
03848 
03849    An aggregate is a homogeneous floating point aggregate is if all
03850    fields/elements in it have the same floating point type (e.g,
03851    SFmode).  128-bit quad-precision floats are excluded.
03852 
03853    Variable sized aggregates should never arrive here, since we should
03854    have already decided to pass them by reference.  Top-level zero-sized
03855    aggregates are excluded because our parallels crash the middle-end.  */
03856 
03857 static enum machine_mode
03858 hfa_element_mode (tree type, bool nested)
03859 {
03860   enum machine_mode element_mode = VOIDmode;
03861   enum machine_mode mode;
03862   enum tree_code code = TREE_CODE (type);
03863   int know_element_mode = 0;
03864   tree t;
03865 
03866   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
03867     return VOIDmode;
03868 
03869   switch (code)
03870     {
03871     case VOID_TYPE: case INTEGER_TYPE:  case ENUMERAL_TYPE:
03872     case BOOLEAN_TYPE:  case POINTER_TYPE:
03873     case OFFSET_TYPE: case REFERENCE_TYPE:  case METHOD_TYPE:
03874     case LANG_TYPE:   case FUNCTION_TYPE:
03875       return VOIDmode;
03876 
03877       /* Fortran complex types are supposed to be HFAs, so we need to handle
03878    gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
03879    types though.  */
03880     case COMPLEX_TYPE:
03881       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
03882     && TYPE_MODE (type) != TCmode)
03883   return GET_MODE_INNER (TYPE_MODE (type));
03884       else
03885   return VOIDmode;
03886 
03887     case REAL_TYPE:
03888       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
03889    mode if this is contained within an aggregate.  */
03890       if (nested && TYPE_MODE (type) != TFmode)
03891   return TYPE_MODE (type);
03892       else
03893   return VOIDmode;
03894 
03895     case ARRAY_TYPE:
03896       return hfa_element_mode (TREE_TYPE (type), 1);
03897 
03898     case RECORD_TYPE:
03899     case UNION_TYPE:
03900     case QUAL_UNION_TYPE:
03901       for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
03902   {
03903     if (TREE_CODE (t) != FIELD_DECL)
03904       continue;
03905 
03906     mode = hfa_element_mode (TREE_TYPE (t), 1);
03907     if (know_element_mode)
03908       {
03909         if (mode != element_mode)
03910     return VOIDmode;
03911       }
03912     else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
03913       return VOIDmode;
03914     else
03915       {
03916         know_element_mode = 1;
03917         element_mode = mode;
03918       }
03919   }
03920       return element_mode;
03921 
03922     default:
03923       /* If we reach here, we probably have some front-end specific type
03924    that the backend doesn't know about.  This can happen via the
03925    aggregate_value_p call in init_function_start.  All we can do is
03926    ignore unknown tree types.  */
03927       return VOIDmode;
03928     }
03929 
03930   return VOIDmode;
03931 }
03932 
03933 /* Return the number of words required to hold a quantity of TYPE and MODE
03934    when passed as an argument.  */
03935 static int
03936 ia64_function_arg_words (tree type, enum machine_mode mode)
03937 {
03938   int words;
03939 
03940   if (mode == BLKmode)
03941     words = int_size_in_bytes (type);
03942   else
03943     words = GET_MODE_SIZE (mode);
03944 
03945   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
03946 }
03947 
03948 /* Return the number of registers that should be skipped so the current
03949    argument (described by TYPE and WORDS) will be properly aligned.
03950 
03951    Integer and float arguments larger than 8 bytes start at the next
03952    even boundary.  Aggregates larger than 8 bytes start at the next
03953    even boundary if the aggregate has 16 byte alignment.  Note that
03954    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
03955    but are still to be aligned in registers.
03956 
03957    ??? The ABI does not specify how to handle aggregates with
03958    alignment from 9 to 15 bytes, or greater than 16.  We handle them
03959    all as if they had 16 byte alignment.  Such aggregates can occur
03960    only if gcc extensions are used.  */
03961 static int
03962 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
03963 {
03964   if ((cum->words & 1) == 0)
03965     return 0;
03966 
03967   if (type
03968       && TREE_CODE (type) != INTEGER_TYPE
03969       && TREE_CODE (type) != REAL_TYPE)
03970     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
03971   else
03972     return words > 1;
03973 }
03974 
03975 /* Return rtx for register where argument is passed, or zero if it is passed
03976    on the stack.  */
03977 /* ??? 128-bit quad-precision floats are always passed in general
03978    registers.  */
03979 
03980 rtx
03981 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
03982        int named, int incoming)
03983 {
03984   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
03985   int words = ia64_function_arg_words (type, mode);
03986   int offset = ia64_function_arg_offset (cum, type, words);
03987   enum machine_mode hfa_mode = VOIDmode;
03988 
03989   /* If all argument slots are used, then it must go on the stack.  */
03990   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
03991     return 0;
03992 
03993   /* Check for and handle homogeneous FP aggregates.  */
03994   if (type)
03995     hfa_mode = hfa_element_mode (type, 0);
03996 
03997   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
03998      and unprototyped hfas are passed specially.  */
03999   if (hfa_mode != VOIDmode && (! cum->prototype || named))
04000     {
04001       rtx loc[16];
04002       int i = 0;
04003       int fp_regs = cum->fp_regs;
04004       int int_regs = cum->words + offset;
04005       int hfa_size = GET_MODE_SIZE (hfa_mode);
04006       int byte_size;
04007       int args_byte_size;
04008 
04009       /* If prototyped, pass it in FR regs then GR regs.
04010    If not prototyped, pass it in both FR and GR regs.
04011 
04012    If this is an SFmode aggregate, then it is possible to run out of
04013    FR regs while GR regs are still left.  In that case, we pass the
04014    remaining part in the GR regs.  */
04015 
04016       /* Fill the FP regs.  We do this always.  We stop if we reach the end
04017    of the argument, the last FP register, or the last argument slot.  */
04018 
04019       byte_size = ((mode == BLKmode)
04020        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
04021       args_byte_size = int_regs * UNITS_PER_WORD;
04022       offset = 0;
04023       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
04024         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
04025   {
04026     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
04027               gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
04028                     + fp_regs)),
04029               GEN_INT (offset));
04030     offset += hfa_size;
04031     args_byte_size += hfa_size;
04032     fp_regs++;
04033   }
04034 
04035       /* If no prototype, then the whole thing must go in GR regs.  */
04036       if (! cum->prototype)
04037   offset = 0;
04038       /* If this is an SFmode aggregate, then we might have some left over
04039    that needs to go in GR regs.  */
04040       else if (byte_size != offset)
04041   int_regs += offset / UNITS_PER_WORD;
04042 
04043       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
04044 
04045       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
04046   {
04047     enum machine_mode gr_mode = DImode;
04048     unsigned int gr_size;
04049 
04050     /* If we have an odd 4 byte hunk because we ran out of FR regs,
04051        then this goes in a GR reg left adjusted/little endian, right
04052        adjusted/big endian.  */
04053     /* ??? Currently this is handled wrong, because 4-byte hunks are
04054        always right adjusted/little endian.  */
04055     if (offset & 0x4)
04056       gr_mode = SImode;
04057     /* If we have an even 4 byte hunk because the aggregate is a
04058        multiple of 4 bytes in size, then this goes in a GR reg right
04059        adjusted/little endian.  */
04060     else if (byte_size - offset == 4)
04061       gr_mode = SImode;
04062 
04063     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
04064               gen_rtx_REG (gr_mode, (basereg
04065                    + int_regs)),
04066               GEN_INT (offset));
04067 
04068     gr_size = GET_MODE_SIZE (gr_mode);
04069     offset += gr_size;
04070     if (gr_size == UNITS_PER_WORD
04071         || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
04072       int_regs++;
04073     else if (gr_size > UNITS_PER_WORD)
04074       int_regs += gr_size / UNITS_PER_WORD;
04075   }
04076       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
04077     }
04078 
04079   /* Integral and aggregates go in general registers.  If we have run out of
04080      FR registers, then FP values must also go in general registers.  This can
04081      happen when we have a SFmode HFA.  */
04082   else if (mode == TFmode || mode == TCmode
04083      || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
04084     {
04085       int byte_size = ((mode == BLKmode)
04086                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
04087       if (BYTES_BIG_ENDIAN
04088   && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
04089   && byte_size < UNITS_PER_WORD
04090   && byte_size > 0)
04091   {
04092     rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
04093             gen_rtx_REG (DImode,
04094                    (basereg + cum->words
04095               + offset)),
04096             const0_rtx);
04097     return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
04098   }
04099       else
04100   return gen_rtx_REG (mode, basereg + cum->words + offset);
04101 
04102     }
04103 
04104   /* If there is a prototype, then FP values go in a FR register when
04105      named, and in a GR register when unnamed.  */
04106   else if (cum->prototype)
04107     {
04108       if (named)
04109   return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
04110       /* In big-endian mode, an anonymous SFmode value must be represented
04111          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
04112    the value into the high half of the general register.  */
04113       else if (BYTES_BIG_ENDIAN && mode == SFmode)
04114   return gen_rtx_PARALLEL (mode,
04115      gen_rtvec (1,
04116                    gen_rtx_EXPR_LIST (VOIDmode,
04117          gen_rtx_REG (DImode, basereg + cum->words + offset),
04118               const0_rtx)));
04119       else
04120   return gen_rtx_REG (mode, basereg + cum->words + offset);
04121     }
04122   /* If there is no prototype, then FP values go in both FR and GR
04123      registers.  */
04124   else
04125     {
04126       /* See comment above.  */
04127       enum machine_mode inner_mode =
04128   (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
04129 
04130       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
04131               gen_rtx_REG (mode, (FR_ARG_FIRST
04132                 + cum->fp_regs)),
04133               const0_rtx);
04134       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
04135               gen_rtx_REG (inner_mode,
04136                (basereg + cum->words
04137                 + offset)),
04138               const0_rtx);
04139 
04140       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
04141     }
04142 }
04143 
04144 /* Return number of bytes, at the beginning of the argument, that must be
04145    put in registers.  0 is the argument is entirely in registers or entirely
04146    in memory.  */
04147 
04148 static int
04149 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
04150       tree type, bool named ATTRIBUTE_UNUSED)
04151 {
04152   int words = ia64_function_arg_words (type, mode);
04153   int offset = ia64_function_arg_offset (cum, type, words);
04154 
04155   /* If all argument slots are used, then it must go on the stack.  */
04156   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
04157     return 0;
04158 
04159   /* It doesn't matter whether the argument goes in FR or GR regs.  If
04160      it fits within the 8 argument slots, then it goes entirely in
04161      registers.  If it extends past the last argument slot, then the rest
04162      goes on the stack.  */
04163 
04164   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
04165     return 0;
04166 
04167   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
04168 }
04169 
04170 /* Update CUM to point after this argument.  This is patterned after
04171    ia64_function_arg.  */
04172 
04173 void
04174 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
04175          tree type, int named)
04176 {
04177   int words = ia64_function_arg_words (type, mode);
04178   int offset = ia64_function_arg_offset (cum, type, words);
04179   enum machine_mode hfa_mode = VOIDmode;
04180 
04181   /* If all arg slots are already full, then there is nothing to do.  */
04182   if (cum->words >= MAX_ARGUMENT_SLOTS)
04183     return;
04184 
04185   cum->words += words + offset;
04186 
04187   /* Check for and handle homogeneous FP aggregates.  */
04188   if (type)
04189     hfa_mode = hfa_element_mode (type, 0);
04190 
04191   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
04192      and unprototyped hfas are passed specially.  */
04193   if (hfa_mode != VOIDmode && (! cum->prototype || named))
04194     {
04195       int fp_regs = cum->fp_regs;
04196       /* This is the original value of cum->words + offset.  */
04197       int int_regs = cum->words - words;
04198       int hfa_size = GET_MODE_SIZE (hfa_mode);
04199       int byte_size;
04200       int args_byte_size;
04201 
04202       /* If prototyped, pass it in FR regs then GR regs.
04203    If not prototyped, pass it in both FR and GR regs.
04204 
04205    If this is an SFmode aggregate, then it is possible to run out of
04206    FR regs while GR regs are still left.  In that case, we pass the
04207    remaining part in the GR regs.  */
04208 
04209       /* Fill the FP regs.  We do this always.  We stop if we reach the end
04210    of the argument, the last FP register, or the last argument slot.  */
04211 
04212       byte_size = ((mode == BLKmode)
04213        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
04214       args_byte_size = int_regs * UNITS_PER_WORD;
04215       offset = 0;
04216       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
04217         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
04218   {
04219     offset += hfa_size;
04220     args_byte_size += hfa_size;
04221     fp_regs++;
04222   }
04223 
04224       cum->fp_regs = fp_regs;
04225     }
04226 
04227   /* Integral and aggregates go in general registers.  So do TFmode FP values.
04228      If we have run out of FR registers, then other FP values must also go in
04229      general registers.  This can happen when we have a SFmode HFA.  */
04230   else if (mode == TFmode || mode == TCmode
04231            || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
04232     cum->int_regs = cum->words;
04233 
04234   /* If there is a prototype, then FP values go in a FR register when
04235      named, and in a GR register when unnamed.  */
04236   else if (cum->prototype)
04237     {
04238       if (! named)
04239   cum->int_regs = cum->words;
04240       else
04241   /* ??? Complex types should not reach here.  */
04242   cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
04243     }
04244   /* If there is no prototype, then FP values go in both FR and GR
04245      registers.  */
04246   else
04247     {
04248       /* ??? Complex types should not reach here.  */
04249       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
04250       cum->int_regs = cum->words;
04251     }
04252 }
04253 
04254 /* Arguments with alignment larger than 8 bytes start at the next even
04255    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
04256    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
04257 
04258 int
04259 ia64_function_arg_boundary (enum machine_mode mode, tree type)
04260 {
04261 
04262   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
04263     return PARM_BOUNDARY * 2;
04264 
04265   if (type)
04266     {
04267       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
04268         return PARM_BOUNDARY * 2;
04269       else
04270         return PARM_BOUNDARY;
04271     }
04272 
04273   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
04274     return PARM_BOUNDARY * 2;
04275   else
04276     return PARM_BOUNDARY;
04277 }
04278 
04279 /* True if it is OK to do sibling call optimization for the specified
04280    call expression EXP.  DECL will be the called function, or NULL if
04281    this is an indirect call.  */
04282 static bool
04283 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
04284 {
04285   /* We can't perform a sibcall if the current function has the syscall_linkage
04286      attribute.  */
04287   if (lookup_attribute ("syscall_linkage",
04288       TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
04289     return false;
04290 
04291   /* We must always return with our current GP.  This means we can
04292      only sibcall to functions defined in the current module.  */
04293   return decl && (*targetm.binds_local_p) (decl);
04294 }
04295 
04296 
04297 /* Implement va_arg.  */
04298 
04299 static tree
04300 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
04301 {
04302   /* Variable sized types are passed by reference.  */
04303   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
04304     {
04305       tree ptrtype = build_pointer_type (type);
04306       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
04307       return build_va_arg_indirect_ref (addr);
04308     }
04309 
04310   /* Aggregate arguments with alignment larger than 8 bytes start at
04311      the next even boundary.  Integer and floating point arguments
04312      do so if they are larger than 8 bytes, whether or not they are
04313      also aligned larger than 8 bytes.  */
04314   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
04315       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
04316     {
04317       tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
04318            build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
04319       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
04320       build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
04321       t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
04322       gimplify_and_add (t, pre_p);
04323     }
04324 
04325   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
04326 }
04327 
04328 /* Return 1 if function return value returned in memory.  Return 0 if it is
04329    in a register.  */
04330 
04331 static bool
04332 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
04333 {
04334   enum machine_mode mode;
04335   enum machine_mode hfa_mode;
04336   HOST_WIDE_INT byte_size;
04337 
04338   mode = TYPE_MODE (valtype);
04339   byte_size = GET_MODE_SIZE (mode);
04340   if (mode == BLKmode)
04341     {
04342       byte_size = int_size_in_bytes (valtype);
04343       if (byte_size < 0)
04344   return true;
04345     }
04346 
04347   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
04348 
04349   hfa_mode = hfa_element_mode (valtype, 0);
04350   if (hfa_mode != VOIDmode)
04351     {
04352       int hfa_size = GET_MODE_SIZE (hfa_mode);
04353 
04354       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
04355   return true;
04356       else
04357   return false;
04358     }
04359   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
04360     return true;
04361   else
04362     return false;
04363 }
04364 
04365 /* Return rtx for register that holds the function return value.  */
04366 
04367 rtx
04368 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
04369 {
04370   enum machine_mode mode;
04371   enum machine_mode hfa_mode;
04372 
04373   mode = TYPE_MODE (valtype);
04374   hfa_mode = hfa_element_mode (valtype, 0);
04375 
04376   if (hfa_mode != VOIDmode)
04377     {
04378       rtx loc[8];
04379       int i;
04380       int hfa_size;
04381       int byte_size;
04382       int offset;
04383 
04384       hfa_size = GET_MODE_SIZE (hfa_mode);
04385       byte_size = ((mode == BLKmode)
04386        ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
04387       offset = 0;
04388       for (i = 0; offset < byte_size; i++)
04389   {
04390     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
04391               gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
04392               GEN_INT (offset));
04393     offset += hfa_size;
04394   }
04395       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
04396     }
04397   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
04398     return gen_rtx_REG (mode, FR_ARG_FIRST);
04399   else
04400     {
04401       bool need_parallel = false;
04402 
04403       /* In big-endian mode, we need to manage the layout of aggregates
04404    in the registers so that we get the bits properly aligned in
04405    the highpart of the registers.  */
04406       if (BYTES_BIG_ENDIAN
04407     && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
04408   need_parallel = true;
04409 
04410       /* Something like struct S { long double x; char a[0] } is not an
04411    HFA structure, and therefore doesn't go in fp registers.  But
04412    the middle-end will give it XFmode anyway, and XFmode values
04413    don't normally fit in integer registers.  So we need to smuggle
04414    the value inside a parallel.  */
04415       else if (mode == XFmode || mode == XCmode || mode == RFmode)
04416   need_parallel = true;
04417 
04418       if (need_parallel)
04419   {
04420     rtx loc[8];
04421     int offset;
04422     int bytesize;
04423     int i;
04424 
04425     offset = 0;
04426     bytesize = int_size_in_bytes (valtype);
04427     /* An empty PARALLEL is invalid here, but the return value
04428        doesn't matter for empty structs.  */
04429     if (bytesize == 0)
04430       return gen_rtx_REG (mode, GR_RET_FIRST);
04431     for (i = 0; offset < bytesize; i++)
04432       {
04433         loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
04434             gen_rtx_REG (DImode,
04435                    GR_RET_FIRST + i),
04436             GEN_INT (offset));
04437         offset += UNITS_PER_WORD;
04438       }
04439     return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
04440   }
04441 
04442       return gen_rtx_REG (mode, GR_RET_FIRST);
04443     }
04444 }
04445 
04446 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
04447    We need to emit DTP-relative relocations.  */
04448 
04449 static void
04450 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
04451 {
04452   gcc_assert (size == 4 || size == 8);
04453   if (size == 4)
04454     fputs ("\tdata4.ua\t@dtprel(", file);
04455   else
04456     fputs ("\tdata8.ua\t@dtprel(", file);
04457   output_addr_const (file, x);
04458   fputs (")", file);
04459 }
04460 
04461 /* Print a memory address as an operand to reference that memory location.  */
04462 
04463 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
04464    also call this from ia64_print_operand for memory addresses.  */
04465 
04466 void
04467 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
04468           rtx address ATTRIBUTE_UNUSED)
04469 {
04470 }
04471 
04472 /* Print an operand to an assembler instruction.
04473    C  Swap and print a comparison operator.
04474    D  Print an FP comparison operator.
04475    E    Print 32 - constant, for SImode shifts as extract.
04476    e    Print 64 - constant, for DImode rotates.
04477    F  A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
04478         a floating point register emitted normally.
04479    I  Invert a predicate register by adding 1.
04480    J    Select the proper predicate register for a condition.
04481    j    Select the inverse predicate register for a condition.
04482    O  Append .acq for volatile load.
04483    P  Postincrement of a MEM.
04484    Q  Append .rel for volatile store.
04485    S  Shift amount for shladd instruction.
04486    T  Print an 8-bit sign extended number (K) as a 32-bit unsigned number
04487   for Intel assembler.
04488    U  Print an 8-bit sign extended number (K) as a 64-bit unsigned number
04489   for Intel assembler.
04490    X  A pair of floating point registers.
04491    r  Print register name, or constant 0 as r0.  HP compatibility for
04492   Linux kernel.
04493    v    Print vector constant value as an 8-byte integer value.  */
04494 
04495 void
04496 ia64_print_operand (FILE * file, rtx x, int code)
04497 {
04498   const char *str;
04499 
04500   switch (code)
04501     {
04502     case 0:
04503       /* Handled below.  */
04504       break;
04505 
04506     case 'C':
04507       {
04508   enum rtx_code c = swap_condition (GET_CODE (x));
04509   fputs (GET_RTX_NAME (c), file);
04510   return;
04511       }
04512 
04513     case 'D':
04514       switch (GET_CODE (x))
04515   {
04516   case NE:
04517     str = "neq";
04518     break;
04519   case UNORDERED:
04520     str = "unord";
04521     break;
04522   case ORDERED:
04523     str = "ord";
04524     break;
04525   default:
04526     str = GET_RTX_NAME (GET_CODE (x));
04527     break;
04528   }
04529       fputs (str, file);
04530       return;
04531 
04532     case 'E':
04533       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
04534       return;
04535 
04536     case 'e':
04537       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
04538       return;
04539 
04540     case 'F':
04541       if (x == CONST0_RTX (GET_MODE (x)))
04542   str = reg_names [FR_REG (0)];
04543       else if (x == CONST1_RTX (GET_MODE (x)))
04544   str = reg_names [FR_REG (1)];
04545       else
04546   {
04547     gcc_assert (GET_CODE (x) == REG);
04548     str = reg_names [REGNO (x)];
04549   }
04550       fputs (str, file);
04551       return;
04552 
04553     case 'I':
04554       fputs (reg_names [REGNO (x) + 1], file);
04555       return;
04556 
04557     case 'J':
04558     case 'j':
04559       {
04560   unsigned int regno = REGNO (XEXP (x, 0));
04561   if (GET_CODE (x) == EQ)
04562     regno += 1;
04563   if (code == 'j')
04564     regno ^= 1;
04565         fputs (reg_names [regno], file);
04566       }
04567       return;
04568 
04569     case 'O':
04570       if (MEM_VOLATILE_P (x))
04571   fputs(".acq", file);
04572       return;
04573 
04574     case 'P':
04575       {
04576   HOST_WIDE_INT value;
04577 
04578   switch (GET_CODE (XEXP (x, 0)))
04579     {
04580     default:
04581       return;
04582 
04583     case POST_MODIFY:
04584       x = XEXP (XEXP (XEXP (x, 0), 1), 1);
04585       if (GET_CODE (x) == CONST_INT)
04586         value = INTVAL (x);
04587       else
04588         {
04589     gcc_assert (GET_CODE (x) == REG);
04590     fprintf (file, ", %s", reg_names[REGNO (x)]);
04591     return;
04592         }
04593       break;
04594 
04595     case POST_INC:
04596       value = GET_MODE_SIZE (GET_MODE (x));
04597       break;
04598 
04599     case POST_DEC:
04600       value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
04601       break;
04602     }
04603 
04604   fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
04605   return;
04606       }
04607 
04608     case 'Q':
04609       if (MEM_VOLATILE_P (x))
04610   fputs(".rel", file);
04611       return;
04612 
04613     case 'S':
04614       fprintf (file, "%d", exact_log2 (INTVAL (x)));
04615       return;
04616 
04617     case 'T':
04618       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
04619   {
04620     fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
04621     return;
04622   }
04623       break;
04624 
04625     case 'U':
04626       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
04627   {
04628     const char *prefix = "0x";
04629     if (INTVAL (x) & 0x80000000)
04630       {
04631         fprintf (file, "0xffffffff");
04632         prefix = "";
04633       }
04634     fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
04635     return;
04636   }
04637       break;
04638 
04639     case 'X':
04640       {
04641   unsigned int regno = REGNO (x);
04642   fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
04643       }
04644       return;
04645 
04646     case 'r':
04647       /* If this operand is the constant zero, write it as register zero.
04648    Any register, zero, or CONST_INT value is OK here.  */
04649       if (GET_CODE (x) == REG)
04650   fputs (reg_names[REGNO (x)], file);
04651       else if (x == CONST0_RTX (GET_MODE (x)))
04652   fputs ("r0", file);
04653       else if (GET_CODE (x) == CONST_INT)
04654   output_addr_const (file, x);
04655       else
04656   output_operand_lossage ("invalid %%r value");
04657       return;
04658 
04659     case 'v':
04660       gcc_assert (GET_CODE (x) == CONST_VECTOR);
04661       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
04662       break;
04663 
04664     case '+':
04665       {
04666   const char *which;
04667 
04668   /* For conditional branches, returns or calls, substitute
04669      sptk, dptk, dpnt, or spnt for %s.  */
04670   x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
04671   if (x)
04672     {
04673       int pred_val = INTVAL (XEXP (x, 0));
04674 
04675       /* Guess top and bottom 10% statically predicted.  */
04676       if (pred_val < REG_BR_PROB_BASE / 50
04677     && br_prob_note_reliable_p (x))
04678         which = ".spnt";
04679       else if (pred_val < REG_BR_PROB_BASE / 2)
04680         which = ".dpnt";
04681       else if (pred_val < REG_BR_PROB_BASE / 100 * 98
04682          || !br_prob_note_reliable_p (x))
04683         which = ".dptk";
04684       else
04685         which = ".sptk";
04686     }
04687   else if (GET_CODE (current_output_insn) == CALL_INSN)
04688     which = ".sptk";
04689   else
04690     which = ".dptk";
04691 
04692   fputs (which, file);
04693   return;
04694       }
04695 
04696     case ',':
04697       x = current_insn_predicate;
04698       if (x)
04699   {
04700     unsigned int regno = REGNO (XEXP (x, 0));
04701     if (GET_CODE (x) == EQ)
04702       regno += 1;
04703           fprintf (file, "(%s) ", reg_names [regno]);
04704   }
04705       return;
04706 
04707     default:
04708       output_operand_lossage ("ia64_print_operand: unknown code");
04709       return;
04710     }
04711 
04712   switch (GET_CODE (x))
04713     {
04714       /* This happens for the spill/restore instructions.  */
04715     case POST_INC:
04716     case POST_DEC:
04717     case POST_MODIFY:
04718       x = XEXP (x, 0);
04719       /* ... fall through ...  */
04720 
04721     case REG:
04722       fputs (reg_names [REGNO (x)], file);
04723       break;
04724 
04725     case MEM:
04726       {
04727   rtx addr = XEXP (x, 0);
04728   if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
04729     addr = XEXP (addr, 0);
04730   fprintf (file, "[%s]", reg_names [REGNO (addr)]);
04731   break;
04732       }
04733 
04734     default:
04735       output_addr_const (file, x);
04736       break;
04737     }
04738 
04739   return;
04740 }
04741 
04742 /* Compute a (partial) cost for rtx X.  Return true if the complete
04743    cost has been computed, and false if subexpressions should be
04744    scanned.  In either case, *TOTAL contains the cost result.  */
04745 /* ??? This is incomplete.  */
04746 
04747 static bool
04748 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
04749 {
04750   switch (code)
04751     {
04752     case CONST_INT:
04753       switch (outer_code)
04754         {
04755         case SET:
04756     *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
04757     return true;
04758         case PLUS:
04759     if (CONST_OK_FOR_I (INTVAL (x)))
04760       *total = 0;
04761     else if (CONST_OK_FOR_J (INTVAL (x)))
04762       *total = 1;
04763     else
04764       *total = COSTS_N_INSNS (1);
04765     return true;
04766         default:
04767     if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
04768       *total = 0;
04769     else
04770       *total = COSTS_N_INSNS (1);
04771     return true;
04772   }
04773 
04774     case CONST_DOUBLE:
04775       *total = COSTS_N_INSNS (1);
04776       return true;
04777 
04778     case CONST:
04779     case SYMBOL_REF:
04780     case LABEL_REF:
04781       *total = COSTS_N_INSNS (3);
04782       return true;
04783 
04784     case MULT:
04785       /* For multiplies wider than HImode, we have to go to the FPU,
04786          which normally involves copies.  Plus there's the latency
04787          of the multiply itself, and the latency of the instructions to
04788          transfer integer regs to FP regs.  */
04789       /* ??? Check for FP mode.  */
04790       if (GET_MODE_SIZE (GET_MODE (x)) > 2)
04791         *total = COSTS_N_INSNS (10);
04792       else
04793   *total = COSTS_N_INSNS (2);
04794       return true;
04795 
04796     case PLUS:
04797     case MINUS:
04798     case ASHIFT:
04799     case ASHIFTRT:
04800     case LSHIFTRT:
04801       *total = COSTS_N_INSNS (1);
04802       return true;
04803 
04804     case DIV:
04805     case UDIV:
04806     case MOD:
04807     case UMOD:
04808       /* We make divide expensive, so that divide-by-constant will be
04809          optimized to a multiply.  */
04810       *total = COSTS_N_INSNS (60);
04811       return true;
04812 
04813     default:
04814       return false;
04815     }
04816 }
04817 
04818 /* Calculate the cost of moving data from a register in class FROM to
04819    one in class TO, using MODE.  */
04820 
04821 int
04822 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
04823        enum reg_class to)
04824 {
04825   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
04826   if (to == ADDL_REGS)
04827     to = GR_REGS;
04828   if (from == ADDL_REGS)
04829     from = GR_REGS;
04830 
04831   /* All costs are symmetric, so reduce cases by putting the
04832      lower number class as the destination.  */
04833   if (from < to)
04834     {
04835       enum reg_class tmp = to;
04836       to = from, from = tmp;
04837     }
04838 
04839   /* Moving from FR<->GR in XFmode must be more expensive than 2,
04840      so that we get secondary memory reloads.  Between FR_REGS,
04841      we have to make this at least as expensive as MEMORY_MOVE_COST
04842      to avoid spectacularly poor register class preferencing.  */
04843   if (mode == XFmode || mode == RFmode)
04844     {
04845       if (to != GR_REGS || from != GR_REGS)
04846         return MEMORY_MOVE_COST (mode, to, 0);
04847       else
04848   return 3;
04849     }
04850 
04851   switch (to)
04852     {
04853     case PR_REGS:
04854       /* Moving between PR registers takes two insns.  */
04855       if (from == PR_REGS)
04856   return 3;
04857       /* Moving between PR and anything but GR is impossible.  */
04858       if (from != GR_REGS)
04859   return MEMORY_MOVE_COST (mode, to, 0);
04860       break;
04861 
04862     case BR_REGS:
04863       /* Moving between BR and anything but GR is impossible.  */
04864       if (from != GR_REGS && from != GR_AND_BR_REGS)
04865   return MEMORY_MOVE_COST (mode, to, 0);
04866       break;
04867 
04868     case AR_I_REGS:
04869     case AR_M_REGS:
04870       /* Moving between AR and anything but GR is impossible.  */
04871       if (from != GR_REGS)
04872   return MEMORY_MOVE_COST (mode, to, 0);
04873       break;
04874 
04875     case GR_REGS:
04876     case FR_REGS:
04877     case FP_REGS:
04878     case GR_AND_FR_REGS:
04879     case GR_AND_BR_REGS:
04880     case ALL_REGS:
04881       break;
04882 
04883     default:
04884       gcc_unreachable ();
04885     }
04886 
04887   return 2;
04888 }
04889 
04890 /* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
04891    to use when copying X into that class.  */
04892 
04893 enum reg_class
04894 ia64_preferred_reload_class (rtx x, enum reg_class class)
04895 {
04896   switch (class)
04897     {
04898     case FR_REGS:
04899     case FP_REGS:
04900       /* Don't allow volatile mem reloads into floating point registers.
04901    This is defined to force reload to choose the r/m case instead
04902    of the f/f case when reloading (set (reg fX) (mem/v)).  */
04903       if (MEM_P (x) && MEM_VOLATILE_P (x))
04904   return NO_REGS;
04905       
04906       /* Force all unrecognized constants into the constant pool.  */
04907       if (CONSTANT_P (x))
04908   return NO_REGS;
04909       break;
04910 
04911     case AR_M_REGS:
04912     case AR_I_REGS:
04913       if (!OBJECT_P (x))
04914   return NO_REGS;
04915       break;
04916 
04917     default:
04918       break;
04919     }
04920 
04921   return class;
04922 }
04923 
04924 /* This function returns the register class required for a secondary
04925    register when copying between one of the registers in CLASS, and X,
04926    using MODE.  A return value of NO_REGS means that no secondary register
04927    is required.  */
04928 
04929 enum reg_class
04930 ia64_secondary_reload_class (enum reg_class class,
04931            enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
04932 {
04933   int regno = -1;
04934 
04935   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
04936     regno = true_regnum (x);
04937 
04938   switch (class)
04939     {
04940     case BR_REGS:
04941     case AR_M_REGS:
04942     case AR_I_REGS:
04943       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
04944    interaction.  We end up with two pseudos with overlapping lifetimes
04945    both of which are equiv to the same constant, and both which need
04946    to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
04947    changes depending on the path length, which means the qty_first_reg
04948    check in make_regs_eqv can give different answers at different times.
04949    At some point I'll probably need a reload_indi pattern to handle
04950    this.
04951 
04952    We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
04953    wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
04954    non-general registers for good measure.  */
04955       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
04956   return GR_REGS;
04957 
04958       /* This is needed if a pseudo used as a call_operand gets spilled to a
04959    stack slot.  */
04960       if (GET_CODE (x) == MEM)
04961   return GR_REGS;
04962       break;
04963 
04964     case FR_REGS:
04965     case FP_REGS:
04966       /* Need to go through general registers to get to other class regs.  */
04967       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
04968   return GR_REGS;
04969 
04970       /* This can happen when a paradoxical subreg is an operand to the
04971    muldi3 pattern.  */
04972       /* ??? This shouldn't be necessary after instruction scheduling is
04973    enabled, because paradoxical subregs are not accepted by
04974    register_operand when INSN_SCHEDULING is defined.  Or alternatively,
04975    stop the paradoxical subreg stupidity in the *_operand functions
04976    in recog.c.  */
04977       if (GET_CODE (x) == MEM
04978     && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
04979         || GET_MODE (x) == QImode))
04980   return GR_REGS;
04981 
04982       /* This can happen because of the ior/and/etc patterns that accept FP
04983    registers as operands.  If the third operand is a constant, then it
04984    needs to be reloaded into a FP register.  */
04985       if (GET_CODE (x) == CONST_INT)
04986   return GR_REGS;
04987 
04988       /* This can happen because of register elimination in a muldi3 insn.
04989    E.g. `26107 * (unsigned long)&u'.  */
04990       if (GET_CODE (x) == PLUS)
04991   return GR_REGS;
04992       break;
04993 
04994     case PR_REGS:
04995       /* ??? This happens if we cse/gcse a BImode value across a call,
04996    and the function has a nonlocal goto.  This is because global
04997    does not allocate call crossing pseudos to hard registers when
04998    current_function_has_nonlocal_goto is true.  This is relatively
04999    common for C++ programs that use exceptions.  To reproduce,
05000    return NO_REGS and compile libstdc++.  */
05001       if (GET_CODE (x) == MEM)
05002   return GR_REGS;
05003 
05004       /* This can happen when we take a BImode subreg of a DImode value,
05005    and that DImode value winds up in some non-GR register.  */
05006       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
05007   return GR_REGS;
05008       break;
05009 
05010     default:
05011       break;
05012     }
05013 
05014   return NO_REGS;
05015 }
05016 
05017 
05018 /* Emit text to declare externally defined variables and functions, because
05019    the Intel assembler does not support undefined externals.  */
05020 
05021 void
05022 ia64_asm_output_external (FILE *file, tree decl, const char *name)
05023 {
05024   int save_referenced;
05025 
05026   /* GNU as does not need anything here, but the HP linker does need
05027      something for external functions.  */
05028 
05029   if (TARGET_GNU_AS
05030       && (!TARGET_HPUX_LD
05031     || TREE_CODE (decl) != FUNCTION_DECL
05032     || strstr (name, "__builtin_") == name))
05033     return;
05034 
05035   /* ??? The Intel assembler creates a reference that needs to be satisfied by
05036      the linker when we do this, so we need to be careful not to do this for
05037      builtin functions which have no library equivalent.  Unfortunately, we
05038      can't tell here whether or not a function will actually be called by
05039      expand_expr, so we pull in library functions even if we may not need
05040      them later.  */
05041   if (! strcmp (name, "__builtin_next_arg")
05042       || ! strcmp (name, "alloca")
05043       || ! strcmp (name, "__builtin_constant_p")
05044       || ! strcmp (name, "__builtin_args_info"))
05045     return;
05046 
05047   if (TARGET_HPUX_LD)
05048     ia64_hpux_add_extern_decl (decl);
05049   else
05050     {
05051       /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
05052          restore it.  */
05053       save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
05054       if (TREE_CODE (decl) == FUNCTION_DECL)
05055         ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
05056       (*targetm.asm_out.globalize_label) (file, name);
05057       TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
05058     }
05059 }
05060 
05061 /* Parse the -mfixed-range= option string.  */
05062 
05063 static void
05064 fix_range (const char *const_str)
05065 {
05066   int i, first, last;
05067   char *str, *dash, *comma;
05068 
05069   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
05070      REG2 are either register names or register numbers.  The effect
05071      of this option is to mark the registers in the range from REG1 to
05072      REG2 as ``fixed'' so they won't be used by the compiler.  This is
05073      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
05074 
05075   i = strlen (const_str);
05076   str = (char *) alloca (i + 1);
05077   memcpy (str, const_str, i + 1);
05078 
05079   while (1)
05080     {
05081       dash = strchr (str, '-');
05082       if (!dash)
05083   {
05084     warning (0, "value of -mfixed-range must have form REG1-REG2");
05085     return;
05086   }
05087       *dash = '\0';
05088 
05089       comma = strchr (dash + 1, ',');
05090       if (comma)
05091   *comma = '\0';
05092 
05093       first = decode_reg_name (str);
05094       if (first < 0)
05095   {
05096     warning (0, "unknown register name: %s", str);
05097     return;
05098   }
05099 
05100       last = decode_reg_name (dash + 1);
05101       if (last < 0)
05102   {
05103     warning (0, "unknown register name: %s", dash + 1);
05104     return;
05105   }
05106 
05107       *dash = '-';
05108 
05109       if (first > last)
05110   {
05111     warning (0, "%s-%s is an empty range", str, dash + 1);
05112     return;
05113   }
05114 
05115       for (i = first; i <= last; ++i)
05116   fixed_regs[i] = call_used_regs[i] = 1;
05117 
05118       if (!comma)
05119   break;
05120 
05121       *comma = ',';
05122       str = comma + 1;
05123     }
05124 }
05125 
05126 /* Implement TARGET_HANDLE_OPTION.  */
05127 
05128 static bool
05129 ia64_handle_option (size_t code, const char *arg, int value)
05130 {
05131   switch (code)
05132     {
05133     case OPT_mfixed_range_:
05134       fix_range (arg);
05135       return true;
05136 
05137     case OPT_mtls_size_:
05138       if (value != 14 && value != 22 && value != 64)
05139   error ("bad value %<%s%> for -mtls-size= switch", arg);
05140       return true;
05141 
05142     case OPT_mtune_:
05143       {
05144   static struct pta
05145     {
05146       const char *name;   /* processor name or nickname.  */
05147       enum processor_type processor;
05148     }
05149   const processor_alias_table[] =
05150     {
05151       {"itanium", PROCESSOR_ITANIUM},
05152       {"itanium1", PROCESSOR_ITANIUM},
05153       {"merced", PROCESSOR_ITANIUM},
05154       {"itanium2", PROCESSOR_ITANIUM2},
05155       {"mckinley", PROCESSOR_ITANIUM2},
05156     };
05157   int const pta_size = ARRAY_SIZE (processor_alias_table);
05158   int i;
05159 
05160   for (i = 0; i < pta_size; i++)
05161     if (!strcmp (arg, processor_alias_table[i].name))
05162       {
05163         ia64_tune = processor_alias_table[i].processor;
05164         break;
05165       }
05166   if (i == pta_size)
05167     error ("bad value %<%s%> for -mtune= switch", arg);
05168   return true;
05169       }
05170 
05171     default:
05172       return true;
05173     }
05174 }
05175 
05176 /* Implement OVERRIDE_OPTIONS.  */
05177 
05178 void
05179 ia64_override_options (void)
05180 {
05181   if (TARGET_AUTO_PIC)
05182     target_flags |= MASK_CONST_GP;
05183 
05184   if (TARGET_INLINE_SQRT == INL_MIN_LAT)
05185     {
05186       warning (0, "not yet implemented: latency-optimized inline square root");
05187       TARGET_INLINE_SQRT = INL_MAX_THR;
05188     }
05189 
05190   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
05191   flag_schedule_insns_after_reload = 0;
05192 
05193   ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
05194 
05195   init_machine_status = ia64_init_machine_status;
05196 }
05197 
05198 static struct machine_function *
05199 ia64_init_machine_status (void)
05200 {
05201   return ggc_alloc_cleared (sizeof (struct machine_function));
05202 }
05203 
05204 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
05205 static enum attr_type ia64_safe_type (rtx);
05206 
05207 static enum attr_itanium_class
05208 ia64_safe_itanium_class (rtx insn)
05209 {
05210   if (recog_memoized (insn) >= 0)
05211     return get_attr_itanium_class (insn);
05212   else
05213     return ITANIUM_CLASS_UNKNOWN;
05214 }
05215 
05216 static enum attr_type
05217 ia64_safe_type (rtx insn)
05218 {
05219   if (recog_memoized (insn) >= 0)
05220     return get_attr_type (insn);
05221   else
05222     return TYPE_UNKNOWN;
05223 }
05224 
05225 /* The following collection of routines emit instruction group stop bits as
05226    necessary to avoid dependencies.  */
05227 
05228 /* Need to track some additional registers as far as serialization is
05229    concerned so we can properly handle br.call and br.ret.  We could
05230    make these registers visible to gcc, but since these registers are
05231    never explicitly used in gcc generated code, it seems wasteful to
05232    do so (plus it would make the call and return patterns needlessly
05233    complex).  */
05234 #define REG_RP    (BR_REG (0))
05235 #define REG_AR_CFM  (FIRST_PSEUDO_REGISTER + 1)
05236 /* This is used for volatile asms which may require a stop bit immediately
05237    before and after them.  */
05238 #define REG_VOLATILE  (FIRST_PSEUDO_REGISTER + 2)
05239 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
05240 #define NUM_REGS  (AR_UNAT_BIT_0 + 64)
05241 
05242 /* For each register, we keep track of how it has been written in the
05243    current instruction group.
05244 
05245    If a register is written unconditionally (no qualifying predicate),
05246    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
05247 
05248    If a register is written if its qualifying predicate P is true, we
05249    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
05250    may be written again by the complement of P (P^1) and when this happens,
05251    WRITE_COUNT gets set to 2.
05252 
05253    The result of this is that whenever an insn attempts to write a register
05254    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
05255 
05256    If a predicate register is written by a floating-point insn, we set
05257    WRITTEN_BY_FP to true.
05258 
05259    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
05260    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
05261 
05262 struct reg_write_state
05263 {
05264   unsigned int write_count : 2;
05265   unsigned int first_pred : 16;
05266   unsigned int written_by_fp : 1;
05267   unsigned int written_by_and : 1;
05268   unsigned int written_by_or : 1;
05269 };
05270 
05271 /* Cumulative info for the current instruction group.  */
05272 struct reg_write_state rws_sum[NUM_REGS];
05273 /* Info for the current instruction.  This gets copied to rws_sum after a
05274    stop bit is emitted.  */
05275 struct reg_write_state rws_insn[NUM_REGS];
05276 
05277 /* Indicates whether this is the first instruction after a stop bit,
05278    in which case we don't need another stop bit.  Without this,
05279    ia64_variable_issue will die when scheduling an alloc.  */
05280 static int first_instruction;
05281 
05282 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
05283    RTL for one instruction.  */
05284 struct reg_flags
05285 {
05286   unsigned int is_write : 1;  /* Is register being written?  */
05287   unsigned int is_fp : 1; /* Is register used as part of an fp op?  */
05288   unsigned int is_branch : 1; /* Is register used as part of a branch?  */
05289   unsigned int is_and : 1;  /* Is register used as part of and.orcm?  */
05290   unsigned int is_or : 1; /* Is register used as part of or.andcm?  */
05291   unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
05292 };
05293 
05294 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
05295 static int rws_access_regno (int, struct reg_flags, int);
05296 static int rws_access_reg (rtx, struct reg_flags, int);
05297 static void update_set_flags (rtx, struct reg_flags *);
05298 static int set_src_needs_barrier (rtx, struct reg_flags, int);
05299 static int rtx_needs_barrier (rtx, struct reg_flags, int);
05300 static void init_insn_group_barriers (void);
05301 static int group_barrier_needed (rtx);
05302 static int safe_group_barrier_needed (rtx);
05303 
05304 /* Update *RWS for REGNO, which is being written by the current instruction,
05305    with predicate PRED, and associated register flags in FLAGS.  */
05306 
05307 static void
05308 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
05309 {
05310   if (pred)
05311     rws[regno].write_count++;
05312   else
05313     rws[regno].write_count = 2;
05314   rws[regno].written_by_fp |= flags.is_fp;
05315   /* ??? Not tracking and/or across differing predicates.  */
05316   rws[regno].written_by_and = flags.is_and;
05317   rws[regno].written_by_or = flags.is_or;
05318   rws[regno].first_pred = pred;
05319 }
05320 
05321 /* Handle an access to register REGNO of type FLAGS using predicate register
05322    PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
05323    a dependency with an earlier instruction in the same group.  */
05324 
05325 static int
05326 rws_access_regno (int regno, struct reg_flags flags, int pred)
05327 {
05328   int need_barrier = 0;
05329 
05330   gcc_assert (regno < NUM_REGS);
05331 
05332   if (! PR_REGNO_P (regno))
05333     flags.is_and = flags.is_or = 0;
05334 
05335   if (flags.is_write)
05336     {
05337       int write_count;
05338 
05339       /* One insn writes same reg multiple times?  */
05340       gcc_assert (!rws_insn[regno].write_count);
05341 
05342       /* Update info for current instruction.  */
05343       rws_update (rws_insn, regno, flags, pred);
05344       write_count = rws_sum[regno].write_count;
05345 
05346       switch (write_count)
05347   {
05348   case 0:
05349     /* The register has not been written yet.  */
05350     rws_update (rws_sum, regno, flags, pred);
05351     break;
05352 
05353   case 1:
05354     /* The register has been written via a predicate.  If this is
05355        not a complementary predicate, then we need a barrier.  */
05356     /* ??? This assumes that P and P+1 are always complementary
05357        predicates for P even.  */
05358     if (flags.is_and && rws_sum[regno].written_by_and)
05359       ;
05360     else if (flags.is_or && rws_sum[regno].written_by_or)
05361       ;
05362     else if ((rws_sum[regno].first_pred ^ 1) != pred)
05363       need_barrier = 1;
05364     rws_update (rws_sum, regno, flags, pred);
05365     break;
05366 
05367   case 2:
05368     /* The register has been unconditionally written already.  We
05369        need a barrier.  */
05370     if (flags.is_and && rws_sum[regno].written_by_and)
05371       ;
05372     else if (flags.is_or && rws_sum[regno].written_by_or)
05373       ;
05374     else
05375       need_barrier = 1;
05376     rws_sum[regno].written_by_and = flags.is_and;
05377     rws_sum[regno].written_by_or = flags.is_or;
05378     break;
05379 
05380   default:
05381     gcc_unreachable ();
05382   }
05383     }
05384   else
05385     {
05386       if (flags.is_branch)
05387   {
05388     /* Branches have several RAW exceptions that allow to avoid
05389        barriers.  */
05390 
05391     if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
05392       /* RAW dependencies on branch regs are permissible as long
05393          as the writer is a non-branch instruction.  Since we
05394          never generate code that uses a branch register written
05395          by a branch instruction, handling this case is
05396          easy.  */
05397       return 0;
05398 
05399     if (REGNO_REG_CLASS (regno) == PR_REGS
05400         && ! rws_sum[regno].written_by_fp)
05401       /* The predicates of a branch are available within the
05402          same insn group as long as the predicate was written by
05403          something other than a floating-point instruction.  */
05404       return 0;
05405   }
05406 
05407       if (flags.is_and && rws_sum[regno].written_by_and)
05408   return 0;
05409       if (flags.is_or && rws_sum[regno].written_by_or)
05410   return 0;
05411 
05412       switch (rws_sum[regno].write_count)
05413   {
05414   case 0:
05415     /* The register has not been written yet.  */
05416     break;
05417 
05418   case 1:
05419     /* The register has been written via a predicate.  If this is
05420        not a complementary predicate, then we need a barrier.  */
05421     /* ??? This assumes that P and P+1 are always complementary
05422        predicates for P even.  */
05423     if ((rws_sum[regno].first_pred ^ 1) != pred)
05424       need_barrier = 1;
05425     break;
05426 
05427   case 2:
05428     /* The register has been unconditionally written already.  We
05429        need a barrier.  */
05430     need_barrier = 1;
05431     break;
05432 
05433   default:
05434     gcc_unreachable ();
05435   }
05436     }
05437 
05438   return need_barrier;
05439 }
05440 
05441 static int
05442 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
05443 {
05444   int regno = REGNO (reg);
05445   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
05446 
05447   if (n == 1)
05448     return rws_access_regno (regno, flags, pred);
05449   else
05450     {
05451       int need_barrier = 0;
05452       while (--n >= 0)
05453   need_barrier |= rws_access_regno (regno + n, flags, pred);
05454       return need_barrier;
05455     }
05456 }
05457 
05458 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
05459    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
05460 
05461 static void
05462 update_set_flags (rtx x, struct reg_flags *pflags)
05463 {
05464   rtx src = SET_SRC (x);
05465 
05466   switch (GET_CODE (src))
05467     {
05468     case CALL:
05469       return;
05470 
05471     case IF_THEN_ELSE:
05472       /* There are four cases here:
05473    (1) The destination is (pc), in which case this is a branch,
05474    nothing here applies.
05475    (2) The destination is ar.lc, in which case this is a
05476    doloop_end_internal,
05477    (3) The destination is an fp register, in which case this is
05478    an fselect instruction.
05479    (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 
05480    this is a check load.
05481    In all cases, nothing we do in this function applies.  */
05482       return;
05483 
05484     default:
05485       if (COMPARISON_P (src)
05486     && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
05487   /* Set pflags->is_fp to 1 so that we know we're dealing
05488      with a floating point comparison when processing the
05489      destination of the SET.  */
05490   pflags->is_fp = 1;
05491 
05492       /* Discover if this is a parallel comparison.  We only handle
05493    and.orcm and or.andcm at present, since we must retain a
05494    strict inverse on the predicate pair.  */
05495       else if (GET_CODE (src) == AND)
05496   pflags->is_and = 1;
05497       else if (GET_CODE (src) == IOR)
05498   pflags->is_or = 1;
05499 
05500       break;
05501     }
05502 }
05503 
05504 /* Subroutine of rtx_needs_barrier; this function determines whether the
05505    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
05506    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
05507    for this insn.  */
05508 
05509 static int
05510 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
05511 {
05512   int need_barrier = 0;
05513   rtx dst;
05514   rtx src = SET_SRC (x);
05515 
05516   if (GET_CODE (src) == CALL)
05517     /* We don't need to worry about the result registers that
05518        get written by subroutine call.  */
05519     return rtx_needs_barrier (src, flags, pred);
05520   else if (SET_DEST (x) == pc_rtx)
05521     {
05522       /* X is a conditional branch.  */
05523       /* ??? This seems redundant, as the caller sets this bit for
05524    all JUMP_INSNs.  */
05525       if (!ia64_spec_check_src_p (src))
05526   flags.is_branch = 1;
05527       return rtx_needs_barrier (src, flags, pred);
05528     }
05529 
05530   if (ia64_spec_check_src_p (src))
05531     /* Avoid checking one register twice (in condition 
05532        and in 'then' section) for ldc pattern.  */
05533     {
05534       gcc_assert (REG_P (XEXP (src, 2)));
05535       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
05536       
05537       /* We process MEM below.  */
05538       src = XEXP (src, 1);
05539     }
05540 
05541   need_barrier |= rtx_needs_barrier (src, flags, pred);
05542 
05543   dst = SET_DEST (x);
05544   if (GET_CODE (dst) == ZERO_EXTRACT)
05545     {
05546       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
05547       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
05548     }
05549   return need_barrier;
05550 }
05551 
05552 /* Handle an access to rtx X of type FLAGS using predicate register
05553    PRED.  Return 1 if this access creates a dependency with an earlier
05554    instruction in the same group.  */
05555 
05556 static int
05557 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
05558 {
05559   int i, j;
05560   int is_complemented = 0;
05561   int need_barrier = 0;
05562   const char *format_ptr;
05563   struct reg_flags new_flags;
05564   rtx cond;
05565 
05566   if (! x)
05567     return 0;
05568 
05569   new_flags = flags;
05570 
05571   switch (GET_CODE (x))
05572     {
05573     case SET:
05574       update_set_flags (x, &new_flags);
05575       need_barrier = set_src_needs_barrier (x, new_flags, pred);
05576       if (GET_CODE (SET_SRC (x)) != CALL)
05577   {
05578     new_flags.is_write = 1;
05579     need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
05580   }
05581       break;
05582 
05583     case CALL:
05584       new_flags.is_write = 0;
05585       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
05586 
05587       /* Avoid multiple register writes, in case this is a pattern with
05588    multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
05589       if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
05590   {
05591     new_flags.is_write = 1;
05592     need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
05593     need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
05594     need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
05595   }
05596       break;
05597 
05598     case COND_EXEC:
05599       /* X is a predicated instruction.  */
05600 
05601       cond = COND_EXEC_TEST (x);
05602       gcc_assert (!pred);
05603       need_barrier = rtx_needs_barrier (cond, flags, 0);
05604 
05605       if (GET_CODE (cond) == EQ)
05606   is_complemented = 1;
05607       cond = XEXP (cond, 0);
05608       gcc_assert (GET_CODE (cond) == REG
05609       && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
05610       pred = REGNO (cond);
05611       if (is_complemented)
05612   ++pred;
05613 
05614       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
05615       return need_barrier;
05616 
05617     case CLOBBER:
05618     case USE:
05619       /* Clobber & use are for earlier compiler-phases only.  */
05620       break;
05621 
05622     case ASM_OPERANDS:
05623     case ASM_INPUT:
05624       /* We always emit stop bits for traditional asms.  We emit stop bits
05625    for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
05626       if (GET_CODE (x) != ASM_OPERANDS
05627     || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
05628   {
05629     /* Avoid writing the register multiple times if we have multiple
05630        asm outputs.  This avoids a failure in rws_access_reg.  */
05631     if (! rws_insn[REG_VOLATILE].write_count)
05632       {
05633         new_flags.is_write = 1;
05634         rws_access_regno (REG_VOLATILE, new_flags, pred);
05635       }
05636     return 1;
05637   }
05638 
05639       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
05640    We cannot just fall through here since then we would be confused
05641    by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
05642    traditional asms unlike their normal usage.  */
05643 
05644       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
05645   if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
05646     need_barrier = 1;
05647       break;
05648 
05649     case PARALLEL:
05650       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
05651   {
05652     rtx pat = XVECEXP (x, 0, i);
05653     switch (GET_CODE (pat))
05654       {
05655       case SET:
05656         update_set_flags (pat, &new_flags);
05657         need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
05658         break;
05659 
05660       case USE:
05661       case CALL:
05662       case ASM_OPERANDS:
05663         need_barrier |= rtx_needs_barrier (pat, flags, pred);
05664         break;
05665 
05666       case CLOBBER:
05667       case RETURN:
05668         break;
05669 
05670       default:
05671         gcc_unreachable ();
05672       }
05673   }
05674       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
05675   {
05676     rtx pat = XVECEXP (x, 0, i);
05677     if (GET_CODE (pat) == SET)
05678       {
05679         if (GET_CODE (SET_SRC (pat)) != CALL)
05680     {
05681       new_flags.is_write = 1;
05682       need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
05683                  pred);
05684     }
05685       }
05686     else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
05687       need_barrier |= rtx_needs_barrier (pat, flags, pred);
05688   }
05689       break;
05690 
05691     case SUBREG:
05692       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
05693       break;
05694     case REG:
05695       if (REGNO (x) == AR_UNAT_REGNUM)
05696   {
05697     for (i = 0; i < 64; ++i)
05698       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
05699   }
05700       else
05701   need_barrier = rws_access_reg (x, flags, pred);
05702       break;
05703 
05704     case MEM:
05705       /* Find the regs used in memory address computation.  */
05706       new_flags.is_write = 0;
05707       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
05708       break;
05709 
05710     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
05711     case SYMBOL_REF:  case LABEL_REF:     case CONST:
05712       break;
05713 
05714       /* Operators with side-effects.  */
05715     case POST_INC:    case POST_DEC:
05716       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
05717 
05718       new_flags.is_write = 0;
05719       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
05720       new_flags.is_write = 1;
05721       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
05722       break;
05723 
05724     case POST_MODIFY:
05725       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
05726 
05727       new_flags.is_write = 0;
05728       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
05729       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
05730       new_flags.is_write = 1;
05731       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
05732       break;
05733 
05734       /* Handle common unary and binary ops for efficiency.  */
05735     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
05736     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
05737     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
05738     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
05739     case NE:       case EQ:      case GE:      case GT:        case LE:
05740     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
05741       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
05742       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
05743       break;
05744 
05745     case NEG:      case NOT:          case SIGN_EXTEND:     case ZERO_EXTEND:
05746     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
05747     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
05748     case SQRT:     case FFS:    case POPCOUNT:
05749       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
05750       break;
05751 
05752     case VEC_SELECT:
05753       /* VEC_SELECT's second argument is a PARALLEL with integers that
05754    describe the elements selected.  On ia64, those integers are
05755    always constants.  Avoid walking the PARALLEL so that we don't
05756    get confused with "normal" parallels and then die.  */
05757       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
05758       break;
05759 
05760     case UNSPEC:
05761       switch (XINT (x, 1))
05762   {
05763   case UNSPEC_LTOFF_DTPMOD:
05764   case UNSPEC_LTOFF_DTPREL:
05765   case UNSPEC_DTPREL:
05766   case UNSPEC_LTOFF_TPREL:
05767   case UNSPEC_TPREL:
05768   case UNSPEC_PRED_REL_MUTEX:
05769   case UNSPEC_PIC_CALL:
05770         case UNSPEC_MF:
05771         case UNSPEC_FETCHADD_ACQ:
05772   case UNSPEC_BSP_VALUE:
05773   case UNSPEC_FLUSHRS:
05774   case UNSPEC_BUNDLE_SELECTOR:
05775           break;
05776 
05777   case UNSPEC_GR_SPILL:
05778   case UNSPEC_GR_RESTORE:
05779     {
05780       HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
05781       HOST_WIDE_INT bit = (offset >> 3) & 63;
05782 
05783       need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05784       new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
05785       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
05786                 new_flags, pred);
05787       break;
05788     }
05789 
05790   case UNSPEC_FR_SPILL:
05791   case UNSPEC_FR_RESTORE:
05792   case UNSPEC_GETF_EXP:
05793   case UNSPEC_SETF_EXP:
05794         case UNSPEC_ADDP4:
05795   case UNSPEC_FR_SQRT_RECIP_APPROX:
05796   case UNSPEC_LDA:
05797   case UNSPEC_LDS:
05798   case UNSPEC_LDSA:
05799   case UNSPEC_CHKACLR:
05800         case UNSPEC_CHKS:
05801     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05802     break;
05803 
05804   case UNSPEC_FR_RECIP_APPROX:
05805   case UNSPEC_SHRP:
05806   case UNSPEC_COPYSIGN:
05807     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05808     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
05809     break;
05810 
05811         case UNSPEC_CMPXCHG_ACQ:
05812     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
05813     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
05814     break;
05815 
05816   default:
05817     gcc_unreachable ();
05818   }
05819       break;
05820 
05821     case UNSPEC_VOLATILE:
05822       switch (XINT (x, 1))
05823   {
05824   case UNSPECV_ALLOC:
05825     /* Alloc must always be the first instruction of a group.
05826        We force this by always returning true.  */
05827     /* ??? We might get better scheduling if we explicitly check for
05828        input/local/output register dependencies, and modify the
05829        scheduler so that alloc is always reordered to the start of
05830        the current group.  We could then eliminate all of the
05831        first_instruction code.  */
05832     rws_access_regno (AR_PFS_REGNUM, flags, pred);
05833 
05834     new_flags.is_write = 1;
05835     rws_access_regno (REG_AR_CFM, new_flags, pred);
05836     return 1;
05837 
05838   case UNSPECV_SET_BSP:
05839     need_barrier = 1;
05840           break;
05841 
05842   case UNSPECV_BLOCKAGE:
05843   case UNSPECV_INSN_GROUP_BARRIER:
05844   case UNSPECV_BREAK:
05845   case UNSPECV_PSAC_ALL:
05846   case UNSPECV_PSAC_NORMAL:
05847     return 0;
05848 
05849   default:
05850     gcc_unreachable ();
05851   }
05852       break;
05853 
05854     case RETURN:
05855       new_flags.is_write = 0;
05856       need_barrier  = rws_access_regno (REG_RP, flags, pred);
05857       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
05858 
05859       new_flags.is_write = 1;
05860       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
05861       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
05862       break;
05863 
05864     default:
05865       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
05866       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
05867   switch (format_ptr[i])
05868     {
05869     case '0': /* unused field */
05870     case 'i': /* integer */
05871     case 'n': /* note */
05872     case 'w': /* wide integer */
05873     case 's': /* pointer to string */
05874     case 'S': /* optional pointer to string */
05875       break;
05876 
05877     case 'e':
05878       if (rtx_needs_barrier (XEXP (x, i), flags, pred))
05879         need_barrier = 1;
05880       break;
05881 
05882     case 'E':
05883       for (j = XVECLEN (x, i) - 1; j >= 0; --j)
05884         if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
05885     need_barrier = 1;
05886       break;
05887 
05888     default:
05889       gcc_unreachable ();
05890     }
05891       break;
05892     }
05893   return need_barrier;
05894 }
05895 
05896 /* Clear out the state for group_barrier_needed at the start of a
05897    sequence of insns.  */
05898 
05899 static void
05900 init_insn_group_barriers (void)
05901 {
05902   memset (rws_sum, 0, sizeof (rws_sum));
05903   first_instruction = 1;
05904 }
05905 
05906 /* Given the current state, determine whether a group barrier (a stop bit) is
05907    necessary before INSN.  Return nonzero if so.  This modifies the state to
05908    include the effects of INSN as a side-effect.  */
05909 
05910 static int
05911 group_barrier_needed (rtx insn)
05912 {
05913   rtx pat;
05914   int need_barrier = 0;
05915   struct reg_flags flags;
05916 
05917   memset (&flags, 0, sizeof (flags));
05918   switch (GET_CODE (insn))
05919     {
05920     case NOTE:
05921       break;
05922 
05923     case BARRIER:
05924       /* A barrier doesn't imply an instruction group boundary.  */
05925       break;
05926 
05927     case CODE_LABEL:
05928       memset (rws_insn, 0, sizeof (rws_insn));
05929       return 1;
05930 
05931     case CALL_INSN:
05932       flags.is_branch = 1;
05933       flags.is_sibcall = SIBLING_CALL_P (insn);
05934       memset (rws_insn, 0, sizeof (rws_insn));
05935 
05936       /* Don't bundle a call following another call.  */
05937       if ((pat = prev_active_insn (insn))
05938     && GET_CODE (pat) == CALL_INSN)
05939   {
05940     need_barrier = 1;
05941     break;
05942   }
05943 
05944       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
05945       break;
05946 
05947     case JUMP_INSN:
05948       if (!ia64_spec_check_p (insn))
05949   flags.is_branch = 1;
05950 
05951       /* Don't bundle a jump following a call.  */
05952       if ((pat = prev_active_insn (insn))
05953     && GET_CODE (pat) == CALL_INSN)
05954   {
05955     need_barrier = 1;
05956     break;
05957   }
05958       /* FALLTHRU */
05959 
05960     case INSN:
05961       if (GET_CODE (PATTERN (insn)) == USE
05962     || GET_CODE (PATTERN (insn)) == CLOBBER)
05963   /* Don't care about USE and CLOBBER "insns"---those are used to
05964      indicate to the optimizer that it shouldn't get rid of
05965      certain operations.  */
05966   break;
05967 
05968       pat = PATTERN (insn);
05969 
05970       /* Ug.  Hack hacks hacked elsewhere.  */
05971       switch (recog_memoized (insn))
05972   {
05973     /* We play dependency tricks with the epilogue in order
05974        to get proper schedules.  Undo this for dv analysis.  */
05975   case CODE_FOR_epilogue_deallocate_stack:
05976   case CODE_FOR_prologue_allocate_stack:
05977     pat = XVECEXP (pat, 0, 0);
05978     break;
05979 
05980     /* The pattern we use for br.cloop confuses the code above.
05981        The second element of the vector is representative.  */
05982   case CODE_FOR_doloop_end_internal:
05983     pat = XVECEXP (pat, 0, 1);
05984     break;
05985 
05986     /* Doesn't generate code.  */
05987   case CODE_FOR_pred_rel_mutex:
05988   case CODE_FOR_prologue_use:
05989     return 0;
05990 
05991   default:
05992     break;
05993   }
05994 
05995       memset (rws_insn, 0, sizeof (rws_insn));
05996       need_barrier = rtx_needs_barrier (pat, flags, 0);
05997 
05998       /* Check to see if the previous instruction was a volatile
05999    asm.  */
06000       if (! need_barrier)
06001   need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
06002       break;
06003 
06004     default:
06005       gcc_unreachable ();
06006     }
06007 
06008   if (first_instruction && INSN_P (insn)
06009       && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
06010       && GET_CODE (PATTERN (insn)) != USE
06011       && GET_CODE (PATTERN (insn)) != CLOBBER)
06012     {
06013       need_barrier = 0;
06014       first_instruction = 0;
06015     }
06016 
06017   return need_barrier;
06018 }
06019 
06020 /* Like group_barrier_needed, but do not clobber the current state.  */
06021 
06022 static int
06023 safe_group_barrier_needed (rtx insn)
06024 {
06025   struct reg_write_state rws_saved[NUM_REGS];
06026   int saved_first_instruction;
06027   int t;
06028 
06029   memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
06030   saved_first_instruction = first_instruction;
06031 
06032   t = group_barrier_needed (insn);
06033 
06034   memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
06035   first_instruction = saved_first_instruction;
06036 
06037   return t;
06038 }
06039 
06040 /* Scan the current function and insert stop bits as necessary to
06041    eliminate dependencies.  This function assumes that a final
06042    instruction scheduling pass has been run which has already
06043    inserted most of the necessary stop bits.  This function only
06044    inserts new ones at basic block boundaries, since these are
06045    invisible to the scheduler.  */
06046 
06047 static void
06048 emit_insn_group_barriers (FILE *dump)
06049 {
06050   rtx insn;
06051   rtx last_label = 0;
06052   int insns_since_last_label = 0;
06053 
06054   init_insn_group_barriers ();
06055 
06056   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
06057     {
06058       if (GET_CODE (insn) == CODE_LABEL)
06059   {
06060     if (insns_since_last_label)
06061       last_label = insn;
06062     insns_since_last_label = 0;
06063   }
06064       else if (GET_CODE (insn) == NOTE
06065          && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
06066   {
06067     if (insns_since_last_label)
06068       last_label = insn;
06069     insns_since_last_label = 0;
06070   }
06071       else if (GET_CODE (insn) == INSN
06072          && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
06073          && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
06074   {
06075     init_insn_group_barriers ();
06076     last_label = 0;
06077   }
06078       else if (INSN_P (insn))
06079   {
06080     insns_since_last_label = 1;
06081 
06082     if (group_barrier_needed (insn))
06083       {
06084         if (last_label)
06085     {
06086       if (dump)
06087         fprintf (dump, "Emitting stop before label %d\n",
06088            INSN_UID (last_label));
06089       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
06090       insn = last_label;
06091 
06092       init_insn_group_barriers ();
06093       last_label = 0;
06094     }
06095       }
06096   }
06097     }
06098 }
06099 
06100 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
06101    This function has to emit all necessary group barriers.  */
06102 
06103 static void
06104 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
06105 {
06106   rtx insn;
06107 
06108   init_insn_group_barriers ();
06109 
06110   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
06111     {
06112       if (GET_CODE (insn) == BARRIER)
06113   {
06114     rtx last = prev_active_insn (insn);
06115 
06116     if (! last)
06117       continue;
06118     if (GET_CODE (last) == JUMP_INSN
06119         && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
06120       last = prev_active_insn (last);
06121     if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
06122       emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
06123 
06124     init_insn_group_barriers ();
06125   }
06126       else if (INSN_P (insn))
06127   {
06128     if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
06129       init_insn_group_barriers ();
06130     else if (group_barrier_needed (insn))
06131       {
06132         emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
06133         init_insn_group_barriers ();
06134         group_barrier_needed (insn);
06135       }
06136   }
06137     }
06138 }
06139 
06140 
06141 
06142 /* Instruction scheduling support.  */
06143 
06144 #define NR_BUNDLES 10
06145 
06146 /* A list of names of all available bundles.  */
06147 
06148 static const char *bundle_name [NR_BUNDLES] =
06149 {
06150   ".mii",
06151   ".mmi",
06152   ".mfi",
06153   ".mmf",
06154 #if NR_BUNDLES == 10
06155   ".bbb",
06156   ".mbb",
06157 #endif
06158   ".mib",
06159   ".mmb",
06160   ".mfb",
06161   ".mlx"
06162 };
06163 
06164 /* Nonzero if we should insert stop bits into the schedule.  */
06165 
06166 int ia64_final_schedule = 0;
06167 
06168 /* Codes of the corresponding queried units: */
06169 
06170 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
06171 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
06172 
06173 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
06174 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
06175 
06176 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
06177 
06178 /* The following variable value is an insn group barrier.  */
06179 
06180 static rtx dfa_stop_insn;
06181 
06182 /* The following variable value is the last issued insn.  */
06183 
06184 static rtx last_scheduled_insn;
06185 
06186 /* The following variable value is size of the DFA state.  */
06187 
06188 static size_t dfa_state_size;
06189 
06190 /* The following variable value is pointer to a DFA state used as
06191    temporary variable.  */
06192 
06193 static state_t temp_dfa_state = NULL;
06194 
06195 /* The following variable value is DFA state after issuing the last
06196    insn.  */
06197 
06198 static state_t prev_cycle_state = NULL;
06199 
06200 /* The following array element values are TRUE if the corresponding
06201    insn requires to add stop bits before it.  */
06202 
06203 static char *stops_p = NULL;
06204 
06205 /* The following array element values are ZERO for non-speculative
06206    instructions and hold corresponding speculation check number for
06207    speculative instructions.  */
06208 static int *spec_check_no = NULL;
06209 
06210 /* Size of spec_check_no array.  */
06211 static int max_uid = 0;
06212 
06213 /* The following variable is used to set up the mentioned above array.  */
06214 
06215 static int stop_before_p = 0;
06216 
06217 /* The following variable value is length of the arrays `clocks' and
06218    `add_cycles'. */
06219 
06220 static int clocks_length;
06221 
06222 /* The following array element values are cycles on which the
06223    corresponding insn will be issued.  The array is used only for
06224    Itanium1.  */
06225 
06226 static int *clocks;
06227 
06228 /* The following array element values are numbers of cycles should be
06229    added to improve insn scheduling for MM_insns for Itanium1.  */
06230 
06231 static int *add_cycles;
06232 
06233 /* The following variable value is number of data speculations in progress.  */
06234 static int pending_data_specs = 0;
06235 
06236 static rtx ia64_single_set (rtx);
06237 static void ia64_emit_insn_before (rtx, rtx);
06238 
06239 /* Map a bundle number to its pseudo-op.  */
06240 
06241 const char *
06242 get_bundle_name (int b)
06243 {
06244   return bundle_name[b];
06245 }
06246 
06247 
06248 /* Return the maximum number of instructions a cpu can issue.  */
06249 
06250 static int
06251 ia64_issue_rate (void)
06252 {
06253   return 6;
06254 }
06255 
06256 /* Helper function - like single_set, but look inside COND_EXEC.  */
06257 
06258 static rtx
06259 ia64_single_set (rtx insn)
06260 {
06261   rtx x = PATTERN (insn), ret;
06262   if (GET_CODE (x) == COND_EXEC)
06263     x = COND_EXEC_CODE (x);
06264   if (GET_CODE (x) == SET)
06265     return x;
06266 
06267   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
06268      Although they are not classical single set, the second set is there just
06269      to protect it from moving past FP-relative stack accesses.  */
06270   switch (recog_memoized (insn))
06271     {
06272     case CODE_FOR_prologue_allocate_stack:
06273     case CODE_FOR_epilogue_deallocate_stack:
06274       ret = XVECEXP (x, 0, 0);
06275       break;
06276 
06277     default:
06278       ret = single_set_2 (insn, x);
06279       break;
06280     }
06281 
06282   return ret;
06283 }
06284 
06285 /* Adjust the cost of a scheduling dependency.
06286    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
06287    COST is the current cost.  */
06288 
06289 static int
06290 ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
06291 {
06292   enum reg_note dep_type = (enum reg_note) dep_type1;
06293   enum attr_itanium_class dep_class;
06294   enum attr_itanium_class insn_class;
06295 
06296   if (dep_type != REG_DEP_OUTPUT)
06297     return cost;
06298 
06299   insn_class = ia64_safe_itanium_class (insn);
06300   dep_class = ia64_safe_itanium_class (dep_insn);
06301   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
06302       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
06303     return 0;
06304 
06305   return cost;
06306 }
06307 
06308 /* Like emit_insn_before, but skip cycle_display notes.
06309    ??? When cycle display notes are implemented, update this.  */
06310 
06311 static void
06312 ia64_emit_insn_before (rtx insn, rtx before)
06313 {
06314   emit_insn_before (insn, before);
06315 }
06316 
06317 /* The following function marks insns who produce addresses for load
06318    and store insns.  Such insns will be placed into M slots because it
06319    decrease latency time for Itanium1 (see function
06320    `ia64_produce_address_p' and the DFA descriptions).  */
06321 
06322 static void
06323 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
06324 {
06325   rtx insn, link, next, next_tail;
06326 
06327   /* Before reload, which_alternative is not set, which means that
06328      ia64_safe_itanium_class will produce wrong results for (at least)
06329      move instructions.  */
06330   if (!reload_completed)
06331     return;
06332 
06333   next_tail = NEXT_INSN (tail);
06334   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
06335     if (INSN_P (insn))
06336       insn->call = 0;
06337   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
06338     if (INSN_P (insn)
06339   && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
06340       {
06341   for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
06342     {
06343       enum attr_itanium_class c;
06344 
06345       if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
06346         continue;
06347       next = XEXP (link, 0);
06348       c = ia64_safe_itanium_class (next);
06349       if ((c == ITANIUM_CLASS_ST
06350      || c == ITANIUM_CLASS_STF)
06351     && ia64_st_address_bypass_p (insn, next))
06352         break;
06353       else if ((c == ITANIUM_CLASS_LD
06354           || c == ITANIUM_CLASS_FLD
06355           || c == ITANIUM_CLASS_FLDP)
06356          && ia64_ld_address_bypass_p (insn, next))
06357         break;
06358     }
06359   insn->call = link != 0;
06360       }
06361 }
06362 
06363 /* We're beginning a new block.  Initialize data structures as necessary.  */
06364 
06365 static void
06366 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
06367      int sched_verbose ATTRIBUTE_UNUSED,
06368      int max_ready ATTRIBUTE_UNUSED)
06369 {
06370 #ifdef ENABLE_CHECKING
06371   rtx insn;
06372 
06373   if (reload_completed)
06374     for (insn = NEXT_INSN (current_sched_info->prev_head);
06375    insn != current_sched_info->next_tail;
06376    insn = NEXT_INSN (insn))
06377       gcc_assert (!SCHED_GROUP_P (insn));
06378 #endif
06379   last_scheduled_insn = NULL_RTX;
06380   init_insn_group_barriers ();
06381 }
06382 
06383 /* We're beginning a scheduling pass.  Check assertion.  */
06384 
06385 static void
06386 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
06387                         int sched_verbose ATTRIBUTE_UNUSED,
06388                         int max_ready ATTRIBUTE_UNUSED)
06389 {  
06390   gcc_assert (!pending_data_specs);
06391 }
06392 
06393 /* Scheduling pass is now finished.  Free/reset static variable.  */
06394 static void
06395 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
06396         int sched_verbose ATTRIBUTE_UNUSED)
06397 {
06398   free (spec_check_no);
06399   spec_check_no = 0;
06400   max_uid = 0;
06401 }
06402 
06403 /* We are about to being issuing insns for this clock cycle.
06404    Override the default sort algorithm to better slot instructions.  */
06405 
06406 static int
06407 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
06408       int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
06409       int reorder_type)
06410 {
06411   int n_asms;
06412   int n_ready = *pn_ready;
06413   rtx *e_ready = ready + n_ready;
06414   rtx *insnp;
06415 
06416   if (sched_verbose)
06417     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
06418 
06419   if (reorder_type == 0)
06420     {
06421       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
06422       n_asms = 0;
06423       for (insnp = ready; insnp < e_ready; insnp++)
06424   if (insnp < e_ready)
06425     {
06426       rtx insn = *insnp;
06427       enum attr_type t = ia64_safe_type (insn);
06428       if (t == TYPE_UNKNOWN)
06429         {
06430     if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06431         || asm_noperands (PATTERN (insn)) >= 0)
06432       {
06433         rtx lowest = ready[n_asms];
06434         ready[n_asms] = insn;
06435         *insnp = lowest;
06436         n_asms++;
06437       }
06438     else
06439       {
06440         rtx highest = ready[n_ready - 1];
06441         ready[n_ready - 1] = insn;
06442         *insnp = highest;
06443         return 1;
06444       }
06445         }
06446     }
06447 
06448       if (n_asms < n_ready)
06449   {
06450     /* Some normal insns to process.  Skip the asms.  */
06451     ready += n_asms;
06452     n_ready -= n_asms;
06453   }
06454       else if (n_ready > 0)
06455   return 1;
06456     }
06457 
06458   if (ia64_final_schedule)
06459     {
06460       int deleted = 0;
06461       int nr_need_stop = 0;
06462 
06463       for (insnp = ready; insnp < e_ready; insnp++)
06464   if (safe_group_barrier_needed (*insnp))
06465     nr_need_stop++;
06466 
06467       if (reorder_type == 1 && n_ready == nr_need_stop)
06468   return 0;
06469       if (reorder_type == 0)
06470   return 1;
06471       insnp = e_ready;
06472       /* Move down everything that needs a stop bit, preserving
06473    relative order.  */
06474       while (insnp-- > ready + deleted)
06475   while (insnp >= ready + deleted)
06476     {
06477       rtx insn = *insnp;
06478       if (! safe_group_barrier_needed (insn))
06479         break;
06480       memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
06481       *ready = insn;
06482       deleted++;
06483     }
06484       n_ready -= deleted;
06485       ready += deleted;
06486     }
06487 
06488   return 1;
06489 }
06490 
06491 /* We are about to being issuing insns for this clock cycle.  Override
06492    the default sort algorithm to better slot instructions.  */
06493 
06494 static int
06495 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
06496         int clock_var)
06497 {
06498   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
06499          pn_ready, clock_var, 0);
06500 }
06501 
06502 /* Like ia64_sched_reorder, but called after issuing each insn.
06503    Override the default sort algorithm to better slot instructions.  */
06504 
06505 static int
06506 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
06507          int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
06508          int *pn_ready, int clock_var)
06509 {
06510   if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
06511     clocks [INSN_UID (last_scheduled_insn)] = clock_var;
06512   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
06513          clock_var, 1);
06514 }
06515 
06516 /* We are about to issue INSN.  Return the number of insns left on the
06517    ready queue that can be issued this cycle.  */
06518 
06519 static int
06520 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
06521          int sched_verbose ATTRIBUTE_UNUSED,
06522          rtx insn ATTRIBUTE_UNUSED,
06523          int can_issue_more ATTRIBUTE_UNUSED)
06524 {
06525   if (current_sched_info->flags & DO_SPECULATION)
06526     /* Modulo scheduling does not extend h_i_d when emitting
06527        new instructions.  Deal with it.  */
06528     {
06529       if (DONE_SPEC (insn) & BEGIN_DATA)
06530   pending_data_specs++;
06531       if (CHECK_SPEC (insn) & BEGIN_DATA)
06532   pending_data_specs--;
06533     }
06534 
06535   last_scheduled_insn = insn;
06536   memcpy (prev_cycle_state, curr_state, dfa_state_size);
06537   if (reload_completed)
06538     {
06539       int needed = group_barrier_needed (insn);
06540       
06541       gcc_assert (!needed);
06542       if (GET_CODE (insn) == CALL_INSN)
06543   init_insn_group_barriers ();
06544       stops_p [INSN_UID (insn)] = stop_before_p;
06545       stop_before_p = 0;
06546     }
06547   return 1;
06548 }
06549 
06550 /* We are choosing insn from the ready queue.  Return nonzero if INSN
06551    can be chosen.  */
06552 
06553 static int
06554 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
06555 {
06556   gcc_assert (insn  && INSN_P (insn));
06557   return ((!reload_completed
06558      || !safe_group_barrier_needed (insn))
06559     && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
06560 }
06561 
06562 /* We are choosing insn from the ready queue.  Return nonzero if INSN
06563    can be chosen.  */
06564 
06565 static bool
06566 ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
06567 {
06568   gcc_assert (insn  && INSN_P (insn));
06569   /* Size of ALAT is 32.  As far as we perform conservative data speculation,
06570      we keep ALAT half-empty.  */
06571   return (pending_data_specs < 16
06572     || !(TODO_SPEC (insn) & BEGIN_DATA));
06573 }
06574 
06575 /* The following variable value is pseudo-insn used by the DFA insn
06576    scheduler to change the DFA state when the simulated clock is
06577    increased.  */
06578 
06579 static rtx dfa_pre_cycle_insn;
06580 
06581 /* We are about to being issuing INSN.  Return nonzero if we cannot
06582    issue it on given cycle CLOCK and return zero if we should not sort
06583    the ready queue on the next clock start.  */
06584 
06585 static int
06586 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
06587         int clock, int *sort_p)
06588 {
06589   int setup_clocks_p = FALSE;
06590 
06591   gcc_assert (insn && INSN_P (insn));
06592   if ((reload_completed && safe_group_barrier_needed (insn))
06593       || (last_scheduled_insn
06594     && (GET_CODE (last_scheduled_insn) == CALL_INSN
06595         || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
06596         || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
06597     {
06598       init_insn_group_barriers ();
06599       if (verbose && dump)
06600   fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
06601      last_clock == clock ? " + cycle advance" : "");
06602       stop_before_p = 1;
06603       if (last_clock == clock)
06604   {
06605     state_transition (curr_state, dfa_stop_insn);
06606     if (TARGET_EARLY_STOP_BITS)
06607       *sort_p = (last_scheduled_insn == NULL_RTX
06608            || GET_CODE (last_scheduled_insn) != CALL_INSN);
06609     else
06610       *sort_p = 0;
06611     return 1;
06612   }
06613       else if (reload_completed)
06614   setup_clocks_p = TRUE;
06615       if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
06616     || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
06617   state_reset (curr_state);
06618       else
06619   {
06620     memcpy (curr_state, prev_cycle_state, dfa_state_size);
06621     state_transition (curr_state, dfa_stop_insn);
06622     state_transition (curr_state, dfa_pre_cycle_insn);
06623     state_transition (curr_state, NULL);
06624   }
06625     }
06626   else if (reload_completed)
06627     setup_clocks_p = TRUE;
06628   if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
06629       && GET_CODE (PATTERN (insn)) != ASM_INPUT
06630       && asm_noperands (PATTERN (insn)) < 0)
06631     {
06632       enum attr_itanium_class c = ia64_safe_itanium_class (insn);
06633 
06634       if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
06635   {
06636     rtx link;
06637     int d = -1;
06638 
06639     for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
06640       if (REG_NOTE_KIND (link) == 0)
06641         {
06642     enum attr_itanium_class dep_class;
06643     rtx dep_insn = XEXP (link, 0);
06644 
06645     dep_class = ia64_safe_itanium_class (dep_insn);
06646     if ((dep_class == ITANIUM_CLASS_MMMUL
06647          || dep_class == ITANIUM_CLASS_MMSHF)
06648         && last_clock - clocks [INSN_UID (dep_insn)] < 4
06649         && (d < 0
06650       || last_clock - clocks [INSN_UID (dep_insn)] < d))
06651       d = last_clock - clocks [INSN_UID (dep_insn)];
06652         }
06653     if (d >= 0)
06654       add_cycles [INSN_UID (insn)] = 3 - d;
06655   }
06656     }
06657   return 0;
06658 }
06659 
06660 /* Implement targetm.sched.h_i_d_extended hook.
06661    Extend internal data structures.  */
06662 static void
06663 ia64_h_i_d_extended (void)
06664 {
06665   if (current_sched_info->flags & DO_SPECULATION)
06666     {
06667       int new_max_uid = get_max_uid () + 1;
06668 
06669       spec_check_no = xrecalloc (spec_check_no, new_max_uid,
06670          max_uid, sizeof (*spec_check_no));
06671       max_uid = new_max_uid;
06672     }
06673 
06674   if (stops_p != NULL) 
06675     {
06676       int new_clocks_length = get_max_uid () + 1;
06677       
06678       stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
06679       
06680       if (ia64_tune == PROCESSOR_ITANIUM)
06681   {
06682     clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
06683             sizeof (int));
06684     add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
06685           sizeof (int));
06686   }
06687       
06688       clocks_length = new_clocks_length;
06689     }
06690 }
06691 
06692 /* Constants that help mapping 'enum machine_mode' to int.  */
06693 enum SPEC_MODES
06694   {
06695     SPEC_MODE_INVALID = -1,
06696     SPEC_MODE_FIRST = 0,
06697     SPEC_MODE_FOR_EXTEND_FIRST = 1,
06698     SPEC_MODE_FOR_EXTEND_LAST = 3,
06699     SPEC_MODE_LAST = 8
06700   };
06701 
06702 /* Return index of the MODE.  */
06703 static int
06704 ia64_mode_to_int (enum machine_mode mode)
06705 {
06706   switch (mode)
06707     {
06708     case BImode: return 0; /* SPEC_MODE_FIRST  */
06709     case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
06710     case HImode: return 2;
06711     case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
06712     case DImode: return 4;
06713     case SFmode: return 5;
06714     case DFmode: return 6;
06715     case XFmode: return 7;
06716     case TImode:
06717       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
06718    mentioned in itanium[12].md.  Predicate fp_register_operand also
06719    needs to be defined.  Bottom line: better disable for now.  */
06720       return SPEC_MODE_INVALID;
06721     default:     return SPEC_MODE_INVALID;
06722     }
06723 }
06724 
06725 /* Provide information about speculation capabilities.  */
06726 static void
06727 ia64_set_sched_flags (spec_info_t spec_info)
06728 {
06729   unsigned int *flags = &(current_sched_info->flags);
06730 
06731   if (*flags & SCHED_RGN
06732       || *flags & SCHED_EBB)  
06733     {
06734       int mask = 0;
06735 
06736       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
06737     || (mflag_sched_ar_data_spec && reload_completed))
06738   {
06739     mask |= BEGIN_DATA;
06740     
06741     if ((mflag_sched_br_in_data_spec && !reload_completed)
06742         || (mflag_sched_ar_in_data_spec && reload_completed))
06743       mask |= BE_IN_DATA;
06744   }
06745       
06746       if (mflag_sched_control_spec)
06747   {
06748     mask |= BEGIN_CONTROL;
06749     
06750     if (mflag_sched_in_control_spec)
06751       mask |= BE_IN_CONTROL;
06752   }
06753 
06754       gcc_assert (*flags & USE_GLAT);
06755 
06756       if (mask)
06757   {
06758     *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
06759     
06760     spec_info->mask = mask;
06761     spec_info->flags = 0;
06762       
06763     if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
06764       spec_info->flags |= PREFER_NON_DATA_SPEC;
06765 
06766     if ((mask & CONTROL_SPEC)
06767         && mflag_sched_prefer_non_control_spec_insns)
06768       spec_info->flags |= PREFER_NON_CONTROL_SPEC;
06769 
06770     if (mflag_sched_spec_verbose)
06771       {
06772         if (sched_verbose >= 1)
06773     spec_info->dump = sched_dump;
06774         else
06775     spec_info->dump = stderr;
06776       }
06777     else
06778       spec_info->dump = 0;
06779     
06780     if (mflag_sched_count_spec_in_critical_path)
06781       spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
06782   }
06783     }
06784 }
06785 
06786 /* Implement targetm.sched.speculate_insn hook.
06787    Check if the INSN can be TS speculative.
06788    If 'no' - return -1.
06789    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
06790    If current pattern of the INSN already provides TS speculation, return 0.  */
06791 static int
06792 ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
06793 {  
06794   rtx pat, reg, mem, mem_reg;
06795   int mode_no, gen_p = 1;
06796   bool extend_p;
06797   
06798   gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
06799            
06800   pat = PATTERN (insn);
06801 
06802   if (GET_CODE (pat) == COND_EXEC)
06803     pat = COND_EXEC_CODE (pat);
06804 
06805   /* This should be a SET ...  */
06806   if (GET_CODE (pat) != SET)
06807     return -1;
06808 
06809   reg = SET_DEST (pat);
06810   /* ... to the general/fp register ...  */
06811   if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
06812     return -1;
06813 
06814   /* ... from the mem ...  */
06815   mem = SET_SRC (pat);
06816 
06817   /* ... that can, possibly, be a zero_extend ...  */
06818   if (GET_CODE (mem) == ZERO_EXTEND)
06819     {
06820       mem = XEXP (mem, 0);
06821       extend_p = true;      
06822     }
06823   else
06824     extend_p = false;
06825 
06826   /* ... or a speculative load.  */
06827   if (GET_CODE (mem) == UNSPEC)
06828     {
06829       int code;
06830       
06831       code = XINT (mem, 1);
06832       if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
06833   return -1;
06834 
06835       if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
06836     || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
06837     || code == UNSPEC_LDSA)
06838   gen_p = 0;
06839 
06840       mem = XVECEXP (mem, 0, 0);
06841       gcc_assert (MEM_P (mem));
06842     }
06843 
06844   /* Source should be a mem ...  */
06845   if (!MEM_P (mem))
06846     return -1;
06847 
06848   /* ... addressed by a register.  */
06849   mem_reg = XEXP (mem, 0);
06850   if (!REG_P (mem_reg))
06851     return -1;
06852      
06853   /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
06854      will always be DImode.  */
06855   mode_no = ia64_mode_to_int (GET_MODE (mem));
06856   
06857   if (mode_no == SPEC_MODE_INVALID
06858       || (extend_p
06859     && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
06860          && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
06861     return -1;
06862 
06863   extract_insn_cached (insn);
06864   gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
06865 
06866   *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
06867 
06868   return gen_p;
06869 }
06870 
06871 enum
06872   {
06873     /* Offset to reach ZERO_EXTEND patterns.  */
06874     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
06875     /* Number of patterns for each speculation mode.  */
06876     SPEC_N = (SPEC_MODE_LAST
06877               + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
06878   };
06879 
06880 enum SPEC_GEN_LD_MAP
06881   {
06882     /* Offset to ld.a patterns.  */
06883     SPEC_GEN_A = 0 * SPEC_N,
06884     /* Offset to ld.s patterns.  */
06885     SPEC_GEN_S = 1 * SPEC_N,
06886     /* Offset to ld.sa patterns.  */
06887     SPEC_GEN_SA = 2 * SPEC_N,
06888     /* Offset to ld.sa patterns.  For this patterns corresponding ld.c will
06889        mutate to chk.s.  */
06890     SPEC_GEN_SA_FOR_S = 3 * SPEC_N
06891   };
06892 
06893 /* These offsets are used to get (4 * SPEC_N).  */
06894 enum SPEC_GEN_CHECK_OFFSET
06895   {
06896     SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
06897     SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
06898   };
06899 
06900 /* If GEN_P is true, calculate the index of needed speculation check and return
06901    speculative pattern for INSN with speculative mode TS, machine mode
06902    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
06903    If GEN_P is false, just calculate the index of needed speculation check.  */
06904 static rtx
06905 ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
06906 {
06907   rtx pat, new_pat;
06908   int load_no;
06909   int shift = 0;
06910 
06911   static rtx (* const gen_load[]) (rtx, rtx) = {
06912     gen_movbi_advanced,
06913     gen_movqi_advanced,
06914     gen_movhi_advanced,
06915     gen_movsi_advanced,
06916     gen_movdi_advanced,
06917     gen_movsf_advanced,
06918     gen_movdf_advanced,
06919     gen_movxf_advanced,
06920     gen_movti_advanced,
06921     gen_zero_extendqidi2_advanced,
06922     gen_zero_extendhidi2_advanced,
06923     gen_zero_extendsidi2_advanced,
06924 
06925     gen_movbi_speculative,
06926     gen_movqi_speculative,
06927     gen_movhi_speculative,
06928     gen_movsi_speculative,
06929     gen_movdi_speculative,
06930     gen_movsf_speculative,
06931     gen_movdf_speculative,
06932     gen_movxf_speculative,
06933     gen_movti_speculative,
06934     gen_zero_extendqidi2_speculative,
06935     gen_zero_extendhidi2_speculative,
06936     gen_zero_extendsidi2_speculative,
06937 
06938     gen_movbi_speculative_advanced,
06939     gen_movqi_speculative_advanced,
06940     gen_movhi_speculative_advanced,
06941     gen_movsi_speculative_advanced,
06942     gen_movdi_speculative_advanced,
06943     gen_movsf_speculative_advanced,
06944     gen_movdf_speculative_advanced,
06945     gen_movxf_speculative_advanced,
06946     gen_movti_speculative_advanced,
06947     gen_zero_extendqidi2_speculative_advanced,
06948     gen_zero_extendhidi2_speculative_advanced,
06949     gen_zero_extendsidi2_speculative_advanced,
06950 
06951     gen_movbi_speculative_advanced,
06952     gen_movqi_speculative_advanced,
06953     gen_movhi_speculative_advanced,
06954     gen_movsi_speculative_advanced,
06955     gen_movdi_speculative_advanced,
06956     gen_movsf_speculative_advanced,
06957     gen_movdf_speculative_advanced,
06958     gen_movxf_speculative_advanced,
06959     gen_movti_speculative_advanced,
06960     gen_zero_extendqidi2_speculative_advanced,
06961     gen_zero_extendhidi2_speculative_advanced,
06962     gen_zero_extendsidi2_speculative_advanced
06963   };
06964 
06965   load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
06966 
06967   if (ts & BEGIN_DATA)
06968     {
06969       /* We don't need recovery because even if this is ld.sa
06970    ALAT entry will be allocated only if NAT bit is set to zero. 
06971    So it is enough to use ld.c here.  */    
06972 
06973       if (ts & BEGIN_CONTROL)
06974   {       
06975     load_no += SPEC_GEN_SA;
06976 
06977     if (!mflag_sched_ldc)
06978       shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
06979   }
06980       else
06981   {
06982     load_no += SPEC_GEN_A;
06983 
06984     if (!mflag_sched_ldc)   
06985       shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
06986   }
06987     }
06988   else if (ts & BEGIN_CONTROL)
06989     {
06990       /* ld.sa can be used instead of ld.s to avoid basic block splitting.  */
06991       if (!mflag_control_ldc)
06992   load_no += SPEC_GEN_S;
06993       else
06994   {
06995     gcc_assert (mflag_sched_ldc);
06996     load_no += SPEC_GEN_SA_FOR_S;
06997   }
06998     }
06999   else
07000     gcc_unreachable ();
07001 
07002   /* Set the desired check index.  We add '1', because zero element in this
07003      array means, that instruction with such uid is non-speculative.  */
07004   spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
07005 
07006   if (!gen_p)
07007     return 0;
07008 
07009   new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
07010              copy_rtx (recog_data.operand[1]));
07011 
07012   pat = PATTERN (insn);
07013   if (GET_CODE (pat) == COND_EXEC)
07014     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx 
07015          (COND_EXEC_TEST (pat)), new_pat);
07016 
07017   return new_pat;
07018 }
07019 
07020 /* Offset to branchy checks.  */
07021 enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
07022 
07023 /* Return nonzero, if INSN needs branchy recovery check.  */
07024 static bool
07025 ia64_needs_block_p (rtx insn)
07026 {
07027   int check_no;
07028 
07029   check_no = spec_check_no[INSN_UID(insn)] - 1;
07030   gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
07031 
07032   return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
07033     || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
07034 }
07035 
07036 /* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
07037    If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
07038    Otherwise, generate a simple check.  */
07039 static rtx
07040 ia64_gen_check (rtx insn, rtx label, bool mutate_p)
07041 {
07042   rtx op1, pat, check_pat;
07043 
07044   static rtx (* const gen_check[]) (rtx, rtx) = {
07045     gen_movbi_clr,
07046     gen_movqi_clr,
07047     gen_movhi_clr,
07048     gen_movsi_clr,
07049     gen_movdi_clr,
07050     gen_movsf_clr,
07051     gen_movdf_clr,
07052     gen_movxf_clr,
07053     gen_movti_clr,
07054     gen_zero_extendqidi2_clr,
07055     gen_zero_extendhidi2_clr,
07056     gen_zero_extendsidi2_clr,
07057 
07058     gen_speculation_check_bi,
07059     gen_speculation_check_qi,
07060     gen_speculation_check_hi,
07061     gen_speculation_check_si,
07062     gen_speculation_check_di,
07063     gen_speculation_check_sf,
07064     gen_speculation_check_df,
07065     gen_speculation_check_xf,
07066     gen_speculation_check_ti,
07067     gen_speculation_check_di,
07068     gen_speculation_check_di,
07069     gen_speculation_check_di,
07070 
07071     gen_movbi_clr,
07072     gen_movqi_clr,
07073     gen_movhi_clr,
07074     gen_movsi_clr,
07075     gen_movdi_clr,
07076     gen_movsf_clr,
07077     gen_movdf_clr,
07078     gen_movxf_clr,
07079     gen_movti_clr,
07080     gen_zero_extendqidi2_clr,
07081     gen_zero_extendhidi2_clr,
07082     gen_zero_extendsidi2_clr,
07083 
07084     gen_movbi_clr,
07085     gen_movqi_clr,
07086     gen_movhi_clr,
07087     gen_movsi_clr,
07088     gen_movdi_clr,
07089     gen_movsf_clr,
07090     gen_movdf_clr,
07091     gen_movxf_clr,
07092     gen_movti_clr,
07093     gen_zero_extendqidi2_clr,
07094     gen_zero_extendhidi2_clr,
07095     gen_zero_extendsidi2_clr,
07096 
07097     gen_advanced_load_check_clr_bi,
07098     gen_advanced_load_check_clr_qi,
07099     gen_advanced_load_check_clr_hi,
07100     gen_advanced_load_check_clr_si,
07101     gen_advanced_load_check_clr_di,
07102     gen_advanced_load_check_clr_sf,
07103     gen_advanced_load_check_clr_df,
07104     gen_advanced_load_check_clr_xf,
07105     gen_advanced_load_check_clr_ti,
07106     gen_advanced_load_check_clr_di,
07107     gen_advanced_load_check_clr_di,
07108     gen_advanced_load_check_clr_di,
07109 
07110     /* Following checks are generated during mutation.  */
07111     gen_advanced_load_check_clr_bi,
07112     gen_advanced_load_check_clr_qi,
07113     gen_advanced_load_check_clr_hi,
07114     gen_advanced_load_check_clr_si,
07115     gen_advanced_load_check_clr_di,
07116     gen_advanced_load_check_clr_sf,
07117     gen_advanced_load_check_clr_df,
07118     gen_advanced_load_check_clr_xf,
07119     gen_advanced_load_check_clr_ti,
07120     gen_advanced_load_check_clr_di,
07121     gen_advanced_load_check_clr_di,
07122     gen_advanced_load_check_clr_di,
07123 
07124     0,0,0,0,0,0,0,0,0,0,0,0,
07125 
07126     gen_advanced_load_check_clr_bi,
07127     gen_advanced_load_check_clr_qi,
07128     gen_advanced_load_check_clr_hi,
07129     gen_advanced_load_check_clr_si,
07130     gen_advanced_load_check_clr_di,
07131     gen_advanced_load_check_clr_sf,
07132     gen_advanced_load_check_clr_df,
07133     gen_advanced_load_check_clr_xf,
07134     gen_advanced_load_check_clr_ti,
07135     gen_advanced_load_check_clr_di,
07136     gen_advanced_load_check_clr_di,
07137     gen_advanced_load_check_clr_di,
07138 
07139     gen_speculation_check_bi,
07140     gen_speculation_check_qi,
07141     gen_speculation_check_hi,
07142     gen_speculation_check_si,
07143     gen_speculation_check_di,
07144     gen_speculation_check_sf,
07145     gen_speculation_check_df,
07146     gen_speculation_check_xf,
07147     gen_speculation_check_ti,
07148     gen_speculation_check_di,
07149     gen_speculation_check_di,
07150     gen_speculation_check_di
07151   };
07152 
07153   extract_insn_cached (insn);
07154 
07155   if (label)
07156     {
07157       gcc_assert (mutate_p || ia64_needs_block_p (insn));
07158       op1 = label;
07159     }
07160   else
07161     {
07162       gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
07163       op1 = copy_rtx (recog_data.operand[1]);
07164     }
07165       
07166   if (mutate_p)
07167     /* INSN is ld.c.
07168        Find the speculation check number by searching for original
07169        speculative load in the RESOLVED_DEPS list of INSN.
07170        As long as patterns are unique for each instruction, this can be
07171        accomplished by matching ORIG_PAT fields.  */
07172     {
07173       rtx link;
07174       int check_no = 0;
07175       rtx orig_pat = ORIG_PAT (insn);
07176 
07177       for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
07178   {
07179     rtx x = XEXP (link, 0);
07180 
07181     if (ORIG_PAT (x) == orig_pat)
07182       check_no = spec_check_no[INSN_UID (x)];
07183   }
07184       gcc_assert (check_no);
07185 
07186       spec_check_no[INSN_UID (insn)] = (check_no
07187           + SPEC_GEN_CHECK_MUTATION_OFFSET);
07188     }
07189 
07190   check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
07191          (copy_rtx (recog_data.operand[0]), op1));
07192     
07193   pat = PATTERN (insn);
07194   if (GET_CODE (pat) == COND_EXEC)
07195     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
07196            check_pat);
07197 
07198   return check_pat;
07199 }
07200 
07201 /* Return nonzero, if X is branchy recovery check.  */
07202 static int
07203 ia64_spec_check_p (rtx x)
07204 {
07205   x = PATTERN (x);
07206   if (GET_CODE (x) == COND_EXEC)
07207     x = COND_EXEC_CODE (x);
07208   if (GET_CODE (x) == SET)
07209     return ia64_spec_check_src_p (SET_SRC (x));
07210   return 0;
07211 }
07212 
07213 /* Return nonzero, if SRC belongs to recovery check.  */
07214 static int
07215 ia64_spec_check_src_p (rtx src)
07216 {
07217   if (GET_CODE (src) == IF_THEN_ELSE)
07218     {
07219       rtx t;
07220 
07221       t = XEXP (src, 0);
07222       if (GET_CODE (t) == NE)
07223   {
07224     t = XEXP (t, 0);      
07225