• Main Page
  • Modules
  • Data Types
  • Files

osprey-gcc/gcc/config/ia64/ia64.c

Go to the documentation of this file.
00001 /* Definitions of target machine for GNU compiler.
00002    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
00003    Free Software Foundation, Inc.
00004    Contributed by James E. Wilson <wilson@cygnus.com> and
00005       David Mosberger <davidm@hpl.hp.com>.
00006 
00007 This file is part of GCC.
00008 
00009 GCC is free software; you can redistribute it and/or modify
00010 it under the terms of the GNU General Public License as published by
00011 the Free Software Foundation; either version 2, or (at your option)
00012 any later version.
00013 
00014 GCC is distributed in the hope that it will be useful,
00015 but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 GNU General Public License for more details.
00018 
00019 You should have received a copy of the GNU General Public License
00020 along with GCC; see the file COPYING.  If not, write to
00021 the Free Software Foundation, 59 Temple Place - Suite 330,
00022 Boston, MA 02111-1307, USA.  */
00023 
00024 #include "config.h"
00025 #include "system.h"
00026 #include "coretypes.h"
00027 #include "tm.h"
00028 #include "rtl.h"
00029 #include "tree.h"
00030 #include "regs.h"
00031 #include "hard-reg-set.h"
00032 #include "real.h"
00033 #include "insn-config.h"
00034 #include "conditions.h"
00035 #include "output.h"
00036 #include "insn-attr.h"
00037 #include "flags.h"
00038 #include "recog.h"
00039 #include "expr.h"
00040 #include "optabs.h"
00041 #include "except.h"
00042 #include "function.h"
00043 #include "ggc.h"
00044 #include "basic-block.h"
00045 #include "toplev.h"
00046 #include "sched-int.h"
00047 #include "timevar.h"
00048 #include "target.h"
00049 #include "target-def.h"
00050 #include "tm_p.h"
00051 #include "hashtab.h"
00052 #include "langhooks.h"
00053 #include "cfglayout.h"
00054 #include "tree-gimple.h"
00055 
00056 /* This is used for communication between ASM_OUTPUT_LABEL and
00057    ASM_OUTPUT_LABELREF.  */
00058 int ia64_asm_output_label = 0;
00059 
00060 /* Define the information needed to generate branch and scc insns.  This is
00061    stored from the compare operation.  */
00062 struct rtx_def * ia64_compare_op0;
00063 struct rtx_def * ia64_compare_op1;
00064 
00065 /* Register names for ia64_expand_prologue.  */
00066 static const char * const ia64_reg_numbers[96] =
00067 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
00068   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
00069   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
00070   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
00071   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
00072   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
00073   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
00074   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
00075   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
00076   "r104","r105","r106","r107","r108","r109","r110","r111",
00077   "r112","r113","r114","r115","r116","r117","r118","r119",
00078   "r120","r121","r122","r123","r124","r125","r126","r127"};
00079 
00080 /* ??? These strings could be shared with REGISTER_NAMES.  */
00081 static const char * const ia64_input_reg_names[8] =
00082 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
00083 
00084 /* ??? These strings could be shared with REGISTER_NAMES.  */
00085 static const char * const ia64_local_reg_names[80] =
00086 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
00087   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
00088   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
00089   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
00090   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
00091   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
00092   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
00093   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
00094   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
00095   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
00096 
00097 /* ??? These strings could be shared with REGISTER_NAMES.  */
00098 static const char * const ia64_output_reg_names[8] =
00099 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
00100 
00101 /* String used with the -mfixed-range= option.  */
00102 const char *ia64_fixed_range_string;
00103 
00104 /* Determines whether we use adds, addl, or movl to generate our
00105    TLS immediate offsets.  */
00106 int ia64_tls_size = 22;
00107 
00108 /* String used with the -mtls-size= option.  */
00109 const char *ia64_tls_size_string;
00110 
00111 /* Which cpu are we scheduling for.  */
00112 enum processor_type ia64_tune;
00113 
00114 /* String used with the -tune= option.  */
00115 const char *ia64_tune_string;
00116 
00117 /* Determines whether we run our final scheduling pass or not.  We always
00118    avoid the normal second scheduling pass.  */
00119 static int ia64_flag_schedule_insns2;
00120 
00121 /* Determines whether we run variable tracking in machine dependent
00122    reorganization.  */
00123 static int ia64_flag_var_tracking;
00124 
00125 /* Variables which are this size or smaller are put in the sdata/sbss
00126    sections.  */
00127 
00128 unsigned int ia64_section_threshold;
00129 
00130 /* The following variable is used by the DFA insn scheduler.  The value is
00131    TRUE if we do insn bundling instead of insn scheduling.  */
00132 int bundling_p = 0;
00133 
00134 /* Structure to be filled in by ia64_compute_frame_size with register
00135    save masks and offsets for the current function.  */
00136 
00137 struct ia64_frame_info
00138 {
00139   HOST_WIDE_INT total_size; /* size of the stack frame, not including
00140            the caller's scratch area.  */
00141   HOST_WIDE_INT spill_cfa_off;  /* top of the reg spill area from the cfa.  */
00142   HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area.  */
00143   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
00144   HARD_REG_SET mask;    /* mask of saved registers.  */
00145   unsigned int gr_used_mask;  /* mask of registers in use as gr spill
00146            registers or long-term scratches.  */
00147   int n_spilled;    /* number of spilled registers.  */
00148   int reg_fp;     /* register for fp.  */
00149   int reg_save_b0;    /* save register for b0.  */
00150   int reg_save_pr;    /* save register for prs.  */
00151   int reg_save_ar_pfs;    /* save register for ar.pfs.  */
00152   int reg_save_ar_unat;   /* save register for ar.unat.  */
00153   int reg_save_ar_lc;   /* save register for ar.lc.  */
00154   int reg_save_gp;    /* save register for gp.  */
00155   int n_input_regs;   /* number of input registers used.  */
00156   int n_local_regs;   /* number of local registers used.  */
00157   int n_output_regs;    /* number of output registers used.  */
00158   int n_rotate_regs;    /* number of rotating registers used.  */
00159 
00160   char need_regstk;   /* true if a .regstk directive needed.  */
00161   char initialized;   /* true if the data is finalized.  */
00162 };
00163 
00164 /* Current frame information calculated by ia64_compute_frame_size.  */
00165 static struct ia64_frame_info current_frame_info;
00166 
00167 static int ia64_first_cycle_multipass_dfa_lookahead (void);
00168 static void ia64_dependencies_evaluation_hook (rtx, rtx);
00169 static void ia64_init_dfa_pre_cycle_insn (void);
00170 static rtx ia64_dfa_pre_cycle_insn (void);
00171 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
00172 static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
00173 static rtx gen_tls_get_addr (void);
00174 static rtx gen_thread_pointer (void);
00175 static int find_gr_spill (int);
00176 static int next_scratch_gr_reg (void);
00177 static void mark_reg_gr_used_mask (rtx, void *);
00178 static void ia64_compute_frame_size (HOST_WIDE_INT);
00179 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
00180 static void finish_spill_pointers (void);
00181 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
00182 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
00183 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
00184 static rtx gen_movdi_x (rtx, rtx, rtx);
00185 static rtx gen_fr_spill_x (rtx, rtx, rtx);
00186 static rtx gen_fr_restore_x (rtx, rtx, rtx);
00187 
00188 static enum machine_mode hfa_element_mode (tree, bool);
00189 static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
00190            tree, int *, int);
00191 static bool ia64_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
00192             tree, bool);
00193 static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
00194            tree, bool);
00195 static bool ia64_function_ok_for_sibcall (tree, tree);
00196 static bool ia64_return_in_memory (tree, tree);
00197 static bool ia64_rtx_costs (rtx, int, int, int *);
00198 static void fix_range (const char *);
00199 static struct machine_function * ia64_init_machine_status (void);
00200 static void emit_insn_group_barriers (FILE *);
00201 static void emit_all_insn_group_barriers (FILE *);
00202 static void final_emit_insn_group_barriers (FILE *);
00203 static void emit_predicate_relation_info (void);
00204 static void ia64_reorg (void);
00205 static bool ia64_in_small_data_p (tree);
00206 static void process_epilogue (void);
00207 static int process_set (FILE *, rtx);
00208 
00209 static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
00210 static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
00211 static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
00212            int, tree, rtx);
00213 static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
00214 static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
00215 static bool ia64_assemble_integer (rtx, unsigned int, int);
00216 static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
00217 static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
00218 static void ia64_output_function_end_prologue (FILE *);
00219 
00220 static int ia64_issue_rate (void);
00221 static int ia64_adjust_cost (rtx, rtx, rtx, int);
00222 static void ia64_sched_init (FILE *, int, int);
00223 static void ia64_sched_finish (FILE *, int);
00224 static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
00225 static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
00226 static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
00227 static int ia64_variable_issue (FILE *, int, rtx, int);
00228 
00229 static struct bundle_state *get_free_bundle_state (void);
00230 static void free_bundle_state (struct bundle_state *);
00231 static void initiate_bundle_states (void);
00232 static void finish_bundle_states (void);
00233 static unsigned bundle_state_hash (const void *);
00234 static int bundle_state_eq_p (const void *, const void *);
00235 static int insert_bundle_state (struct bundle_state *);
00236 static void initiate_bundle_state_table (void);
00237 static void finish_bundle_state_table (void);
00238 static int try_issue_nops (struct bundle_state *, int);
00239 static int try_issue_insn (struct bundle_state *, rtx);
00240 static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
00241 static int get_max_pos (state_t);
00242 static int get_template (state_t, int);
00243 
00244 static rtx get_next_important_insn (rtx, rtx);
00245 static void bundling (FILE *, int, rtx, rtx);
00246 
00247 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
00248           HOST_WIDE_INT, tree);
00249 static void ia64_file_start (void);
00250 
00251 static void ia64_select_rtx_section (enum machine_mode, rtx,
00252              unsigned HOST_WIDE_INT);
00253 static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
00254      ATTRIBUTE_UNUSED;
00255 static void ia64_rwreloc_unique_section (tree, int)
00256      ATTRIBUTE_UNUSED;
00257 static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
00258                unsigned HOST_WIDE_INT)
00259      ATTRIBUTE_UNUSED;
00260 static unsigned int ia64_section_type_flags (tree, const char *, int);
00261 static void ia64_hpux_add_extern_decl (tree decl)
00262      ATTRIBUTE_UNUSED;
00263 static void ia64_hpux_file_end (void)
00264      ATTRIBUTE_UNUSED;
00265 static void ia64_init_libfuncs (void)
00266      ATTRIBUTE_UNUSED;
00267 static void ia64_hpux_init_libfuncs (void)
00268      ATTRIBUTE_UNUSED;
00269 static void ia64_sysv4_init_libfuncs (void)
00270      ATTRIBUTE_UNUSED;
00271 static void ia64_vms_init_libfuncs (void)
00272      ATTRIBUTE_UNUSED;
00273 
00274 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
00275 static void ia64_encode_section_info (tree, rtx, int);
00276 static rtx ia64_struct_value_rtx (tree, int);
00277 static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
00278 static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
00279 static bool ia64_vector_mode_supported_p (enum machine_mode mode);
00280 static bool ia64_cannot_force_const_mem (rtx);
00281 
00282 /* Table of valid machine attributes.  */
00283 static const struct attribute_spec ia64_attribute_table[] =
00284 {
00285   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
00286   { "syscall_linkage", 0, 0, false, true,  true,  NULL },
00287   { "model",         1, 1, true, false, false, ia64_handle_model_attribute },
00288   { NULL,        0, 0, false, false, false, NULL }
00289 };
00290 
00291 /* Initialize the GCC target structure.  */
00292 #undef TARGET_ATTRIBUTE_TABLE
00293 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
00294 
00295 #undef TARGET_INIT_BUILTINS
00296 #define TARGET_INIT_BUILTINS ia64_init_builtins
00297 
00298 #undef TARGET_EXPAND_BUILTIN
00299 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
00300 
00301 #undef TARGET_ASM_BYTE_OP
00302 #define TARGET_ASM_BYTE_OP "\tdata1\t"
00303 #undef TARGET_ASM_ALIGNED_HI_OP
00304 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
00305 #undef TARGET_ASM_ALIGNED_SI_OP
00306 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
00307 #undef TARGET_ASM_ALIGNED_DI_OP
00308 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
00309 #undef TARGET_ASM_UNALIGNED_HI_OP
00310 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
00311 #undef TARGET_ASM_UNALIGNED_SI_OP
00312 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
00313 #undef TARGET_ASM_UNALIGNED_DI_OP
00314 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
00315 #undef TARGET_ASM_INTEGER
00316 #define TARGET_ASM_INTEGER ia64_assemble_integer
00317 
00318 #undef TARGET_ASM_FUNCTION_PROLOGUE
00319 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
00320 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
00321 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
00322 #undef TARGET_ASM_FUNCTION_EPILOGUE
00323 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
00324 
00325 #undef TARGET_IN_SMALL_DATA_P
00326 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
00327 
00328 #undef TARGET_SCHED_ADJUST_COST
00329 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
00330 #undef TARGET_SCHED_ISSUE_RATE
00331 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
00332 #undef TARGET_SCHED_VARIABLE_ISSUE
00333 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
00334 #undef TARGET_SCHED_INIT
00335 #define TARGET_SCHED_INIT ia64_sched_init
00336 #undef TARGET_SCHED_FINISH
00337 #define TARGET_SCHED_FINISH ia64_sched_finish
00338 #undef TARGET_SCHED_REORDER
00339 #define TARGET_SCHED_REORDER ia64_sched_reorder
00340 #undef TARGET_SCHED_REORDER2
00341 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
00342 
00343 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
00344 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
00345 
00346 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
00347 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
00348 
00349 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
00350 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
00351 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
00352 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
00353 
00354 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
00355 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
00356   ia64_first_cycle_multipass_dfa_lookahead_guard
00357 
00358 #undef TARGET_SCHED_DFA_NEW_CYCLE
00359 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
00360 
00361 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
00362 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
00363 #undef TARGET_PASS_BY_REFERENCE
00364 #define TARGET_PASS_BY_REFERENCE ia64_pass_by_reference
00365 #undef TARGET_ARG_PARTIAL_BYTES
00366 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
00367 
00368 #undef TARGET_ASM_OUTPUT_MI_THUNK
00369 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
00370 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
00371 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
00372 
00373 #undef TARGET_ASM_FILE_START
00374 #define TARGET_ASM_FILE_START ia64_file_start
00375 
00376 #undef TARGET_RTX_COSTS
00377 #define TARGET_RTX_COSTS ia64_rtx_costs
00378 #undef TARGET_ADDRESS_COST
00379 #define TARGET_ADDRESS_COST hook_int_rtx_0
00380 
00381 #undef TARGET_MACHINE_DEPENDENT_REORG
00382 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
00383 
00384 #undef TARGET_ENCODE_SECTION_INFO
00385 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
00386 
00387 #undef  TARGET_SECTION_TYPE_FLAGS
00388 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
00389 
00390 /* ??? ABI doesn't allow us to define this.  */
00391 #if 0
00392 #undef TARGET_PROMOTE_FUNCTION_ARGS
00393 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
00394 #endif
00395 
00396 /* ??? ABI doesn't allow us to define this.  */
00397 #if 0
00398 #undef TARGET_PROMOTE_FUNCTION_RETURN
00399 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
00400 #endif
00401 
00402 /* ??? Investigate.  */
00403 #if 0
00404 #undef TARGET_PROMOTE_PROTOTYPES
00405 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
00406 #endif
00407 
00408 #undef TARGET_STRUCT_VALUE_RTX
00409 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
00410 #undef TARGET_RETURN_IN_MEMORY
00411 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
00412 #undef TARGET_SETUP_INCOMING_VARARGS
00413 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
00414 #undef TARGET_STRICT_ARGUMENT_NAMING
00415 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
00416 #undef TARGET_MUST_PASS_IN_STACK
00417 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
00418 
00419 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
00420 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
00421 
00422 #undef TARGET_UNWIND_EMIT
00423 #define TARGET_UNWIND_EMIT process_for_unwind_directive
00424 
00425 #undef TARGET_SCALAR_MODE_SUPPORTED_P
00426 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
00427 #undef TARGET_VECTOR_MODE_SUPPORTED_P
00428 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
00429 
00430 /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
00431    in an order different from the specified program order.  */
00432 #undef TARGET_RELAXED_ORDERING
00433 #define TARGET_RELAXED_ORDERING true
00434 
00435 #undef TARGET_CANNOT_FORCE_CONST_MEM
00436 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
00437 
00438 struct gcc_target targetm = TARGET_INITIALIZER;
00439 
00440 typedef enum
00441   {
00442     ADDR_AREA_NORMAL, /* normal address area */
00443     ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
00444   }
00445 ia64_addr_area;
00446 
00447 static GTY(()) tree small_ident1;
00448 static GTY(()) tree small_ident2;
00449 
00450 static void
00451 init_idents (void)
00452 {
00453   if (small_ident1 == 0)
00454     {
00455       small_ident1 = get_identifier ("small");
00456       small_ident2 = get_identifier ("__small__");
00457     }
00458 }
00459 
00460 /* Retrieve the address area that has been chosen for the given decl.  */
00461 
00462 static ia64_addr_area
00463 ia64_get_addr_area (tree decl)
00464 {
00465   tree model_attr;
00466 
00467   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
00468   if (model_attr)
00469     {
00470       tree id;
00471 
00472       init_idents ();
00473       id = TREE_VALUE (TREE_VALUE (model_attr));
00474       if (id == small_ident1 || id == small_ident2)
00475   return ADDR_AREA_SMALL;
00476     }
00477   return ADDR_AREA_NORMAL;
00478 }
00479 
00480 static tree
00481 ia64_handle_model_attribute (tree *node, tree name, tree args,
00482            int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
00483 {
00484   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
00485   ia64_addr_area area;
00486   tree arg, decl = *node;
00487 
00488   init_idents ();
00489   arg = TREE_VALUE (args);
00490   if (arg == small_ident1 || arg == small_ident2)
00491     {
00492       addr_area = ADDR_AREA_SMALL;
00493     }
00494   else
00495     {
00496       warning ("invalid argument of %qs attribute",
00497          IDENTIFIER_POINTER (name));
00498       *no_add_attrs = true;
00499     }
00500 
00501   switch (TREE_CODE (decl))
00502     {
00503     case VAR_DECL:
00504       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
00505      == FUNCTION_DECL)
00506     && !TREE_STATIC (decl))
00507   {
00508     error ("%Jan address area attribute cannot be specified for "
00509      "local variables", decl, decl);
00510     *no_add_attrs = true;
00511   }
00512       area = ia64_get_addr_area (decl);
00513       if (area != ADDR_AREA_NORMAL && addr_area != area)
00514   {
00515     error ("%Jaddress area of '%s' conflicts with previous "
00516      "declaration", decl, decl);
00517     *no_add_attrs = true;
00518   }
00519       break;
00520 
00521     case FUNCTION_DECL:
00522       error ("%Jaddress area attribute cannot be specified for functions",
00523        decl, decl);
00524       *no_add_attrs = true;
00525       break;
00526 
00527     default:
00528       warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
00529       *no_add_attrs = true;
00530       break;
00531     }
00532 
00533   return NULL_TREE;
00534 }
00535 
00536 static void
00537 ia64_encode_addr_area (tree decl, rtx symbol)
00538 {
00539   int flags;
00540 
00541   flags = SYMBOL_REF_FLAGS (symbol);
00542   switch (ia64_get_addr_area (decl))
00543     {
00544     case ADDR_AREA_NORMAL: break;
00545     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
00546     default: abort ();
00547     }
00548   SYMBOL_REF_FLAGS (symbol) = flags;
00549 }
00550 
00551 static void
00552 ia64_encode_section_info (tree decl, rtx rtl, int first)
00553 {
00554   default_encode_section_info (decl, rtl, first);
00555 
00556   /* Careful not to prod global register variables.  */
00557   if (TREE_CODE (decl) == VAR_DECL
00558       && GET_CODE (DECL_RTL (decl)) == MEM
00559       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
00560       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
00561     ia64_encode_addr_area (decl, XEXP (rtl, 0));
00562 }
00563 
00564 /* Implement CONST_OK_FOR_LETTER_P.  */
00565 
00566 bool
00567 ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
00568 {
00569   switch (c)
00570     {
00571     case 'I':
00572       return CONST_OK_FOR_I (value);
00573     case 'J':
00574       return CONST_OK_FOR_J (value);
00575     case 'K':
00576       return CONST_OK_FOR_K (value);
00577     case 'L':
00578       return CONST_OK_FOR_L (value);
00579     case 'M':
00580       return CONST_OK_FOR_M (value);
00581     case 'N':
00582       return CONST_OK_FOR_N (value);
00583     case 'O':
00584       return CONST_OK_FOR_O (value);
00585     case 'P':
00586       return CONST_OK_FOR_P (value);
00587     default:
00588       return false;
00589     }
00590 }
00591 
00592 /* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
00593 
00594 bool
00595 ia64_const_double_ok_for_letter_p (rtx value, char c)
00596 {
00597   switch (c)
00598     {
00599     case 'G':
00600       return CONST_DOUBLE_OK_FOR_G (value);
00601     default:
00602       return false;
00603     }
00604 }
00605 
00606 /* Implement EXTRA_CONSTRAINT.  */
00607 
00608 bool
00609 ia64_extra_constraint (rtx value, char c)
00610 {
00611   switch (c)
00612     {
00613     case 'Q':
00614       /* Non-volatile memory for FP_REG loads/stores.  */
00615       return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
00616 
00617     case 'R':
00618       /* 1..4 for shladd arguments.  */
00619       return (GET_CODE (value) == CONST_INT
00620         && INTVAL (value) >= 1 && INTVAL (value) <= 4);
00621 
00622     case 'S':
00623       /* Non-post-inc memory for asms and other unsavory creatures.  */
00624       return (GET_CODE (value) == MEM
00625         && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
00626         && (reload_in_progress || memory_operand (value, VOIDmode)));
00627 
00628     case 'T':
00629       /* Symbol ref to small-address-area.  */
00630       return small_addr_symbolic_operand (value, VOIDmode);
00631 
00632     case 'U':
00633       /* Vector zero.  */
00634       return value == CONST0_RTX (GET_MODE (value));
00635 
00636     case 'W':
00637       /* An integer vector, such that conversion to an integer yields a
00638    value appropriate for an integer 'J' constraint.  */
00639       if (GET_CODE (value) == CONST_VECTOR
00640     && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
00641   {
00642     value = simplify_subreg (DImode, value, GET_MODE (value), 0);
00643     return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
00644   }
00645       return false;
00646 
00647     case 'Y':
00648       /* A V2SF vector containing elements that satisfy 'G'.  */
00649       return
00650   (GET_CODE (value) == CONST_VECTOR
00651    && GET_MODE (value) == V2SFmode
00652    && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
00653    && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
00654 
00655     default:
00656       return false;
00657     }
00658 }
00659 
00660 /* Return 1 if the operands of a move are ok.  */
00661 
00662 int
00663 ia64_move_ok (rtx dst, rtx src)
00664 {
00665   /* If we're under init_recog_no_volatile, we'll not be able to use
00666      memory_operand.  So check the code directly and don't worry about
00667      the validity of the underlying address, which should have been
00668      checked elsewhere anyway.  */
00669   if (GET_CODE (dst) != MEM)
00670     return 1;
00671   if (GET_CODE (src) == MEM)
00672     return 0;
00673   if (register_operand (src, VOIDmode))
00674     return 1;
00675 
00676   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
00677   if (INTEGRAL_MODE_P (GET_MODE (dst)))
00678     return src == const0_rtx;
00679   else
00680     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
00681 }
00682 
00683 int
00684 addp4_optimize_ok (rtx op1, rtx op2)
00685 {
00686   return (basereg_operand (op1, GET_MODE(op1)) !=
00687     basereg_operand (op2, GET_MODE(op2)));
00688 }
00689 
00690 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
00691    Return the length of the field, or <= 0 on failure.  */
00692 
00693 int
00694 ia64_depz_field_mask (rtx rop, rtx rshift)
00695 {
00696   unsigned HOST_WIDE_INT op = INTVAL (rop);
00697   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
00698 
00699   /* Get rid of the zero bits we're shifting in.  */
00700   op >>= shift;
00701 
00702   /* We must now have a solid block of 1's at bit 0.  */
00703   return exact_log2 (op + 1);
00704 }
00705 
00706 /* Return the TLS model to use for ADDR.  */
00707 
00708 static enum tls_model
00709 tls_symbolic_operand_type (rtx addr)
00710 {
00711   enum tls_model tls_kind = 0;
00712 
00713   if (GET_CODE (addr) == CONST)
00714     {
00715       if (GET_CODE (XEXP (addr, 0)) == PLUS
00716     && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
00717         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
00718     }
00719   else if (GET_CODE (addr) == SYMBOL_REF)
00720     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
00721 
00722   return tls_kind;
00723 }
00724 
00725 /* Return true if X is a constant that is valid for some immediate
00726    field in an instruction.  */
00727 
00728 bool
00729 ia64_legitimate_constant_p (rtx x)
00730 {
00731   switch (GET_CODE (x))
00732     {
00733     case CONST_INT:
00734     case LABEL_REF:
00735       return true;
00736 
00737     case CONST_DOUBLE:
00738       if (GET_MODE (x) == VOIDmode)
00739   return true;
00740       return CONST_DOUBLE_OK_FOR_G (x);
00741 
00742     case CONST:
00743     case SYMBOL_REF:
00744       return tls_symbolic_operand_type (x) == 0;
00745 
00746     default:
00747       return false;
00748     }
00749 }
00750 
00751 /* Don't allow TLS addresses to get spilled to memory.  */
00752 
00753 static bool
00754 ia64_cannot_force_const_mem (rtx x)
00755 {
00756   return tls_symbolic_operand_type (x) != 0;
00757 }
00758 
00759 /* Expand a symbolic constant load.  */
00760 
00761 bool
00762 ia64_expand_load_address (rtx dest, rtx src)
00763 {
00764   gcc_assert (GET_CODE (dest) == REG);
00765 
00766   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
00767      having to pointer-extend the value afterward.  Other forms of address
00768      computation below are also more natural to compute as 64-bit quantities.
00769      If we've been given an SImode destination register, change it.  */
00770   if (GET_MODE (dest) != Pmode)
00771     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
00772 
00773   if (TARGET_NO_PIC)
00774     return false;
00775   if (small_addr_symbolic_operand (src, VOIDmode))
00776     return false;
00777 
00778   if (TARGET_AUTO_PIC)
00779     emit_insn (gen_load_gprel64 (dest, src));
00780   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
00781     emit_insn (gen_load_fptr (dest, src));
00782   else if (sdata_symbolic_operand (src, VOIDmode))
00783     emit_insn (gen_load_gprel (dest, src));
00784   else
00785     {
00786       HOST_WIDE_INT addend = 0;
00787       rtx tmp;
00788 
00789       /* We did split constant offsets in ia64_expand_move, and we did try
00790    to keep them split in move_operand, but we also allowed reload to
00791    rematerialize arbitrary constants rather than spill the value to
00792    the stack and reload it.  So we have to be prepared here to split
00793    them apart again.  */
00794       if (GET_CODE (src) == CONST)
00795   {
00796     HOST_WIDE_INT hi, lo;
00797 
00798     hi = INTVAL (XEXP (XEXP (src, 0), 1));
00799     lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
00800     hi = hi - lo;
00801 
00802     if (lo != 0)
00803       {
00804         addend = lo;
00805         src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
00806       }
00807   }
00808 
00809       tmp = gen_rtx_HIGH (Pmode, src);
00810       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
00811       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00812 
00813       tmp = gen_rtx_LO_SUM (Pmode, dest, src);
00814       emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00815 
00816       if (addend)
00817   {
00818     tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
00819     emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
00820   }
00821     }
00822 
00823   return true;
00824 }
00825 
00826 static GTY(()) rtx gen_tls_tga;
00827 static rtx
00828 gen_tls_get_addr (void)
00829 {
00830   if (!gen_tls_tga)
00831     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
00832   return gen_tls_tga;
00833 }
00834 
00835 static GTY(()) rtx thread_pointer_rtx;
00836 static rtx
00837 gen_thread_pointer (void)
00838 {
00839   if (!thread_pointer_rtx)
00840     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
00841   return thread_pointer_rtx;
00842 }
00843 
00844 static rtx
00845 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
00846        HOST_WIDE_INT addend)
00847 {
00848   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
00849   rtx orig_op0 = op0, orig_op1 = op1;
00850   HOST_WIDE_INT addend_lo, addend_hi;
00851 
00852   addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
00853   addend_hi = addend - addend_lo;
00854 
00855   switch (tls_kind)
00856     {
00857     case TLS_MODEL_GLOBAL_DYNAMIC:
00858       start_sequence ();
00859 
00860       tga_op1 = gen_reg_rtx (Pmode);
00861       emit_insn (gen_load_dtpmod (tga_op1, op1));
00862 
00863       tga_op2 = gen_reg_rtx (Pmode);
00864       emit_insn (gen_load_dtprel (tga_op2, op1));
00865 
00866       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
00867            LCT_CONST, Pmode, 2, tga_op1,
00868            Pmode, tga_op2, Pmode);
00869 
00870       insns = get_insns ();
00871       end_sequence ();
00872 
00873       if (GET_MODE (op0) != Pmode)
00874   op0 = tga_ret;
00875       emit_libcall_block (insns, op0, tga_ret, op1);
00876       break;
00877 
00878     case TLS_MODEL_LOCAL_DYNAMIC:
00879       /* ??? This isn't the completely proper way to do local-dynamic
00880    If the call to __tls_get_addr is used only by a single symbol,
00881    then we should (somehow) move the dtprel to the second arg
00882    to avoid the extra add.  */
00883       start_sequence ();
00884 
00885       tga_op1 = gen_reg_rtx (Pmode);
00886       emit_insn (gen_load_dtpmod (tga_op1, op1));
00887 
00888       tga_op2 = const0_rtx;
00889 
00890       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
00891            LCT_CONST, Pmode, 2, tga_op1,
00892            Pmode, tga_op2, Pmode);
00893 
00894       insns = get_insns ();
00895       end_sequence ();
00896 
00897       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
00898         UNSPEC_LD_BASE);
00899       tmp = gen_reg_rtx (Pmode);
00900       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
00901 
00902       if (!register_operand (op0, Pmode))
00903   op0 = gen_reg_rtx (Pmode);
00904       if (TARGET_TLS64)
00905   {
00906     emit_insn (gen_load_dtprel (op0, op1));
00907     emit_insn (gen_adddi3 (op0, tmp, op0));
00908   }
00909       else
00910   emit_insn (gen_add_dtprel (op0, op1, tmp));
00911       break;
00912 
00913     case TLS_MODEL_INITIAL_EXEC:
00914       op1 = plus_constant (op1, addend_hi);
00915       addend = addend_lo;
00916 
00917       tmp = gen_reg_rtx (Pmode);
00918       emit_insn (gen_load_tprel (tmp, op1));
00919 
00920       if (!register_operand (op0, Pmode))
00921   op0 = gen_reg_rtx (Pmode);
00922       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
00923       break;
00924 
00925     case TLS_MODEL_LOCAL_EXEC:
00926       if (!register_operand (op0, Pmode))
00927   op0 = gen_reg_rtx (Pmode);
00928 
00929       op1 = orig_op1;
00930       addend = 0;
00931       if (TARGET_TLS64)
00932   {
00933     emit_insn (gen_load_tprel (op0, op1));
00934     emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
00935   }
00936       else
00937   emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
00938       break;
00939 
00940     default:
00941       abort ();
00942     }
00943 
00944   if (addend)
00945     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
00946              orig_op0, 1, OPTAB_DIRECT);
00947 
00948   if (orig_op0 == op0)
00949     return NULL_RTX;
00950   if (GET_MODE (orig_op0) == Pmode)
00951     return op0;
00952   return gen_lowpart (GET_MODE (orig_op0), op0);
00953 }
00954 
00955 rtx
00956 ia64_expand_move (rtx op0, rtx op1)
00957 {
00958   enum machine_mode mode = GET_MODE (op0);
00959 
00960   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
00961     op1 = force_reg (mode, op1);
00962 
00963   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
00964     {
00965       HOST_WIDE_INT addend = 0;
00966       enum tls_model tls_kind;
00967       rtx sym = op1;
00968 
00969       if (GET_CODE (op1) == CONST
00970     && GET_CODE (XEXP (op1, 0)) == PLUS
00971     && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
00972   {
00973     addend = INTVAL (XEXP (XEXP (op1, 0), 1));
00974     sym = XEXP (XEXP (op1, 0), 0);
00975   }
00976 
00977       tls_kind = tls_symbolic_operand_type (sym);
00978       if (tls_kind)
00979   return ia64_expand_tls_address (tls_kind, op0, sym, addend);
00980 
00981       if (any_offset_symbol_operand (sym, mode))
00982   addend = 0;
00983       else if (aligned_offset_symbol_operand (sym, mode))
00984   {
00985     HOST_WIDE_INT addend_lo, addend_hi;
00986         
00987     addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
00988     addend_hi = addend - addend_lo;
00989 
00990     if (addend_lo != 0)
00991       {
00992         op1 = plus_constant (sym, addend_hi);
00993         addend = addend_lo;
00994       }
00995     else
00996       addend = 0;
00997   }
00998       else
00999   op1 = sym;
01000 
01001       if (reload_completed)
01002   {
01003     /* We really should have taken care of this offset earlier.  */
01004     gcc_assert (addend == 0);
01005     if (ia64_expand_load_address (op0, op1))
01006       return NULL_RTX;
01007   }
01008 
01009       if (addend)
01010   {
01011     rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
01012 
01013     emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
01014 
01015     op1 = expand_simple_binop (mode, PLUS, subtarget,
01016              GEN_INT (addend), op0, 1, OPTAB_DIRECT);
01017     if (op0 == op1)
01018       return NULL_RTX;
01019   }
01020     }
01021 
01022   return op1;
01023 }
01024 
01025 /* Split a move from OP1 to OP0 conditional on COND.  */
01026 
01027 void
01028 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
01029 {
01030   rtx insn, first = get_last_insn ();
01031 
01032   emit_move_insn (op0, op1);
01033 
01034   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
01035     if (INSN_P (insn))
01036       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
01037             PATTERN (insn));
01038 }
01039 
01040 /* Split a post-reload TImode or TFmode reference into two DImode
01041    components.  This is made extra difficult by the fact that we do
01042    not get any scratch registers to work with, because reload cannot
01043    be prevented from giving us a scratch that overlaps the register
01044    pair involved.  So instead, when addressing memory, we tweak the
01045    pointer register up and back down with POST_INCs.  Or up and not
01046    back down when we can get away with it.
01047 
01048    REVERSED is true when the loads must be done in reversed order
01049    (high word first) for correctness.  DEAD is true when the pointer
01050    dies with the second insn we generate and therefore the second
01051    address must not carry a postmodify.
01052 
01053    May return an insn which is to be emitted after the moves.  */
01054 
01055 static rtx
01056 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
01057 {
01058   rtx fixup = 0;
01059 
01060   switch (GET_CODE (in))
01061     {
01062     case REG:
01063       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
01064       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
01065       break;
01066 
01067     case CONST_INT:
01068     case CONST_DOUBLE:
01069       /* Cannot occur reversed.  */
01070       if (reversed) abort ();
01071       
01072       if (GET_MODE (in) != TFmode)
01073   split_double (in, &out[0], &out[1]);
01074       else
01075   /* split_double does not understand how to split a TFmode
01076      quantity into a pair of DImode constants.  */
01077   {
01078     REAL_VALUE_TYPE r;
01079     unsigned HOST_WIDE_INT p[2];
01080     long l[4];  /* TFmode is 128 bits */
01081 
01082     REAL_VALUE_FROM_CONST_DOUBLE (r, in);
01083     real_to_target (l, &r, TFmode);
01084 
01085     if (FLOAT_WORDS_BIG_ENDIAN)
01086       {
01087         p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
01088         p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
01089       }
01090     else
01091       {
01092         p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
01093         p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
01094       }
01095     out[0] = GEN_INT (p[0]);
01096     out[1] = GEN_INT (p[1]);
01097   }
01098       break;
01099 
01100     case MEM:
01101       {
01102   rtx base = XEXP (in, 0);
01103   rtx offset;
01104 
01105   switch (GET_CODE (base))
01106     {
01107     case REG:
01108       if (!reversed)
01109         {
01110     out[0] = adjust_automodify_address
01111       (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01112     out[1] = adjust_automodify_address
01113       (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
01114         }
01115       else
01116         {
01117     /* Reversal requires a pre-increment, which can only
01118        be done as a separate insn.  */
01119     emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
01120     out[0] = adjust_automodify_address
01121       (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
01122     out[1] = adjust_address (in, DImode, 0);
01123         }
01124       break;
01125 
01126     case POST_INC:
01127       if (reversed || dead) abort ();
01128       /* Just do the increment in two steps.  */
01129       out[0] = adjust_automodify_address (in, DImode, 0, 0);
01130       out[1] = adjust_automodify_address (in, DImode, 0, 8);
01131       break;
01132 
01133     case POST_DEC:
01134       if (reversed || dead) abort ();
01135       /* Add 8, subtract 24.  */
01136       base = XEXP (base, 0);
01137       out[0] = adjust_automodify_address
01138         (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01139       out[1] = adjust_automodify_address
01140         (in, DImode,
01141          gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
01142          8);
01143       break;
01144 
01145     case POST_MODIFY:
01146       if (reversed || dead) abort ();
01147       /* Extract and adjust the modification.  This case is
01148          trickier than the others, because we might have an
01149          index register, or we might have a combined offset that
01150          doesn't fit a signed 9-bit displacement field.  We can
01151          assume the incoming expression is already legitimate.  */
01152       offset = XEXP (base, 1);
01153       base = XEXP (base, 0);
01154 
01155       out[0] = adjust_automodify_address
01156         (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
01157 
01158       if (GET_CODE (XEXP (offset, 1)) == REG)
01159         {
01160     /* Can't adjust the postmodify to match.  Emit the
01161        original, then a separate addition insn.  */
01162     out[1] = adjust_automodify_address (in, DImode, 0, 8);
01163     fixup = gen_adddi3 (base, base, GEN_INT (-8));
01164         }
01165       else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
01166         abort ();
01167       else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
01168         {
01169     /* Again the postmodify cannot be made to match, but
01170        in this case it's more efficient to get rid of the
01171        postmodify entirely and fix up with an add insn.  */
01172     out[1] = adjust_automodify_address (in, DImode, base, 8);
01173     fixup = gen_adddi3 (base, base,
01174             GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
01175         }
01176       else
01177         {
01178     /* Combined offset still fits in the displacement field.
01179        (We cannot overflow it at the high end.)  */
01180     out[1] = adjust_automodify_address
01181       (in, DImode,
01182        gen_rtx_POST_MODIFY (Pmode, base,
01183          gen_rtx_PLUS (Pmode, base,
01184            GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
01185        8);
01186         }
01187       break;
01188 
01189     default:
01190       abort ();
01191     }
01192   break;
01193       }
01194 
01195     default:
01196       abort ();
01197     }
01198 
01199   return fixup;
01200 }
01201 
01202 /* Split a TImode or TFmode move instruction after reload.
01203    This is used by *movtf_internal and *movti_internal.  */
01204 void
01205 ia64_split_tmode_move (rtx operands[])
01206 {
01207   rtx in[2], out[2], insn;
01208   rtx fixup[2];
01209   bool dead = false;
01210   bool reversed = false;
01211 
01212   /* It is possible for reload to decide to overwrite a pointer with
01213      the value it points to.  In that case we have to do the loads in
01214      the appropriate order so that the pointer is not destroyed too
01215      early.  Also we must not generate a postmodify for that second
01216      load, or rws_access_regno will abort.  */
01217   if (GET_CODE (operands[1]) == MEM
01218       && reg_overlap_mentioned_p (operands[0], operands[1]))
01219     {
01220       rtx base = XEXP (operands[1], 0);
01221       while (GET_CODE (base) != REG)
01222   base = XEXP (base, 0);
01223 
01224       if (REGNO (base) == REGNO (operands[0]))
01225   reversed = true;
01226       dead = true;
01227     }
01228   /* Another reason to do the moves in reversed order is if the first
01229      element of the target register pair is also the second element of
01230      the source register pair.  */
01231   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
01232       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
01233     reversed = true;
01234 
01235   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
01236   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
01237 
01238 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)       \
01239   if (GET_CODE (EXP) == MEM           \
01240       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY     \
01241     || GET_CODE (XEXP (EXP, 0)) == POST_INC     \
01242     || GET_CODE (XEXP (EXP, 0)) == POST_DEC))     \
01243     REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,      \
01244             XEXP (XEXP (EXP, 0), 0),  \
01245             REG_NOTES (INSN))
01246 
01247   insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
01248   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
01249   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
01250 
01251   insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
01252   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
01253   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
01254 
01255   if (fixup[0])
01256     emit_insn (fixup[0]);
01257   if (fixup[1])
01258     emit_insn (fixup[1]);
01259 
01260 #undef MAYBE_ADD_REG_INC_NOTE
01261 }
01262 
01263 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
01264    through memory plus an extra GR scratch register.  Except that you can
01265    either get the first from SECONDARY_MEMORY_NEEDED or the second from
01266    SECONDARY_RELOAD_CLASS, but not both.
01267 
01268    We got into problems in the first place by allowing a construct like
01269    (subreg:XF (reg:TI)), which we got from a union containing a long double.
01270    This solution attempts to prevent this situation from occurring.  When
01271    we see something like the above, we spill the inner register to memory.  */
01272 
01273 rtx
01274 spill_xfmode_operand (rtx in, int force)
01275 {
01276   if (GET_CODE (in) == SUBREG
01277       && GET_MODE (SUBREG_REG (in)) == TImode
01278       && GET_CODE (SUBREG_REG (in)) == REG)
01279     {
01280       rtx memt = assign_stack_temp (TImode, 16, 0);
01281       emit_move_insn (memt, SUBREG_REG (in));
01282       return adjust_address (memt, XFmode, 0);
01283     }
01284   else if (force && GET_CODE (in) == REG)
01285     {
01286       rtx memx = assign_stack_temp (XFmode, 16, 0);
01287       emit_move_insn (memx, in);
01288       return memx;
01289     }
01290   else
01291     return in;
01292 }
01293 
01294 /* Emit comparison instruction if necessary, returning the expression
01295    that holds the compare result in the proper mode.  */
01296 
01297 static GTY(()) rtx cmptf_libfunc;
01298 
01299 rtx
01300 ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
01301 {
01302   rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
01303   rtx cmp;
01304 
01305   /* If we have a BImode input, then we already have a compare result, and
01306      do not need to emit another comparison.  */
01307   if (GET_MODE (op0) == BImode)
01308     {
01309       if ((code == NE || code == EQ) && op1 == const0_rtx)
01310   cmp = op0;
01311       else
01312   abort ();
01313     }
01314   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
01315      magic number as its third argument, that indicates what to do.
01316      The return value is an integer to be compared against zero.  */
01317   else if (GET_MODE (op0) == TFmode)
01318     {
01319       enum qfcmp_magic {
01320   QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect.  */
01321   QCMP_UNORD = 2,
01322   QCMP_EQ = 4,
01323   QCMP_LT = 8,
01324   QCMP_GT = 16
01325       } magic;
01326       enum rtx_code ncode;
01327       rtx ret, insns;
01328       if (!cmptf_libfunc || GET_MODE (op1) != TFmode)
01329   abort ();
01330       switch (code)
01331   {
01332     /* 1 = equal, 0 = not equal.  Equality operators do
01333        not raise FP_INVALID when given an SNaN operand.  */
01334   case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
01335   case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
01336     /* isunordered() from C99.  */
01337   case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
01338   case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
01339     /* Relational operators raise FP_INVALID when given
01340        an SNaN operand.  */
01341   case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
01342   case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
01343   case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
01344   case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
01345     /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
01346        Expanders for buneq etc. weuld have to be added to ia64.md
01347        for this to be useful.  */
01348   default: abort ();
01349   }
01350 
01351       start_sequence ();
01352 
01353       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
01354              op0, TFmode, op1, TFmode,
01355              GEN_INT (magic), DImode);
01356       cmp = gen_reg_rtx (BImode);
01357       emit_insn (gen_rtx_SET (VOIDmode, cmp,
01358             gen_rtx_fmt_ee (ncode, BImode,
01359                 ret, const0_rtx)));
01360 
01361       insns = get_insns ();
01362       end_sequence ();
01363 
01364       emit_libcall_block (insns, cmp, cmp,
01365         gen_rtx_fmt_ee (code, BImode, op0, op1));
01366       code = NE;
01367     }
01368   else
01369     {
01370       cmp = gen_reg_rtx (BImode);
01371       emit_insn (gen_rtx_SET (VOIDmode, cmp,
01372             gen_rtx_fmt_ee (code, BImode, op0, op1)));
01373       code = NE;
01374     }
01375 
01376   return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
01377 }
01378 
01379 /* Generate an integral vector comparison.  */
01380 
01381 static bool
01382 ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
01383           rtx dest, rtx op0, rtx op1)
01384 {
01385   bool negate = false;
01386   rtx x;
01387 
01388   switch (code)
01389     {
01390     case EQ:
01391     case GT:
01392       break;
01393 
01394     case NE:
01395       code = EQ;
01396       negate = true;
01397       break;
01398 
01399     case LE:
01400       code = GT;
01401       negate = true;
01402       break;
01403 
01404     case GE:
01405       negate = true;
01406       /* FALLTHRU */
01407 
01408     case LT:
01409       x = op0;
01410       op0 = op1;
01411       op1 = x;
01412       code = GT;
01413       break;
01414 
01415     case GTU:
01416     case GEU:
01417     case LTU:
01418     case LEU:
01419       {
01420   rtx w0h, w0l, w1h, w1l, ch, cl;
01421   enum machine_mode wmode;
01422   rtx (*unpack_l) (rtx, rtx, rtx);
01423   rtx (*unpack_h) (rtx, rtx, rtx);
01424   rtx (*pack) (rtx, rtx, rtx);
01425 
01426   /* We don't have native unsigned comparisons, but we can generate
01427      them better than generic code can.  */
01428 
01429   if (mode == V2SImode)
01430     abort ();
01431   else if (mode == V8QImode)
01432     {
01433       wmode = V4HImode;
01434       pack = gen_pack2_sss;
01435       unpack_l = gen_unpack1_l;
01436       unpack_h = gen_unpack1_h;
01437     }
01438   else if (mode == V4HImode)
01439     {
01440       wmode = V2SImode;
01441       pack = gen_pack4_sss;
01442       unpack_l = gen_unpack2_l;
01443       unpack_h = gen_unpack2_h;
01444     }
01445   else
01446     abort ();
01447 
01448   /* Unpack into wider vectors, zero extending the elements.  */
01449 
01450   w0l = gen_reg_rtx (wmode);
01451   w0h = gen_reg_rtx (wmode);
01452   w1l = gen_reg_rtx (wmode);
01453   w1h = gen_reg_rtx (wmode);
01454   emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
01455   emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
01456   emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
01457   emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
01458 
01459   /* Compare in the wider mode.  */
01460 
01461   cl = gen_reg_rtx (wmode);
01462   ch = gen_reg_rtx (wmode);
01463   code = signed_condition (code);
01464   ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
01465   negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
01466 
01467   /* Repack into a single narrower vector.  */
01468 
01469   emit_insn (pack (dest, cl, ch));
01470       }
01471       return negate;
01472 
01473     default:
01474       abort ();
01475     }
01476 
01477   x = gen_rtx_fmt_ee (code, mode, op0, op1);
01478   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
01479 
01480   return negate;
01481 }
01482 
01483 static void
01484 ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
01485 {
01486   rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
01487 
01488   /* In this case, we extract the two SImode quantities and generate
01489      normal comparisons for each of them.  */
01490 
01491   op1l = gen_lowpart (SImode, operands[1]);
01492   op2l = gen_lowpart (SImode, operands[2]);
01493   op4l = gen_lowpart (SImode, operands[4]);
01494   op5l = gen_lowpart (SImode, operands[5]);
01495 
01496   op1h = gen_reg_rtx (SImode);
01497   op2h = gen_reg_rtx (SImode);
01498   op4h = gen_reg_rtx (SImode);
01499   op5h = gen_reg_rtx (SImode);
01500 
01501   emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
01502         gen_lowpart (DImode, operands[1]), GEN_INT (32)));
01503   emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
01504         gen_lowpart (DImode, operands[2]), GEN_INT (32)));
01505   emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
01506         gen_lowpart (DImode, operands[4]), GEN_INT (32)));
01507   emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
01508         gen_lowpart (DImode, operands[5]), GEN_INT (32)));
01509 
01510   bl = gen_reg_rtx (BImode);
01511   x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
01512   emit_insn (gen_rtx_SET (VOIDmode, bl, x));
01513 
01514   bh = gen_reg_rtx (BImode);
01515   x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
01516   emit_insn (gen_rtx_SET (VOIDmode, bh, x));
01517 
01518   /* With the results of the comparisons, emit conditional moves.  */
01519 
01520   dl = gen_reg_rtx (SImode);
01521   x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
01522   emit_insn (gen_rtx_SET (VOIDmode, dl, x));
01523 
01524   dh = gen_reg_rtx (SImode);
01525   x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
01526   emit_insn (gen_rtx_SET (VOIDmode, dh, x));
01527 
01528   /* Merge the two partial results back into a vector.  */
01529 
01530   x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
01531   emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01532 }
01533 
01534 /* Emit an integral vector conditional move.  */
01535 
01536 void
01537 ia64_expand_vecint_cmov (rtx operands[])
01538 {
01539   enum machine_mode mode = GET_MODE (operands[0]);
01540   enum rtx_code code = GET_CODE (operands[3]);
01541   bool negate;
01542   rtx cmp, x, ot, of;
01543 
01544   /* Since we don't have unsigned V2SImode comparisons, it's more efficient
01545      to special-case them entirely.  */
01546   if (mode == V2SImode
01547       && (code == GTU || code == GEU || code == LEU || code == LTU))
01548     {
01549       ia64_expand_vcondu_v2si (code, operands);
01550       return;
01551     }
01552 
01553   cmp = gen_reg_rtx (mode);
01554   negate = ia64_expand_vecint_compare (code, mode, cmp,
01555                operands[4], operands[5]);
01556 
01557   ot = operands[1+negate];
01558   of = operands[2-negate];
01559 
01560   if (ot == CONST0_RTX (mode))
01561     {
01562       if (of == CONST0_RTX (mode))
01563   {
01564     emit_move_insn (operands[0], ot);
01565     return;
01566   }
01567 
01568       x = gen_rtx_NOT (mode, cmp);
01569       x = gen_rtx_AND (mode, x, of);
01570       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01571     }
01572   else if (of == CONST0_RTX (mode))
01573     {
01574       x = gen_rtx_AND (mode, cmp, ot);
01575       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01576     }
01577   else
01578     {
01579       rtx t, f;
01580 
01581       t = gen_reg_rtx (mode);
01582       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
01583       emit_insn (gen_rtx_SET (VOIDmode, t, x));
01584 
01585       f = gen_reg_rtx (mode);
01586       x = gen_rtx_NOT (mode, cmp);
01587       x = gen_rtx_AND (mode, x, operands[2-negate]);
01588       emit_insn (gen_rtx_SET (VOIDmode, f, x));
01589 
01590       x = gen_rtx_IOR (mode, t, f);
01591       emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
01592     }
01593 }
01594 
01595 /* Emit an integral vector min or max operation.  Return true if all done.  */
01596 
01597 bool
01598 ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
01599          rtx operands[])
01600 {
01601   rtx xops[5];
01602 
01603   /* These four combinations are supported directly.  */
01604   if (mode == V8QImode && (code == UMIN || code == UMAX))
01605     return false;
01606   if (mode == V4HImode && (code == SMIN || code == SMAX))
01607     return false;
01608 
01609   /* Everything else implemented via vector comparisons.  */
01610   xops[0] = operands[0];
01611   xops[4] = xops[1] = operands[1];
01612   xops[5] = xops[2] = operands[2];
01613 
01614   switch (code)
01615     {
01616     case UMIN:
01617       code = LTU;
01618       break;
01619     case UMAX:
01620       code = GTU;
01621       break;
01622     case SMIN:
01623       code = LT;
01624       break;
01625     case SMAX:
01626       code = GT;
01627       break;
01628     default:
01629       abort ();
01630     }
01631   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
01632 
01633   ia64_expand_vecint_cmov (xops);
01634   return true;
01635 }
01636 
01637 /* Emit the appropriate sequence for a call.  */
01638 
01639 void
01640 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
01641       int sibcall_p)
01642 {
01643   rtx insn, b0;
01644 
01645   addr = XEXP (addr, 0);
01646   addr = convert_memory_address (DImode, addr);
01647   b0 = gen_rtx_REG (DImode, R_BR (0));
01648 
01649   /* ??? Should do this for functions known to bind local too.  */
01650   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
01651     {
01652       if (sibcall_p)
01653   insn = gen_sibcall_nogp (addr);
01654       else if (! retval)
01655   insn = gen_call_nogp (addr, b0);
01656       else
01657   insn = gen_call_value_nogp (retval, addr, b0);
01658       insn = emit_call_insn (insn);
01659     }
01660   else
01661     {
01662       if (sibcall_p)
01663   insn = gen_sibcall_gp (addr);
01664       else if (! retval)
01665   insn = gen_call_gp (addr, b0);
01666       else
01667   insn = gen_call_value_gp (retval, addr, b0);
01668       insn = emit_call_insn (insn);
01669 
01670       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
01671     }
01672 
01673   if (sibcall_p)
01674     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
01675 }
01676 
01677 void
01678 ia64_reload_gp (void)
01679 {
01680   rtx tmp;
01681 
01682   if (current_frame_info.reg_save_gp)
01683     tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
01684   else
01685     {
01686       HOST_WIDE_INT offset;
01687 
01688       offset = (current_frame_info.spill_cfa_off
01689           + current_frame_info.spill_size);
01690       if (frame_pointer_needed)
01691         {
01692           tmp = hard_frame_pointer_rtx;
01693           offset = -offset;
01694         }
01695       else
01696         {
01697           tmp = stack_pointer_rtx;
01698           offset = current_frame_info.total_size - offset;
01699         }
01700 
01701       if (CONST_OK_FOR_I (offset))
01702         emit_insn (gen_adddi3 (pic_offset_table_rtx,
01703              tmp, GEN_INT (offset)));
01704       else
01705         {
01706           emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
01707           emit_insn (gen_adddi3 (pic_offset_table_rtx,
01708                pic_offset_table_rtx, tmp));
01709         }
01710 
01711       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
01712     }
01713 
01714   emit_move_insn (pic_offset_table_rtx, tmp);
01715 }
01716 
01717 void
01718 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
01719      rtx scratch_b, int noreturn_p, int sibcall_p)
01720 {
01721   rtx insn;
01722   bool is_desc = false;
01723 
01724   /* If we find we're calling through a register, then we're actually
01725      calling through a descriptor, so load up the values.  */
01726   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
01727     {
01728       rtx tmp;
01729       bool addr_dead_p;
01730 
01731       /* ??? We are currently constrained to *not* use peep2, because
01732    we can legitimately change the global lifetime of the GP
01733    (in the form of killing where previously live).  This is
01734    because a call through a descriptor doesn't use the previous
01735    value of the GP, while a direct call does, and we do not
01736    commit to either form until the split here.
01737 
01738    That said, this means that we lack precise life info for
01739    whether ADDR is dead after this call.  This is not terribly
01740    important, since we can fix things up essentially for free
01741    with the POST_DEC below, but it's nice to not use it when we
01742    can immediately tell it's not necessary.  */
01743       addr_dead_p = ((noreturn_p || sibcall_p
01744           || TEST_HARD_REG_BIT (regs_invalidated_by_call,
01745               REGNO (addr)))
01746          && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
01747 
01748       /* Load the code address into scratch_b.  */
01749       tmp = gen_rtx_POST_INC (Pmode, addr);
01750       tmp = gen_rtx_MEM (Pmode, tmp);
01751       emit_move_insn (scratch_r, tmp);
01752       emit_move_insn (scratch_b, scratch_r);
01753 
01754       /* Load the GP address.  If ADDR is not dead here, then we must
01755    revert the change made above via the POST_INCREMENT.  */
01756       if (!addr_dead_p)
01757   tmp = gen_rtx_POST_DEC (Pmode, addr);
01758       else
01759   tmp = addr;
01760       tmp = gen_rtx_MEM (Pmode, tmp);
01761       emit_move_insn (pic_offset_table_rtx, tmp);
01762 
01763       is_desc = true;
01764       addr = scratch_b;
01765     }
01766 
01767   if (sibcall_p)
01768     insn = gen_sibcall_nogp (addr);
01769   else if (retval)
01770     insn = gen_call_value_nogp (retval, addr, retaddr);
01771   else
01772     insn = gen_call_nogp (addr, retaddr);
01773   emit_call_insn (insn);
01774 
01775   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
01776     ia64_reload_gp ();
01777 }
01778 
01779 /* Begin the assembly file.  */
01780 
01781 static void
01782 ia64_file_start (void)
01783 {
01784   /* Variable tracking should be run after all optimizations which change order
01785      of insns.  It also needs a valid CFG.  This can't be done in
01786      ia64_override_options, because flag_var_tracking is finallized after
01787      that.  */
01788   ia64_flag_var_tracking = flag_var_tracking;
01789   flag_var_tracking = 0;
01790 
01791   default_file_start ();
01792   emit_safe_across_calls ();
01793 }
01794 
01795 void
01796 emit_safe_across_calls (void)
01797 {
01798   unsigned int rs, re;
01799   int out_state;
01800 
01801   rs = 1;
01802   out_state = 0;
01803   while (1)
01804     {
01805       while (rs < 64 && call_used_regs[PR_REG (rs)])
01806   rs++;
01807       if (rs >= 64)
01808   break;
01809       for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
01810   continue;
01811       if (out_state == 0)
01812   {
01813     fputs ("\t.pred.safe_across_calls ", asm_out_file);
01814     out_state = 1;
01815   }
01816       else
01817   fputc (',', asm_out_file);
01818       if (re == rs + 1)
01819   fprintf (asm_out_file, "p%u", rs);
01820       else
01821   fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
01822       rs = re + 1;
01823     }
01824   if (out_state)
01825     fputc ('\n', asm_out_file);
01826 }
01827 
01828 /* Helper function for ia64_compute_frame_size: find an appropriate general
01829    register to spill some special register to.  SPECIAL_SPILL_MASK contains
01830    bits in GR0 to GR31 that have already been allocated by this routine.
01831    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
01832 
01833 static int
01834 find_gr_spill (int try_locals)
01835 {
01836   int regno;
01837 
01838   /* If this is a leaf function, first try an otherwise unused
01839      call-clobbered register.  */
01840   if (current_function_is_leaf)
01841     {
01842       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
01843   if (! regs_ever_live[regno]
01844       && call_used_regs[regno]
01845       && ! fixed_regs[regno]
01846       && ! global_regs[regno]
01847       && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
01848     {
01849       current_frame_info.gr_used_mask |= 1 << regno;
01850       return regno;
01851     }
01852     }
01853 
01854   if (try_locals)
01855     {
01856       regno = current_frame_info.n_local_regs;
01857       /* If there is a frame pointer, then we can't use loc79, because
01858    that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
01859    reg_name switching code in ia64_expand_prologue.  */
01860       if (regno < (80 - frame_pointer_needed))
01861   {
01862     current_frame_info.n_local_regs = regno + 1;
01863     return LOC_REG (0) + regno;
01864   }
01865     }
01866 
01867   /* Failed to find a general register to spill to.  Must use stack.  */
01868   return 0;
01869 }
01870 
01871 /* In order to make for nice schedules, we try to allocate every temporary
01872    to a different register.  We must of course stay away from call-saved,
01873    fixed, and global registers.  We must also stay away from registers
01874    allocated in current_frame_info.gr_used_mask, since those include regs
01875    used all through the prologue.
01876 
01877    Any register allocated here must be used immediately.  The idea is to
01878    aid scheduling, not to solve data flow problems.  */
01879 
01880 static int last_scratch_gr_reg;
01881 
01882 static int
01883 next_scratch_gr_reg (void)
01884 {
01885   int i, regno;
01886 
01887   for (i = 0; i < 32; ++i)
01888     {
01889       regno = (last_scratch_gr_reg + i + 1) & 31;
01890       if (call_used_regs[regno]
01891     && ! fixed_regs[regno]
01892     && ! global_regs[regno]
01893     && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
01894   {
01895     last_scratch_gr_reg = regno;
01896     return regno;
01897   }
01898     }
01899 
01900   /* There must be _something_ available.  */
01901   abort ();
01902 }
01903 
01904 /* Helper function for ia64_compute_frame_size, called through
01905    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
01906 
01907 static void
01908 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
01909 {
01910   unsigned int regno = REGNO (reg);
01911   if (regno < 32)
01912     {
01913       unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
01914       for (i = 0; i < n; ++i)
01915   current_frame_info.gr_used_mask |= 1 << (regno + i);
01916     }
01917 }
01918 
01919 /* Returns the number of bytes offset between the frame pointer and the stack
01920    pointer for the current function.  SIZE is the number of bytes of space
01921    needed for local variables.  */
01922 
01923 static void
01924 ia64_compute_frame_size (HOST_WIDE_INT size)
01925 {
01926   HOST_WIDE_INT total_size;
01927   HOST_WIDE_INT spill_size = 0;
01928   HOST_WIDE_INT extra_spill_size = 0;
01929   HOST_WIDE_INT pretend_args_size;
01930   HARD_REG_SET mask;
01931   int n_spilled = 0;
01932   int spilled_gr_p = 0;
01933   int spilled_fr_p = 0;
01934   unsigned int regno;
01935   int i;
01936 
01937   if (current_frame_info.initialized)
01938     return;
01939 
01940   memset (&current_frame_info, 0, sizeof current_frame_info);
01941   CLEAR_HARD_REG_SET (mask);
01942 
01943   /* Don't allocate scratches to the return register.  */
01944   diddle_return_value (mark_reg_gr_used_mask, NULL);
01945 
01946   /* Don't allocate scratches to the EH scratch registers.  */
01947   if (cfun->machine->ia64_eh_epilogue_sp)
01948     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
01949   if (cfun->machine->ia64_eh_epilogue_bsp)
01950     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
01951 
01952   /* Find the size of the register stack frame.  We have only 80 local
01953      registers, because we reserve 8 for the inputs and 8 for the
01954      outputs.  */
01955 
01956   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
01957      since we'll be adjusting that down later.  */
01958   regno = LOC_REG (78) + ! frame_pointer_needed;
01959   for (; regno >= LOC_REG (0); regno--)
01960     if (regs_ever_live[regno])
01961       break;
01962   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
01963 
01964   /* For functions marked with the syscall_linkage attribute, we must mark
01965      all eight input registers as in use, so that locals aren't visible to
01966      the caller.  */
01967 
01968   if (cfun->machine->n_varargs > 0
01969       || lookup_attribute ("syscall_linkage",
01970          TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
01971     current_frame_info.n_input_regs = 8;
01972   else
01973     {
01974       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
01975   if (regs_ever_live[regno])
01976     break;
01977       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
01978     }
01979 
01980   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
01981     if (regs_ever_live[regno])
01982       break;
01983   i = regno - OUT_REG (0) + 1;
01984 
01985   /* When -p profiling, we need one output register for the mcount argument.
01986      Likewise for -a profiling for the bb_init_func argument.  For -ax
01987      profiling, we need two output registers for the two bb_init_trace_func
01988      arguments.  */
01989   if (current_function_profile)
01990     i = MAX (i, 1);
01991   current_frame_info.n_output_regs = i;
01992 
01993   /* ??? No rotating register support yet.  */
01994   current_frame_info.n_rotate_regs = 0;
01995 
01996   /* Discover which registers need spilling, and how much room that
01997      will take.  Begin with floating point and general registers,
01998      which will always wind up on the stack.  */
01999 
02000   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
02001     if (regs_ever_live[regno] && ! call_used_regs[regno])
02002       {
02003   SET_HARD_REG_BIT (mask, regno);
02004   spill_size += 16;
02005   n_spilled += 1;
02006   spilled_fr_p = 1;
02007       }
02008 
02009   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
02010     if (regs_ever_live[regno] && ! call_used_regs[regno])
02011       {
02012   SET_HARD_REG_BIT (mask, regno);
02013   spill_size += 8;
02014   n_spilled += 1;
02015   spilled_gr_p = 1;
02016       }
02017 
02018   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
02019     if (regs_ever_live[regno] && ! call_used_regs[regno])
02020       {
02021   SET_HARD_REG_BIT (mask, regno);
02022   spill_size += 8;
02023   n_spilled += 1;
02024       }
02025 
02026   /* Now come all special registers that might get saved in other
02027      general registers.  */
02028 
02029   if (frame_pointer_needed)
02030     {
02031       current_frame_info.reg_fp = find_gr_spill (1);
02032       /* If we did not get a register, then we take LOC79.  This is guaranteed
02033    to be free, even if regs_ever_live is already set, because this is
02034    HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
02035    as we don't count loc79 above.  */
02036       if (current_frame_info.reg_fp == 0)
02037   {
02038     current_frame_info.reg_fp = LOC_REG (79);
02039     current_frame_info.n_local_regs++;
02040   }
02041     }
02042 
02043   if (! current_function_is_leaf)
02044     {
02045       /* Emit a save of BR0 if we call other functions.  Do this even
02046    if this function doesn't return, as EH depends on this to be
02047    able to unwind the stack.  */
02048       SET_HARD_REG_BIT (mask, BR_REG (0));
02049 
02050       current_frame_info.reg_save_b0 = find_gr_spill (1);
02051       if (current_frame_info.reg_save_b0 == 0)
02052   {
02053     spill_size += 8;
02054     n_spilled += 1;
02055   }
02056 
02057       /* Similarly for ar.pfs.  */
02058       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
02059       current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
02060       if (current_frame_info.reg_save_ar_pfs == 0)
02061   {
02062     extra_spill_size += 8;
02063     n_spilled += 1;
02064   }
02065 
02066       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
02067    registers are clobbered, so we fall back to the stack.  */
02068       current_frame_info.reg_save_gp
02069   = (current_function_calls_setjmp ? 0 : find_gr_spill (1));
02070       if (current_frame_info.reg_save_gp == 0)
02071   {
02072     SET_HARD_REG_BIT (mask, GR_REG (1));
02073     spill_size += 8;
02074     n_spilled += 1;
02075   }
02076     }
02077   else
02078     {
02079       if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
02080   {
02081     SET_HARD_REG_BIT (mask, BR_REG (0));
02082     spill_size += 8;
02083     n_spilled += 1;
02084   }
02085 
02086       if (regs_ever_live[AR_PFS_REGNUM])
02087   {
02088     SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
02089     current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
02090     if (current_frame_info.reg_save_ar_pfs == 0)
02091       {
02092         extra_spill_size += 8;
02093         n_spilled += 1;
02094       }
02095   }
02096     }
02097 
02098   /* Unwind descriptor hackery: things are most efficient if we allocate
02099      consecutive GR save registers for RP, PFS, FP in that order. However,
02100      it is absolutely critical that FP get the only hard register that's
02101      guaranteed to be free, so we allocated it first.  If all three did
02102      happen to be allocated hard regs, and are consecutive, rearrange them
02103      into the preferred order now.  */
02104   if (current_frame_info.reg_fp != 0
02105       && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
02106       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
02107     {
02108       current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
02109       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
02110       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
02111     }
02112 
02113   /* See if we need to store the predicate register block.  */
02114   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
02115     if (regs_ever_live[regno] && ! call_used_regs[regno])
02116       break;
02117   if (regno <= PR_REG (63))
02118     {
02119       SET_HARD_REG_BIT (mask, PR_REG (0));
02120       current_frame_info.reg_save_pr = find_gr_spill (1);
02121       if (current_frame_info.reg_save_pr == 0)
02122   {
02123     extra_spill_size += 8;
02124     n_spilled += 1;
02125   }
02126 
02127       /* ??? Mark them all as used so that register renaming and such
02128    are free to use them.  */
02129       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
02130   regs_ever_live[regno] = 1;
02131     }
02132 
02133   /* If we're forced to use st8.spill, we're forced to save and restore
02134      ar.unat as well.  The check for existing liveness allows inline asm
02135      to touch ar.unat.  */
02136   if (spilled_gr_p || cfun->machine->n_varargs
02137       || regs_ever_live[AR_UNAT_REGNUM])
02138     {
02139       regs_ever_live[AR_UNAT_REGNUM] = 1;
02140       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
02141       current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
02142       if (current_frame_info.reg_save_ar_unat == 0)
02143   {
02144     extra_spill_size += 8;
02145     n_spilled += 1;
02146   }
02147     }
02148 
02149   if (regs_ever_live[AR_LC_REGNUM])
02150     {
02151       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
02152       current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
02153       if (current_frame_info.reg_save_ar_lc == 0)
02154   {
02155     extra_spill_size += 8;
02156     n_spilled += 1;
02157   }
02158     }
02159 
02160   /* If we have an odd number of words of pretend arguments written to
02161      the stack, then the FR save area will be unaligned.  We round the
02162      size of this area up to keep things 16 byte aligned.  */
02163   if (spilled_fr_p)
02164     pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
02165   else
02166     pretend_args_size = current_function_pretend_args_size;
02167 
02168   total_size = (spill_size + extra_spill_size + size + pretend_args_size
02169     + current_function_outgoing_args_size);
02170   total_size = IA64_STACK_ALIGN (total_size);
02171 
02172   /* We always use the 16-byte scratch area provided by the caller, but
02173      if we are a leaf function, there's no one to which we need to provide
02174      a scratch area.  */
02175   if (current_function_is_leaf)
02176     total_size = MAX (0, total_size - 16);
02177 
02178   current_frame_info.total_size = total_size;
02179   current_frame_info.spill_cfa_off = pretend_args_size - 16;
02180   current_frame_info.spill_size = spill_size;
02181   current_frame_info.extra_spill_size = extra_spill_size;
02182   COPY_HARD_REG_SET (current_frame_info.mask, mask);
02183   current_frame_info.n_spilled = n_spilled;
02184   current_frame_info.initialized = reload_completed;
02185 }
02186 
02187 /* Compute the initial difference between the specified pair of registers.  */
02188 
02189 HOST_WIDE_INT
02190 ia64_initial_elimination_offset (int from, int to)
02191 {
02192   HOST_WIDE_INT offset;
02193 
02194   ia64_compute_frame_size (get_frame_size ());
02195   switch (from)
02196     {
02197     case FRAME_POINTER_REGNUM:
02198       if (to == HARD_FRAME_POINTER_REGNUM)
02199   {
02200     if (current_function_is_leaf)
02201       offset = -current_frame_info.total_size;
02202     else
02203       offset = -(current_frame_info.total_size
02204            - current_function_outgoing_args_size - 16);
02205   }
02206       else if (to == STACK_POINTER_REGNUM)
02207   {
02208     if (current_function_is_leaf)
02209       offset = 0;
02210     else
02211       offset = 16 + current_function_outgoing_args_size;
02212   }
02213       else
02214   abort ();
02215       break;
02216 
02217     case ARG_POINTER_REGNUM:
02218       /* Arguments start above the 16 byte save area, unless stdarg
02219    in which case we store through the 16 byte save area.  */
02220       if (to == HARD_FRAME_POINTER_REGNUM)
02221   offset = 16 - current_function_pretend_args_size;
02222       else if (to == STACK_POINTER_REGNUM)
02223   offset = (current_frame_info.total_size
02224       + 16 - current_function_pretend_args_size);
02225       else
02226   abort ();
02227       break;
02228 
02229     default:
02230       abort ();
02231     }
02232 
02233   return offset;
02234 }
02235 
02236 /* If there are more than a trivial number of register spills, we use
02237    two interleaved iterators so that we can get two memory references
02238    per insn group.
02239 
02240    In order to simplify things in the prologue and epilogue expanders,
02241    we use helper functions to fix up the memory references after the
02242    fact with the appropriate offsets to a POST_MODIFY memory mode.
02243    The following data structure tracks the state of the two iterators
02244    while insns are being emitted.  */
02245 
02246 struct spill_fill_data
02247 {
02248   rtx init_after;   /* point at which to emit initializations */
02249   rtx init_reg[2];    /* initial base register */
02250   rtx iter_reg[2];    /* the iterator registers */
02251   rtx *prev_addr[2];    /* address of last memory use */
02252   rtx prev_insn[2];   /* the insn corresponding to prev_addr */
02253   HOST_WIDE_INT prev_off[2];  /* last offset */
02254   int n_iter;     /* number of iterators in use */
02255   int next_iter;    /* next iterator to use */
02256   unsigned int save_gr_used_mask;
02257 };
02258 
02259 static struct spill_fill_data spill_fill_data;
02260 
02261 static void
02262 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
02263 {
02264   int i;
02265 
02266   spill_fill_data.init_after = get_last_insn ();
02267   spill_fill_data.init_reg[0] = init_reg;
02268   spill_fill_data.init_reg[1] = init_reg;
02269   spill_fill_data.prev_addr[0] = NULL;
02270   spill_fill_data.prev_addr[1] = NULL;
02271   spill_fill_data.prev_insn[0] = NULL;
02272   spill_fill_data.prev_insn[1] = NULL;
02273   spill_fill_data.prev_off[0] = cfa_off;
02274   spill_fill_data.prev_off[1] = cfa_off;
02275   spill_fill_data.next_iter = 0;
02276   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
02277 
02278   spill_fill_data.n_iter = 1 + (n_spills > 2);
02279   for (i = 0; i < spill_fill_data.n_iter; ++i)
02280     {
02281       int regno = next_scratch_gr_reg ();
02282       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
02283       current_frame_info.gr_used_mask |= 1 << regno;
02284     }
02285 }
02286 
02287 static void
02288 finish_spill_pointers (void)
02289 {
02290   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
02291 }
02292 
02293 static rtx
02294 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
02295 {
02296   int iter = spill_fill_data.next_iter;
02297   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
02298   rtx disp_rtx = GEN_INT (disp);
02299   rtx mem;
02300 
02301   if (spill_fill_data.prev_addr[iter])
02302     {
02303       if (CONST_OK_FOR_N (disp))
02304   {
02305     *spill_fill_data.prev_addr[iter]
02306       = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
02307            gen_rtx_PLUS (DImode,
02308              spill_fill_data.iter_reg[iter],
02309              disp_rtx));
02310     REG_NOTES (spill_fill_data.prev_insn[iter])
02311       = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
02312          REG_NOTES (spill_fill_data.prev_insn[iter]));
02313   }
02314       else
02315   {
02316     /* ??? Could use register post_modify for loads.  */
02317     if (! CONST_OK_FOR_I (disp))
02318       {
02319         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02320         emit_move_insn (tmp, disp_rtx);
02321         disp_rtx = tmp;
02322       }
02323     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02324          spill_fill_data.iter_reg[iter], disp_rtx));
02325   }
02326     }
02327   /* Micro-optimization: if we've created a frame pointer, it's at
02328      CFA 0, which may allow the real iterator to be initialized lower,
02329      slightly increasing parallelism.  Also, if there are few saves
02330      it may eliminate the iterator entirely.  */
02331   else if (disp == 0
02332      && spill_fill_data.init_reg[iter] == stack_pointer_rtx
02333      && frame_pointer_needed)
02334     {
02335       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
02336       set_mem_alias_set (mem, get_varargs_alias_set ());
02337       return mem;
02338     }
02339   else
02340     {
02341       rtx seq, insn;
02342 
02343       if (disp == 0)
02344   seq = gen_movdi (spill_fill_data.iter_reg[iter],
02345        spill_fill_data.init_reg[iter]);
02346       else
02347   {
02348     start_sequence ();
02349 
02350     if (! CONST_OK_FOR_I (disp))
02351       {
02352         rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
02353         emit_move_insn (tmp, disp_rtx);
02354         disp_rtx = tmp;
02355       }
02356 
02357     emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
02358          spill_fill_data.init_reg[iter],
02359          disp_rtx));
02360 
02361     seq = get_insns ();
02362     end_sequence ();
02363   }
02364 
02365       /* Careful for being the first insn in a sequence.  */
02366       if (spill_fill_data.init_after)
02367   insn = emit_insn_after (seq, spill_fill_data.init_after);
02368       else
02369   {
02370     rtx first = get_insns ();
02371     if (first)
02372       insn = emit_insn_before (seq, first);
02373     else
02374       insn = emit_insn (seq);
02375   }
02376       spill_fill_data.init_after = insn;
02377 
02378       /* If DISP is 0, we may or may not have a further adjustment
02379    afterward.  If we do, then the load/store insn may be modified
02380    to be a post-modify.  If we don't, then this copy may be
02381    eliminated by copyprop_hardreg_forward, which makes this
02382    insn garbage, which runs afoul of the sanity check in
02383    propagate_one_insn.  So mark this insn as legal to delete.  */
02384       if (disp == 0)
02385   REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
02386                REG_NOTES (insn));
02387     }
02388 
02389   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
02390 
02391   /* ??? Not all of the spills are for varargs, but some of them are.
02392      The rest of the spills belong in an alias set of their own.  But
02393      it doesn't actually hurt to include them here.  */
02394   set_mem_alias_set (mem, get_varargs_alias_set ());
02395 
02396   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
02397   spill_fill_data.prev_off[iter] = cfa_off;
02398 
02399   if (++iter >= spill_fill_data.n_iter)
02400     iter = 0;
02401   spill_fill_data.next_iter = iter;
02402 
02403   return mem;
02404 }
02405 
02406 static void
02407 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
02408     rtx frame_reg)
02409 {
02410   int iter = spill_fill_data.next_iter;
02411   rtx mem, insn;
02412 
02413   mem = spill_restore_mem (reg, cfa_off);
02414   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
02415   spill_fill_data.prev_insn[iter] = insn;
02416 
02417   if (frame_reg)
02418     {
02419       rtx base;
02420       HOST_WIDE_INT off;
02421 
02422       RTX_FRAME_RELATED_P (insn) = 1;
02423 
02424       /* Don't even pretend that the unwind code can intuit its way
02425    through a pair of interleaved post_modify iterators.  Just
02426    provide the correct answer.  */
02427 
02428       if (frame_pointer_needed)
02429   {
02430     base = hard_frame_pointer_rtx;
02431     off = - cfa_off;
02432   }
02433       else
02434   {
02435     base = stack_pointer_rtx;
02436     off = current_frame_info.total_size - cfa_off;
02437   }
02438 
02439       REG_NOTES (insn)
02440   = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02441     gen_rtx_SET (VOIDmode,
02442            gen_rtx_MEM (GET_MODE (reg),
02443             plus_constant (base, off)),
02444            frame_reg),
02445     REG_NOTES (insn));
02446     }
02447 }
02448 
02449 static void
02450 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
02451 {
02452   int iter = spill_fill_data.next_iter;
02453   rtx insn;
02454 
02455   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
02456         GEN_INT (cfa_off)));
02457   spill_fill_data.prev_insn[iter] = insn;
02458 }
02459 
02460 /* Wrapper functions that discards the CONST_INT spill offset.  These
02461    exist so that we can give gr_spill/gr_fill the offset they need and
02462    use a consistent function interface.  */
02463 
02464 static rtx
02465 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02466 {
02467   return gen_movdi (dest, src);
02468 }
02469 
02470 static rtx
02471 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02472 {
02473   return gen_fr_spill (dest, src);
02474 }
02475 
02476 static rtx
02477 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
02478 {
02479   return gen_fr_restore (dest, src);
02480 }
02481 
02482 /* Called after register allocation to add any instructions needed for the
02483    prologue.  Using a prologue insn is favored compared to putting all of the
02484    instructions in output_function_prologue(), since it allows the scheduler
02485    to intermix instructions with the saves of the caller saved registers.  In
02486    some cases, it might be necessary to emit a barrier instruction as the last
02487    insn to prevent such scheduling.
02488 
02489    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
02490    so that the debug info generation code can handle them properly.
02491 
02492    The register save area is layed out like so:
02493    cfa+16
02494   [ varargs spill area ]
02495   [ fr register spill area ]
02496   [ br register spill area ]
02497   [ ar register spill area ]
02498   [ pr register spill area ]
02499   [ gr register spill area ] */
02500 
02501 /* ??? Get inefficient code when the frame size is larger than can fit in an
02502    adds instruction.  */
02503 
02504 void
02505 ia64_expand_prologue (void)
02506 {
02507   rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
02508   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
02509   rtx reg, alt_reg;
02510 
02511   ia64_compute_frame_size (get_frame_size ());
02512   last_scratch_gr_reg = 15;
02513 
02514   /* If there is no epilogue, then we don't need some prologue insns.
02515      We need to avoid emitting the dead prologue insns, because flow
02516      will complain about them.  */
02517   if (optimize)
02518     {
02519       edge e;
02520       edge_iterator ei;
02521 
02522       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
02523   if ((e->flags & EDGE_FAKE) == 0
02524       && (e->flags & EDGE_FALLTHRU) != 0)
02525     break;
02526       epilogue_p = (e != NULL);
02527     }
02528   else
02529     epilogue_p = 1;
02530 
02531   /* Set the local, input, and output register names.  We need to do this
02532      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
02533      half.  If we use in/loc/out register names, then we get assembler errors
02534      in crtn.S because there is no alloc insn or regstk directive in there.  */
02535   if (! TARGET_REG_NAMES)
02536     {
02537       int inputs = current_frame_info.n_input_regs;
02538       int locals = current_frame_info.n_local_regs;
02539       int outputs = current_frame_info.n_output_regs;
02540 
02541       for (i = 0; i < inputs; i++)
02542   reg_names[IN_REG (i)] = ia64_reg_numbers[i];
02543       for (i = 0; i < locals; i++)
02544   reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
02545       for (i = 0; i < outputs; i++)
02546   reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
02547     }
02548 
02549   /* Set the frame pointer register name.  The regnum is logically loc79,
02550      but of course we'll not have allocated that many locals.  Rather than
02551      worrying about renumbering the existing rtxs, we adjust the name.  */
02552   /* ??? This code means that we can never use one local register when
02553      there is a frame pointer.  loc79 gets wasted in this case, as it is
02554      renamed to a register that will never be used.  See also the try_locals
02555      code in find_gr_spill.  */
02556   if (current_frame_info.reg_fp)
02557     {
02558       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
02559       reg_names[HARD_FRAME_POINTER_REGNUM]
02560   = reg_names[current_frame_info.reg_fp];
02561       reg_names[current_frame_info.reg_fp] = tmp;
02562     }
02563 
02564   /* We don't need an alloc instruction if we've used no outputs or locals.  */
02565   if (current_frame_info.n_local_regs == 0
02566       && current_frame_info.n_output_regs == 0
02567       && current_frame_info.n_input_regs <= current_function_args_info.int_regs
02568       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
02569     {
02570       /* If there is no alloc, but there are input registers used, then we
02571    need a .regstk directive.  */
02572       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
02573       ar_pfs_save_reg = NULL_RTX;
02574     }
02575   else
02576     {
02577       current_frame_info.need_regstk = 0;
02578 
02579       if (current_frame_info.reg_save_ar_pfs)
02580   regno = current_frame_info.reg_save_ar_pfs;
02581       else
02582   regno = next_scratch_gr_reg ();
02583       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
02584 
02585       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
02586            GEN_INT (current_frame_info.n_input_regs),
02587            GEN_INT (current_frame_info.n_local_regs),
02588            GEN_INT (current_frame_info.n_output_regs),
02589            GEN_INT (current_frame_info.n_rotate_regs)));
02590       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
02591     }
02592 
02593   /* Set up frame pointer, stack pointer, and spill iterators.  */
02594 
02595   n_varargs = cfun->machine->n_varargs;
02596   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
02597       stack_pointer_rtx, 0);
02598 
02599   if (frame_pointer_needed)
02600     {
02601       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
02602       RTX_FRAME_RELATED_P (insn) = 1;
02603     }
02604 
02605   if (current_frame_info.total_size != 0)
02606     {
02607       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
02608       rtx offset;
02609 
02610       if (CONST_OK_FOR_I (- current_frame_info.total_size))
02611   offset = frame_size_rtx;
02612       else
02613   {
02614     regno = next_scratch_gr_reg ();
02615     offset = gen_rtx_REG (DImode, regno);
02616     emit_move_insn (offset, frame_size_rtx);
02617   }
02618 
02619       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
02620             stack_pointer_rtx, offset));
02621 
02622       if (! frame_pointer_needed)
02623   {
02624     RTX_FRAME_RELATED_P (insn) = 1;
02625     if (GET_CODE (offset) != CONST_INT)
02626       {
02627         REG_NOTES (insn)
02628     = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02629       gen_rtx_SET (VOIDmode,
02630              stack_pointer_rtx,
02631              gen_rtx_PLUS (DImode,
02632                stack_pointer_rtx,
02633                frame_size_rtx)),
02634       REG_NOTES (insn));
02635       }
02636   }
02637 
02638       /* ??? At this point we must generate a magic insn that appears to
02639    modify the stack pointer, the frame pointer, and all spill
02640    iterators.  This would allow the most scheduling freedom.  For
02641    now, just hard stop.  */
02642       emit_insn (gen_blockage ());
02643     }
02644 
02645   /* Must copy out ar.unat before doing any integer spills.  */
02646   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
02647     {
02648       if (current_frame_info.reg_save_ar_unat)
02649   ar_unat_save_reg
02650     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
02651       else
02652   {
02653     alt_regno = next_scratch_gr_reg ();
02654     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
02655     current_frame_info.gr_used_mask |= 1 << alt_regno;
02656   }
02657 
02658       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
02659       insn = emit_move_insn (ar_unat_save_reg, reg);
02660       RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
02661 
02662       /* Even if we're not going to generate an epilogue, we still
02663    need to save the register so that EH works.  */
02664       if (! epilogue_p && current_frame_info.reg_save_ar_unat)
02665   emit_insn (gen_prologue_use (ar_unat_save_reg));
02666     }
02667   else
02668     ar_unat_save_reg = NULL_RTX;
02669 
02670   /* Spill all varargs registers.  Do this before spilling any GR registers,
02671      since we want the UNAT bits for the GR registers to override the UNAT
02672      bits from varargs, which we don't care about.  */
02673 
02674   cfa_off = -16;
02675   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
02676     {
02677       reg = gen_rtx_REG (DImode, regno);
02678       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
02679     }
02680 
02681   /* Locate the bottom of the register save area.  */
02682   cfa_off = (current_frame_info.spill_cfa_off
02683        + current_frame_info.spill_size
02684        + current_frame_info.extra_spill_size);
02685 
02686   /* Save the predicate register block either in a register or in memory.  */
02687   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
02688     {
02689       reg = gen_rtx_REG (DImode, PR_REG (0));
02690       if (current_frame_info.reg_save_pr != 0)
02691   {
02692     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
02693     insn = emit_move_insn (alt_reg, reg);
02694 
02695     /* ??? Denote pr spill/fill by a DImode move that modifies all
02696        64 hard registers.  */
02697     RTX_FRAME_RELATED_P (insn) = 1;
02698     REG_NOTES (insn)
02699       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
02700       gen_rtx_SET (VOIDmode, alt_reg, reg),
02701       REG_NOTES (insn));
02702 
02703     /* Even if we're not going to generate an epilogue, we still
02704        need to save the register so that EH works.  */
02705     if (! epilogue_p)
02706       emit_insn (gen_prologue_use (alt_reg));
02707   }
02708       else
02709   {
02710     alt_regno = next_scratch_gr_reg ();
02711     alt_reg = gen_rtx_REG (DImode, alt_regno);
02712     insn = emit_move_insn (alt_reg, reg);
02713     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02714     cfa_off -= 8;
02715   }
02716     }
02717 
02718   /* Handle AR regs in numerical order.  All of them get special handling.  */
02719   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
02720       && current_frame_info.reg_save_ar_unat == 0)
02721     {
02722       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
02723       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
02724       cfa_off -= 8;
02725     }
02726 
02727   /* The alloc insn already copied ar.pfs into a general register.  The
02728      only thing we have to do now is copy that register to a stack slot
02729      if we'd not allocated a local register for the job.  */
02730   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
02731       && current_frame_info.reg_save_ar_pfs == 0)
02732     {
02733       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02734       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
02735       cfa_off -= 8;
02736     }
02737 
02738   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
02739     {
02740       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
02741       if (current_frame_info.reg_save_ar_lc != 0)
02742   {
02743     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
02744     insn = emit_move_insn (alt_reg, reg);
02745     RTX_FRAME_RELATED_P (insn) = 1;
02746 
02747     /* Even if we're not going to generate an epilogue, we still
02748        need to save the register so that EH works.  */
02749     if (! epilogue_p)
02750       emit_insn (gen_prologue_use (alt_reg));
02751   }
02752       else
02753   {
02754     alt_regno = next_scratch_gr_reg ();
02755     alt_reg = gen_rtx_REG (DImode, alt_regno);
02756     emit_move_insn (alt_reg, reg);
02757     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02758     cfa_off -= 8;
02759   }
02760     }
02761 
02762   if (current_frame_info.reg_save_gp)
02763     {
02764       insn = emit_move_insn (gen_rtx_REG (DImode,
02765             current_frame_info.reg_save_gp),
02766            pic_offset_table_rtx);
02767       /* We don't know for sure yet if this is actually needed, since
02768    we've not split the PIC call patterns.  If all of the calls
02769    are indirect, and not followed by any uses of the gp, then
02770    this save is dead.  Allow it to go away.  */
02771       REG_NOTES (insn)
02772   = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
02773     }
02774 
02775   /* We should now be at the base of the gr/br/fr spill area.  */
02776   if (cfa_off != (current_frame_info.spill_cfa_off
02777       + current_frame_info.spill_size))
02778     abort ();
02779 
02780   /* Spill all general registers.  */
02781   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
02782     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02783       {
02784   reg = gen_rtx_REG (DImode, regno);
02785   do_spill (gen_gr_spill, reg, cfa_off, reg);
02786   cfa_off -= 8;
02787       }
02788 
02789   /* Handle BR0 specially -- it may be getting stored permanently in
02790      some GR register.  */
02791   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
02792     {
02793       reg = gen_rtx_REG (DImode, BR_REG (0));
02794       if (current_frame_info.reg_save_b0 != 0)
02795   {
02796     alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
02797     insn = emit_move_insn (alt_reg, reg);
02798     RTX_FRAME_RELATED_P (insn) = 1;
02799 
02800     /* Even if we're not going to generate an epilogue, we still
02801        need to save the register so that EH works.  */
02802     if (! epilogue_p)
02803       emit_insn (gen_prologue_use (alt_reg));
02804   }
02805       else
02806   {
02807     alt_regno = next_scratch_gr_reg ();
02808     alt_reg = gen_rtx_REG (DImode, alt_regno);
02809     emit_move_insn (alt_reg, reg);
02810     do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02811     cfa_off -= 8;
02812   }
02813     }
02814 
02815   /* Spill the rest of the BR registers.  */
02816   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
02817     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02818       {
02819   alt_regno = next_scratch_gr_reg ();
02820   alt_reg = gen_rtx_REG (DImode, alt_regno);
02821   reg = gen_rtx_REG (DImode, regno);
02822   emit_move_insn (alt_reg, reg);
02823   do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
02824   cfa_off -= 8;
02825       }
02826 
02827   /* Align the frame and spill all FR registers.  */
02828   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
02829     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02830       {
02831         if (cfa_off & 15)
02832     abort ();
02833   reg = gen_rtx_REG (XFmode, regno);
02834   do_spill (gen_fr_spill_x, reg, cfa_off, reg);
02835   cfa_off -= 16;
02836       }
02837 
02838   if (cfa_off != current_frame_info.spill_cfa_off)
02839     abort ();
02840 
02841   finish_spill_pointers ();
02842 }
02843 
02844 /* Called after register allocation to add any instructions needed for the
02845    epilogue.  Using an epilogue insn is favored compared to putting all of the
02846    instructions in output_function_prologue(), since it allows the scheduler
02847    to intermix instructions with the saves of the caller saved registers.  In
02848    some cases, it might be necessary to emit a barrier instruction as the last
02849    insn to prevent such scheduling.  */
02850 
02851 void
02852 ia64_expand_epilogue (int sibcall_p)
02853 {
02854   rtx insn, reg, alt_reg, ar_unat_save_reg;
02855   int regno, alt_regno, cfa_off;
02856 
02857   ia64_compute_frame_size (get_frame_size ());
02858 
02859   /* If there is a frame pointer, then we use it instead of the stack
02860      pointer, so that the stack pointer does not need to be valid when
02861      the epilogue starts.  See EXIT_IGNORE_STACK.  */
02862   if (frame_pointer_needed)
02863     setup_spill_pointers (current_frame_info.n_spilled,
02864         hard_frame_pointer_rtx, 0);
02865   else
02866     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
02867         current_frame_info.total_size);
02868 
02869   if (current_frame_info.total_size != 0)
02870     {
02871       /* ??? At this point we must generate a magic insn that appears to
02872          modify the spill iterators and the frame pointer.  This would
02873    allow the most scheduling freedom.  For now, just hard stop.  */
02874       emit_insn (gen_blockage ());
02875     }
02876 
02877   /* Locate the bottom of the register save area.  */
02878   cfa_off = (current_frame_info.spill_cfa_off
02879        + current_frame_info.spill_size
02880        + current_frame_info.extra_spill_size);
02881 
02882   /* Restore the predicate registers.  */
02883   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
02884     {
02885       if (current_frame_info.reg_save_pr != 0)
02886   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
02887       else
02888   {
02889     alt_regno = next_scratch_gr_reg ();
02890     alt_reg = gen_rtx_REG (DImode, alt_regno);
02891     do_restore (gen_movdi_x, alt_reg, cfa_off);
02892     cfa_off -= 8;
02893   }
02894       reg = gen_rtx_REG (DImode, PR_REG (0));
02895       emit_move_insn (reg, alt_reg);
02896     }
02897 
02898   /* Restore the application registers.  */
02899 
02900   /* Load the saved unat from the stack, but do not restore it until
02901      after the GRs have been restored.  */
02902   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
02903     {
02904       if (current_frame_info.reg_save_ar_unat != 0)
02905         ar_unat_save_reg
02906     = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
02907       else
02908   {
02909     alt_regno = next_scratch_gr_reg ();
02910     ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
02911     current_frame_info.gr_used_mask |= 1 << alt_regno;
02912     do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
02913     cfa_off -= 8;
02914   }
02915     }
02916   else
02917     ar_unat_save_reg = NULL_RTX;
02918 
02919   if (current_frame_info.reg_save_ar_pfs != 0)
02920     {
02921       alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
02922       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02923       emit_move_insn (reg, alt_reg);
02924     }
02925   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
02926     {
02927       alt_regno = next_scratch_gr_reg ();
02928       alt_reg = gen_rtx_REG (DImode, alt_regno);
02929       do_restore (gen_movdi_x, alt_reg, cfa_off);
02930       cfa_off -= 8;
02931       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
02932       emit_move_insn (reg, alt_reg);
02933     }
02934 
02935   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
02936     {
02937       if (current_frame_info.reg_save_ar_lc != 0)
02938   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
02939       else
02940   {
02941     alt_regno = next_scratch_gr_reg ();
02942     alt_reg = gen_rtx_REG (DImode, alt_regno);
02943     do_restore (gen_movdi_x, alt_reg, cfa_off);
02944     cfa_off -= 8;
02945   }
02946       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
02947       emit_move_insn (reg, alt_reg);
02948     }
02949 
02950   /* We should now be at the base of the gr/br/fr spill area.  */
02951   if (cfa_off != (current_frame_info.spill_cfa_off
02952       + current_frame_info.spill_size))
02953     abort ();
02954 
02955   /* The GP may be stored on the stack in the prologue, but it's
02956      never restored in the epilogue.  Skip the stack slot.  */
02957   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
02958     cfa_off -= 8;
02959 
02960   /* Restore all general registers.  */
02961   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
02962     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02963       {
02964   reg = gen_rtx_REG (DImode, regno);
02965   do_restore (gen_gr_restore, reg, cfa_off);
02966   cfa_off -= 8;
02967       }
02968 
02969   /* Restore the branch registers.  Handle B0 specially, as it may
02970      have gotten stored in some GR register.  */
02971   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
02972     {
02973       if (current_frame_info.reg_save_b0 != 0)
02974   alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
02975       else
02976   {
02977     alt_regno = next_scratch_gr_reg ();
02978     alt_reg = gen_rtx_REG (DImode, alt_regno);
02979     do_restore (gen_movdi_x, alt_reg, cfa_off);
02980     cfa_off -= 8;
02981   }
02982       reg = gen_rtx_REG (DImode, BR_REG (0));
02983       emit_move_insn (reg, alt_reg);
02984     }
02985 
02986   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
02987     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
02988       {
02989   alt_regno = next_scratch_gr_reg ();
02990   alt_reg = gen_rtx_REG (DImode, alt_regno);
02991   do_restore (gen_movdi_x, alt_reg, cfa_off);
02992   cfa_off -= 8;
02993   reg = gen_rtx_REG (DImode, regno);
02994   emit_move_insn (reg, alt_reg);
02995       }
02996 
02997   /* Restore floating point registers.  */
02998   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
02999     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03000       {
03001         if (cfa_off & 15)
03002     abort ();
03003   reg = gen_rtx_REG (XFmode, regno);
03004   do_restore (gen_fr_restore_x, reg, cfa_off);
03005   cfa_off -= 16;
03006       }
03007 
03008   /* Restore ar.unat for real.  */
03009   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
03010     {
03011       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
03012       emit_move_insn (reg, ar_unat_save_reg);
03013     }
03014 
03015   if (cfa_off != current_frame_info.spill_cfa_off)
03016     abort ();
03017 
03018   finish_spill_pointers ();
03019 
03020   if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
03021     {
03022       /* ??? At this point we must generate a magic insn that appears to
03023          modify the spill iterators, the stack pointer, and the frame
03024    pointer.  This would allow the most scheduling freedom.  For now,
03025    just hard stop.  */
03026       emit_insn (gen_blockage ());
03027     }
03028 
03029   if (cfun->machine->ia64_eh_epilogue_sp)
03030     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
03031   else if (frame_pointer_needed)
03032     {
03033       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
03034       RTX_FRAME_RELATED_P (insn) = 1;
03035     }
03036   else if (current_frame_info.total_size)
03037     {
03038       rtx offset, frame_size_rtx;
03039 
03040       frame_size_rtx = GEN_INT (current_frame_info.total_size);
03041       if (CONST_OK_FOR_I (current_frame_info.total_size))
03042   offset = frame_size_rtx;
03043       else
03044   {
03045     regno = next_scratch_gr_reg ();
03046     offset = gen_rtx_REG (DImode, regno);
03047     emit_move_insn (offset, frame_size_rtx);
03048   }
03049 
03050       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
03051             offset));
03052 
03053       RTX_FRAME_RELATED_P (insn) = 1;
03054       if (GET_CODE (offset) != CONST_INT)
03055   {
03056     REG_NOTES (insn)
03057       = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
03058       gen_rtx_SET (VOIDmode,
03059              stack_pointer_rtx,
03060              gen_rtx_PLUS (DImode,
03061                stack_pointer_rtx,
03062                frame_size_rtx)),
03063       REG_NOTES (insn));
03064   }
03065     }
03066 
03067   if (cfun->machine->ia64_eh_epilogue_bsp)
03068     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
03069 
03070   if (! sibcall_p)
03071     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
03072   else
03073     {
03074       int fp = GR_REG (2);
03075       /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
03076    first available call clobbered register.  If there was a frame_pointer
03077    register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
03078    so we have to make sure we're using the string "r2" when emitting
03079    the register name for the assembler.  */
03080       if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
03081   fp = HARD_FRAME_POINTER_REGNUM;
03082 
03083       /* We must emit an alloc to force the input registers to become output
03084    registers.  Otherwise, if the callee tries to pass its parameters
03085    through to another call without an intervening alloc, then these
03086    values get lost.  */
03087       /* ??? We don't need to preserve all input registers.  We only need to
03088    preserve those input registers used as arguments to the sibling call.
03089    It is unclear how to compute that number here.  */
03090       if (current_frame_info.n_input_regs != 0)
03091   {
03092     rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
03093     insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
03094         const0_rtx, const0_rtx,
03095         n_inputs, const0_rtx));
03096     RTX_FRAME_RELATED_P (insn) = 1;
03097   }
03098     }
03099 }
03100 
03101 /* Return 1 if br.ret can do all the work required to return from a
03102    function.  */
03103 
03104 int
03105 ia64_direct_return (void)
03106 {
03107   if (reload_completed && ! frame_pointer_needed)
03108     {
03109       ia64_compute_frame_size (get_frame_size ());
03110 
03111       return (current_frame_info.total_size == 0
03112         && current_frame_info.n_spilled == 0
03113         && current_frame_info.reg_save_b0 == 0
03114         && current_frame_info.reg_save_pr == 0
03115         && current_frame_info.reg_save_ar_pfs == 0
03116         && current_frame_info.reg_save_ar_unat == 0
03117         && current_frame_info.reg_save_ar_lc == 0);
03118     }
03119   return 0;
03120 }
03121 
03122 /* Return the magic cookie that we use to hold the return address
03123    during early compilation.  */
03124 
03125 rtx
03126 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
03127 {
03128   if (count != 0)
03129     return NULL;
03130   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
03131 }
03132 
03133 /* Split this value after reload, now that we know where the return
03134    address is saved.  */
03135 
03136 void
03137 ia64_split_return_addr_rtx (rtx dest)
03138 {
03139   rtx src;
03140 
03141   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
03142     {
03143       if (current_frame_info.reg_save_b0 != 0)
03144   src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
03145       else
03146   {
03147     HOST_WIDE_INT off;
03148     unsigned int regno;
03149 
03150     /* Compute offset from CFA for BR0.  */
03151     /* ??? Must be kept in sync with ia64_expand_prologue.  */
03152     off = (current_frame_info.spill_cfa_off
03153      + current_frame_info.spill_size);
03154     for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
03155       if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
03156         off -= 8;
03157 
03158     /* Convert CFA offset to a register based offset.  */
03159     if (frame_pointer_needed)
03160       src = hard_frame_pointer_rtx;
03161     else
03162       {
03163         src = stack_pointer_rtx;
03164         off += current_frame_info.total_size;
03165       }
03166 
03167     /* Load address into scratch register.  */
03168     if (CONST_OK_FOR_I (off))
03169       emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
03170     else
03171       {
03172         emit_move_insn (dest, GEN_INT (off));
03173         emit_insn (gen_adddi3 (dest, src, dest));
03174       }
03175 
03176     src = gen_rtx_MEM (Pmode, dest);
03177   }
03178     }
03179   else
03180     src = gen_rtx_REG (DImode, BR_REG (0));
03181 
03182   emit_move_insn (dest, src);
03183 }
03184 
03185 int
03186 ia64_hard_regno_rename_ok (int from, int to)
03187 {
03188   /* Don't clobber any of the registers we reserved for the prologue.  */
03189   if (to == current_frame_info.reg_fp
03190       || to == current_frame_info.reg_save_b0
03191       || to == current_frame_info.reg_save_pr
03192       || to == current_frame_info.reg_save_ar_pfs
03193       || to == current_frame_info.reg_save_ar_unat
03194       || to == current_frame_info.reg_save_ar_lc)
03195     return 0;
03196 
03197   if (from == current_frame_info.reg_fp
03198       || from == current_frame_info.reg_save_b0
03199       || from == current_frame_info.reg_save_pr
03200       || from == current_frame_info.reg_save_ar_pfs
03201       || from == current_frame_info.reg_save_ar_unat
03202       || from == current_frame_info.reg_save_ar_lc)
03203     return 0;
03204 
03205   /* Don't use output registers outside the register frame.  */
03206   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
03207     return 0;
03208 
03209   /* Retain even/oddness on predicate register pairs.  */
03210   if (PR_REGNO_P (from) && PR_REGNO_P (to))
03211     return (from & 1) == (to & 1);
03212 
03213   return 1;
03214 }
03215 
03216 /* Target hook for assembling integer objects.  Handle word-sized
03217    aligned objects and detect the cases when @fptr is needed.  */
03218 
03219 static bool
03220 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
03221 {
03222   if (size == POINTER_SIZE / BITS_PER_UNIT
03223       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
03224       && GET_CODE (x) == SYMBOL_REF
03225       && SYMBOL_REF_FUNCTION_P (x))
03226     {
03227       static const char * const directive[2][2] = {
03228     /* 64-bit pointer */  /* 32-bit pointer */
03229   { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},  /* unaligned */
03230   { "\tdata8\t@fptr(",    "\tdata4\t@fptr("}  /* aligned */
03231       };
03232       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
03233       output_addr_const (asm_out_file, x);
03234       fputs (")\n", asm_out_file);
03235       return true;
03236     }
03237   return default_assemble_integer (x, size, aligned_p);
03238 }
03239 
03240 /* Emit the function prologue.  */
03241 
03242 static void
03243 ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
03244 {
03245   int mask, grsave, grsave_prev;
03246 
03247   if (current_frame_info.need_regstk)
03248     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
03249        current_frame_info.n_input_regs,
03250        current_frame_info.n_local_regs,
03251        current_frame_info.n_output_regs,
03252        current_frame_info.n_rotate_regs);
03253 
03254   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03255     return;
03256 
03257   /* Emit the .prologue directive.  */
03258 
03259   mask = 0;
03260   grsave = grsave_prev = 0;
03261   if (current_frame_info.reg_save_b0 != 0)
03262     {
03263       mask |= 8;
03264       grsave = grsave_prev = current_frame_info.reg_save_b0;
03265     }
03266   if (current_frame_info.reg_save_ar_pfs != 0
03267       && (grsave_prev == 0
03268     || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
03269     {
03270       mask |= 4;
03271       if (grsave_prev == 0)
03272   grsave = current_frame_info.reg_save_ar_pfs;
03273       grsave_prev = current_frame_info.reg_save_ar_pfs;
03274     }
03275   if (current_frame_info.reg_fp != 0
03276       && (grsave_prev == 0
03277     || current_frame_info.reg_fp == grsave_prev + 1))
03278     {
03279       mask |= 2;
03280       if (grsave_prev == 0)
03281   grsave = HARD_FRAME_POINTER_REGNUM;
03282       grsave_prev = current_frame_info.reg_fp;
03283     }
03284   if (current_frame_info.reg_save_pr != 0
03285       && (grsave_prev == 0
03286     || current_frame_info.reg_save_pr == grsave_prev + 1))
03287     {
03288       mask |= 1;
03289       if (grsave_prev == 0)
03290   grsave = current_frame_info.reg_save_pr;
03291     }
03292 
03293   if (mask && TARGET_GNU_AS)
03294     fprintf (file, "\t.prologue %d, %d\n", mask,
03295        ia64_dbx_register_number (grsave));
03296   else
03297     fputs ("\t.prologue\n", file);
03298 
03299   /* Emit a .spill directive, if necessary, to relocate the base of
03300      the register spill area.  */
03301   if (current_frame_info.spill_cfa_off != -16)
03302     fprintf (file, "\t.spill %ld\n",
03303        (long) (current_frame_info.spill_cfa_off
03304          + current_frame_info.spill_size));
03305 }
03306 
03307 /* Emit the .body directive at the scheduled end of the prologue.  */
03308 
03309 static void
03310 ia64_output_function_end_prologue (FILE *file)
03311 {
03312   if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
03313     return;
03314 
03315   fputs ("\t.body\n", file);
03316 }
03317 
03318 /* Emit the function epilogue.  */
03319 
03320 static void
03321 ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
03322              HOST_WIDE_INT size ATTRIBUTE_UNUSED)
03323 {
03324   int i;
03325 
03326   if (current_frame_info.reg_fp)
03327     {
03328       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
03329       reg_names[HARD_FRAME_POINTER_REGNUM]
03330   = reg_names[current_frame_info.reg_fp];
03331       reg_names[current_frame_info.reg_fp] = tmp;
03332     }
03333   if (! TARGET_REG_NAMES)
03334     {
03335       for (i = 0; i < current_frame_info.n_input_regs; i++)
03336   reg_names[IN_REG (i)] = ia64_input_reg_names[i];
03337       for (i = 0; i < current_frame_info.n_local_regs; i++)
03338   reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
03339       for (i = 0; i < current_frame_info.n_output_regs; i++)
03340   reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
03341     }
03342 
03343   current_frame_info.initialized = 0;
03344 }
03345 
03346 int
03347 ia64_dbx_register_number (int regno)
03348 {
03349   /* In ia64_expand_prologue we quite literally renamed the frame pointer
03350      from its home at loc79 to something inside the register frame.  We
03351      must perform the same renumbering here for the debug info.  */
03352   if (current_frame_info.reg_fp)
03353     {
03354       if (regno == HARD_FRAME_POINTER_REGNUM)
03355   regno = current_frame_info.reg_fp;
03356       else if (regno == current_frame_info.reg_fp)
03357   regno = HARD_FRAME_POINTER_REGNUM;
03358     }
03359 
03360   if (IN_REGNO_P (regno))
03361     return 32 + regno - IN_REG (0);
03362   else if (LOC_REGNO_P (regno))
03363     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
03364   else if (OUT_REGNO_P (regno))
03365     return (32 + current_frame_info.n_input_regs
03366       + current_frame_info.n_local_regs + regno - OUT_REG (0));
03367   else
03368     return regno;
03369 }
03370 
03371 void
03372 ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
03373 {
03374   rtx addr_reg, eight = GEN_INT (8);
03375 
03376   /* The Intel assembler requires that the global __ia64_trampoline symbol
03377      be declared explicitly */
03378   if (!TARGET_GNU_AS)
03379     {
03380       static bool declared_ia64_trampoline = false;
03381 
03382       if (!declared_ia64_trampoline)
03383   {
03384     declared_ia64_trampoline = true;
03385     (*targetm.asm_out.globalize_label) (asm_out_file,
03386                 "__ia64_trampoline");
03387   }
03388     }
03389 
03390   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
03391   addr = convert_memory_address (Pmode, addr);
03392   fnaddr = convert_memory_address (Pmode, fnaddr);
03393   static_chain = convert_memory_address (Pmode, static_chain);
03394 
03395   /* Load up our iterator.  */
03396   addr_reg = gen_reg_rtx (Pmode);
03397   emit_move_insn (addr_reg, addr);
03398 
03399   /* The first two words are the fake descriptor:
03400      __ia64_trampoline, ADDR+16.  */
03401   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03402       gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
03403   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03404 
03405   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
03406       copy_to_reg (plus_constant (addr, 16)));
03407   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03408 
03409   /* The third word is the target descriptor.  */
03410   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
03411   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
03412 
03413   /* The fourth word is the static chain.  */
03414   emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
03415 }
03416 
03417 /* Do any needed setup for a variadic function.  CUM has not been updated
03418    for the last named argument which has type TYPE and mode MODE.
03419 
03420    We generate the actual spill instructions during prologue generation.  */
03421 
03422 static void
03423 ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
03424            tree type, int * pretend_size,
03425            int second_time ATTRIBUTE_UNUSED)
03426 {
03427   CUMULATIVE_ARGS next_cum = *cum;
03428 
03429   /* Skip the current argument.  */
03430   ia64_function_arg_advance (&next_cum, mode, type, 1);
03431 
03432   if (next_cum.words < MAX_ARGUMENT_SLOTS)
03433     {
03434       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
03435       *pretend_size = n * UNITS_PER_WORD;
03436       cfun->machine->n_varargs = n;
03437     }
03438 }
03439 
03440 /* Check whether TYPE is a homogeneous floating point aggregate.  If
03441    it is, return the mode of the floating point type that appears
03442    in all leafs.  If it is not, return VOIDmode.
03443 
03444    An aggregate is a homogeneous floating point aggregate is if all
03445    fields/elements in it have the same floating point type (e.g,
03446    SFmode).  128-bit quad-precision floats are excluded.
03447 
03448    Variable sized aggregates should never arrive here, since we should
03449    have already decided to pass them by reference.  Top-level zero-sized
03450    aggregates are excluded because our parallels crash the middle-end.  */
03451 
03452 static enum machine_mode
03453 hfa_element_mode (tree type, bool nested)
03454 {
03455   enum machine_mode element_mode = VOIDmode;
03456   enum machine_mode mode;
03457   enum tree_code code = TREE_CODE (type);
03458   int know_element_mode = 0;
03459   tree t;
03460 
03461   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
03462     return VOIDmode;
03463 
03464   switch (code)
03465     {
03466     case VOID_TYPE: case INTEGER_TYPE:  case ENUMERAL_TYPE:
03467     case BOOLEAN_TYPE:  case CHAR_TYPE:   case POINTER_TYPE:
03468     case OFFSET_TYPE: case REFERENCE_TYPE:  case METHOD_TYPE:
03469     case FILE_TYPE: case LANG_TYPE:   case FUNCTION_TYPE:
03470       return VOIDmode;
03471 
03472       /* Fortran complex types are supposed to be HFAs, so we need to handle
03473    gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
03474    types though.  */
03475     case COMPLEX_TYPE:
03476       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
03477     && TYPE_MODE (type) != TCmode)
03478   return GET_MODE_INNER (TYPE_MODE (type));
03479       else
03480   return VOIDmode;
03481 
03482     case REAL_TYPE:
03483       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
03484    mode if this is contained within an aggregate.  */
03485       if (nested && TYPE_MODE (type) != TFmode)
03486   return TYPE_MODE (type);
03487       else
03488   return VOIDmode;
03489 
03490     case ARRAY_TYPE:
03491       return hfa_element_mode (TREE_TYPE (type), 1);
03492 
03493     case RECORD_TYPE:
03494     case UNION_TYPE:
03495     case QUAL_UNION_TYPE:
03496       for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
03497   {
03498     if (TREE_CODE (t) != FIELD_DECL)
03499       continue;
03500 
03501     mode = hfa_element_mode (TREE_TYPE (t), 1);
03502     if (know_element_mode)
03503       {
03504         if (mode != element_mode)
03505     return VOIDmode;
03506       }
03507     else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
03508       return VOIDmode;
03509     else
03510       {
03511         know_element_mode = 1;
03512         element_mode = mode;
03513       }
03514   }
03515       return element_mode;
03516 
03517     default:
03518       /* If we reach here, we probably have some front-end specific type
03519    that the backend doesn't know about.  This can happen via the
03520    aggregate_value_p call in init_function_start.  All we can do is
03521    ignore unknown tree types.  */
03522       return VOIDmode;
03523     }
03524 
03525   return VOIDmode;
03526 }
03527 
03528 /* Return the number of words required to hold a quantity of TYPE and MODE
03529    when passed as an argument.  */
03530 static int
03531 ia64_function_arg_words (tree type, enum machine_mode mode)
03532 {
03533   int words;
03534 
03535   if (mode == BLKmode)
03536     words = int_size_in_bytes (type);
03537   else
03538     words = GET_MODE_SIZE (mode);
03539 
03540   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
03541 }
03542 
03543 /* Return the number of registers that should be skipped so the current
03544    argument (described by TYPE and WORDS) will be properly aligned.
03545 
03546    Integer and float arguments larger than 8 bytes start at the next
03547    even boundary.  Aggregates larger than 8 bytes start at the next
03548    even boundary if the aggregate has 16 byte alignment.  Note that
03549    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
03550    but are still to be aligned in registers.
03551 
03552    ??? The ABI does not specify how to handle aggregates with
03553    alignment from 9 to 15 bytes, or greater than 16.  We handle them
03554    all as if they had 16 byte alignment.  Such aggregates can occur
03555    only if gcc extensions are used.  */
03556 static int
03557 ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
03558 {
03559   if ((cum->words & 1) == 0)
03560     return 0;
03561 
03562   if (type
03563       && TREE_CODE (type) != INTEGER_TYPE
03564       && TREE_CODE (type) != REAL_TYPE)
03565     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
03566   else
03567     return words > 1;
03568 }
03569 
03570 /* Return rtx for register where argument is passed, or zero if it is passed
03571    on the stack.  */
03572 /* ??? 128-bit quad-precision floats are always passed in general
03573    registers.  */
03574 
03575 rtx
03576 ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
03577        int named, int incoming)
03578 {
03579   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
03580   int words = ia64_function_arg_words (type, mode);
03581   int offset = ia64_function_arg_offset (cum, type, words);
03582   enum machine_mode hfa_mode = VOIDmode;
03583 
03584   /* If all argument slots are used, then it must go on the stack.  */
03585   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
03586     return 0;
03587 
03588   /* Check for and handle homogeneous FP aggregates.  */
03589   if (type)
03590     hfa_mode = hfa_element_mode (type, 0);
03591 
03592   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
03593      and unprototyped hfas are passed specially.  */
03594   if (hfa_mode != VOIDmode && (! cum->prototype || named))
03595     {
03596       rtx loc[16];
03597       int i = 0;
03598       int fp_regs = cum->fp_regs;
03599       int int_regs = cum->words + offset;
03600       int hfa_size = GET_MODE_SIZE (hfa_mode);
03601       int byte_size;
03602       int args_byte_size;
03603 
03604       /* If prototyped, pass it in FR regs then GR regs.
03605    If not prototyped, pass it in both FR and GR regs.
03606 
03607    If this is an SFmode aggregate, then it is possible to run out of
03608    FR regs while GR regs are still left.  In that case, we pass the
03609    remaining part in the GR regs.  */
03610 
03611       /* Fill the FP regs.  We do this always.  We stop if we reach the end
03612    of the argument, the last FP register, or the last argument slot.  */
03613 
03614       byte_size = ((mode == BLKmode)
03615        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03616       args_byte_size = int_regs * UNITS_PER_WORD;
03617       offset = 0;
03618       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
03619         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
03620   {
03621     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03622               gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
03623                     + fp_regs)),
03624               GEN_INT (offset));
03625     offset += hfa_size;
03626     args_byte_size += hfa_size;
03627     fp_regs++;
03628   }
03629 
03630       /* If no prototype, then the whole thing must go in GR regs.  */
03631       if (! cum->prototype)
03632   offset = 0;
03633       /* If this is an SFmode aggregate, then we might have some left over
03634    that needs to go in GR regs.  */
03635       else if (byte_size != offset)
03636   int_regs += offset / UNITS_PER_WORD;
03637 
03638       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
03639 
03640       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
03641   {
03642     enum machine_mode gr_mode = DImode;
03643     unsigned int gr_size;
03644 
03645     /* If we have an odd 4 byte hunk because we ran out of FR regs,
03646        then this goes in a GR reg left adjusted/little endian, right
03647        adjusted/big endian.  */
03648     /* ??? Currently this is handled wrong, because 4-byte hunks are
03649        always right adjusted/little endian.  */
03650     if (offset & 0x4)
03651       gr_mode = SImode;
03652     /* If we have an even 4 byte hunk because the aggregate is a
03653        multiple of 4 bytes in size, then this goes in a GR reg right
03654        adjusted/little endian.  */
03655     else if (byte_size - offset == 4)
03656       gr_mode = SImode;
03657 
03658     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03659               gen_rtx_REG (gr_mode, (basereg
03660                    + int_regs)),
03661               GEN_INT (offset));
03662 
03663     gr_size = GET_MODE_SIZE (gr_mode);
03664     offset += gr_size;
03665     if (gr_size == UNITS_PER_WORD
03666         || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
03667       int_regs++;
03668     else if (gr_size > UNITS_PER_WORD)
03669       int_regs += gr_size / UNITS_PER_WORD;
03670   }
03671       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
03672     }
03673 
03674   /* Integral and aggregates go in general registers.  If we have run out of
03675      FR registers, then FP values must also go in general registers.  This can
03676      happen when we have a SFmode HFA.  */
03677   else if (mode == TFmode || mode == TCmode
03678      || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
03679     {
03680       int byte_size = ((mode == BLKmode)
03681                        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03682       if (BYTES_BIG_ENDIAN
03683   && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
03684   && byte_size < UNITS_PER_WORD
03685   && byte_size > 0)
03686   {
03687     rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
03688             gen_rtx_REG (DImode,
03689                    (basereg + cum->words
03690               + offset)),
03691             const0_rtx);
03692     return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
03693   }
03694       else
03695   return gen_rtx_REG (mode, basereg + cum->words + offset);
03696 
03697     }
03698 
03699   /* If there is a prototype, then FP values go in a FR register when
03700      named, and in a GR register when unnamed.  */
03701   else if (cum->prototype)
03702     {
03703       if (named)
03704   return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
03705       /* In big-endian mode, an anonymous SFmode value must be represented
03706          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
03707    the value into the high half of the general register.  */
03708       else if (BYTES_BIG_ENDIAN && mode == SFmode)
03709   return gen_rtx_PARALLEL (mode,
03710      gen_rtvec (1,
03711                    gen_rtx_EXPR_LIST (VOIDmode,
03712          gen_rtx_REG (DImode, basereg + cum->words + offset),
03713               const0_rtx)));
03714       else
03715   return gen_rtx_REG (mode, basereg + cum->words + offset);
03716     }
03717   /* If there is no prototype, then FP values go in both FR and GR
03718      registers.  */
03719   else
03720     {
03721       /* See comment above.  */
03722       enum machine_mode inner_mode =
03723   (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
03724 
03725       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
03726               gen_rtx_REG (mode, (FR_ARG_FIRST
03727                 + cum->fp_regs)),
03728               const0_rtx);
03729       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
03730               gen_rtx_REG (inner_mode,
03731                (basereg + cum->words
03732                 + offset)),
03733               const0_rtx);
03734 
03735       return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
03736     }
03737 }
03738 
03739 /* Return number of bytes, at the beginning of the argument, that must be
03740    put in registers.  0 is the argument is entirely in registers or entirely
03741    in memory.  */
03742 
03743 static int
03744 ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
03745       tree type, bool named ATTRIBUTE_UNUSED)
03746 {
03747   int words = ia64_function_arg_words (type, mode);
03748   int offset = ia64_function_arg_offset (cum, type, words);
03749 
03750   /* If all argument slots are used, then it must go on the stack.  */
03751   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
03752     return 0;
03753 
03754   /* It doesn't matter whether the argument goes in FR or GR regs.  If
03755      it fits within the 8 argument slots, then it goes entirely in
03756      registers.  If it extends past the last argument slot, then the rest
03757      goes on the stack.  */
03758 
03759   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
03760     return 0;
03761 
03762   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
03763 }
03764 
03765 /* Update CUM to point after this argument.  This is patterned after
03766    ia64_function_arg.  */
03767 
03768 void
03769 ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
03770          tree type, int named)
03771 {
03772   int words = ia64_function_arg_words (type, mode);
03773   int offset = ia64_function_arg_offset (cum, type, words);
03774   enum machine_mode hfa_mode = VOIDmode;
03775 
03776   /* If all arg slots are already full, then there is nothing to do.  */
03777   if (cum->words >= MAX_ARGUMENT_SLOTS)
03778     return;
03779 
03780   cum->words += words + offset;
03781 
03782   /* Check for and handle homogeneous FP aggregates.  */
03783   if (type)
03784     hfa_mode = hfa_element_mode (type, 0);
03785 
03786   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
03787      and unprototyped hfas are passed specially.  */
03788   if (hfa_mode != VOIDmode && (! cum->prototype || named))
03789     {
03790       int fp_regs = cum->fp_regs;
03791       /* This is the original value of cum->words + offset.  */
03792       int int_regs = cum->words - words;
03793       int hfa_size = GET_MODE_SIZE (hfa_mode);
03794       int byte_size;
03795       int args_byte_size;
03796 
03797       /* If prototyped, pass it in FR regs then GR regs.
03798    If not prototyped, pass it in both FR and GR regs.
03799 
03800    If this is an SFmode aggregate, then it is possible to run out of
03801    FR regs while GR regs are still left.  In that case, we pass the
03802    remaining part in the GR regs.  */
03803 
03804       /* Fill the FP regs.  We do this always.  We stop if we reach the end
03805    of the argument, the last FP register, or the last argument slot.  */
03806 
03807       byte_size = ((mode == BLKmode)
03808        ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
03809       args_byte_size = int_regs * UNITS_PER_WORD;
03810       offset = 0;
03811       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
03812         && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
03813   {
03814     offset += hfa_size;
03815     args_byte_size += hfa_size;
03816     fp_regs++;
03817   }
03818 
03819       cum->fp_regs = fp_regs;
03820     }
03821 
03822   /* Integral and aggregates go in general registers.  So do TFmode FP values.
03823      If we have run out of FR registers, then other FP values must also go in
03824      general registers.  This can happen when we have a SFmode HFA.  */
03825   else if (mode == TFmode || mode == TCmode
03826            || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
03827     cum->int_regs = cum->words;
03828 
03829   /* If there is a prototype, then FP values go in a FR register when
03830      named, and in a GR register when unnamed.  */
03831   else if (cum->prototype)
03832     {
03833       if (! named)
03834   cum->int_regs = cum->words;
03835       else
03836   /* ??? Complex types should not reach here.  */
03837   cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
03838     }
03839   /* If there is no prototype, then FP values go in both FR and GR
03840      registers.  */
03841   else
03842     {
03843       /* ??? Complex types should not reach here.  */
03844       cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
03845       cum->int_regs = cum->words;
03846     }
03847 }
03848 
03849 /* Arguments with alignment larger than 8 bytes start at the next even
03850    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
03851    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
03852 
03853 int
03854 ia64_function_arg_boundary (enum machine_mode mode, tree type)
03855 {
03856 
03857   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
03858     return PARM_BOUNDARY * 2;
03859 
03860   if (type)
03861     {
03862       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
03863         return PARM_BOUNDARY * 2;
03864       else
03865         return PARM_BOUNDARY;
03866     }
03867 
03868   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
03869     return PARM_BOUNDARY * 2;
03870   else
03871     return PARM_BOUNDARY;
03872 }
03873 
03874 /* Variable sized types are passed by reference.  */
03875 /* ??? At present this is a GCC extension to the IA-64 ABI.  */
03876 
03877 static bool
03878 ia64_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
03879       enum machine_mode mode ATTRIBUTE_UNUSED,
03880       tree type, bool named ATTRIBUTE_UNUSED)
03881 {
03882   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
03883 }
03884 
03885 /* True if it is OK to do sibling call optimization for the specified
03886    call expression EXP.  DECL will be the called function, or NULL if
03887    this is an indirect call.  */
03888 static bool
03889 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
03890 {
03891   /* We can't perform a sibcall if the current function has the syscall_linkage
03892      attribute.  */
03893   if (lookup_attribute ("syscall_linkage",
03894       TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
03895     return false;
03896 
03897   /* We must always return with our current GP.  This means we can
03898      only sibcall to functions defined in the current module.  */
03899   return decl && (*targetm.binds_local_p) (decl);
03900 }
03901 
03902 
03903 /* Implement va_arg.  */
03904 
03905 static tree
03906 ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
03907 {
03908   /* Variable sized types are passed by reference.  */
03909   if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
03910     {
03911       tree ptrtype = build_pointer_type (type);
03912       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
03913       return build_va_arg_indirect_ref (addr);
03914     }
03915 
03916   /* Aggregate arguments with alignment larger than 8 bytes start at
03917      the next even boundary.  Integer and floating point arguments
03918      do so if they are larger than 8 bytes, whether or not they are
03919      also aligned larger than 8 bytes.  */
03920   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
03921       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
03922     {
03923       tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
03924           build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
03925       t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
03926      build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
03927       t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
03928       gimplify_and_add (t, pre_p);
03929     }
03930 
03931   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
03932 }
03933 
03934 /* Return 1 if function return value returned in memory.  Return 0 if it is
03935    in a register.  */
03936 
03937 static bool
03938 ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
03939 {
03940   enum machine_mode mode;
03941   enum machine_mode hfa_mode;
03942   HOST_WIDE_INT byte_size;
03943 
03944   mode = TYPE_MODE (valtype);
03945   byte_size = GET_MODE_SIZE (mode);
03946   if (mode == BLKmode)
03947     {
03948       byte_size = int_size_in_bytes (valtype);
03949       if (byte_size < 0)
03950   return true;
03951     }
03952 
03953   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
03954 
03955   hfa_mode = hfa_element_mode (valtype, 0);
03956   if (hfa_mode != VOIDmode)
03957     {
03958       int hfa_size = GET_MODE_SIZE (hfa_mode);
03959 
03960       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
03961   return true;
03962       else
03963   return false;
03964     }
03965   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
03966     return true;
03967   else
03968     return false;
03969 }
03970 
03971 /* Return rtx for register that holds the function return value.  */
03972 
03973 rtx
03974 ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
03975 {
03976   enum machine_mode mode;
03977   enum machine_mode hfa_mode;
03978 
03979   mode = TYPE_MODE (valtype);
03980   hfa_mode = hfa_element_mode (valtype, 0);
03981 
03982   if (hfa_mode != VOIDmode)
03983     {
03984       rtx loc[8];
03985       int i;
03986       int hfa_size;
03987       int byte_size;
03988       int offset;
03989 
03990       hfa_size = GET_MODE_SIZE (hfa_mode);
03991       byte_size = ((mode == BLKmode)
03992        ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
03993       offset = 0;
03994       for (i = 0; offset < byte_size; i++)
03995   {
03996     loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
03997               gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
03998               GEN_INT (offset));
03999     offset += hfa_size;
04000   }
04001       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
04002     }
04003   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
04004     return gen_rtx_REG (mode, FR_ARG_FIRST);
04005   else
04006     {
04007       bool need_parallel = false;
04008 
04009       /* In big-endian mode, we need to manage the layout of aggregates
04010    in the registers so that we get the bits properly aligned in
04011    the highpart of the registers.  */
04012       if (BYTES_BIG_ENDIAN
04013     && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
04014   need_parallel = true;
04015 
04016       /* Something like struct S { long double x; char a[0] } is not an
04017    HFA structure, and therefore doesn't go in fp registers.  But
04018    the middle-end will give it XFmode anyway, and XFmode values
04019    don't normally fit in integer registers.  So we need to smuggle
04020    the value inside a parallel.  */
04021       else if (mode == XFmode || mode == XCmode)
04022   need_parallel = true;
04023 
04024       if (need_parallel)
04025   {
04026     rtx loc[8];
04027     int offset;
04028     int bytesize;
04029     int i;
04030 
04031     offset = 0;
04032     bytesize = int_size_in_bytes (valtype);
04033     /* An empty PARALLEL is invalid here, but the return value
04034        doesn't matter for empty structs.  */
04035     if (bytesize == 0)
04036       return gen_rtx_REG (mode, GR_RET_FIRST);
04037     for (i = 0; offset < bytesize; i++)
04038       {
04039         loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
04040             gen_rtx_REG (DImode,
04041                    GR_RET_FIRST + i),
04042             GEN_INT (offset));
04043         offset += UNITS_PER_WORD;
04044       }
04045     return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
04046   }
04047 
04048       return gen_rtx_REG (mode, GR_RET_FIRST);
04049     }
04050 }
04051 
04052 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
04053    We need to emit DTP-relative relocations.  */
04054 
04055 void
04056 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
04057 {
04058   if (size != 8)
04059     abort ();
04060   fputs ("\tdata8.ua\t@dtprel(", file);
04061   output_addr_const (file, x);
04062   fputs (")", file);
04063 }
04064 
04065 /* Print a memory address as an operand to reference that memory location.  */
04066 
04067 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
04068    also call this from ia64_print_operand for memory addresses.  */
04069 
04070 void
04071 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
04072           rtx address ATTRIBUTE_UNUSED)
04073 {
04074 }
04075 
04076 /* Print an operand to an assembler instruction.
04077    C  Swap and print a comparison operator.
04078    D  Print an FP comparison operator.
04079    E    Print 32 - constant, for SImode shifts as extract.
04080    e    Print 64 - constant, for DImode rotates.
04081    F  A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
04082         a floating point register emitted normally.
04083    I  Invert a predicate register by adding 1.
04084    J    Select the proper predicate register for a condition.
04085    j    Select the inverse predicate register for a condition.
04086    O  Append .acq for volatile load.
04087    P  Postincrement of a MEM.
04088    Q  Append .rel for volatile store.
04089    S  Shift amount for shladd instruction.
04090    T  Print an 8-bit sign extended number (K) as a 32-bit unsigned number
04091   for Intel assembler.
04092    U  Print an 8-bit sign extended number (K) as a 64-bit unsigned number
04093   for Intel assembler.
04094    r  Print register name, or constant 0 as r0.  HP compatibility for
04095   Linux kernel.
04096    v    Print vector constant value as an 8-byte integer value.  */
04097 
04098 void
04099 ia64_print_operand (FILE * file, rtx x, int code)
04100 {
04101   const char *str;
04102 
04103   switch (code)
04104     {
04105     case 0:
04106       /* Handled below.  */
04107       break;
04108 
04109     case 'C':
04110       {
04111   enum rtx_code c = swap_condition (GET_CODE (x));
04112   fputs (GET_RTX_NAME (c), file);
04113   return;
04114       }
04115 
04116     case 'D':
04117       switch (GET_CODE (x))
04118   {
04119   case NE:
04120     str = "neq";
04121     break;
04122   case UNORDERED:
04123     str = "unord";
04124     break;
04125   case ORDERED:
04126     str = "ord";
04127     break;
04128   default:
04129     str = GET_RTX_NAME (GET_CODE (x));
04130     break;
04131   }
04132       fputs (str, file);
04133       return;
04134 
04135     case 'E':
04136       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
04137       return;
04138 
04139     case 'e':
04140       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
04141       return;
04142 
04143     case 'F':
04144       if (x == CONST0_RTX (GET_MODE (x)))
04145   str = reg_names [FR_REG (0)];
04146       else if (x == CONST1_RTX (GET_MODE (x)))
04147   str = reg_names [FR_REG (1)];
04148       else if (GET_CODE (x) == REG)
04149   str = reg_names [REGNO (x)];
04150       else
04151   abort ();
04152       fputs (str, file);
04153       return;
04154 
04155     case 'I':
04156       fputs (reg_names [REGNO (x) + 1], file);
04157       return;
04158 
04159     case 'J':
04160     case 'j':
04161       {
04162   unsigned int regno = REGNO (XEXP (x, 0));
04163   if (GET_CODE (x) == EQ)
04164     regno += 1;
04165   if (code == 'j')
04166     regno ^= 1;
04167         fputs (reg_names [regno], file);
04168       }
04169       return;
04170 
04171     case 'O':
04172       if (MEM_VOLATILE_P (x))
04173   fputs(".acq", file);
04174       return;
04175 
04176     case 'P':
04177       {
04178   HOST_WIDE_INT value;
04179 
04180   switch (GET_CODE (XEXP (x, 0)))
04181     {
04182     default:
04183       return;
04184 
04185     case POST_MODIFY:
04186       x = XEXP (XEXP (XEXP (x, 0), 1), 1);
04187       if (GET_CODE (x) == CONST_INT)
04188         value = INTVAL (x);
04189       else if (GET_CODE (x) == REG)
04190         {
04191     fprintf (file, ", %s", reg_names[REGNO (x)]);
04192     return;
04193         }
04194       else
04195         abort ();
04196       break;
04197 
04198     case POST_INC:
04199       value = GET_MODE_SIZE (GET_MODE (x));
04200       break;
04201 
04202     case POST_DEC:
04203       value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
04204       break;
04205     }
04206 
04207   fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
04208   return;
04209       }
04210 
04211     case 'Q':
04212       if (MEM_VOLATILE_P (x))
04213   fputs(".rel", file);
04214       return;
04215 
04216     case 'S':
04217       fprintf (file, "%d", exact_log2 (INTVAL (x)));
04218       return;
04219 
04220     case 'T':
04221       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
04222   {
04223     fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
04224     return;
04225   }
04226       break;
04227 
04228     case 'U':
04229       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
04230   {
04231     const char *prefix = "0x";
04232     if (INTVAL (x) & 0x80000000)
04233       {
04234         fprintf (file, "0xffffffff");
04235         prefix = "";
04236       }
04237     fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
04238     return;
04239   }
04240       break;
04241 
04242     case 'r':
04243       /* If this operand is the constant zero, write it as register zero.
04244    Any register, zero, or CONST_INT value is OK here.  */
04245       if (GET_CODE (x) == REG)
04246   fputs (reg_names[REGNO (x)], file);
04247       else if (x == CONST0_RTX (GET_MODE (x)))
04248   fputs ("r0", file);
04249       else if (GET_CODE (x) == CONST_INT)
04250   output_addr_const (file, x);
04251       else
04252   output_operand_lossage ("invalid %%r value");
04253       return;
04254 
04255     case 'v':
04256       gcc_assert (GET_CODE (x) == CONST_VECTOR);
04257       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
04258       break;
04259 
04260     case '+':
04261       {
04262   const char *which;
04263 
04264   /* For conditional branches, returns or calls, substitute
04265      sptk, dptk, dpnt, or spnt for %s.  */
04266   x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
04267   if (x)
04268     {
04269       int pred_val = INTVAL (XEXP (x, 0));
04270 
04271       /* Guess top and bottom 10% statically predicted.  */
04272       if (pred_val < REG_BR_PROB_BASE / 50)
04273         which = ".spnt";
04274       else if (pred_val < REG_BR_PROB_BASE / 2)
04275         which = ".dpnt";
04276       else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
04277         which = ".dptk";
04278       else
04279         which = ".sptk";
04280     }
04281   else if (GET_CODE (current_output_insn) == CALL_INSN)
04282     which = ".sptk";
04283   else
04284     which = ".dptk";
04285 
04286   fputs (which, file);
04287   return;
04288       }
04289 
04290     case ',':
04291       x = current_insn_predicate;
04292       if (x)
04293   {
04294     unsigned int regno = REGNO (XEXP (x, 0));
04295     if (GET_CODE (x) == EQ)
04296       regno += 1;
04297           fprintf (file, "(%s) ", reg_names [regno]);
04298   }
04299       return;
04300 
04301     default:
04302       output_operand_lossage ("ia64_print_operand: unknown code");
04303       return;
04304     }
04305 
04306   switch (GET_CODE (x))
04307     {
04308       /* This happens for the spill/restore instructions.  */
04309     case POST_INC:
04310     case POST_DEC:
04311     case POST_MODIFY:
04312       x = XEXP (x, 0);
04313       /* ... fall through ...  */
04314 
04315     case REG:
04316       fputs (reg_names [REGNO (x)], file);
04317       break;
04318 
04319     case MEM:
04320       {
04321   rtx addr = XEXP (x, 0);
04322   if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
04323     addr = XEXP (addr, 0);
04324   fprintf (file, "[%s]", reg_names [REGNO (addr)]);
04325   break;
04326       }
04327 
04328     default:
04329       output_addr_const (file, x);
04330       break;
04331     }
04332 
04333   return;
04334 }
04335 
04336 /* Compute a (partial) cost for rtx X.  Return true if the complete
04337    cost has been computed, and false if subexpressions should be
04338    scanned.  In either case, *TOTAL contains the cost result.  */
04339 /* ??? This is incomplete.  */
04340 
04341 static bool
04342 ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
04343 {
04344   switch (code)
04345     {
04346     case CONST_INT:
04347       switch (outer_code)
04348         {
04349         case SET:
04350     *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
04351     return true;
04352         case PLUS:
04353     if (CONST_OK_FOR_I (INTVAL (x)))
04354       *total = 0;
04355     else if (CONST_OK_FOR_J (INTVAL (x)))
04356       *total = 1;
04357     else
04358       *total = COSTS_N_INSNS (1);
04359     return true;
04360         default:
04361     if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
04362       *total = 0;
04363     else
04364       *total = COSTS_N_INSNS (1);
04365     return true;
04366   }
04367 
04368     case CONST_DOUBLE:
04369       *total = COSTS_N_INSNS (1);
04370       return true;
04371 
04372     case CONST:
04373     case SYMBOL_REF:
04374     case LABEL_REF:
04375       *total = COSTS_N_INSNS (3);
04376       return true;
04377 
04378     case MULT:
04379       /* For multiplies wider than HImode, we have to go to the FPU,
04380          which normally involves copies.  Plus there's the latency
04381          of the multiply itself, and the latency of the instructions to
04382          transfer integer regs to FP regs.  */
04383       /* ??? Check for FP mode.  */
04384       if (GET_MODE_SIZE (GET_MODE (x)) > 2)
04385         *total = COSTS_N_INSNS (10);
04386       else
04387   *total = COSTS_N_INSNS (2);
04388       return true;
04389 
04390     case PLUS:
04391     case MINUS:
04392     case ASHIFT:
04393     case ASHIFTRT:
04394     case LSHIFTRT:
04395       *total = COSTS_N_INSNS (1);
04396       return true;
04397 
04398     case DIV:
04399     case UDIV:
04400     case MOD:
04401     case UMOD:
04402       /* We make divide expensive, so that divide-by-constant will be
04403          optimized to a multiply.  */
04404       *total = COSTS_N_INSNS (60);
04405       return true;
04406 
04407     default:
04408       return false;
04409     }
04410 }
04411 
04412 /* Calculate the cost of moving data from a register in class FROM to
04413    one in class TO, using MODE.  */
04414 
04415 int
04416 ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
04417        enum reg_class to)
04418 {
04419   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
04420   if (to == ADDL_REGS)
04421     to = GR_REGS;
04422   if (from == ADDL_REGS)
04423     from = GR_REGS;
04424 
04425   /* All costs are symmetric, so reduce cases by putting the
04426      lower number class as the destination.  */
04427   if (from < to)
04428     {
04429       enum reg_class tmp = to;
04430       to = from, from = tmp;
04431     }
04432 
04433   /* Moving from FR<->GR in XFmode must be more expensive than 2,
04434      so that we get secondary memory reloads.  Between FR_REGS,
04435      we have to make this at least as expensive as MEMORY_MOVE_COST
04436      to avoid spectacularly poor register class preferencing.  */
04437   if (mode == XFmode)
04438     {
04439       if (to != GR_REGS || from != GR_REGS)
04440         return MEMORY_MOVE_COST (mode, to, 0);
04441       else
04442   return 3;
04443     }
04444 
04445   switch (to)
04446     {
04447     case PR_REGS:
04448       /* Moving between PR registers takes two insns.  */
04449       if (from == PR_REGS)
04450   return 3;
04451       /* Moving between PR and anything but GR is impossible.  */
04452       if (from != GR_REGS)
04453   return MEMORY_MOVE_COST (mode, to, 0);
04454       break;
04455 
04456     case BR_REGS:
04457       /* Moving between BR and anything but GR is impossible.  */
04458       if (from != GR_REGS && from != GR_AND_BR_REGS)
04459   return MEMORY_MOVE_COST (mode, to, 0);
04460       break;
04461 
04462     case AR_I_REGS:
04463     case AR_M_REGS:
04464       /* Moving between AR and anything but GR is impossible.  */
04465       if (from != GR_REGS)
04466   return MEMORY_MOVE_COST (mode, to, 0);
04467       break;
04468 
04469     case GR_REGS:
04470     case FR_REGS:
04471     case GR_AND_FR_REGS:
04472     case GR_AND_BR_REGS:
04473     case ALL_REGS:
04474       break;
04475 
04476     default:
04477       abort ();
04478     }
04479 
04480   return 2;
04481 }
04482 
04483 /* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
04484    to use when copying X into that class.  */
04485 
04486 enum reg_class
04487 ia64_preferred_reload_class (rtx x, enum reg_class class)
04488 {
04489   switch (class)
04490     {
04491     case FR_REGS:
04492       /* Don't allow volatile mem reloads into floating point registers.
04493    This is defined to force reload to choose the r/m case instead
04494    of the f/f case when reloading (set (reg fX) (mem/v)).  */
04495       if (MEM_P (x) && MEM_VOLATILE_P (x))
04496   return NO_REGS;
04497       
04498       /* Force all unrecognized constants into the constant pool.  */
04499       if (CONSTANT_P (x))
04500   return NO_REGS;
04501       break;
04502 
04503     case AR_M_REGS:
04504     case AR_I_REGS:
04505       if (!OBJECT_P (x))
04506   return NO_REGS;
04507       break;
04508 
04509     default:
04510       break;
04511     }
04512 
04513   return class;
04514 }
04515 
04516 /* This function returns the register class required for a secondary
04517    register when copying between one of the registers in CLASS, and X,
04518    using MODE.  A return value of NO_REGS means that no secondary register
04519    is required.  */
04520 
04521 enum reg_class
04522 ia64_secondary_reload_class (enum reg_class class,
04523            enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
04524 {
04525   int regno = -1;
04526 
04527   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
04528     regno = true_regnum (x);
04529 
04530   switch (class)
04531     {
04532     case BR_REGS:
04533     case AR_M_REGS:
04534     case AR_I_REGS:
04535       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
04536    interaction.  We end up with two pseudos with overlapping lifetimes
04537    both of which are equiv to the same constant, and both which need
04538    to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
04539    changes depending on the path length, which means the qty_first_reg
04540    check in make_regs_eqv can give different answers at different times.
04541    At some point I'll probably need a reload_indi pattern to handle
04542    this.
04543 
04544    We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
04545    wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
04546    non-general registers for good measure.  */
04547       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
04548   return GR_REGS;
04549 
04550       /* This is needed if a pseudo used as a call_operand gets spilled to a
04551    stack slot.  */
04552       if (GET_CODE (x) == MEM)
04553   return GR_REGS;
04554       break;
04555 
04556     case FR_REGS:
04557       /* Need to go through general registers to get to other class regs.  */
04558       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
04559   return GR_REGS;
04560 
04561       /* This can happen when a paradoxical subreg is an operand to the
04562    muldi3 pattern.  */
04563       /* ??? This shouldn't be necessary after instruction scheduling is
04564    enabled, because paradoxical subregs are not accepted by
04565    register_operand when INSN_SCHEDULING is defined.  Or alternatively,
04566    stop the paradoxical subreg stupidity in the *_operand functions
04567    in recog.c.  */
04568       if (GET_CODE (x) == MEM
04569     && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
04570         || GET_MODE (x) == QImode))
04571   return GR_REGS;
04572 
04573       /* This can happen because of the ior/and/etc patterns that accept FP
04574    registers as operands.  If the third operand is a constant, then it
04575    needs to be reloaded into a FP register.  */
04576       if (GET_CODE (x) == CONST_INT)
04577   return GR_REGS;
04578 
04579       /* This can happen because of register elimination in a muldi3 insn.
04580    E.g. `26107 * (unsigned long)&u'.  */
04581       if (GET_CODE (x) == PLUS)
04582   return GR_REGS;
04583       break;
04584 
04585     case PR_REGS:
04586       /* ??? This happens if we cse/gcse a BImode value across a call,
04587    and the function has a nonlocal goto.  This is because global
04588    does not allocate call crossing pseudos to hard registers when
04589    current_function_has_nonlocal_goto is true.  This is relatively
04590    common for C++ programs that use exceptions.  To reproduce,
04591    return NO_REGS and compile libstdc++.  */
04592       if (GET_CODE (x) == MEM)
04593   return GR_REGS;
04594 
04595       /* This can happen when we take a BImode subreg of a DImode value,
04596    and that DImode value winds up in some non-GR register.  */
04597       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
04598   return GR_REGS;
04599       break;
04600 
04601     default:
04602       break;
04603     }
04604 
04605   return NO_REGS;
04606 }
04607 
04608 
04609 /* Emit text to declare externally defined variables and functions, because
04610    the Intel assembler does not support undefined externals.  */
04611 
04612 void
04613 ia64_asm_output_external (FILE *file, tree decl, const char *name)
04614 {
04615   int save_referenced;
04616 
04617   /* GNU as does not need anything here, but the HP linker does need
04618      something for external functions.  */
04619 
04620   if (TARGET_GNU_AS
04621       && (!TARGET_HPUX_LD
04622     || TREE_CODE (decl) != FUNCTION_DECL
04623     || strstr (name, "__builtin_") == name))
04624     return;
04625 
04626   /* ??? The Intel assembler creates a reference that needs to be satisfied by
04627      the linker when we do this, so we need to be careful not to do this for
04628      builtin functions which have no library equivalent.  Unfortunately, we
04629      can't tell here whether or not a function will actually be called by
04630      expand_expr, so we pull in library functions even if we may not need
04631      them later.  */
04632   if (! strcmp (name, "__builtin_next_arg")
04633       || ! strcmp (name, "alloca")
04634       || ! strcmp (name, "__builtin_constant_p")
04635       || ! strcmp (name, "__builtin_args_info"))
04636     return;
04637 
04638   if (TARGET_HPUX_LD)
04639     ia64_hpux_add_extern_decl (decl);
04640   else
04641     {
04642       /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
04643          restore it.  */
04644       save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
04645       if (TREE_CODE (decl) == FUNCTION_DECL)
04646         ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
04647       (*targetm.asm_out.globalize_label) (file, name);
04648       TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
04649     }
04650 }
04651 
04652 /* Parse the -mfixed-range= option string.  */
04653 
04654 static void
04655 fix_range (const char *const_str)
04656 {
04657   int i, first, last;
04658   char *str, *dash, *comma;
04659 
04660   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
04661      REG2 are either register names or register numbers.  The effect
04662      of this option is to mark the registers in the range from REG1 to
04663      REG2 as ``fixed'' so they won't be used by the compiler.  This is
04664      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
04665 
04666   i = strlen (const_str);
04667   str = (char *) alloca (i + 1);
04668   memcpy (str, const_str, i + 1);
04669 
04670   while (1)
04671     {
04672       dash = strchr (str, '-');
04673       if (!dash)
04674   {
04675     warning ("value of -mfixed-range must have form REG1-REG2");
04676     return;
04677   }
04678       *dash = '\0';
04679 
04680       comma = strchr (dash + 1, ',');
04681       if (comma)
04682   *comma = '\0';
04683 
04684       first = decode_reg_name (str);
04685       if (first < 0)
04686   {
04687     warning ("unknown register name: %s", str);
04688     return;
04689   }
04690 
04691       last = decode_reg_name (dash + 1);
04692       if (last < 0)
04693   {
04694     warning ("unknown register name: %s", dash + 1);
04695     return;
04696   }
04697 
04698       *dash = '-';
04699 
04700       if (first > last)
04701   {
04702     warning ("%s-%s is an empty range", str, dash + 1);
04703     return;
04704   }
04705 
04706       for (i = first; i <= last; ++i)
04707   fixed_regs[i] = call_used_regs[i] = 1;
04708 
04709       if (!comma)
04710   break;
04711 
04712       *comma = ',';
04713       str = comma + 1;
04714     }
04715 }
04716 
04717 static struct machine_function *
04718 ia64_init_machine_status (void)
04719 {
04720   return ggc_alloc_cleared (sizeof (struct machine_function));
04721 }
04722 
04723 /* Handle TARGET_OPTIONS switches.  */
04724 
04725 void
04726 ia64_override_options (void)
04727 {
04728   static struct pta
04729     {
04730       const char *const name;   /* processor name or nickname.  */
04731       const enum processor_type processor;
04732     }
04733   const processor_alias_table[] =
04734     {
04735       {"itanium", PROCESSOR_ITANIUM},
04736       {"itanium1", PROCESSOR_ITANIUM},
04737       {"merced", PROCESSOR_ITANIUM},
04738       {"itanium2", PROCESSOR_ITANIUM2},
04739       {"mckinley", PROCESSOR_ITANIUM2},
04740     };
04741 
04742   int const pta_size = ARRAY_SIZE (processor_alias_table);
04743   int i;
04744 
04745   if (TARGET_AUTO_PIC)
04746     target_flags |= MASK_CONST_GP;
04747 
04748   if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
04749     {
04750       if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
04751      && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
04752   {
04753     warning ("cannot optimize floating point division for both latency and throughput");
04754     target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
04755   }
04756       else 
04757   {
04758     if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
04759       target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
04760     else
04761       target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
04762   }
04763     }
04764 
04765   if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
04766     {
04767       if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
04768      && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
04769   {
04770     warning ("cannot optimize integer division for both latency and throughput");
04771     target_flags &= ~MASK_INLINE_INT_DIV_THR;
04772   }
04773       else 
04774   {
04775     if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
04776       target_flags &= ~MASK_INLINE_INT_DIV_LAT;
04777     else
04778       target_flags &= ~MASK_INLINE_INT_DIV_THR;
04779   }
04780     }
04781 
04782   if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
04783     {
04784       if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
04785      && (target_flags_explicit & MASK_INLINE_SQRT_THR))
04786   {
04787     warning ("cannot optimize square root for both latency and throughput");
04788     target_flags &= ~MASK_INLINE_SQRT_THR;
04789   }
04790       else 
04791   {
04792     if (target_flags_explicit & MASK_INLINE_SQRT_THR)
04793       target_flags &= ~MASK_INLINE_SQRT_LAT;
04794     else
04795       target_flags &= ~MASK_INLINE_SQRT_THR;
04796   }
04797     }
04798 
04799   if (TARGET_INLINE_SQRT_LAT)
04800     {
04801       warning ("not yet implemented: latency-optimized inline square root");
04802       target_flags &= ~MASK_INLINE_SQRT_LAT;
04803     }
04804 
04805   if (ia64_fixed_range_string)
04806     fix_range (ia64_fixed_range_string);
04807 
04808   if (ia64_tls_size_string)
04809     {
04810       char *end;
04811       unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
04812       if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
04813   error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
04814       else
04815   ia64_tls_size = tmp;
04816     }
04817 
04818   if (!ia64_tune_string)
04819     ia64_tune_string = "itanium2";
04820 
04821   for (i = 0; i < pta_size; i++)
04822     if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
04823       {
04824   ia64_tune = processor_alias_table[i].processor;
04825   break;
04826       }
04827 
04828   if (i == pta_size)
04829     error ("bad value (%s) for -tune= switch", ia64_tune_string);
04830 
04831   ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
04832   flag_schedule_insns_after_reload = 0;
04833 
04834   ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
04835 
04836   init_machine_status = ia64_init_machine_status;
04837 }
04838 
04839 static enum attr_itanium_class ia64_safe_itanium_class (rtx);
04840 static enum attr_type ia64_safe_type (rtx);
04841 
04842 static enum attr_itanium_class
04843 ia64_safe_itanium_class (rtx insn)
04844 {
04845   if (recog_memoized (insn) >= 0)
04846     return get_attr_itanium_class (insn);
04847   else
04848     return ITANIUM_CLASS_UNKNOWN;
04849 }
04850 
04851 static enum attr_type
04852 ia64_safe_type (rtx insn)
04853 {
04854   if (recog_memoized (insn) >= 0)
04855     return get_attr_type (insn);
04856   else
04857     return TYPE_UNKNOWN;
04858 }
04859 
04860 /* The following collection of routines emit instruction group stop bits as
04861    necessary to avoid dependencies.  */
04862 
04863 /* Need to track some additional registers as far as serialization is
04864    concerned so we can properly handle br.call and br.ret.  We could
04865    make these registers visible to gcc, but since these registers are
04866    never explicitly used in gcc generated code, it seems wasteful to
04867    do so (plus it would make the call and return patterns needlessly
04868    complex).  */
04869 #define REG_RP    (BR_REG (0))
04870 #define REG_AR_CFM  (FIRST_PSEUDO_REGISTER + 1)
04871 /* This is used for volatile asms which may require a stop bit immediately
04872    before and after them.  */
04873 #define REG_VOLATILE  (FIRST_PSEUDO_REGISTER + 2)
04874 #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3)
04875 #define NUM_REGS  (AR_UNAT_BIT_0 + 64)
04876 
04877 /* For each register, we keep track of how it has been written in the
04878    current instruction group.
04879 
04880    If a register is written unconditionally (no qualifying predicate),
04881    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
04882 
04883    If a register is written if its qualifying predicate P is true, we
04884    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
04885    may be written again by the complement of P (P^1) and when this happens,
04886    WRITE_COUNT gets set to 2.
04887 
04888    The result of this is that whenever an insn attempts to write a register
04889    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
04890 
04891    If a predicate register is written by a floating-point insn, we set
04892    WRITTEN_BY_FP to true.
04893 
04894    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
04895    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
04896 
04897 struct reg_write_state
04898 {
04899   unsigned int write_count : 2;
04900   unsigned int first_pred : 16;
04901   unsigned int written_by_fp : 1;
04902   unsigned int written_by_and : 1;
04903   unsigned int written_by_or : 1;
04904 };
04905 
04906 /* Cumulative info for the current instruction group.  */
04907 struct reg_write_state rws_sum[NUM_REGS];
04908 /* Info for the current instruction.  This gets copied to rws_sum after a
04909    stop bit is emitted.  */
04910 struct reg_write_state rws_insn[NUM_REGS];
04911 
04912 /* Indicates whether this is the first instruction after a stop bit,
04913    in which case we don't need another stop bit.  Without this, we hit
04914    the abort in ia64_variable_issue when scheduling an alloc.  */
04915 static int first_instruction;
04916 
04917 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
04918    RTL for one instruction.  */
04919 struct reg_flags
04920 {
04921   unsigned int is_write : 1;  /* Is register being written?  */
04922   unsigned int is_fp : 1; /* Is register used as part of an fp op?  */
04923   unsigned int is_branch : 1; /* Is register used as part of a branch?  */
04924   unsigned int is_and : 1;  /* Is register used as part of and.orcm?  */
04925   unsigned int is_or : 1; /* Is register used as part of or.andcm?  */
04926   unsigned int is_sibcall : 1;  /* Is this a sibling or normal call?  */
04927 };
04928 
04929 static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
04930 static int rws_access_regno (int, struct reg_flags, int);
04931 static int rws_access_reg (rtx, struct reg_flags, int);
04932 static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
04933 static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
04934 static int rtx_needs_barrier (rtx, struct reg_flags, int);
04935 static void init_insn_group_barriers (void);
04936 static int group_barrier_needed_p (rtx);
04937 static int safe_group_barrier_needed_p (rtx);
04938 
04939 /* Update *RWS for REGNO, which is being written by the current instruction,
04940    with predicate PRED, and associated register flags in FLAGS.  */
04941 
04942 static void
04943 rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
04944 {
04945   if (pred)
04946     rws[regno].write_count++;
04947   else
04948     rws[regno].write_count = 2;
04949   rws[regno].written_by_fp |= flags.is_fp;
04950   /* ??? Not tracking and/or across differing predicates.  */
04951   rws[regno].written_by_and = flags.is_and;
04952   rws[regno].written_by_or = flags.is_or;
04953   rws[regno].first_pred = pred;
04954 }
04955 
04956 /* Handle an access to register REGNO of type FLAGS using predicate register
04957    PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
04958    a dependency with an earlier instruction in the same group.  */
04959 
04960 static int
04961 rws_access_regno (int regno, struct reg_flags flags, int pred)
04962 {
04963   int need_barrier = 0;
04964 
04965   if (regno >= NUM_REGS)
04966     abort ();
04967 
04968   if (! PR_REGNO_P (regno))
04969     flags.is_and = flags.is_or = 0;
04970 
04971   if (flags.is_write)
04972     {
04973       int write_count;
04974 
04975       /* One insn writes same reg multiple times?  */
04976       if (rws_insn[regno].write_count > 0)
04977   abort ();
04978 
04979       /* Update info for current instruction.  */
04980       rws_update (rws_insn, regno, flags, pred);
04981       write_count = rws_sum[regno].write_count;
04982 
04983       switch (write_count)
04984   {
04985   case 0:
04986     /* The register has not been written yet.  */
04987     rws_update (rws_sum, regno, flags, pred);
04988     break;
04989 
04990   case 1:
04991     /* The register has been written via a predicate.  If this is
04992        not a complementary predicate, then we need a barrier.  */
04993     /* ??? This assumes that P and P+1 are always complementary
04994        predicates for P even.  */
04995     if (flags.is_and && rws_sum[regno].written_by_and)
04996       ;
04997     else if (flags.is_or && rws_sum[regno].written_by_or)
04998       ;
04999     else if ((rws_sum[regno].first_pred ^ 1) != pred)
05000       need_barrier = 1;
05001     rws_update (rws_sum, regno, flags, pred);
05002     break;
05003 
05004   case 2:
05005     /* The register has been unconditionally written already.  We
05006        need a barrier.  */
05007     if (flags.is_and && rws_sum[regno].written_by_and)
05008       ;
05009     else if (flags.is_or && rws_sum[regno].written_by_or)
05010       ;
05011     else
05012       need_barrier = 1;
05013     rws_sum[regno].written_by_and = flags.is_and;
05014     rws_sum[regno].written_by_or = flags.is_or;
05015     break;
05016 
05017   default:
05018     abort ();
05019   }
05020     }
05021   else
05022     {
05023       if (flags.is_branch)
05024   {
05025     /* Branches have several RAW exceptions that allow to avoid
05026        barriers.  */
05027 
05028     if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
05029       /* RAW dependencies on branch regs are permissible as long
05030          as the writer is a non-branch instruction.  Since we
05031          never generate code that uses a branch register written
05032          by a branch instruction, handling this case is
05033          easy.  */
05034       return 0;
05035 
05036     if (REGNO_REG_CLASS (regno) == PR_REGS
05037         && ! rws_sum[regno].written_by_fp)
05038       /* The predicates of a branch are available within the
05039          same insn group as long as the predicate was written by
05040          something other than a floating-point instruction.  */
05041       return 0;
05042   }
05043 
05044       if (flags.is_and && rws_sum[regno].written_by_and)
05045   return 0;
05046       if (flags.is_or && rws_sum[regno].written_by_or)
05047   return 0;
05048 
05049       switch (rws_sum[regno].write_count)
05050   {
05051   case 0:
05052     /* The register has not been written yet.  */
05053     break;
05054 
05055   case 1:
05056     /* The register has been written via a predicate.  If this is
05057        not a complementary predicate, then we need a barrier.  */
05058     /* ??? This assumes that P and P+1 are always complementary
05059        predicates for P even.  */
05060     if ((rws_sum[regno].first_pred ^ 1) != pred)
05061       need_barrier = 1;
05062     break;
05063 
05064   case 2:
05065     /* The register has been unconditionally written already.  We
05066        need a barrier.  */
05067     need_barrier = 1;
05068     break;
05069 
05070   default:
05071     abort ();
05072   }
05073     }
05074 
05075   return need_barrier;
05076 }
05077 
05078 static int
05079 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
05080 {
05081   int regno = REGNO (reg);
05082   int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
05083 
05084   if (n == 1)
05085     return rws_access_regno (regno, flags, pred);
05086   else
05087     {
05088       int need_barrier = 0;
05089       while (--n >= 0)
05090   need_barrier |= rws_access_regno (regno + n, flags, pred);
05091       return need_barrier;
05092     }
05093 }
05094 
05095 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
05096    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
05097 
05098 static void
05099 update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
05100 {
05101   rtx src = SET_SRC (x);
05102 
05103   *pcond = 0;
05104 
05105   switch (GET_CODE (src))
05106     {
05107     case CALL:
05108       return;
05109 
05110     case IF_THEN_ELSE:
05111       if (SET_DEST (x) == pc_rtx)
05112   /* X is a conditional branch.  */
05113   return;
05114       else
05115   {
05116     int is_complemented = 0;
05117 
05118     /* X is a conditional move.  */
05119     rtx cond = XEXP (src, 0);
05120     if (GET_CODE (cond) == EQ)
05121       is_complemented = 1;
05122     cond = XEXP (cond, 0);
05123     if (GET_CODE (cond) != REG
05124         && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
05125       abort ();
05126     *pcond = cond;
05127     if (XEXP (src, 1) == SET_DEST (x)
05128         || XEXP (src, 2) == SET_DEST (x))
05129       {
05130         /* X is a conditional move that conditionally writes the
05131      destination.  */
05132 
05133         /* We need another complement in this case.  */
05134         if (XEXP (src, 1) == SET_DEST (x))
05135     is_complemented = ! is_complemented;
05136 
05137         *ppred = REGNO (cond);
05138         if (is_complemented)
05139     ++*ppred;
05140       }
05141 
05142     /* ??? If this is a conditional write to the dest, then this
05143        instruction does not actually read one source.  This probably
05144        doesn't matter, because that source is also the dest.  */
05145     /* ??? Multiple writes to predicate registers are allowed
05146        if they are all AND type compares, or if they are all OR
05147        type compares.  We do not generate such instructions
05148        currently.  */
05149   }
05150       /* ... fall through ...  */
05151 
05152     default:
05153       if (COMPARISON_P (src)
05154     && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
05155   /* Set pflags->is_fp to 1 so that we know we're dealing
05156      with a floating point comparison when processing the
05157      destination of the SET.  */
05158   pflags->is_fp = 1;
05159 
05160       /* Discover if this is a parallel comparison.  We only handle
05161    and.orcm and or.andcm at present, since we must retain a
05162    strict inverse on the predicate pair.  */
05163       else if (GET_CODE (src) == AND)
05164   pflags->is_and = 1;
05165       else if (GET_CODE (src) == IOR)
05166   pflags->is_or = 1;
05167 
05168       break;
05169     }
05170 }
05171 
05172 /* Subroutine of rtx_needs_barrier; this function determines whether the
05173    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
05174    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
05175    for this insn.  */
05176 
05177 static int
05178 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
05179 {
05180   int need_barrier = 0;
05181   rtx dst;
05182   rtx src = SET_SRC (x);
05183 
05184   if (GET_CODE (src) == CALL)
05185     /* We don't need to worry about the result registers that
05186        get written by subroutine call.  */
05187     return rtx_needs_barrier (src, flags, pred);
05188   else if (SET_DEST (x) == pc_rtx)
05189     {
05190       /* X is a conditional branch.  */
05191       /* ??? This seems redundant, as the caller sets this bit for
05192    all JUMP_INSNs.  */
05193       flags.is_branch = 1;
05194       return rtx_needs_barrier (src, flags, pred);
05195     }
05196 
05197   need_barrier = rtx_needs_barrier (src, flags, pred);
05198 
05199   /* This instruction unconditionally uses a predicate register.  */
05200   if (cond)
05201     need_barrier |= rws_access_reg (cond, flags, 0);
05202 
05203   dst = SET_DEST (x);
05204   if (GET_CODE (dst) == ZERO_EXTRACT)
05205     {
05206       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
05207       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
05208       dst = XEXP (dst, 0);
05209     }
05210   return need_barrier;
05211 }
05212 
05213 /* Handle an access to rtx X of type FLAGS using predicate register
05214    PRED.  Return 1 if this access creates a dependency with an earlier
05215    instruction in the same group.  */
05216 
05217 static int
05218 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
05219 {
05220   int i, j;
05221   int is_complemented = 0;
05222   int need_barrier = 0;
05223   const char *format_ptr;
05224   struct reg_flags new_flags;
05225   rtx cond = 0;
05226 
05227   if (! x)
05228     return 0;
05229 
05230   new_flags = flags;
05231 
05232   switch (GET_CODE (x))
05233     {
05234     case SET:
05235       update_set_flags (x, &new_flags, &pred, &cond);
05236       need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
05237       if (GET_CODE (SET_SRC (x)) != CALL)
05238   {
05239     new_flags.is_write = 1;
05240     need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
05241   }
05242       break;
05243 
05244     case CALL:
05245       new_flags.is_write = 0;
05246       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
05247 
05248       /* Avoid multiple register writes, in case this is a pattern with
05249    multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
05250       if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
05251   {
05252     new_flags.is_write = 1;
05253     need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
05254     need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
05255     need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
05256   }
05257       break;
05258 
05259     case COND_EXEC:
05260       /* X is a predicated instruction.  */
05261 
05262       cond = COND_EXEC_TEST (x);
05263       if (pred)
05264   abort ();
05265       need_barrier = rtx_needs_barrier (cond, flags, 0);
05266 
05267       if (GET_CODE (cond) == EQ)
05268   is_complemented = 1;
05269       cond = XEXP (cond, 0);
05270       if (GET_CODE (cond) != REG
05271     && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
05272   abort ();
05273       pred = REGNO (cond);
05274       if (is_complemented)
05275   ++pred;
05276 
05277       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
05278       return need_barrier;
05279 
05280     case CLOBBER:
05281     case USE:
05282       /* Clobber & use are for earlier compiler-phases only.  */
05283       break;
05284 
05285     case ASM_OPERANDS:
05286     case ASM_INPUT:
05287       /* We always emit stop bits for traditional asms.  We emit stop bits
05288    for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
05289       if (GET_CODE (x) != ASM_OPERANDS
05290     || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
05291   {
05292     /* Avoid writing the register multiple times if we have multiple
05293        asm outputs.  This avoids an abort in rws_access_reg.  */
05294     if (! rws_insn[REG_VOLATILE].write_count)
05295       {
05296         new_flags.is_write = 1;
05297         rws_access_regno (REG_VOLATILE, new_flags, pred);
05298       }
05299     return 1;
05300   }
05301 
05302       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
05303    We cannot just fall through here since then we would be confused
05304    by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
05305    traditional asms unlike their normal usage.  */
05306 
05307       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
05308   if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
05309     need_barrier = 1;
05310       break;
05311 
05312     case PARALLEL:
05313       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
05314   {
05315     rtx pat = XVECEXP (x, 0, i);
05316     switch (GET_CODE (pat))
05317       {
05318       case SET:
05319         update_set_flags (pat, &new_flags, &pred, &cond);
05320         need_barrier |= set_src_needs_barrier (pat, new_flags,
05321                  pred, cond);
05322         break;
05323 
05324       case USE:
05325       case CALL:
05326       case ASM_OPERANDS:
05327         need_barrier |= rtx_needs_barrier (pat, flags, pred);
05328         break;
05329 
05330       case CLOBBER:
05331       case RETURN:
05332         break;
05333 
05334       default:
05335         gcc_unreachable ();
05336       }
05337   }
05338       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
05339   {
05340     rtx pat = XVECEXP (x, 0, i);
05341     if (GET_CODE (pat) == SET)
05342       {
05343         if (GET_CODE (SET_SRC (pat)) != CALL)
05344     {
05345       new_flags.is_write = 1;
05346       need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
05347                  pred);
05348     }
05349       }
05350     else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
05351       need_barrier |= rtx_needs_barrier (pat, flags, pred);
05352   }
05353       break;
05354 
05355     case SUBREG:
05356       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
05357       break;
05358     case REG:
05359       if (REGNO (x) == AR_UNAT_REGNUM)
05360   {
05361     for (i = 0; i < 64; ++i)
05362       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
05363   }
05364       else
05365   need_barrier = rws_access_reg (x, flags, pred);
05366       break;
05367 
05368     case MEM:
05369       /* Find the regs used in memory address computation.  */
05370       new_flags.is_write = 0;
05371       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
05372       break;
05373 
05374     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
05375     case SYMBOL_REF:  case LABEL_REF:     case CONST:
05376       break;
05377 
05378       /* Operators with side-effects.  */
05379     case POST_INC:    case POST_DEC:
05380       if (GET_CODE (XEXP (x, 0)) != REG)
05381   abort ();
05382 
05383       new_flags.is_write = 0;
05384       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
05385       new_flags.is_write = 1;
05386       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
05387       break;
05388 
05389     case POST_MODIFY:
05390       if (GET_CODE (XEXP (x, 0)) != REG)
05391   abort ();
05392 
05393       new_flags.is_write = 0;
05394       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
05395       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
05396       new_flags.is_write = 1;
05397       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
05398       break;
05399 
05400       /* Handle common unary and binary ops for efficiency.  */
05401     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
05402     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
05403     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
05404     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
05405     case NE:       case EQ:      case GE:      case GT:        case LE:
05406     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
05407       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
05408       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
05409       break;
05410 
05411     case NEG:      case NOT:          case SIGN_EXTEND:     case ZERO_EXTEND:
05412     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
05413     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
05414     case SQRT:     case FFS:    case POPCOUNT:
05415       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
05416       break;
05417 
05418     case VEC_SELECT:
05419       /* VEC_SELECT's second argument is a PARALLEL with integers that
05420    describe the elements selected.  On ia64, those integers are
05421    always constants.  Avoid walking the PARALLEL so that we don't
05422    get confused with "normal" parallels and abort.  */
05423       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
05424       break;
05425 
05426     case UNSPEC:
05427       switch (XINT (x, 1))
05428   {
05429   case UNSPEC_LTOFF_DTPMOD:
05430   case UNSPEC_LTOFF_DTPREL:
05431   case UNSPEC_DTPREL:
05432   case UNSPEC_LTOFF_TPREL:
05433   case UNSPEC_TPREL:
05434   case UNSPEC_PRED_REL_MUTEX:
05435   case UNSPEC_PIC_CALL:
05436         case UNSPEC_MF:
05437         case UNSPEC_FETCHADD_ACQ:
05438   case UNSPEC_BSP_VALUE:
05439   case UNSPEC_FLUSHRS:
05440   case UNSPEC_BUNDLE_SELECTOR:
05441           break;
05442 
05443   case UNSPEC_GR_SPILL:
05444   case UNSPEC_GR_RESTORE:
05445     {
05446       HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
05447       HOST_WIDE_INT bit = (offset >> 3) & 63;
05448 
05449       need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05450       new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
05451       need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
05452                 new_flags, pred);
05453       break;
05454     }
05455 
05456   case UNSPEC_FR_SPILL:
05457   case UNSPEC_FR_RESTORE:
05458   case UNSPEC_GETF_EXP:
05459   case UNSPEC_SETF_EXP:
05460         case UNSPEC_ADDP4:
05461   case UNSPEC_FR_SQRT_RECIP_APPROX:
05462     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05463     break;
05464 
05465   case UNSPEC_FR_RECIP_APPROX:
05466   case UNSPEC_SHRP:
05467   case UNSPEC_COPYSIGN:
05468     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
05469     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
05470     break;
05471 
05472         case UNSPEC_CMPXCHG_ACQ:
05473     need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
05474     need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
05475     break;
05476 
05477   default:
05478     abort ();
05479   }
05480       break;
05481 
05482     case UNSPEC_VOLATILE:
05483       switch (XINT (x, 1))
05484   {
05485   case UNSPECV_ALLOC:
05486     /* Alloc must always be the first instruction of a group.
05487        We force this by always returning true.  */
05488     /* ??? We might get better scheduling if we explicitly check for
05489        input/local/output register dependencies, and modify the
05490        scheduler so that alloc is always reordered to the start of
05491        the current group.  We could then eliminate all of the
05492        first_instruction code.  */
05493     rws_access_regno (AR_PFS_REGNUM, flags, pred);
05494 
05495     new_flags.is_write = 1;
05496     rws_access_regno (REG_AR_CFM, new_flags, pred);
05497     return 1;
05498 
05499   case UNSPECV_SET_BSP:
05500     need_barrier = 1;
05501           break;
05502 
05503   case UNSPECV_BLOCKAGE:
05504   case UNSPECV_INSN_GROUP_BARRIER:
05505   case UNSPECV_BREAK:
05506   case UNSPECV_PSAC_ALL:
05507   case UNSPECV_PSAC_NORMAL:
05508     return 0;
05509 
05510   default:
05511     abort ();
05512   }
05513       break;
05514 
05515     case RETURN:
05516       new_flags.is_write = 0;
05517       need_barrier  = rws_access_regno (REG_RP, flags, pred);
05518       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
05519 
05520       new_flags.is_write = 1;
05521       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
05522       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
05523       break;
05524 
05525     default:
05526       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
05527       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
05528   switch (format_ptr[i])
05529     {
05530     case '0': /* unused field */
05531     case 'i': /* integer */
05532     case 'n': /* note */
05533     case 'w': /* wide integer */
05534     case 's': /* pointer to string */
05535     case 'S': /* optional pointer to string */
05536       break;
05537 
05538     case 'e':
05539       if (rtx_needs_barrier (XEXP (x, i), flags, pred))
05540         need_barrier = 1;
05541       break;
05542 
05543     case 'E':
05544       for (j = XVECLEN (x, i) - 1; j >= 0; --j)
05545         if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
05546     need_barrier = 1;
05547       break;
05548 
05549     default:
05550       abort ();
05551     }
05552       break;
05553     }
05554   return need_barrier;
05555 }
05556 
05557 /* Clear out the state for group_barrier_needed_p at the start of a
05558    sequence of insns.  */
05559 
05560 static void
05561 init_insn_group_barriers (void)
05562 {
05563   memset (rws_sum, 0, sizeof (rws_sum));
05564   first_instruction = 1;
05565 }
05566 
05567 /* Given the current state, recorded by previous calls to this function,
05568    determine whether a group barrier (a stop bit) is necessary before INSN.
05569    Return nonzero if so.  */
05570 
05571 static int
05572 group_barrier_needed_p (rtx insn)
05573 {
05574   rtx pat;
05575   int need_barrier = 0;
05576   struct reg_flags flags;
05577 
05578   memset (&flags, 0, sizeof (flags));
05579   switch (GET_CODE (insn))
05580     {
05581     case NOTE:
05582       break;
05583 
05584     case BARRIER:
05585       /* A barrier doesn't imply an instruction group boundary.  */
05586       break;
05587 
05588     case CODE_LABEL:
05589       memset (rws_insn, 0, sizeof (rws_insn));
05590       return 1;
05591 
05592     case CALL_INSN:
05593       flags.is_branch = 1;
05594       flags.is_sibcall = SIBLING_CALL_P (insn);
05595       memset (rws_insn, 0, sizeof (rws_insn));
05596 
05597       /* Don't bundle a call following another call.  */
05598       if ((pat = prev_active_insn (insn))
05599     && GET_CODE (pat) == CALL_INSN)
05600   {
05601     need_barrier = 1;
05602     break;
05603   }
05604 
05605       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
05606       break;
05607 
05608     case JUMP_INSN:
05609       flags.is_branch = 1;
05610 
05611       /* Don't bundle a jump following a call.  */
05612       if ((pat = prev_active_insn (insn))
05613     && GET_CODE (pat) == CALL_INSN)
05614   {
05615     need_barrier = 1;
05616     break;
05617   }
05618       /* FALLTHRU */
05619 
05620     case INSN:
05621       if (GET_CODE (PATTERN (insn)) == USE
05622     || GET_CODE (PATTERN (insn)) == CLOBBER)
05623   /* Don't care about USE and CLOBBER "insns"---those are used to
05624      indicate to the optimizer that it shouldn't get rid of
05625      certain operations.  */
05626   break;
05627 
05628       pat = PATTERN (insn);
05629 
05630       /* Ug.  Hack hacks hacked elsewhere.  */
05631       switch (recog_memoized (insn))
05632   {
05633     /* We play dependency tricks with the epilogue in order
05634        to get proper schedules.  Undo this for dv analysis.  */
05635   case CODE_FOR_epilogue_deallocate_stack:
05636   case CODE_FOR_prologue_allocate_stack:
05637     pat = XVECEXP (pat, 0, 0);
05638     break;
05639 
05640     /* The pattern we use for br.cloop confuses the code above.
05641        The second element of the vector is representative.  */
05642   case CODE_FOR_doloop_end_internal:
05643     pat = XVECEXP (pat, 0, 1);
05644     break;
05645 
05646     /* Doesn't generate code.  */
05647   case CODE_FOR_pred_rel_mutex:
05648   case CODE_FOR_prologue_use:
05649     return 0;
05650 
05651   default:
05652     break;
05653   }
05654 
05655       memset (rws_insn, 0, sizeof (rws_insn));
05656       need_barrier = rtx_needs_barrier (pat, flags, 0);
05657 
05658       /* Check to see if the previous instruction was a volatile
05659    asm.  */
05660       if (! need_barrier)
05661   need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
05662       break;
05663 
05664     default:
05665       abort ();
05666     }
05667 
05668   if (first_instruction && INSN_P (insn)
05669       && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
05670       && GET_CODE (PATTERN (insn)) != USE
05671       && GET_CODE (PATTERN (insn)) != CLOBBER)
05672     {
05673       need_barrier = 0;
05674       first_instruction = 0;
05675     }
05676 
05677   return need_barrier;
05678 }
05679 
05680 /* Like group_barrier_needed_p, but do not clobber the current state.  */
05681 
05682 static int
05683 safe_group_barrier_needed_p (rtx insn)
05684 {
05685   struct reg_write_state rws_saved[NUM_REGS];
05686   int saved_first_instruction;
05687   int t;
05688 
05689   memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
05690   saved_first_instruction = first_instruction;
05691 
05692   t = group_barrier_needed_p (insn);
05693 
05694   memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
05695   first_instruction = saved_first_instruction;
05696 
05697   return t;
05698 }
05699 
05700 /* Scan the current function and insert stop bits as necessary to
05701    eliminate dependencies.  This function assumes that a final
05702    instruction scheduling pass has been run which has already
05703    inserted most of the necessary stop bits.  This function only
05704    inserts new ones at basic block boundaries, since these are
05705    invisible to the scheduler.  */
05706 
05707 static void
05708 emit_insn_group_barriers (FILE *dump)
05709 {
05710   rtx insn;
05711   rtx last_label = 0;
05712   int insns_since_last_label = 0;
05713 
05714   init_insn_group_barriers ();
05715 
05716   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
05717     {
05718       if (GET_CODE (insn) == CODE_LABEL)
05719   {
05720     if (insns_since_last_label)
05721       last_label = insn;
05722     insns_since_last_label = 0;
05723   }
05724       else if (GET_CODE (insn) == NOTE
05725          && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
05726   {
05727     if (insns_since_last_label)
05728       last_label = insn;
05729     insns_since_last_label = 0;
05730   }
05731       else if (GET_CODE (insn) == INSN
05732          && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
05733          && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
05734   {
05735     init_insn_group_barriers ();
05736     last_label = 0;
05737   }
05738       else if (INSN_P (insn))
05739   {
05740     insns_since_last_label = 1;
05741 
05742     if (group_barrier_needed_p (insn))
05743       {
05744         if (last_label)
05745     {
05746       if (dump)
05747         fprintf (dump, "Emitting stop before label %d\n",
05748            INSN_UID (last_label));
05749       emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
05750       insn = last_label;
05751 
05752       init_insn_group_barriers ();
05753       last_label = 0;
05754     }
05755       }
05756   }
05757     }
05758 }
05759 
05760 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
05761    This function has to emit all necessary group barriers.  */
05762 
05763 static void
05764 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
05765 {
05766   rtx insn;
05767 
05768   init_insn_group_barriers ();
05769 
05770   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
05771     {
05772       if (GET_CODE (insn) == BARRIER)
05773   {
05774     rtx last = prev_active_insn (insn);
05775 
05776     if (! last)
05777       continue;
05778     if (GET_CODE (last) == JUMP_INSN
05779         && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
05780       last = prev_active_insn (last);
05781     if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
05782       emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
05783 
05784     init_insn_group_barriers ();
05785   }
05786       else if (INSN_P (insn))
05787   {
05788     if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
05789       init_insn_group_barriers ();
05790     else if (group_barrier_needed_p (insn))
05791       {
05792         emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
05793         init_insn_group_barriers ();
05794         group_barrier_needed_p (insn);
05795       }
05796   }
05797     }
05798 }
05799 
05800 
05801 
05802 /* Instruction scheduling support.  */
05803 
05804 #define NR_BUNDLES 10
05805 
05806 /* A list of names of all available bundles.  */
05807 
05808 static const char *bundle_name [NR_BUNDLES] =
05809 {
05810   ".mii",
05811   ".mmi",
05812   ".mfi",
05813   ".mmf",
05814 #if NR_BUNDLES == 10
05815   ".bbb",
05816   ".mbb",
05817 #endif
05818   ".mib",
05819   ".mmb",
05820   ".mfb",
05821   ".mlx"
05822 };
05823 
05824 /* Nonzero if we should insert stop bits into the schedule.  */
05825 
05826 int ia64_final_schedule = 0;
05827 
05828 /* Codes of the corresponding queried units: */
05829 
05830 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
05831 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
05832 
05833 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
05834 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
05835 
05836 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
05837 
05838 /* The following variable value is an insn group barrier.  */
05839 
05840 static rtx dfa_stop_insn;
05841 
05842 /* The following variable value is the last issued insn.  */
05843 
05844 static rtx last_scheduled_insn;
05845 
05846 /* The following variable value is size of the DFA state.  */
05847 
05848 static size_t dfa_state_size;
05849 
05850 /* The following variable value is pointer to a DFA state used as
05851    temporary variable.  */
05852 
05853 static state_t temp_dfa_state = NULL;
05854 
05855 /* The following variable value is DFA state after issuing the last
05856    insn.  */
05857 
05858 static state_t prev_cycle_state = NULL;
05859 
05860 /* The following array element values are TRUE if the corresponding
05861    insn requires to add stop bits before it.  */
05862 
05863 static char *stops_p;
05864 
05865 /* The following variable is used to set up the mentioned above array.  */
05866 
05867 static int stop_before_p = 0;
05868 
05869 /* The following variable value is length of the arrays `clocks' and
05870    `add_cycles'. */
05871 
05872 static int clocks_length;
05873 
05874 /* The following array element values are cycles on which the
05875    corresponding insn will be issued.  The array is used only for
05876    Itanium1.  */
05877 
05878 static int *clocks;
05879 
05880 /* The following array element values are numbers of cycles should be
05881    added to improve insn scheduling for MM_insns for Itanium1.  */
05882 
05883 static int *add_cycles;
05884 
05885 static rtx ia64_single_set (rtx);
05886 static void ia64_emit_insn_before (rtx, rtx);
05887 
05888 /* Map a bundle number to its pseudo-op.  */
05889 
05890 const char *
05891 get_bundle_name (int b)
05892 {
05893   return bundle_name[b];
05894 }
05895 
05896 
05897 /* Return the maximum number of instructions a cpu can issue.  */
05898 
05899 static int
05900 ia64_issue_rate (void)
05901 {
05902   return 6;
05903 }
05904 
05905 /* Helper function - like single_set, but look inside COND_EXEC.  */
05906 
05907 static rtx
05908 ia64_single_set (rtx insn)
05909 {
05910   rtx x = PATTERN (insn), ret;
05911   if (GET_CODE (x) == COND_EXEC)
05912     x = COND_EXEC_CODE (x);
05913   if (GET_CODE (x) == SET)
05914     return x;
05915 
05916   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
05917      Although they are not classical single set, the second set is there just
05918      to protect it from moving past FP-relative stack accesses.  */
05919   switch (recog_memoized (insn))
05920     {
05921     case CODE_FOR_prologue_allocate_stack:
05922     case CODE_FOR_epilogue_deallocate_stack:
05923       ret = XVECEXP (x, 0, 0);
05924       break;
05925 
05926     default:
05927       ret = single_set_2 (insn, x);
05928       break;
05929     }
05930 
05931   return ret;
05932 }
05933 
05934 /* Adjust the cost of a scheduling dependency.  Return the new cost of
05935    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
05936 
05937 static int
05938 ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
05939 {
05940   enum attr_itanium_class dep_class;
05941   enum attr_itanium_class insn_class;
05942 
05943   if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
05944     return cost;
05945 
05946   insn_class = ia64_safe_itanium_class (insn);
05947   dep_class = ia64_safe_itanium_class (dep_insn);
05948   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
05949       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
05950     return 0;
05951 
05952   return cost;
05953 }
05954 
05955 /* Like emit_insn_before, but skip cycle_display notes.
05956    ??? When cycle display notes are implemented, update this.  */
05957 
05958 static void
05959 ia64_emit_insn_before (rtx insn, rtx before)
05960 {
05961   emit_insn_before (insn, before);
05962 }
05963 
05964 /* The following function marks insns who produce addresses for load
05965    and store insns.  Such insns will be placed into M slots because it
05966    decrease latency time for Itanium1 (see function
05967    `ia64_produce_address_p' and the DFA descriptions).  */
05968 
05969 static void
05970 ia64_dependencies_evaluation_hook (rtx head, rtx tail)
05971 {
05972   rtx insn, link, next, next_tail;
05973 
05974   next_tail = NEXT_INSN (tail);
05975   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
05976     if (INSN_P (insn))
05977       insn->call = 0;
05978   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
05979     if (INSN_P (insn)
05980   && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
05981       {
05982   for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
05983     {
05984       next = XEXP (link, 0);
05985       if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
05986      || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
05987     && ia64_st_address_bypass_p (insn, next))
05988         break;
05989       else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
05990           || ia64_safe_itanium_class (next)
05991           == ITANIUM_CLASS_FLD)
05992          && ia64_ld_address_bypass_p (insn, next))
05993         break;
05994     }
05995   insn->call = link != 0;
05996       }
05997 }
05998 
05999 /* We're beginning a new block.  Initialize data structures as necessary.  */
06000 
06001 static void
06002 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
06003      int sched_verbose ATTRIBUTE_UNUSED,
06004      int max_ready ATTRIBUTE_UNUSED)
06005 {
06006 #ifdef ENABLE_CHECKING
06007   rtx insn;
06008 
06009   if (reload_completed)
06010     for (insn = NEXT_INSN (current_sched_info->prev_head);
06011    insn != current_sched_info->next_tail;
06012    insn = NEXT_INSN (insn))
06013       if (SCHED_GROUP_P (insn))
06014   abort ();
06015 #endif
06016   last_scheduled_insn = NULL_RTX;
06017   init_insn_group_barriers ();
06018 }
06019 
06020 /* We are about to being issuing insns for this clock cycle.
06021    Override the default sort algorithm to better slot instructions.  */
06022 
06023 static int
06024 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
06025       int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
06026       int reorder_type)
06027 {
06028   int n_asms;
06029   int n_ready = *pn_ready;
06030   rtx *e_ready = ready + n_ready;
06031   rtx *insnp;
06032 
06033   if (sched_verbose)
06034     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
06035 
06036   if (reorder_type == 0)
06037     {
06038       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
06039       n_asms = 0;
06040       for (insnp = ready; insnp < e_ready; insnp++)
06041   if (insnp < e_ready)
06042     {
06043       rtx insn = *insnp;
06044       enum attr_type t = ia64_safe_type (insn);
06045       if (t == TYPE_UNKNOWN)
06046         {
06047     if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06048         || asm_noperands (PATTERN (insn)) >= 0)
06049       {
06050         rtx lowest = ready[n_asms];
06051         ready[n_asms] = insn;
06052         *insnp = lowest;
06053         n_asms++;
06054       }
06055     else
06056       {
06057         rtx highest = ready[n_ready - 1];
06058         ready[n_ready - 1] = insn;
06059         *insnp = highest;
06060         return 1;
06061       }
06062         }
06063     }
06064 
06065       if (n_asms < n_ready)
06066   {
06067     /* Some normal insns to process.  Skip the asms.  */
06068     ready += n_asms;
06069     n_ready -= n_asms;
06070   }
06071       else if (n_ready > 0)
06072   return 1;
06073     }
06074 
06075   if (ia64_final_schedule)
06076     {
06077       int deleted = 0;
06078       int nr_need_stop = 0;
06079 
06080       for (insnp = ready; insnp < e_ready; insnp++)
06081   if (safe_group_barrier_needed_p (*insnp))
06082     nr_need_stop++;
06083 
06084       if (reorder_type == 1 && n_ready == nr_need_stop)
06085   return 0;
06086       if (reorder_type == 0)
06087   return 1;
06088       insnp = e_ready;
06089       /* Move down everything that needs a stop bit, preserving
06090    relative order.  */
06091       while (insnp-- > ready + deleted)
06092   while (insnp >= ready + deleted)
06093     {
06094       rtx insn = *insnp;
06095       if (! safe_group_barrier_needed_p (insn))
06096         break;
06097       memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
06098       *ready = insn;
06099       deleted++;
06100     }
06101       n_ready -= deleted;
06102       ready += deleted;
06103     }
06104 
06105   return 1;
06106 }
06107 
06108 /* We are about to being issuing insns for this clock cycle.  Override
06109    the default sort algorithm to better slot instructions.  */
06110 
06111 static int
06112 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
06113         int clock_var)
06114 {
06115   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
06116          pn_ready, clock_var, 0);
06117 }
06118 
06119 /* Like ia64_sched_reorder, but called after issuing each insn.
06120    Override the default sort algorithm to better slot instructions.  */
06121 
06122 static int
06123 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
06124          int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
06125          int *pn_ready, int clock_var)
06126 {
06127   if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
06128     clocks [INSN_UID (last_scheduled_insn)] = clock_var;
06129   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
06130          clock_var, 1);
06131 }
06132 
06133 /* We are about to issue INSN.  Return the number of insns left on the
06134    ready queue that can be issued this cycle.  */
06135 
06136 static int
06137 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
06138          int sched_verbose ATTRIBUTE_UNUSED,
06139          rtx insn ATTRIBUTE_UNUSED,
06140          int can_issue_more ATTRIBUTE_UNUSED)
06141 {
06142   last_scheduled_insn = insn;
06143   memcpy (prev_cycle_state, curr_state, dfa_state_size);
06144   if (reload_completed)
06145     {
06146       if (group_barrier_needed_p (insn))
06147   abort ();
06148       if (GET_CODE (insn) == CALL_INSN)
06149   init_insn_group_barriers ();
06150       stops_p [INSN_UID (insn)] = stop_before_p;
06151       stop_before_p = 0;
06152     }
06153   return 1;
06154 }
06155 
06156 /* We are choosing insn from the ready queue.  Return nonzero if INSN
06157    can be chosen.  */
06158 
06159 static int
06160 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
06161 {
06162   if (insn == NULL_RTX || !INSN_P (insn))
06163     abort ();
06164   return (!reload_completed
06165     || !safe_group_barrier_needed_p (insn));
06166 }
06167 
06168 /* The following variable value is pseudo-insn used by the DFA insn
06169    scheduler to change the DFA state when the simulated clock is
06170    increased.  */
06171 
06172 static rtx dfa_pre_cycle_insn;
06173 
06174 /* We are about to being issuing INSN.  Return nonzero if we cannot
06175    issue it on given cycle CLOCK and return zero if we should not sort
06176    the ready queue on the next clock start.  */
06177 
06178 static int
06179 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
06180         int clock, int *sort_p)
06181 {
06182   int setup_clocks_p = FALSE;
06183 
06184   if (insn == NULL_RTX || !INSN_P (insn))
06185     abort ();
06186   if ((reload_completed && safe_group_barrier_needed_p (insn))
06187       || (last_scheduled_insn
06188     && (GET_CODE (last_scheduled_insn) == CALL_INSN
06189         || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
06190         || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
06191     {
06192       init_insn_group_barriers ();
06193       if (verbose && dump)
06194   fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
06195      last_clock == clock ? " + cycle advance" : "");
06196       stop_before_p = 1;
06197       if (last_clock == clock)
06198   {
06199     state_transition (curr_state, dfa_stop_insn);
06200     if (TARGET_EARLY_STOP_BITS)
06201       *sort_p = (last_scheduled_insn == NULL_RTX
06202            || GET_CODE (last_scheduled_insn) != CALL_INSN);
06203     else
06204       *sort_p = 0;
06205     return 1;
06206   }
06207       else if (reload_completed)
06208   setup_clocks_p = TRUE;
06209       if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
06210     || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
06211   state_reset (curr_state);
06212       else
06213   {
06214     memcpy (curr_state, prev_cycle_state, dfa_state_size);
06215     state_transition (curr_state, dfa_stop_insn);
06216     state_transition (curr_state, dfa_pre_cycle_insn);
06217     state_transition (curr_state, NULL);
06218   }
06219     }
06220   else if (reload_completed)
06221     setup_clocks_p = TRUE;
06222   if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
06223       && GET_CODE (PATTERN (insn)) != ASM_INPUT
06224       && asm_noperands (PATTERN (insn)) < 0)
06225     {
06226       enum attr_itanium_class c = ia64_safe_itanium_class (insn);
06227 
06228       if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
06229   {
06230     rtx link;
06231     int d = -1;
06232 
06233     for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
06234       if (REG_NOTE_KIND (link) == 0)
06235         {
06236     enum attr_itanium_class dep_class;
06237     rtx dep_insn = XEXP (link, 0);
06238 
06239     dep_class = ia64_safe_itanium_class (dep_insn);
06240     if ((dep_class == ITANIUM_CLASS_MMMUL
06241          || dep_class == ITANIUM_CLASS_MMSHF)
06242         && last_clock - clocks [INSN_UID (dep_insn)] < 4
06243         && (d < 0
06244       || last_clock - clocks [INSN_UID (dep_insn)] < d))
06245       d = last_clock - clocks [INSN_UID (dep_insn)];
06246         }
06247     if (d >= 0)
06248       add_cycles [INSN_UID (insn)] = 3 - d;
06249   }
06250     }
06251   return 0;
06252 }
06253 
06254 
06255 
06256 /* The following page contains abstract data `bundle states' which are
06257    used for bundling insns (inserting nops and template generation).  */
06258 
06259 /* The following describes state of insn bundling.  */
06260 
06261 struct bundle_state
06262 {
06263   /* Unique bundle state number to identify them in the debugging
06264      output  */
06265   int unique_num;
06266   rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
06267   /* number nops before and after the insn  */
06268   short before_nops_num, after_nops_num;
06269   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
06270                    insn */
06271   int cost;     /* cost of the state in cycles */
06272   int accumulated_insns_num; /* number of all previous insns including
06273         nops.  L is considered as 2 insns */
06274   int branch_deviation; /* deviation of previous branches from 3rd slots  */
06275   struct bundle_state *next;  /* next state with the same insn_num  */
06276   struct bundle_state *originator; /* originator (previous insn state)  */
06277   /* All bundle states are in the following chain.  */
06278   struct bundle_state *allocated_states_chain;
06279   /* The DFA State after issuing the insn and the nops.  */
06280   state_t dfa_state;
06281 };
06282 
06283 /* The following is map insn number to the corresponding bundle state.  */
06284 
06285 static struct bundle_state **index_to_bundle_states;
06286 
06287 /* The unique number of next bundle state.  */
06288 
06289 static int bundle_states_num;
06290 
06291 /* All allocated bundle states are in the following chain.  */
06292 
06293 static struct bundle_state *allocated_bundle_states_chain;
06294 
06295 /* All allocated but not used bundle states are in the following
06296    chain.  */
06297 
06298 static struct bundle_state *free_bundle_state_chain;
06299 
06300 
06301 /* The following function returns a free bundle state.  */
06302 
06303 static struct bundle_state *
06304 get_free_bundle_state (void)
06305 {
06306   struct bundle_state *result;
06307 
06308   if (free_bundle_state_chain != NULL)
06309     {
06310       result = free_bundle_state_chain;
06311       free_bundle_state_chain = result->next;
06312     }
06313   else
06314     {
06315       result = xmalloc (sizeof (struct bundle_state));
06316       result->dfa_state = xmalloc (dfa_state_size);
06317       result->allocated_states_chain = allocated_bundle_states_chain;
06318       allocated_bundle_states_chain = result;
06319     }
06320   result->unique_num = bundle_states_num++;
06321   return result;
06322 
06323 }
06324 
06325 /* The following function frees given bundle state.  */
06326 
06327 static void
06328 free_bundle_state (struct bundle_state *state)
06329 {
06330   state->next = free_bundle_state_chain;
06331   free_bundle_state_chain = state;
06332 }
06333 
06334 /* Start work with abstract data `bundle states'.  */
06335 
06336 static void
06337 initiate_bundle_states (void)
06338 {
06339   bundle_states_num = 0;
06340   free_bundle_state_chain = NULL;
06341   allocated_bundle_states_chain = NULL;
06342 }
06343 
06344 /* Finish work with abstract data `bundle states'.  */
06345 
06346 static void
06347 finish_bundle_states (void)
06348 {
06349   struct bundle_state *curr_state, *next_state;
06350 
06351   for (curr_state = allocated_bundle_states_chain;
06352        curr_state != NULL;
06353        curr_state = next_state)
06354     {
06355       next_state = curr_state->allocated_states_chain;
06356       free (curr_state->dfa_state);
06357       free (curr_state);
06358     }
06359 }
06360 
06361 /* Hash table of the bundle states.  The key is dfa_state and insn_num
06362    of the bundle states.  */
06363 
06364 static htab_t bundle_state_table;
06365 
06366 /* The function returns hash of BUNDLE_STATE.  */
06367 
06368 static unsigned
06369 bundle_state_hash (const void *bundle_state)
06370 {
06371   const struct bundle_state *state = (struct bundle_state *) bundle_state;
06372   unsigned result, i;
06373 
06374   for (result = i = 0; i < dfa_state_size; i++)
06375     result += (((unsigned char *) state->dfa_state) [i]
06376          << ((i % CHAR_BIT) * 3 + CHAR_BIT));
06377   return result + state->insn_num;
06378 }
06379 
06380 /* The function returns nonzero if the bundle state keys are equal.  */
06381 
06382 static int
06383 bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
06384 {
06385   const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
06386   const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
06387 
06388   return (state1->insn_num == state2->insn_num
06389     && memcmp (state1->dfa_state, state2->dfa_state,
06390          dfa_state_size) == 0);
06391 }
06392 
06393 /* The function inserts the BUNDLE_STATE into the hash table.  The
06394    function returns nonzero if the bundle has been inserted into the
06395    table.  The table contains the best bundle state with given key.  */
06396 
06397 static int
06398 insert_bundle_state (struct bundle_state *bundle_state)
06399 {
06400   void **entry_ptr;
06401 
06402   entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
06403   if (*entry_ptr == NULL)
06404     {
06405       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
06406       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
06407       *entry_ptr = (void *) bundle_state;
06408       return TRUE;
06409     }
06410   else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
06411      || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
06412          && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
06413        > bundle_state->accumulated_insns_num
06414        || (((struct bundle_state *)
06415       *entry_ptr)->accumulated_insns_num
06416            == bundle_state->accumulated_insns_num
06417            && ((struct bundle_state *)
06418          *entry_ptr)->branch_deviation
06419            > bundle_state->branch_deviation))))
06420 
06421     {
06422       struct bundle_state temp;
06423 
06424       temp = *(struct bundle_state *) *entry_ptr;
06425       *(struct bundle_state *) *entry_ptr = *bundle_state;
06426       ((struct bundle_state *) *entry_ptr)->next = temp.next;
06427       *bundle_state = temp;
06428     }
06429   return FALSE;
06430 }
06431 
06432 /* Start work with the hash table.  */
06433 
06434 static void
06435 initiate_bundle_state_table (void)
06436 {
06437   bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
06438             (htab_del) 0);
06439 }
06440 
06441 /* Finish work with the hash table.  */
06442 
06443 static void
06444 finish_bundle_state_table (void)
06445 {
06446   htab_delete (bundle_state_table);
06447 }
06448 
06449 
06450 
06451 /* The following variable is a insn `nop' used to check bundle states
06452    with different number of inserted nops.  */
06453 
06454 static rtx ia64_nop;
06455 
06456 /* The following function tries to issue NOPS_NUM nops for the current
06457    state without advancing processor cycle.  If it failed, the
06458    function returns FALSE and frees the current state.  */
06459 
06460 static int
06461 try_issue_nops (struct bundle_state *curr_state, int nops_num)
06462 {
06463   int i;
06464 
06465   for (i = 0; i < nops_num; i++)
06466     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
06467       {
06468   free_bundle_state (curr_state);
06469   return FALSE;
06470       }
06471   return TRUE;
06472 }
06473 
06474 /* The following function tries to issue INSN for the current
06475    state without advancing processor cycle.  If it failed, the
06476    function returns FALSE and frees the current state.  */
06477 
06478 static int
06479 try_issue_insn (struct bundle_state *curr_state, rtx insn)
06480 {
06481   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
06482     {
06483       free_bundle_state (curr_state);
06484       return FALSE;
06485     }
06486   return TRUE;
06487 }
06488 
06489 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
06490    starting with ORIGINATOR without advancing processor cycle.  If
06491    TRY_BUNDLE_END_P is TRUE, the function also/only (if
06492    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
06493    If it was successful, the function creates new bundle state and
06494    insert into the hash table and into `index_to_bundle_states'.  */
06495 
06496 static void
06497 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
06498          rtx insn, int try_bundle_end_p, int only_bundle_end_p)
06499 {
06500   struct bundle_state *curr_state;
06501 
06502   curr_state = get_free_bundle_state ();
06503   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
06504   curr_state->insn = insn;
06505   curr_state->insn_num = originator->insn_num + 1;
06506   curr_state->cost = originator->cost;
06507   curr_state->originator = originator;
06508   curr_state->before_nops_num = before_nops_num;
06509   curr_state->after_nops_num = 0;
06510   curr_state->accumulated_insns_num
06511     = originator->accumulated_insns_num + before_nops_num;
06512   curr_state->branch_deviation = originator->branch_deviation;
06513   if (insn == NULL_RTX)
06514     abort ();
06515   else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
06516     {
06517       if (GET_MODE (insn) == TImode)
06518   abort ();
06519       if (!try_issue_nops (curr_state, before_nops_num))
06520   return;
06521       if (!try_issue_insn (curr_state, insn))
06522   return;
06523       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
06524       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
06525     && curr_state->accumulated_insns_num % 3 != 0)
06526   {
06527     free_bundle_state (curr_state);
06528     return;
06529   }
06530     }
06531   else if (GET_MODE (insn) != TImode)
06532     {
06533       if (!try_issue_nops (curr_state, before_nops_num))
06534   return;
06535       if (!try_issue_insn (curr_state, insn))
06536   return;
06537       curr_state->accumulated_insns_num++;
06538       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06539     || asm_noperands (PATTERN (insn)) >= 0)
06540   abort ();
06541       if (ia64_safe_type (insn) == TYPE_L)
06542   curr_state->accumulated_insns_num++;
06543     }
06544   else
06545     {
06546       /* If this is an insn that must be first in a group, then don't allow
06547    nops to be emitted before it.  Currently, alloc is the only such
06548    supported instruction.  */
06549       /* ??? The bundling automatons should handle this for us, but they do
06550    not yet have support for the first_insn attribute.  */
06551       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
06552   {
06553     free_bundle_state (curr_state);
06554     return;
06555   }
06556 
06557       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
06558       state_transition (curr_state->dfa_state, NULL);
06559       curr_state->cost++;
06560       if (!try_issue_nops (curr_state, before_nops_num))
06561   return;
06562       if (!try_issue_insn (curr_state, insn))
06563   return;
06564       curr_state->accumulated_insns_num++;
06565       if (GET_CODE (PATTERN (insn)) == ASM_INPUT
06566     || asm_noperands (PATTERN (insn)) >= 0)
06567   {
06568     /* Finish bundle containing asm insn.  */
06569     curr_state->after_nops_num
06570       = 3 - curr_state->accumulated_insns_num % 3;
06571     curr_state->accumulated_insns_num
06572       += 3 - curr_state->accumulated_insns_num % 3;
06573   }
06574       else if (ia64_safe_type (insn) == TYPE_L)
06575   curr_state->accumulated_insns_num++;
06576     }
06577   if (ia64_safe_type (insn) == TYPE_B)
06578     curr_state->branch_deviation
06579       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
06580   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
06581     {
06582       if (!only_bundle_end_p && insert_bundle_state (curr_state))
06583   {
06584     state_t dfa_state;
06585     struct bundle_state *curr_state1;
06586     struct bundle_state *allocated_states_chain;
06587 
06588     curr_state1 = get_free_bundle_state ();
06589     dfa_state = curr_state1->dfa_state;
06590     allocated_states_chain = curr_state1->allocated_states_chain;
06591     *curr_state1 = *curr_state;
06592     curr_state1->dfa_state = dfa_state;
06593     curr_state1->allocated_states_chain = allocated_states_chain;
06594     memcpy (curr_state1->dfa_state, curr_state->dfa_state,
06595       dfa_state_size);
06596     curr_state = curr_state1;
06597   }
06598       if (!try_issue_nops (curr_state,
06599          3 - curr_state->accumulated_insns_num % 3))
06600   return;
06601       curr_state->after_nops_num
06602   = 3 - curr_state->accumulated_insns_num % 3;
06603       curr_state->accumulated_insns_num
06604   += 3 - curr_state->accumulated_insns_num % 3;
06605     }
06606   if (!insert_bundle_state (curr_state))
06607     free_bundle_state (curr_state);
06608   return;
06609 }
06610 
06611 /* The following function returns position in the two window bundle
06612    for given STATE.  */
06613 
06614 static int
06615 get_max_pos (state_t state)
06616 {
06617   if (cpu_unit_reservation_p (state, pos_6))
06618     return 6;
06619   else if (cpu_unit_reservation_p (state, pos_5))
06620     return 5;
06621   else if (cpu_unit_reservation_p (state, pos_4))
06622     return 4;
06623   else if (cpu_unit_reservation_p (state, pos_3))
06624     return 3;
06625   else if (cpu_unit_reservation_p (state, pos_2))
06626     return 2;
06627   else if (cpu_unit_reservation_p (state, pos_1))
06628     return 1;
06629   else
06630     return 0;
06631 }
06632 
06633 /* The function returns code of a possible template for given position
06634    and state.  The function should be called only with 2 values of
06635    position equal to 3 or 6.  */
06636 
06637 static int
06638 get_template (state_t state, int pos)
06639 {
06640   switch (pos)
06641     {
06642     case 3:
06643       if (cpu_unit_reservation_p (state, _0mii_))
06644   return 0;
06645       else if (cpu_unit_reservation_p (state, _0mmi_))
06646   return 1;
06647       else if (cpu_unit_reservation_p (state, _0mfi_))
06648   return 2;
06649       else if (cpu_unit_reservation_p (state, _0mmf_))
06650   return 3;
06651       else if (cpu_unit_reservation_p (state, _0bbb_))
06652   return 4;
06653       else if (cpu_unit_reservation_p (state, _0mbb_))
06654   return 5;
06655       else if (cpu_unit_reservation_p (state, _0mib_))
06656   return 6;
06657       else if (cpu_unit_reservation_p (state, _0mmb_))
06658   return 7;
06659       else if (cpu_unit_reservation_p (state, _0mfb_))
06660   return 8;
06661       else if (cpu_unit_reservation_p (state, _0mlx_))
06662   return 9;
06663       else
06664   abort ();
06665     case 6:
06666       if (cpu_unit_reservation_p (state, _1mii_))
06667   return 0;
06668       else if (cpu_unit_reservation_p (state, _1mmi_))
06669   return 1;
06670       else if (cpu_unit_reservation_p (state, _1mfi_))
06671   return 2;
06672       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
06673   return 3;
06674       else if (cpu_unit_reservation_p (state, _1bbb_))
06675   return 4;
06676       else if (cpu_unit_reservation_p (state, _1mbb_))
06677   return 5;
06678       else if (cpu_unit_reservation_p (state, _1mib_))
06679   return 6;
06680       else if (cpu_unit_reservation_p (state, _1mmb_))
06681   return 7;
06682       else if (cpu_unit_reservation_p (state, _1mfb_))
06683   return 8;
06684       else if (cpu_unit_reservation_p (state, _1mlx_))
06685   return 9;
06686       else
06687   abort ();
06688     default:
06689       abort ();
06690     }
06691 }
06692 
06693 /* The following function returns an insn important for insn bundling
06694    followed by INSN and before TAIL.  */
06695 
06696 static rtx
06697 get_next_important_insn (rtx insn, rtx tail)
06698 {
06699   for (; insn && insn != tail; insn = NEXT_INSN (insn))
06700     if (INSN_P (insn)
06701   && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
06702   && GET_CODE (PATTERN (insn)) != USE
06703   && GET_CODE (PATTERN (insn)) != CLOBBER)
06704       return insn;
06705   return NULL_RTX;
06706 }
06707 
06708 /* The following function does insn bundling.  Bundling means
06709    inserting templates and nop insns to fit insn groups into permitted
06710    templates.  Instruction scheduling uses NDFA (non-deterministic
06711    finite automata) encoding informations about the templates and the
06712    inserted nops.  Nondeterminism of the automata permits follows
06713    all possible insn sequences very fast.
06714 
06715    Unfortunately it is not possible to get information about inserting
06716    nop insns and used templates from the automata states.  The
06717    automata only says that we can issue an insn possibly inserting
06718    some nops before it and using some template.  Therefore insn
06719    bundling in this function is implemented by using DFA
06720    (deterministic finite automata).  We follows all possible insn
06721    sequences by inserting 0-2 nops (that is what the NDFA describe for
06722    insn scheduling) before/after each insn being bundled.  We know the
06723    start of simulated processor cycle from insn scheduling (insn
06724    starting a new cycle has TImode).
06725 
06726    Simple implementation of insn bundling would create enormous
06727    number of possible insn sequences satisfying information about new
06728    cycle ticks taken from the insn scheduling.  To make the algorithm
06729    practical we use dynamic programming.  Each decision (about
06730    inserting nops and implicitly about previous decisions) is described
06731    by structure bundle_state (see above).  If we generate the same
06732    bundle state (key is automaton state after issuing the insns and
06733    nops for it), we reuse already generated one.  As consequence we
06734    reject some decisions which cannot improve the solution and
06735    reduce memory for the algorithm.
06736 
06737    When we reach the end of EBB (extended basic block), we choose the
06738    best sequence and then, moving back in EBB, insert templates for
06739    the best alternative.  The templates are taken from querying
06740    automaton state for each insn in chosen bundle states.
06741 
06742    So the algorithm makes two (forward and backward) passes through
06743    EBB.  There is an additional forward pass through EBB for Itanium1
06744    processor.  This pass inserts more nops to make dependency between
06745    a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
06746 
06747 static void
06748 bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
06749 {
06750   struct bundle_state *curr_state, *next_state, *best_state;
06751   rtx insn, next_insn;
06752   int insn_num;
06753   int i, bundle_end_p, only_bundle_end_p, asm_p;
06754   int pos = 0, max_pos, template0, template1;
06755   rtx b;
06756   rtx nop;
06757   enum attr_type type;
06758 
06759   insn_num = 0;
06760   /* Count insns in the EBB.  */
06761   for (insn = NEXT_INSN (prev_head_insn);
06762        insn && insn != tail;
06763        insn = NEXT_INSN (insn))
06764     if (INSN_P (insn))
06765       insn_num++;
06766   if (insn_num == 0)
06767     return;
06768   bundling_p = 1;
06769   dfa_clean_insn_cache ();
06770   initiate_bundle_state_table ();
06771   index_to_bundle_states = xmalloc ((insn_num + 2)
06772             * sizeof (struct bundle_state *));
06773   /* First (forward) pass -- generation of bundle states.  */
06774   curr_state = get_free_bundle_state ();
06775   curr_state->insn = NULL;
06776   curr_state->before_nops_num = 0;
06777   curr_state->after_nops_num = 0;
06778   curr_state->insn_num = 0;
06779   curr_state->cost = 0;
06780   curr_state->accumulated_insns_num = 0;
06781   curr_state->branch_deviation = 0;
06782   curr_state->next = NULL;
06783   curr_state->originator = NULL;
06784   state_reset (curr_state->dfa_state);
06785   index_to_bundle_states [0] = curr_state;
06786   insn_num = 0;
06787   /* Shift cycle mark if it is put on insn which could be ignored.  */
06788   for (insn = NEXT_INSN (prev_head_insn);
06789        insn != tail;
06790        insn = NEXT_INSN (insn))
06791     if (INSN_P (insn)
06792   && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
06793       || GET_CODE (PATTERN (insn)) == USE
06794       || GET_CODE (PATTERN (insn)) == CLOBBER)
06795   && GET_MODE (insn) == TImode)
06796       {
06797   PUT_MODE (insn, VOIDmode);
06798   for (next_insn = NEXT_INSN (insn);
06799        next_insn != tail;
06800        next_insn = NEXT_INSN (next_insn))
06801     if (INSN_P (next_insn)
06802         && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
06803         && GET_CODE (PATTERN (next_insn)) != USE
06804         && GET_CODE (PATTERN (next_insn)) != CLOBBER)
06805       {
06806         PUT_MODE (next_insn, TImode);
06807         break;
06808       }
06809       }
06810   /* Froward pass: generation of bundle states.  */
06811   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
06812        insn != NULL_RTX;
06813        insn = next_insn)
06814     {
06815       if (!INSN_P (insn)
06816     || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
06817     || GET_CODE (PATTERN (insn)) == USE
06818     || GET_CODE (PATTERN (insn)) == CLOBBER)
06819   abort ();
06820       type = ia64_safe_type (insn);
06821       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
06822       insn_num++;
06823       index_to_bundle_states [insn_num] = NULL;
06824       for (curr_state = index_to_bundle_states [insn_num - 1];
06825      curr_state != NULL;
06826      curr_state = next_state)
06827   {
06828     pos = curr_state->accumulated_insns_num % 3;
06829     next_state = curr_state->next;
06830     /* We must fill up the current bundle in order to start a
06831        subsequent asm insn in a new bundle.  Asm insn is always
06832        placed in a separate bundle.  */
06833     only_bundle_end_p
06834       = (next_insn != NULL_RTX
06835          && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
06836          && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
06837     /* We may fill up the current bundle if it is the cycle end
06838        without a group barrier.  */
06839     bundle_end_p
06840       = (only_bundle_end_p || next_insn == NULL_RTX
06841          || (GET_MODE (next_insn) == TImode
06842        && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
06843     if (type == TYPE_F || type == TYPE_B || type == TYPE_L
06844         || type == TYPE_S
06845         /* We need to insert 2 nops for cases like M_MII.  To
06846      guarantee issuing all insns on the same cycle for
06847      Itanium 1, we need to issue 2 nops after the first M
06848      insn (MnnMII where n is a nop insn).  */
06849         || ((type == TYPE_M || type == TYPE_A)
06850       && ia64_tune == PROCESSOR_ITANIUM
06851       && !bundle_end_p && pos == 1))
06852       issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
06853          only_bundle_end_p);
06854     issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
06855              only_bundle_end_p);
06856     issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
06857              only_bundle_end_p);
06858   }
06859       if (index_to_bundle_states [insn_num] == NULL)
06860   abort ();
06861       for (curr_state = index_to_bundle_states [insn_num];
06862      curr_state != NULL;
06863      curr_state = curr_state->next)
06864   if (verbose >= 2 && dump)
06865     {
06866       /* This structure is taken from generated code of the
06867          pipeline hazard recognizer (see file insn-attrtab.c).
06868          Please don't forget to change the structure if a new
06869          automaton is added to .md file.  */
06870       struct DFA_chip
06871       {
06872         unsigned short one_automaton_state;
06873         unsigned short oneb_automaton_state;
06874         unsigned short two_automaton_state;
06875         unsigned short twob_automaton_state;
06876       };
06877 
06878       fprintf
06879         (dump,
06880          "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
06881          curr_state->unique_num,
06882          (curr_state->originator == NULL
06883     ? -1 : curr_state->originator->unique_num),
06884          curr_state->cost,
06885          curr_state->before_nops_num, curr_state->after_nops_num,
06886          curr_state->accumulated_insns_num, curr_state->branch_deviation,
06887          (ia64_tune == PROCESSOR_ITANIUM
06888     ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
06889     : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
06890          INSN_UID (insn));
06891     }
06892     }
06893   if (index_to_bundle_states [insn_num] == NULL)
06894     /* We should find a solution because the 2nd insn scheduling has
06895        found one.  */
06896     abort ();
06897   /* Find a state corresponding to the best insn sequence.  */
06898   best_state = NULL;
06899   for (curr_state = index_to_bundle_states [insn_num];
06900        curr_state != NULL;
06901        curr_state = curr_state->next)
06902     /* We are just looking at the states with fully filled up last
06903        bundle.  The first we prefer insn sequences with minimal cost
06904        then with minimal inserted nops and finally with branch insns
06905        placed in the 3rd slots.  */
06906     if (curr_state->accumulated_insns_num % 3 == 0
06907   && (best_state == NULL || best_state->cost > curr_state->cost
06908       || (best_state->cost == curr_state->cost
06909     && (curr_state->accumulated_insns_num
06910         < best_state->accumulated_insns_num
06911         || (curr_state->accumulated_insns_num
06912       == best_state->accumulated_insns_num
06913       && curr_state->branch_deviation
06914       < best_state->branch_deviation)))))
06915       best_state = curr_state;
06916   /* Second (backward) pass: adding nops and templates.  */
06917   insn_num = best_state->before_nops_num;
06918   template0 = template1 = -1;
06919   for (curr_state = best_state;
06920        curr_state->originator != NULL;
06921        curr_state = curr_state->originator)
06922     {
06923       insn = curr_state->insn;
06924       asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
06925          || asm_noperands (PATTERN (insn)) >= 0);
06926       insn_num++;
06927       if (verbose >= 2 && dump)
06928   {
06929     struct DFA_chip
06930     {
06931       unsigned short one_automaton_state;
06932       unsigned short oneb_automaton_state;
06933       unsigned short two_automaton_state;
06934       unsigned short twob_automaton_state;
06935     };
06936 
06937     fprintf
06938       (dump,
06939        "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
06940        curr_state->unique_num,
06941        (curr_state->originator == NULL
06942         ? -1 : curr_state->originator->unique_num),
06943        curr_state->cost,
06944        curr_state->before_nops_num, curr_state->after_nops_num,
06945        curr_state->accumulated_insns_num, curr_state->branch_deviation,
06946        (ia64_tune == PROCESSOR_ITANIUM
06947         ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
06948         : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
06949        INSN_UID (insn));
06950   }
06951       /* Find the position in the current bundle window.  The window can
06952    contain at most two bundles.  Two bundle window means that
06953    the processor will make two bundle rotation.  */
06954       max_pos = get_max_pos (curr_state->dfa_state);
06955       if (max_pos == 6
06956     /* The following (negative template number) means that the
06957        processor did one bundle rotation.  */
06958     || (max_pos == 3 && template0 < 0))
06959   {
06960     /* We are at the end of the window -- find template(s) for
06961        its bundle(s).  */
06962     pos = max_pos;
06963     if (max_pos == 3)
06964       template0 = get_template (curr_state->dfa_state, 3);
06965     else
06966       {
06967         template1 = get_template (curr_state->dfa_state, 3);
06968         template0 = get_template (curr_state->dfa_state, 6);
06969       }
06970   }
06971       if (max_pos > 3 && template1 < 0)
06972   /* It may happen when we have the stop inside a bundle.  */
06973   {
06974     if (pos > 3)
06975       abort ();
06976     template1 = get_template (curr_state->dfa_state, 3);
06977     pos += 3;
06978   }
06979       if (!asm_p)
06980   /* Emit nops after the current insn.  */
06981   for (i = 0; i < curr_state->after_nops_num; i++)
06982     {
06983       nop = gen_nop ();
06984       emit_insn_after (nop, insn);
06985       pos--;
06986       if (pos < 0)
06987         abort ();
06988       if (pos % 3 == 0)
06989         {
06990     /* We are at the start of a bundle: emit the template
06991        (it should be defined).  */
06992     if (template0 < 0)
06993       abort ();
06994     b = gen_bundle_selector (GEN_INT (template0));
06995     ia64_emit_insn_before (b, nop);
06996     /* If we have two bundle window, we make one bundle
06997        rotation.  Otherwise template0 will be undefined
06998        (negative value).  */
06999     template0 = template1;
07000     template1 = -1;
07001         }
07002     }
07003       /* Move the position backward in the window.  Group barrier has
07004    no slot.  Asm insn takes all bundle.  */
07005       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
07006     && GET_CODE (PATTERN (insn)) != ASM_INPUT
07007     && asm_noperands (PATTERN (insn)) < 0)
07008   pos--;
07009       /* Long insn takes 2 slots.  */
07010       if (ia64_safe_type (insn) == TYPE_L)
07011   pos--;
07012       if (pos < 0)
07013   abort ();
07014       if (pos % 3 == 0
07015     && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
07016     && GET_CODE (PATTERN (insn)) != ASM_INPUT
07017     && asm_noperands (PATTERN (insn)) < 0)
07018   {
07019     /* The current insn is at the bundle start: emit the
07020        template.  */
07021     if (template0 < 0)
07022       abort ();
07023     b = gen_bundle_selector (GEN_INT (template0));
07024     ia64_emit_insn_before (b, insn);
07025     b = PREV_INSN (insn);
07026     insn = b;
07027     /* See comment above in analogous place for emitting nops
07028        after the insn.  */
07029     template0 = template1;
07030     template1 = -1;
07031   }
07032       /* Emit nops after the current insn.  */
07033       for (i = 0; i < curr_state->before_nops_num; i++)
07034   {
07035     nop = gen_nop ();
07036     ia64_emit_insn_before (nop, insn);
07037     nop = PREV_INSN (insn);
07038     insn = nop;
07039     pos--;
07040     if (pos < 0)
07041       abort ();
07042     if (pos % 3 == 0)
07043       {
07044         /* See comment above in analogous place for emitting nops
07045      after the insn.  */
07046         if (template0 < 0)
07047     abort ();
07048         b = gen_bundle_selector (GEN_INT (template0));
07049         ia64_emit_insn_before (b, insn);
07050         b = PREV_INSN (insn);
07051         insn = b;
07052         template0 = template1;
07053         template1 = -1;
07054       }
07055   }
07056     }
07057   if (ia64_tune == PROCESSOR_ITANIUM)
07058     /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
07059        Itanium1 has a strange design, if the distance between an insn
07060        and dependent MM-insn is less 4 then we have a 6 additional
07061        cycles stall.  So we make the distance equal to 4 cycles if it
07062        is less.  */
07063     for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
07064    insn != NULL_RTX;
07065    insn = next_insn)
07066       {
07067   if (!INSN_P (insn)
07068       || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
07069       || GET_CODE (PATTERN (insn)) == USE
07070       || GET_CODE (PATTERN (insn)) == CLOBBER)
07071     abort ();
07072   next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
07073   if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
07074     /* We found a MM-insn which needs additional cycles.  */
07075     {
07076       rtx last;
07077       int i, j, n;
07078       int pred_stop_p;
07079 
07080       /* Now we are searching for a template of the bundle in
07081          which the MM-insn is placed and the position of the
07082          insn in the bundle (0, 1, 2).  Also we are searching
07083          for that there is a stop before the insn.  */
07084       last = prev_active_insn (insn);
07085       pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
07086       if (pred_stop_p)
07087         last = prev_active_insn (last);
07088       n = 0;
07089       for (;; last = prev_active_insn (last))
07090         if (recog_memoized (last) == CODE_FOR_bundle_selector)
07091     {
07092       template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
07093       if (template0 == 9)
07094         /* The insn is in MLX bundle.  Change the template
07095            onto MFI because we will add nops before the
07096            insn.  It simplifies subsequent code a lot.  */
07097         PATTERN (last)
07098           = gen_bundle_selector (const2_rtx); /* -> MFI */
07099       break;
07100     }
07101         else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
07102            && (ia64_safe_itanium_class (last)
07103          != ITANIUM_CLASS_IGNORE))
07104     n++;
07105       /* Some check of correctness: the stop is not at the
07106          bundle start, there are no more 3 insns in the bundle,
07107          and the MM-insn is not at the start of bundle with
07108          template MLX.  */
07109       if ((pred_stop_p && n == 0) || n > 2
07110     || (template0 == 9 && n != 0))
07111         abort ();
07112       /* Put nops after the insn in the bundle.  */
07113       for (j = 3 - n; j > 0; j --)
07114         ia64_emit_insn_before (gen_nop (), insn);
07115       /* It takes into account that we will add more N nops
07116          before the insn lately -- please see code below.  */
07117       add_cycles [INSN_UID (insn)]--;
07118       if (!pred_stop_p || add_cycles [INSN_UID (insn)])
07119         ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
07120              insn);
07121       if (pred_stop_p)
07122         add_cycles [INSN_UID (insn)]--;
07123       for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
07124         {
07125     /* Insert "MII;" template.  */
07126     ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
07127                insn);
07128     ia64_emit_insn_before (gen_nop (), insn);
07129     ia64_emit_insn_before (gen_nop (), insn);
07130     if (i > 1)
07131       {
07132         /* To decrease code size, we use "MI;I;"
07133            template.  */
07134         ia64_emit_insn_before
07135           (gen_insn_group_barrier (GEN_INT (3)), insn);
07136         i--;
07137       }
07138     ia64_emit_insn_before (gen_nop ()