• Main Page
  • Modules
  • Data Types
  • Files

osprey-gcc/gcc/config/i386/i386.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2006. QLogic Corporation. All Rights Reserved.
00003  */
00004 
00005 /* Subroutines used for code generation on IA-32.
00006    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
00007    2002, 2003, 2004, 2005 Free Software Foundation, Inc.
00008 
00009 This file is part of GCC.
00010 
00011 GCC is free software; you can redistribute it and/or modify
00012 it under the terms of the GNU General Public License as published by
00013 the Free Software Foundation; either version 2, or (at your option)
00014 any later version.
00015 
00016 GCC is distributed in the hope that it will be useful,
00017 but WITHOUT ANY WARRANTY; without even the implied warranty of
00018 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019 GNU General Public License for more details.
00020 
00021 You should have received a copy of the GNU General Public License
00022 along with GCC; see the file COPYING.  If not, write to
00023 the Free Software Foundation, 59 Temple Place - Suite 330,
00024 Boston, MA 02111-1307, USA.  */
00025 
00026 #include "config.h"
00027 #include "system.h"
00028 #include "coretypes.h"
00029 #include "tm.h"
00030 #include "rtl.h"
00031 #include "tree.h"
00032 #include "tm_p.h"
00033 #include "regs.h"
00034 #include "hard-reg-set.h"
00035 #include "real.h"
00036 #include "insn-config.h"
00037 #include "conditions.h"
00038 #include "output.h"
00039 #include "insn-codes.h"
00040 #include "insn-attr.h"
00041 #include "flags.h"
00042 #include "except.h"
00043 #include "function.h"
00044 #include "recog.h"
00045 #include "expr.h"
00046 #include "optabs.h"
00047 #include "toplev.h"
00048 #include "basic-block.h"
00049 #include "ggc.h"
00050 #include "target.h"
00051 #include "target-def.h"
00052 #include "langhooks.h"
00053 #include "cgraph.h"
00054 #include "tree-gimple.h"
00055 
00056 #ifndef CHECK_STACK_LIMIT
00057 #define CHECK_STACK_LIMIT (-1)
00058 #endif
00059 
00060 /* Return index of given mode in mult and division cost tables.  */
00061 #define MODE_INDEX(mode)          \
00062   ((mode) == QImode ? 0           \
00063    : (mode) == HImode ? 1         \
00064    : (mode) == SImode ? 2         \
00065    : (mode) == DImode ? 3         \
00066    : 4)
00067 
00068 /* Processor costs (relative to an add) */
00069 static const
00070 struct processor_costs size_cost = {  /* costs for tunning for size */
00071   2,          /* cost of an add instruction */
00072   3,          /* cost of a lea instruction */
00073   2,          /* variable shift costs */
00074   3,          /* constant shift costs */
00075   {3, 3, 3, 3, 5},      /* cost of starting a multiply */
00076   0,          /* cost of multiply per each bit set */
00077   {3, 3, 3, 3, 5},      /* cost of a divide/mod */
00078   3,          /* cost of movsx */
00079   3,          /* cost of movzx */
00080   0,          /* "large" insn */
00081   2,          /* MOVE_RATIO */
00082   2,          /* cost for loading QImode using movzbl */
00083   {2, 2, 2},        /* cost of loading integer registers
00084              in QImode, HImode and SImode.
00085              Relative to reg-reg move (2).  */
00086   {2, 2, 2},        /* cost of storing integer registers */
00087   2,          /* cost of reg,reg fld/fst */
00088   {2, 2, 2},        /* cost of loading fp registers
00089              in SFmode, DFmode and XFmode */
00090   {2, 2, 2},        /* cost of loading integer registers */
00091   3,          /* cost of moving MMX register */
00092   {3, 3},       /* cost of loading MMX registers
00093              in SImode and DImode */
00094   {3, 3},       /* cost of storing MMX registers
00095              in SImode and DImode */
00096   3,          /* cost of moving SSE register */
00097   {3, 3, 3},        /* cost of loading SSE registers
00098              in SImode, DImode and TImode */
00099   {3, 3, 3},        /* cost of storing SSE registers
00100              in SImode, DImode and TImode */
00101   3,          /* MMX or SSE register to integer */
00102   0,          /* size of prefetch block */
00103   0,          /* number of parallel prefetches */
00104   1,          /* Branch cost */
00105   2,          /* cost of FADD and FSUB insns.  */
00106   2,          /* cost of FMUL instruction.  */
00107   2,          /* cost of FDIV instruction.  */
00108   2,          /* cost of FABS instruction.  */
00109   2,          /* cost of FCHS instruction.  */
00110   2,          /* cost of FSQRT instruction.  */
00111 };
00112 
00113 /* Processor costs (relative to an add) */
00114 static const
00115 struct processor_costs i386_cost = {  /* 386 specific costs */
00116   1,          /* cost of an add instruction */
00117   1,          /* cost of a lea instruction */
00118   3,          /* variable shift costs */
00119   2,          /* constant shift costs */
00120   {6, 6, 6, 6, 6},      /* cost of starting a multiply */
00121   1,          /* cost of multiply per each bit set */
00122   {23, 23, 23, 23, 23},     /* cost of a divide/mod */
00123   3,          /* cost of movsx */
00124   2,          /* cost of movzx */
00125   15,         /* "large" insn */
00126   3,          /* MOVE_RATIO */
00127   4,          /* cost for loading QImode using movzbl */
00128   {2, 4, 2},        /* cost of loading integer registers
00129              in QImode, HImode and SImode.
00130              Relative to reg-reg move (2).  */
00131   {2, 4, 2},        /* cost of storing integer registers */
00132   2,          /* cost of reg,reg fld/fst */
00133   {8, 8, 8},        /* cost of loading fp registers
00134              in SFmode, DFmode and XFmode */
00135   {8, 8, 8},        /* cost of loading integer registers */
00136   2,          /* cost of moving MMX register */
00137   {4, 8},       /* cost of loading MMX registers
00138              in SImode and DImode */
00139   {4, 8},       /* cost of storing MMX registers
00140              in SImode and DImode */
00141   2,          /* cost of moving SSE register */
00142   {4, 8, 16},       /* cost of loading SSE registers
00143              in SImode, DImode and TImode */
00144   {4, 8, 16},       /* cost of storing SSE registers
00145              in SImode, DImode and TImode */
00146   3,          /* MMX or SSE register to integer */
00147   0,          /* size of prefetch block */
00148   0,          /* number of parallel prefetches */
00149   1,          /* Branch cost */
00150   23,         /* cost of FADD and FSUB insns.  */
00151   27,         /* cost of FMUL instruction.  */
00152   88,         /* cost of FDIV instruction.  */
00153   22,         /* cost of FABS instruction.  */
00154   24,         /* cost of FCHS instruction.  */
00155   122,          /* cost of FSQRT instruction.  */
00156 };
00157 
00158 static const
00159 struct processor_costs i486_cost = {  /* 486 specific costs */
00160   1,          /* cost of an add instruction */
00161   1,          /* cost of a lea instruction */
00162   3,          /* variable shift costs */
00163   2,          /* constant shift costs */
00164   {12, 12, 12, 12, 12},     /* cost of starting a multiply */
00165   1,          /* cost of multiply per each bit set */
00166   {40, 40, 40, 40, 40},     /* cost of a divide/mod */
00167   3,          /* cost of movsx */
00168   2,          /* cost of movzx */
00169   15,         /* "large" insn */
00170   3,          /* MOVE_RATIO */
00171   4,          /* cost for loading QImode using movzbl */
00172   {2, 4, 2},        /* cost of loading integer registers
00173              in QImode, HImode and SImode.
00174              Relative to reg-reg move (2).  */
00175   {2, 4, 2},        /* cost of storing integer registers */
00176   2,          /* cost of reg,reg fld/fst */
00177   {8, 8, 8},        /* cost of loading fp registers
00178              in SFmode, DFmode and XFmode */
00179   {8, 8, 8},        /* cost of loading integer registers */
00180   2,          /* cost of moving MMX register */
00181   {4, 8},       /* cost of loading MMX registers
00182              in SImode and DImode */
00183   {4, 8},       /* cost of storing MMX registers
00184              in SImode and DImode */
00185   2,          /* cost of moving SSE register */
00186   {4, 8, 16},       /* cost of loading SSE registers
00187              in SImode, DImode and TImode */
00188   {4, 8, 16},       /* cost of storing SSE registers
00189              in SImode, DImode and TImode */
00190   3,          /* MMX or SSE register to integer */
00191   0,          /* size of prefetch block */
00192   0,          /* number of parallel prefetches */
00193   1,          /* Branch cost */
00194   8,          /* cost of FADD and FSUB insns.  */
00195   16,         /* cost of FMUL instruction.  */
00196   73,         /* cost of FDIV instruction.  */
00197   3,          /* cost of FABS instruction.  */
00198   3,          /* cost of FCHS instruction.  */
00199   83,         /* cost of FSQRT instruction.  */
00200 };
00201 
00202 static const
00203 struct processor_costs pentium_cost = {
00204   1,          /* cost of an add instruction */
00205   1,          /* cost of a lea instruction */
00206   4,          /* variable shift costs */
00207   1,          /* constant shift costs */
00208   {11, 11, 11, 11, 11},     /* cost of starting a multiply */
00209   0,          /* cost of multiply per each bit set */
00210   {25, 25, 25, 25, 25},     /* cost of a divide/mod */
00211   3,          /* cost of movsx */
00212   2,          /* cost of movzx */
00213   8,          /* "large" insn */
00214   6,          /* MOVE_RATIO */
00215   6,          /* cost for loading QImode using movzbl */
00216   {2, 4, 2},        /* cost of loading integer registers
00217              in QImode, HImode and SImode.
00218              Relative to reg-reg move (2).  */
00219   {2, 4, 2},        /* cost of storing integer registers */
00220   2,          /* cost of reg,reg fld/fst */
00221   {2, 2, 6},        /* cost of loading fp registers
00222              in SFmode, DFmode and XFmode */
00223   {4, 4, 6},        /* cost of loading integer registers */
00224   8,          /* cost of moving MMX register */
00225   {8, 8},       /* cost of loading MMX registers
00226              in SImode and DImode */
00227   {8, 8},       /* cost of storing MMX registers
00228              in SImode and DImode */
00229   2,          /* cost of moving SSE register */
00230   {4, 8, 16},       /* cost of loading SSE registers
00231              in SImode, DImode and TImode */
00232   {4, 8, 16},       /* cost of storing SSE registers
00233              in SImode, DImode and TImode */
00234   3,          /* MMX or SSE register to integer */
00235   0,          /* size of prefetch block */
00236   0,          /* number of parallel prefetches */
00237   2,          /* Branch cost */
00238   3,          /* cost of FADD and FSUB insns.  */
00239   3,          /* cost of FMUL instruction.  */
00240   39,         /* cost of FDIV instruction.  */
00241   1,          /* cost of FABS instruction.  */
00242   1,          /* cost of FCHS instruction.  */
00243   70,         /* cost of FSQRT instruction.  */
00244 };
00245 
00246 static const
00247 struct processor_costs pentiumpro_cost = {
00248   1,          /* cost of an add instruction */
00249   1,          /* cost of a lea instruction */
00250   1,          /* variable shift costs */
00251   1,          /* constant shift costs */
00252   {4, 4, 4, 4, 4},      /* cost of starting a multiply */
00253   0,          /* cost of multiply per each bit set */
00254   {17, 17, 17, 17, 17},     /* cost of a divide/mod */
00255   1,          /* cost of movsx */
00256   1,          /* cost of movzx */
00257   8,          /* "large" insn */
00258   6,          /* MOVE_RATIO */
00259   2,          /* cost for loading QImode using movzbl */
00260   {4, 4, 4},        /* cost of loading integer registers
00261              in QImode, HImode and SImode.
00262              Relative to reg-reg move (2).  */
00263   {2, 2, 2},        /* cost of storing integer registers */
00264   2,          /* cost of reg,reg fld/fst */
00265   {2, 2, 6},        /* cost of loading fp registers
00266              in SFmode, DFmode and XFmode */
00267   {4, 4, 6},        /* cost of loading integer registers */
00268   2,          /* cost of moving MMX register */
00269   {2, 2},       /* cost of loading MMX registers
00270              in SImode and DImode */
00271   {2, 2},       /* cost of storing MMX registers
00272              in SImode and DImode */
00273   2,          /* cost of moving SSE register */
00274   {2, 2, 8},        /* cost of loading SSE registers
00275              in SImode, DImode and TImode */
00276   {2, 2, 8},        /* cost of storing SSE registers
00277              in SImode, DImode and TImode */
00278   3,          /* MMX or SSE register to integer */
00279   32,         /* size of prefetch block */
00280   6,          /* number of parallel prefetches */
00281   2,          /* Branch cost */
00282   3,          /* cost of FADD and FSUB insns.  */
00283   5,          /* cost of FMUL instruction.  */
00284   56,         /* cost of FDIV instruction.  */
00285   2,          /* cost of FABS instruction.  */
00286   2,          /* cost of FCHS instruction.  */
00287   56,         /* cost of FSQRT instruction.  */
00288 };
00289 
00290 static const
00291 struct processor_costs k6_cost = {
00292   1,          /* cost of an add instruction */
00293   2,          /* cost of a lea instruction */
00294   1,          /* variable shift costs */
00295   1,          /* constant shift costs */
00296   {3, 3, 3, 3, 3},      /* cost of starting a multiply */
00297   0,          /* cost of multiply per each bit set */
00298   {18, 18, 18, 18, 18},     /* cost of a divide/mod */
00299   2,          /* cost of movsx */
00300   2,          /* cost of movzx */
00301   8,          /* "large" insn */
00302   4,          /* MOVE_RATIO */
00303   3,          /* cost for loading QImode using movzbl */
00304   {4, 5, 4},        /* cost of loading integer registers
00305              in QImode, HImode and SImode.
00306              Relative to reg-reg move (2).  */
00307   {2, 3, 2},        /* cost of storing integer registers */
00308   4,          /* cost of reg,reg fld/fst */
00309   {6, 6, 6},        /* cost of loading fp registers
00310              in SFmode, DFmode and XFmode */
00311   {4, 4, 4},        /* cost of loading integer registers */
00312   2,          /* cost of moving MMX register */
00313   {2, 2},       /* cost of loading MMX registers
00314              in SImode and DImode */
00315   {2, 2},       /* cost of storing MMX registers
00316              in SImode and DImode */
00317   2,          /* cost of moving SSE register */
00318   {2, 2, 8},        /* cost of loading SSE registers
00319              in SImode, DImode and TImode */
00320   {2, 2, 8},        /* cost of storing SSE registers
00321              in SImode, DImode and TImode */
00322   6,          /* MMX or SSE register to integer */
00323   32,         /* size of prefetch block */
00324   1,          /* number of parallel prefetches */
00325   1,          /* Branch cost */
00326   2,          /* cost of FADD and FSUB insns.  */
00327   2,          /* cost of FMUL instruction.  */
00328   56,         /* cost of FDIV instruction.  */
00329   2,          /* cost of FABS instruction.  */
00330   2,          /* cost of FCHS instruction.  */
00331   56,         /* cost of FSQRT instruction.  */
00332 };
00333 
00334 static const
00335 struct processor_costs athlon_cost = {
00336   1,          /* cost of an add instruction */
00337   2,          /* cost of a lea instruction */
00338   1,          /* variable shift costs */
00339   1,          /* constant shift costs */
00340   {5, 5, 5, 5, 5},      /* cost of starting a multiply */
00341   0,          /* cost of multiply per each bit set */
00342   {18, 26, 42, 74, 74},     /* cost of a divide/mod */
00343   1,          /* cost of movsx */
00344   1,          /* cost of movzx */
00345   8,          /* "large" insn */
00346   9,          /* MOVE_RATIO */
00347   4,          /* cost for loading QImode using movzbl */
00348   {3, 4, 3},        /* cost of loading integer registers
00349              in QImode, HImode and SImode.
00350              Relative to reg-reg move (2).  */
00351   {3, 4, 3},        /* cost of storing integer registers */
00352   4,          /* cost of reg,reg fld/fst */
00353   {4, 4, 12},       /* cost of loading fp registers
00354              in SFmode, DFmode and XFmode */
00355   {6, 6, 8},        /* cost of loading integer registers */
00356   2,          /* cost of moving MMX register */
00357   {4, 4},       /* cost of loading MMX registers
00358              in SImode and DImode */
00359   {4, 4},       /* cost of storing MMX registers
00360              in SImode and DImode */
00361   2,          /* cost of moving SSE register */
00362   {4, 4, 6},        /* cost of loading SSE registers
00363              in SImode, DImode and TImode */
00364   {4, 4, 5},        /* cost of storing SSE registers
00365              in SImode, DImode and TImode */
00366   5,          /* MMX or SSE register to integer */
00367   64,         /* size of prefetch block */
00368   6,          /* number of parallel prefetches */
00369   5,          /* Branch cost */
00370   4,          /* cost of FADD and FSUB insns.  */
00371   4,          /* cost of FMUL instruction.  */
00372   24,         /* cost of FDIV instruction.  */
00373   2,          /* cost of FABS instruction.  */
00374   2,          /* cost of FCHS instruction.  */
00375   35,         /* cost of FSQRT instruction.  */
00376 };
00377 
00378 static const
00379 struct processor_costs k8_cost = {
00380   1,          /* cost of an add instruction */
00381   2,          /* cost of a lea instruction */
00382   1,          /* variable shift costs */
00383   1,          /* constant shift costs */
00384   {3, 4, 3, 4, 5},      /* cost of starting a multiply */
00385   0,          /* cost of multiply per each bit set */
00386   {18, 26, 42, 74, 74},     /* cost of a divide/mod */
00387   1,          /* cost of movsx */
00388   1,          /* cost of movzx */
00389   8,          /* "large" insn */
00390   9,          /* MOVE_RATIO */
00391   4,          /* cost for loading QImode using movzbl */
00392   {3, 4, 3},        /* cost of loading integer registers
00393              in QImode, HImode and SImode.
00394              Relative to reg-reg move (2).  */
00395   {3, 4, 3},        /* cost of storing integer registers */
00396   4,          /* cost of reg,reg fld/fst */
00397   {4, 4, 12},       /* cost of loading fp registers
00398              in SFmode, DFmode and XFmode */
00399   {6, 6, 8},        /* cost of loading integer registers */
00400   2,          /* cost of moving MMX register */
00401   {3, 3},       /* cost of loading MMX registers
00402              in SImode and DImode */
00403   {4, 4},       /* cost of storing MMX registers
00404              in SImode and DImode */
00405   2,          /* cost of moving SSE register */
00406   {4, 3, 6},        /* cost of loading SSE registers
00407              in SImode, DImode and TImode */
00408   {4, 4, 5},        /* cost of storing SSE registers
00409              in SImode, DImode and TImode */
00410   5,          /* MMX or SSE register to integer */
00411   64,         /* size of prefetch block */
00412   6,          /* number of parallel prefetches */
00413   5,          /* Branch cost */
00414   4,          /* cost of FADD and FSUB insns.  */
00415   4,          /* cost of FMUL instruction.  */
00416   19,         /* cost of FDIV instruction.  */
00417   2,          /* cost of FABS instruction.  */
00418   2,          /* cost of FCHS instruction.  */
00419   35,         /* cost of FSQRT instruction.  */
00420 };
00421 
00422 static const
00423 struct processor_costs pentium4_cost = {
00424   1,          /* cost of an add instruction */
00425   3,          /* cost of a lea instruction */
00426   4,          /* variable shift costs */
00427   4,          /* constant shift costs */
00428   {15, 15, 15, 15, 15},     /* cost of starting a multiply */
00429   0,          /* cost of multiply per each bit set */
00430   {56, 56, 56, 56, 56},     /* cost of a divide/mod */
00431   1,          /* cost of movsx */
00432   1,          /* cost of movzx */
00433   16,         /* "large" insn */
00434   6,          /* MOVE_RATIO */
00435   2,          /* cost for loading QImode using movzbl */
00436   {4, 5, 4},        /* cost of loading integer registers
00437              in QImode, HImode and SImode.
00438              Relative to reg-reg move (2).  */
00439   {2, 3, 2},        /* cost of storing integer registers */
00440   2,          /* cost of reg,reg fld/fst */
00441   {2, 2, 6},        /* cost of loading fp registers
00442              in SFmode, DFmode and XFmode */
00443   {4, 4, 6},        /* cost of loading integer registers */
00444   2,          /* cost of moving MMX register */
00445   {2, 2},       /* cost of loading MMX registers
00446              in SImode and DImode */
00447   {2, 2},       /* cost of storing MMX registers
00448              in SImode and DImode */
00449   12,         /* cost of moving SSE register */
00450   {12, 12, 12},       /* cost of loading SSE registers
00451              in SImode, DImode and TImode */
00452   {2, 2, 8},        /* cost of storing SSE registers
00453              in SImode, DImode and TImode */
00454   10,         /* MMX or SSE register to integer */
00455   64,         /* size of prefetch block */
00456   6,          /* number of parallel prefetches */
00457   2,          /* Branch cost */
00458   5,          /* cost of FADD and FSUB insns.  */
00459   7,          /* cost of FMUL instruction.  */
00460   43,         /* cost of FDIV instruction.  */
00461   2,          /* cost of FABS instruction.  */
00462   2,          /* cost of FCHS instruction.  */
00463   43,         /* cost of FSQRT instruction.  */
00464 };
00465 
00466 static const
00467 struct processor_costs nocona_cost = {
00468   1,          /* cost of an add instruction */
00469   1,          /* cost of a lea instruction */
00470   1,          /* variable shift costs */
00471   1,          /* constant shift costs */
00472   {10, 10, 10, 10, 10},     /* cost of starting a multiply */
00473   0,          /* cost of multiply per each bit set */
00474   {66, 66, 66, 66, 66},     /* cost of a divide/mod */
00475   1,          /* cost of movsx */
00476   1,          /* cost of movzx */
00477   16,         /* "large" insn */
00478   9,          /* MOVE_RATIO */
00479   4,          /* cost for loading QImode using movzbl */
00480   {4, 4, 4},        /* cost of loading integer registers
00481              in QImode, HImode and SImode.
00482              Relative to reg-reg move (2).  */
00483   {4, 4, 4},        /* cost of storing integer registers */
00484   3,          /* cost of reg,reg fld/fst */
00485   {12, 12, 12},       /* cost of loading fp registers
00486              in SFmode, DFmode and XFmode */
00487   {4, 4, 4},        /* cost of loading integer registers */
00488   6,          /* cost of moving MMX register */
00489   {12, 12},       /* cost of loading MMX registers
00490              in SImode and DImode */
00491   {12, 12},       /* cost of storing MMX registers
00492              in SImode and DImode */
00493   6,          /* cost of moving SSE register */
00494   {12, 12, 12},       /* cost of loading SSE registers
00495              in SImode, DImode and TImode */
00496   {12, 12, 12},       /* cost of storing SSE registers
00497              in SImode, DImode and TImode */
00498   8,          /* MMX or SSE register to integer */
00499   128,          /* size of prefetch block */
00500   8,          /* number of parallel prefetches */
00501   1,          /* Branch cost */
00502   6,          /* cost of FADD and FSUB insns.  */
00503   8,          /* cost of FMUL instruction.  */
00504   40,         /* cost of FDIV instruction.  */
00505   3,          /* cost of FABS instruction.  */
00506   3,          /* cost of FCHS instruction.  */
00507   44,         /* cost of FSQRT instruction.  */
00508 };
00509 
00510 const struct processor_costs *ix86_cost = &pentium_cost;
00511 
00512 /* Processor feature/optimization bitmasks.  */
00513 #define m_386 (1<<PROCESSOR_I386)
00514 #define m_486 (1<<PROCESSOR_I486)
00515 #define m_PENT (1<<PROCESSOR_PENTIUM)
00516 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
00517 #define m_K6  (1<<PROCESSOR_K6)
00518 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
00519 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
00520 #define m_K8  (1<<PROCESSOR_K8)
00521 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
00522 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
00523 
00524 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
00525 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00526 const int x86_zero_extend_with_and = m_486 | m_PENT;
00527 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
00528 const int x86_double_with_add = ~m_386;
00529 const int x86_use_bit_test = m_386;
00530 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
00531 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00532 const int x86_3dnow_a = m_ATHLON_K8;
00533 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00534 /* Branch hints were put in P4 based on simulation result. But
00535    after P4 was made, no performance benefit was observed with
00536    branch hints. It also increases the code size. As the result,
00537    icc never generates branch hints.  */
00538 const int x86_branch_hints = 0;
00539 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
00540 const int x86_partial_reg_stall = m_PPRO;
00541 const int x86_use_loop = m_K6;
00542 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
00543 const int x86_use_mov0 = m_K6;
00544 const int x86_use_cltd = ~(m_PENT | m_K6);
00545 const int x86_read_modify_write = ~m_PENT;
00546 const int x86_read_modify = ~(m_PENT | m_PPRO);
00547 const int x86_split_long_moves = m_PPRO;
00548 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
00549 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
00550 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
00551 const int x86_qimode_math = ~(0);
00552 const int x86_promote_qi_regs = 0;
00553 const int x86_himode_math = ~(m_PPRO);
00554 const int x86_promote_hi_regs = m_PPRO;
00555 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
00556 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
00557 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
00558 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
00559 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
00560 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00561 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00562 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
00563 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
00564 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
00565 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
00566 const int x86_shift1 = ~m_486;
00567 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00568 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
00569 /* Set for machines where the type and dependencies are resolved on SSE
00570    register parts instead of whole registers, so we may maintain just
00571    lower part of scalar values in proper format leaving the upper part
00572    undefined.  */
00573 const int x86_sse_split_regs = m_ATHLON_K8;
00574 const int x86_sse_typeless_stores = m_ATHLON_K8;
00575 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
00576 const int x86_use_ffreep = m_ATHLON_K8;
00577 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
00578 
00579 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
00580    integer data in xmm registers.  Which results in pretty abysmal code.  */
00581 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
00582 
00583 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
00584 /* Some CPU cores are not able to predict more than 4 branch instructions in
00585    the 16 byte window.  */
00586 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
00587 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
00588 const int x86_use_bt = m_ATHLON_K8;
00589 
00590 /* In case the average insn count for single function invocation is
00591    lower than this constant, emit fast (but longer) prologue and
00592    epilogue code.  */
00593 #define FAST_PROLOGUE_INSN_COUNT 20
00594 
00595 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
00596 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
00597 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
00598 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
00599 
00600 /* Array of the smallest class containing reg number REGNO, indexed by
00601    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
00602 
00603 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
00604 {
00605   /* ax, dx, cx, bx */
00606   AREG, DREG, CREG, BREG,
00607   /* si, di, bp, sp */
00608   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
00609   /* FP registers */
00610   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
00611   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
00612   /* arg pointer */
00613   NON_Q_REGS,
00614   /* flags, fpsr, dirflag, frame */
00615   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
00616   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
00617   SSE_REGS, SSE_REGS,
00618   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
00619   MMX_REGS, MMX_REGS,
00620   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
00621   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
00622   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
00623   SSE_REGS, SSE_REGS,
00624 };
00625 
00626 /* The "default" register map used in 32bit mode.  */
00627 
00628 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
00629 {
00630   0, 2, 1, 3, 6, 7, 4, 5,   /* general regs */
00631   12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
00632   -1, -1, -1, -1, -1,     /* arg, flags, fpsr, dir, frame */
00633   21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
00634   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
00635   -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
00636   -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
00637 };
00638 
00639 static int const x86_64_int_parameter_registers[6] =
00640 {
00641   5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
00642   FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
00643 };
00644 
00645 static int const x86_64_int_return_registers[4] =
00646 {
00647   0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
00648 };
00649 
00650 /* The "default" register map used in 64bit mode.  */
00651 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
00652 {
00653   0, 1, 2, 3, 4, 5, 6, 7,   /* general regs */
00654   33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
00655   -1, -1, -1, -1, -1,     /* arg, flags, fpsr, dir, frame */
00656   17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
00657   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
00658   8,9,10,11,12,13,14,15,    /* extended integer registers */
00659   25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
00660 };
00661 
00662 /* Define the register numbers to be used in Dwarf debugging information.
00663    The SVR4 reference port C compiler uses the following register numbers
00664    in its Dwarf output code:
00665   0 for %eax (gcc regno = 0)
00666   1 for %ecx (gcc regno = 2)
00667   2 for %edx (gcc regno = 1)
00668   3 for %ebx (gcc regno = 3)
00669   4 for %esp (gcc regno = 7)
00670   5 for %ebp (gcc regno = 6)
00671   6 for %esi (gcc regno = 4)
00672   7 for %edi (gcc regno = 5)
00673    The following three DWARF register numbers are never generated by
00674    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
00675    believes these numbers have these meanings.
00676   8  for %eip    (no gcc equivalent)
00677   9  for %eflags (gcc regno = 17)
00678   10 for %trapno (no gcc equivalent)
00679    It is not at all clear how we should number the FP stack registers
00680    for the x86 architecture.  If the version of SDB on x86/svr4 were
00681    a bit less brain dead with respect to floating-point then we would
00682    have a precedent to follow with respect to DWARF register numbers
00683    for x86 FP registers, but the SDB on x86/svr4 is so completely
00684    broken with respect to FP registers that it is hardly worth thinking
00685    of it as something to strive for compatibility with.
00686    The version of x86/svr4 SDB I have at the moment does (partially)
00687    seem to believe that DWARF register number 11 is associated with
00688    the x86 register %st(0), but that's about all.  Higher DWARF
00689    register numbers don't seem to be associated with anything in
00690    particular, and even for DWARF regno 11, SDB only seems to under-
00691    stand that it should say that a variable lives in %st(0) (when
00692    asked via an `=' command) if we said it was in DWARF regno 11,
00693    but SDB still prints garbage when asked for the value of the
00694    variable in question (via a `/' command).
00695    (Also note that the labels SDB prints for various FP stack regs
00696    when doing an `x' command are all wrong.)
00697    Note that these problems generally don't affect the native SVR4
00698    C compiler because it doesn't allow the use of -O with -g and
00699    because when it is *not* optimizing, it allocates a memory
00700    location for each floating-point variable, and the memory
00701    location is what gets described in the DWARF AT_location
00702    attribute for the variable in question.
00703    Regardless of the severe mental illness of the x86/svr4 SDB, we
00704    do something sensible here and we use the following DWARF
00705    register numbers.  Note that these are all stack-top-relative
00706    numbers.
00707   11 for %st(0) (gcc regno = 8)
00708   12 for %st(1) (gcc regno = 9)
00709   13 for %st(2) (gcc regno = 10)
00710   14 for %st(3) (gcc regno = 11)
00711   15 for %st(4) (gcc regno = 12)
00712   16 for %st(5) (gcc regno = 13)
00713   17 for %st(6) (gcc regno = 14)
00714   18 for %st(7) (gcc regno = 15)
00715 */
00716 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
00717 {
00718   0, 2, 1, 3, 6, 7, 5, 4,   /* general regs */
00719   11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
00720   -1, 9, -1, -1, -1,      /* arg, flags, fpsr, dir, frame */
00721   21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
00722   29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
00723   -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
00724   -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
00725 };
00726 
00727 /* Test and compare insns in i386.md store the information needed to
00728    generate branch and scc insns here.  */
00729 
00730 rtx ix86_compare_op0 = NULL_RTX;
00731 rtx ix86_compare_op1 = NULL_RTX;
00732 
00733 #define MAX_386_STACK_LOCALS 3
00734 /* Size of the register save area.  */
00735 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
00736 
00737 /* Define the structure for the machine field in struct function.  */
00738 
00739 struct stack_local_entry GTY(())
00740 {
00741   unsigned short mode;
00742   unsigned short n;
00743   rtx rtl;
00744   struct stack_local_entry *next;
00745 };
00746 
00747 /* Structure describing stack frame layout.
00748    Stack grows downward:
00749 
00750    [arguments]
00751                 <- ARG_POINTER
00752    saved pc
00753 
00754    saved frame pointer if frame_pointer_needed
00755                 <- HARD_FRAME_POINTER
00756    [saved regs]
00757 
00758    [padding1]          \
00759             )
00760    [va_arg registers]  (
00761             > to_allocate       <- FRAME_POINTER
00762    [frame]         (
00763             )
00764    [padding2]        /
00765   */
00766 struct ix86_frame
00767 {
00768   int nregs;
00769   int padding1;
00770   int va_arg_size;
00771   HOST_WIDE_INT frame;
00772   int padding2;
00773   int outgoing_arguments_size;
00774   int red_zone_size;
00775 
00776   HOST_WIDE_INT to_allocate;
00777   /* The offsets relative to ARG_POINTER.  */
00778   HOST_WIDE_INT frame_pointer_offset;
00779   HOST_WIDE_INT hard_frame_pointer_offset;
00780   HOST_WIDE_INT stack_pointer_offset;
00781 
00782   /* When save_regs_using_mov is set, emit prologue using
00783      move instead of push instructions.  */
00784   bool save_regs_using_mov;
00785 };
00786 
00787 /* Used to enable/disable debugging features.  */
00788 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
00789 /* Code model option as passed by user.  */
00790 const char *ix86_cmodel_string;
00791 /* Parsed value.  */
00792 enum cmodel ix86_cmodel;
00793 /* Asm dialect.  */
00794 const char *ix86_asm_string;
00795 enum asm_dialect ix86_asm_dialect = ASM_ATT;
00796 /* TLS dialext.  */
00797 const char *ix86_tls_dialect_string;
00798 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
00799 
00800 /* Which unit we are generating floating point math for.  */
00801 enum fpmath_unit ix86_fpmath;
00802 
00803 /* Which cpu are we scheduling for.  */
00804 enum processor_type ix86_tune;
00805 /* Which instruction set architecture to use.  */
00806 enum processor_type ix86_arch;
00807 
00808 /* Strings to hold which cpu and instruction set architecture  to use.  */
00809 const char *ix86_tune_string;   /* for -mtune=<xxx> */
00810 const char *ix86_arch_string;   /* for -march=<xxx> */
00811 const char *ix86_fpmath_string;   /* for -mfpmath=<xxx> */
00812 
00813 /* # of registers to use to pass arguments.  */
00814 const char *ix86_regparm_string;
00815 
00816 /* true if sse prefetch instruction is not NOOP.  */
00817 int x86_prefetch_sse;
00818 
00819 /* ix86_regparm_string as a number */
00820 int ix86_regparm;
00821 
00822 /* Alignment to use for loops and jumps:  */
00823 
00824 /* Power of two alignment for loops.  */
00825 const char *ix86_align_loops_string;
00826 
00827 /* Power of two alignment for non-loop jumps.  */
00828 const char *ix86_align_jumps_string;
00829 
00830 /* Power of two alignment for stack boundary in bytes.  */
00831 const char *ix86_preferred_stack_boundary_string;
00832 
00833 /* Preferred alignment for stack boundary in bits.  */
00834 unsigned int ix86_preferred_stack_boundary;
00835 
00836 /* Values 1-5: see jump.c */
00837 int ix86_branch_cost;
00838 const char *ix86_branch_cost_string;
00839 
00840 /* Power of two alignment for functions.  */
00841 const char *ix86_align_funcs_string;
00842 
00843 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
00844 char internal_label_prefix[16];
00845 int internal_label_prefix_len;
00846 
00847 static void output_pic_addr_const (FILE *, rtx, int);
00848 static void put_condition_code (enum rtx_code, enum machine_mode,
00849         int, int, FILE *);
00850 static const char *get_some_local_dynamic_name (void);
00851 static int get_some_local_dynamic_name_1 (rtx *, void *);
00852 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
00853 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
00854                rtx *);
00855 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
00856 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
00857                enum machine_mode);
00858 static rtx get_thread_pointer (int);
00859 static rtx legitimize_tls_address (rtx, enum tls_model, int);
00860 static void get_pc_thunk_name (char [32], unsigned int);
00861 static rtx gen_push (rtx);
00862 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
00863 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
00864 static struct machine_function * ix86_init_machine_status (void);
00865 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
00866 static int ix86_nsaved_regs (void);
00867 static void ix86_emit_save_regs (void);
00868 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
00869 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
00870 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
00871 static HOST_WIDE_INT ix86_GOT_alias_set (void);
00872 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
00873 static rtx ix86_expand_aligntest (rtx, int);
00874 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
00875 static int ix86_issue_rate (void);
00876 static int ix86_adjust_cost (rtx, rtx, rtx, int);
00877 static int ia32_multipass_dfa_lookahead (void);
00878 static void ix86_init_mmx_sse_builtins (void);
00879 static rtx x86_this_parameter (tree);
00880 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
00881          HOST_WIDE_INT, tree);
00882 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
00883 static void x86_file_start (void);
00884 static void ix86_reorg (void);
00885 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
00886 static tree ix86_build_builtin_va_list (void);
00887 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
00888            tree, int *, int);
00889 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
00890 static bool ix86_vector_mode_supported_p (enum machine_mode);
00891 
00892 static int ix86_address_cost (rtx);
00893 static bool ix86_cannot_force_const_mem (rtx);
00894 static rtx ix86_delegitimize_address (rtx);
00895 
00896 struct builtin_description;
00897 static rtx ix86_expand_sse_comi (const struct builtin_description *,
00898          tree, rtx);
00899 static rtx ix86_expand_sse_compare (const struct builtin_description *,
00900             tree, rtx);
00901 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
00902 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
00903 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
00904 static rtx ix86_expand_store_builtin (enum insn_code, tree);
00905 static rtx safe_vector_operand (rtx, enum machine_mode);
00906 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
00907 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
00908 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
00909 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
00910 static int ix86_fp_comparison_cost (enum rtx_code code);
00911 static unsigned int ix86_select_alt_pic_regnum (void);
00912 static int ix86_save_reg (unsigned int, int);
00913 static void ix86_compute_frame_layout (struct ix86_frame *);
00914 static int ix86_comp_type_attributes (tree, tree);
00915 static int ix86_function_regparm (tree, tree);
00916 const struct attribute_spec ix86_attribute_table[];
00917 static bool ix86_function_ok_for_sibcall (tree, tree);
00918 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
00919 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
00920 static int ix86_value_regno (enum machine_mode);
00921 static bool contains_128bit_aligned_vector_p (tree);
00922 static rtx ix86_struct_value_rtx (tree, int);
00923 static bool ix86_ms_bitfield_layout_p (tree);
00924 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
00925 static int extended_reg_mentioned_1 (rtx *, void *);
00926 static bool ix86_rtx_costs (rtx, int, int, int *);
00927 static int min_insn_size (rtx);
00928 static tree ix86_md_asm_clobbers (tree clobbers);
00929 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
00930 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
00931             tree, bool);
00932 static void ix86_init_builtins (void);
00933 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
00934 
00935 /* This function is only used on Solaris.  */
00936 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
00937   ATTRIBUTE_UNUSED;
00938 
00939 /* Register class used for passing given 64bit part of the argument.
00940    These represent classes as documented by the PS ABI, with the exception
00941    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
00942    use SF or DFmode move instead of DImode to avoid reformatting penalties.
00943 
00944    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
00945    whenever possible (upper half does contain padding).
00946  */
00947 enum x86_64_reg_class
00948   {
00949     X86_64_NO_CLASS,
00950     X86_64_INTEGER_CLASS,
00951     X86_64_INTEGERSI_CLASS,
00952     X86_64_SSE_CLASS,
00953     X86_64_SSESF_CLASS,
00954     X86_64_SSEDF_CLASS,
00955     X86_64_SSEUP_CLASS,
00956     X86_64_X87_CLASS,
00957     X86_64_X87UP_CLASS,
00958     X86_64_COMPLEX_X87_CLASS,
00959     X86_64_MEMORY_CLASS
00960   };
00961 static const char * const x86_64_reg_class_name[] = {
00962   "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
00963   "sseup", "x87", "x87up", "cplx87", "no"
00964 };
00965 
00966 #define MAX_CLASSES 4
00967 
00968 /* Table of constants used by fldpi, fldln2, etc....  */
00969 static REAL_VALUE_TYPE ext_80387_constants_table [5];
00970 static bool ext_80387_constants_init = 0;
00971 static void init_ext_80387_constants (void);
00972 
00973 /* Initialize the GCC target structure.  */
00974 #undef TARGET_ATTRIBUTE_TABLE
00975 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
00976 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
00977 #  undef TARGET_MERGE_DECL_ATTRIBUTES
00978 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
00979 #endif
00980 
00981 #undef TARGET_COMP_TYPE_ATTRIBUTES
00982 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
00983 
00984 #undef TARGET_INIT_BUILTINS
00985 #define TARGET_INIT_BUILTINS ix86_init_builtins
00986 #undef TARGET_EXPAND_BUILTIN
00987 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
00988 
00989 #undef TARGET_ASM_FUNCTION_EPILOGUE
00990 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
00991 
00992 #undef TARGET_ASM_OPEN_PAREN
00993 #define TARGET_ASM_OPEN_PAREN ""
00994 #undef TARGET_ASM_CLOSE_PAREN
00995 #define TARGET_ASM_CLOSE_PAREN ""
00996 
00997 #undef TARGET_ASM_ALIGNED_HI_OP
00998 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
00999 #undef TARGET_ASM_ALIGNED_SI_OP
01000 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
01001 #ifdef ASM_QUAD
01002 #undef TARGET_ASM_ALIGNED_DI_OP
01003 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
01004 #endif
01005 
01006 #undef TARGET_ASM_UNALIGNED_HI_OP
01007 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
01008 #undef TARGET_ASM_UNALIGNED_SI_OP
01009 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
01010 #undef TARGET_ASM_UNALIGNED_DI_OP
01011 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
01012 
01013 #undef TARGET_SCHED_ADJUST_COST
01014 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
01015 #undef TARGET_SCHED_ISSUE_RATE
01016 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
01017 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
01018 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
01019   ia32_multipass_dfa_lookahead
01020 
01021 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
01022 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
01023 
01024 #ifdef HAVE_AS_TLS
01025 #undef TARGET_HAVE_TLS
01026 #define TARGET_HAVE_TLS true
01027 #endif
01028 #undef TARGET_CANNOT_FORCE_CONST_MEM
01029 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
01030 
01031 #undef TARGET_DELEGITIMIZE_ADDRESS
01032 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
01033 
01034 #undef TARGET_MS_BITFIELD_LAYOUT_P
01035 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
01036 
01037 #undef TARGET_ASM_OUTPUT_MI_THUNK
01038 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
01039 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
01040 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
01041 
01042 #undef TARGET_ASM_FILE_START
01043 #define TARGET_ASM_FILE_START x86_file_start
01044 
01045 #undef TARGET_RTX_COSTS
01046 #define TARGET_RTX_COSTS ix86_rtx_costs
01047 #undef TARGET_ADDRESS_COST
01048 #define TARGET_ADDRESS_COST ix86_address_cost
01049 
01050 #undef TARGET_FIXED_CONDITION_CODE_REGS
01051 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
01052 #undef TARGET_CC_MODES_COMPATIBLE
01053 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
01054 
01055 #undef TARGET_MACHINE_DEPENDENT_REORG
01056 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
01057 
01058 #undef TARGET_BUILD_BUILTIN_VA_LIST
01059 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
01060 
01061 #undef TARGET_MD_ASM_CLOBBERS
01062 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
01063 
01064 #undef TARGET_PROMOTE_PROTOTYPES
01065 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
01066 #undef TARGET_STRUCT_VALUE_RTX
01067 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
01068 #undef TARGET_SETUP_INCOMING_VARARGS
01069 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
01070 #undef TARGET_MUST_PASS_IN_STACK
01071 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
01072 #undef TARGET_PASS_BY_REFERENCE
01073 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
01074 
01075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
01076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
01077 
01078 #undef TARGET_VECTOR_MODE_SUPPORTED_P
01079 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
01080 
01081 #ifdef SUBTARGET_INSERT_ATTRIBUTES
01082 #undef TARGET_INSERT_ATTRIBUTES
01083 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
01084 #endif
01085 
01086 struct gcc_target targetm = TARGET_INITIALIZER;
01087 
01088 
01089 /* The svr4 ABI for the i386 says that records and unions are returned
01090    in memory.  */
01091 #ifndef DEFAULT_PCC_STRUCT_RETURN
01092 #define DEFAULT_PCC_STRUCT_RETURN 1
01093 #endif
01094 
01095 /* Sometimes certain combinations of command options do not make
01096    sense on a particular target machine.  You can define a macro
01097    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
01098    defined, is executed once just after all the command options have
01099    been parsed.
01100 
01101    Don't use this macro to turn on various extra optimizations for
01102    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
01103 
01104 void
01105 override_options (void)
01106 {
01107   int i;
01108   int ix86_tune_defaulted = 0;
01109 
01110   /* Comes from final.c -- no real reason to change it.  */
01111 #define MAX_CODE_ALIGN 16
01112 
01113   static struct ptt
01114     {
01115       const struct processor_costs *cost; /* Processor costs */
01116       const int target_enable;      /* Target flags to enable.  */
01117       const int target_disable;     /* Target flags to disable.  */
01118       const int align_loop;     /* Default alignments.  */
01119       const int align_loop_max_skip;
01120       const int align_jump;
01121       const int align_jump_max_skip;
01122       const int align_func;
01123     }
01124   const processor_target_table[PROCESSOR_max] =
01125     {
01126       {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
01127       {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
01128       {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
01129       {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
01130       {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
01131       {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
01132       {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
01133       {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
01134       {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
01135     };
01136 
01137   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
01138   static struct pta
01139     {
01140       const char *const name;   /* processor name or nickname.  */
01141       const enum processor_type processor;
01142       const enum pta_flags
01143   {
01144     PTA_SSE = 1,
01145     PTA_SSE2 = 2,
01146     PTA_SSE3 = 4,
01147     PTA_MMX = 8,
01148     PTA_PREFETCH_SSE = 16,
01149     PTA_3DNOW = 32,
01150     PTA_3DNOW_A = 64,
01151     PTA_64BIT = 128
01152   } flags;
01153     }
01154   const processor_alias_table[] =
01155     {
01156       {"i386", PROCESSOR_I386, 0},
01157       {"i486", PROCESSOR_I486, 0},
01158       {"i586", PROCESSOR_PENTIUM, 0},
01159       {"pentium", PROCESSOR_PENTIUM, 0},
01160       {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
01161       {"winchip-c6", PROCESSOR_I486, PTA_MMX},
01162       {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
01163       {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
01164       {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
01165       {"i686", PROCESSOR_PENTIUMPRO, 0},
01166       {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
01167       {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
01168       {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
01169       {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
01170       {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
01171       {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
01172                | PTA_MMX | PTA_PREFETCH_SSE},
01173       {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
01174                 | PTA_MMX | PTA_PREFETCH_SSE},
01175       {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
01176                 | PTA_MMX | PTA_PREFETCH_SSE},
01177       {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
01178                 | PTA_MMX | PTA_PREFETCH_SSE},
01179       {"k6", PROCESSOR_K6, PTA_MMX},
01180       {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
01181       {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
01182       {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
01183            | PTA_3DNOW_A},
01184       {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
01185            | PTA_3DNOW | PTA_3DNOW_A},
01186       {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
01187             | PTA_3DNOW_A | PTA_SSE},
01188       {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
01189               | PTA_3DNOW_A | PTA_SSE},
01190       {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
01191               | PTA_3DNOW_A | PTA_SSE},
01192       {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
01193              | PTA_SSE | PTA_SSE2 },
01194       {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
01195               | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
01196       {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
01197               | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
01198       {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
01199               | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
01200       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
01201               | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
01202     };
01203 
01204   int const pta_size = ARRAY_SIZE (processor_alias_table);
01205 
01206 #ifdef SUBTARGET_OVERRIDE_OPTIONS
01207   SUBTARGET_OVERRIDE_OPTIONS;
01208 #endif
01209 
01210   /* Set the default values for switches whose default depends on TARGET_64BIT
01211      in case they weren't overwritten by command line options.  */
01212   if (TARGET_64BIT)
01213     {
01214       if (flag_omit_frame_pointer == 2)
01215   flag_omit_frame_pointer = 1;
01216       if (flag_asynchronous_unwind_tables == 2)
01217   flag_asynchronous_unwind_tables = 1;
01218       if (flag_pcc_struct_return == 2)
01219   flag_pcc_struct_return = 0;
01220     }
01221   else
01222     {
01223       if (flag_omit_frame_pointer == 2)
01224   flag_omit_frame_pointer = 0;
01225       if (flag_asynchronous_unwind_tables == 2)
01226   flag_asynchronous_unwind_tables = 0;
01227       if (flag_pcc_struct_return == 2)
01228   flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
01229     }
01230 
01231   if (!ix86_tune_string && ix86_arch_string)
01232     ix86_tune_string = ix86_arch_string;
01233   if (!ix86_tune_string)
01234     {
01235       ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
01236       ix86_tune_defaulted = 1;
01237     }
01238   if (!ix86_arch_string)
01239     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
01240 
01241   if (ix86_cmodel_string != 0)
01242     {
01243       if (!strcmp (ix86_cmodel_string, "small"))
01244   ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
01245       else if (flag_pic)
01246   sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
01247       else if (!strcmp (ix86_cmodel_string, "32"))
01248   ix86_cmodel = CM_32;
01249       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
01250   ix86_cmodel = CM_KERNEL;
01251       else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
01252   ix86_cmodel = CM_MEDIUM;
01253       else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
01254   ix86_cmodel = CM_LARGE;
01255       else
01256   error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
01257     }
01258   else
01259     {
01260       ix86_cmodel = CM_32;
01261       if (TARGET_64BIT)
01262   ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
01263     }
01264   if (ix86_asm_string != 0)
01265     {
01266       if (!strcmp (ix86_asm_string, "intel"))
01267   ix86_asm_dialect = ASM_INTEL;
01268       else if (!strcmp (ix86_asm_string, "att"))
01269   ix86_asm_dialect = ASM_ATT;
01270       else
01271   error ("bad value (%s) for -masm= switch", ix86_asm_string);
01272     }
01273   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
01274     error ("code model %qs not supported in the %s bit mode",
01275      ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
01276   if (ix86_cmodel == CM_LARGE)
01277     sorry ("code model %<large%> not supported yet");
01278   if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
01279     sorry ("%i-bit mode not compiled in",
01280      (target_flags & MASK_64BIT) ? 64 : 32);
01281 
01282   for (i = 0; i < pta_size; i++)
01283     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
01284       {
01285   ix86_arch = processor_alias_table[i].processor;
01286   /* Default cpu tuning to the architecture.  */
01287   ix86_tune = ix86_arch;
01288   if (processor_alias_table[i].flags & PTA_MMX
01289       && !(target_flags_explicit & MASK_MMX))
01290     target_flags |= MASK_MMX;
01291   if (processor_alias_table[i].flags & PTA_3DNOW
01292       && !(target_flags_explicit & MASK_3DNOW))
01293     target_flags |= MASK_3DNOW;
01294   if (processor_alias_table[i].flags & PTA_3DNOW_A
01295       && !(target_flags_explicit & MASK_3DNOW_A))
01296     target_flags |= MASK_3DNOW_A;
01297   if (processor_alias_table[i].flags & PTA_SSE
01298       && !(target_flags_explicit & MASK_SSE))
01299     target_flags |= MASK_SSE;
01300   if (processor_alias_table[i].flags & PTA_SSE2
01301       && !(target_flags_explicit & MASK_SSE2))
01302     target_flags |= MASK_SSE2;
01303   if (processor_alias_table[i].flags & PTA_SSE3
01304       && !(target_flags_explicit & MASK_SSE3))
01305     target_flags |= MASK_SSE3;
01306   if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
01307     x86_prefetch_sse = true;
01308   if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
01309     error ("CPU you selected does not support x86-64 "
01310      "instruction set");
01311   break;
01312       }
01313 
01314   if (i == pta_size)
01315     error ("bad value (%s) for -march= switch", ix86_arch_string);
01316 
01317   for (i = 0; i < pta_size; i++)
01318     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
01319       {
01320   ix86_tune = processor_alias_table[i].processor;
01321   if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
01322     {
01323       if (ix86_tune_defaulted)
01324         {
01325     ix86_tune_string = "x86-64";
01326     for (i = 0; i < pta_size; i++)
01327       if (! strcmp (ix86_tune_string,
01328         processor_alias_table[i].name))
01329         break;
01330     ix86_tune = processor_alias_table[i].processor;
01331         }
01332       else
01333         error ("CPU you selected does not support x86-64 "
01334          "instruction set");
01335     }
01336         /* Intel CPUs have always interpreted SSE prefetch instructions as
01337      NOPs; so, we can enable SSE prefetch instructions even when
01338      -mtune (rather than -march) points us to a processor that has them.
01339      However, the VIA C3 gives a SIGILL, so we only do that for i686 and
01340      higher processors.  */
01341   if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
01342     x86_prefetch_sse = true;
01343   break;
01344       }
01345   if (i == pta_size)
01346     error ("bad value (%s) for -mtune= switch", ix86_tune_string);
01347 
01348   if (optimize_size)
01349     ix86_cost = &size_cost;
01350   else
01351     ix86_cost = processor_target_table[ix86_tune].cost;
01352   target_flags |= processor_target_table[ix86_tune].target_enable;
01353   target_flags &= ~processor_target_table[ix86_tune].target_disable;
01354 
01355   /* Arrange to set up i386_stack_locals for all functions.  */
01356   init_machine_status = ix86_init_machine_status;
01357 
01358   /* Validate -mregparm= value.  */
01359   if (ix86_regparm_string)
01360     {
01361       i = atoi (ix86_regparm_string);
01362       if (i < 0 || i > REGPARM_MAX)
01363   error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
01364       else
01365   ix86_regparm = i;
01366     }
01367   else
01368    if (TARGET_64BIT)
01369      ix86_regparm = REGPARM_MAX;
01370 
01371   /* If the user has provided any of the -malign-* options,
01372      warn and use that value only if -falign-* is not set.
01373      Remove this code in GCC 3.2 or later.  */
01374   if (ix86_align_loops_string)
01375     {
01376       warning ("-malign-loops is obsolete, use -falign-loops");
01377       if (align_loops == 0)
01378   {
01379     i = atoi (ix86_align_loops_string);
01380     if (i < 0 || i > MAX_CODE_ALIGN)
01381       error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
01382     else
01383       align_loops = 1 << i;
01384   }
01385     }
01386 
01387   if (ix86_align_jumps_string)
01388     {
01389       warning ("-malign-jumps is obsolete, use -falign-jumps");
01390       if (align_jumps == 0)
01391   {
01392     i = atoi (ix86_align_jumps_string);
01393     if (i < 0 || i > MAX_CODE_ALIGN)
01394       error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
01395     else
01396       align_jumps = 1 << i;
01397   }
01398     }
01399 
01400   if (ix86_align_funcs_string)
01401     {
01402       warning ("-malign-functions is obsolete, use -falign-functions");
01403       if (align_functions == 0)
01404   {
01405     i = atoi (ix86_align_funcs_string);
01406     if (i < 0 || i > MAX_CODE_ALIGN)
01407       error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
01408     else
01409       align_functions = 1 << i;
01410   }
01411     }
01412 
01413   /* Default align_* from the processor table.  */
01414   if (align_loops == 0)
01415     {
01416       align_loops = processor_target_table[ix86_tune].align_loop;
01417       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
01418     }
01419   if (align_jumps == 0)
01420     {
01421       align_jumps = processor_target_table[ix86_tune].align_jump;
01422       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
01423     }
01424   if (align_functions == 0)
01425     {
01426       align_functions = processor_target_table[ix86_tune].align_func;
01427     }
01428 
01429   /* Validate -mpreferred-stack-boundary= value, or provide default.
01430      The default of 128 bits is for Pentium III's SSE __m128, but we
01431      don't want additional code to keep the stack aligned when
01432      optimizing for code size.  */
01433   ix86_preferred_stack_boundary = (optimize_size
01434            ? TARGET_64BIT ? 128 : 32
01435            : 128);
01436   if (ix86_preferred_stack_boundary_string)
01437     {
01438       i = atoi (ix86_preferred_stack_boundary_string);
01439       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
01440   error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
01441          TARGET_64BIT ? 4 : 2);
01442       else
01443   ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
01444     }
01445 
01446   /* Validate -mbranch-cost= value, or provide default.  */
01447   ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
01448   if (ix86_branch_cost_string)
01449     {
01450       i = atoi (ix86_branch_cost_string);
01451       if (i < 0 || i > 5)
01452   error ("-mbranch-cost=%d is not between 0 and 5", i);
01453       else
01454   ix86_branch_cost = i;
01455     }
01456 
01457   if (ix86_tls_dialect_string)
01458     {
01459       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
01460   ix86_tls_dialect = TLS_DIALECT_GNU;
01461       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
01462   ix86_tls_dialect = TLS_DIALECT_SUN;
01463       else
01464   error ("bad value (%s) for -mtls-dialect= switch",
01465          ix86_tls_dialect_string);
01466     }
01467 
01468   /* Keep nonleaf frame pointers.  */
01469   if (flag_omit_frame_pointer)
01470     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
01471   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
01472     flag_omit_frame_pointer = 1;
01473 
01474   /* If we're doing fast math, we don't care about comparison order
01475      wrt NaNs.  This lets us use a shorter comparison sequence.  */
01476   if (flag_unsafe_math_optimizations)
01477     target_flags &= ~MASK_IEEE_FP;
01478 
01479   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
01480      since the insns won't need emulation.  */
01481   if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
01482     target_flags &= ~MASK_NO_FANCY_MATH_387;
01483 
01484   /* Likewise, if the target doesn't have a 387, or we've specified
01485      software floating point, don't use 387 inline instrinsics.  */
01486   if (!TARGET_80387)
01487     target_flags |= MASK_NO_FANCY_MATH_387;
01488 
01489   /* Turn on SSE2 builtins for -msse3.  */
01490   if (TARGET_SSE3)
01491     target_flags |= MASK_SSE2;
01492 
01493   /* Turn on SSE builtins for -msse2.  */
01494   if (TARGET_SSE2)
01495     target_flags |= MASK_SSE;
01496 
01497   /* Turn on MMX builtins for -msse.  */
01498   if (TARGET_SSE)
01499     {
01500       target_flags |= MASK_MMX & ~target_flags_explicit;
01501       x86_prefetch_sse = true;
01502     }
01503 
01504   /* Turn on MMX builtins for 3Dnow.  */
01505   if (TARGET_3DNOW)
01506     target_flags |= MASK_MMX;
01507 
01508   if (TARGET_64BIT)
01509     {
01510       if (TARGET_ALIGN_DOUBLE)
01511   error ("-malign-double makes no sense in the 64bit mode");
01512       if (TARGET_RTD)
01513   error ("-mrtd calling convention not supported in the 64bit mode");
01514 
01515       /* Enable by default the SSE and MMX builtins.  Do allow the user to
01516    explicitly disable any of these.  In particular, disabling SSE and
01517    MMX for kernel code is extremely useful.  */
01518       target_flags
01519   |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
01520       & ~target_flags_explicit);
01521 
01522       if (TARGET_SSE)
01523   ix86_fpmath = FPMATH_SSE;
01524      }
01525   else
01526     {
01527       ix86_fpmath = FPMATH_387;
01528       /* i386 ABI does not specify red zone.  It still makes sense to use it
01529          when programmer takes care to stack from being destroyed.  */
01530       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
01531         target_flags |= MASK_NO_RED_ZONE;
01532     }
01533 
01534   if (ix86_fpmath_string != 0)
01535     {
01536       if (! strcmp (ix86_fpmath_string, "387"))
01537   ix86_fpmath = FPMATH_387;
01538       else if (! strcmp (ix86_fpmath_string, "sse"))
01539   {
01540     if (!TARGET_SSE)
01541       {
01542         warning ("SSE instruction set disabled, using 387 arithmetics");
01543         ix86_fpmath = FPMATH_387;
01544       }
01545     else
01546       ix86_fpmath = FPMATH_SSE;
01547   }
01548       else if (! strcmp (ix86_fpmath_string, "387,sse")
01549          || ! strcmp (ix86_fpmath_string, "sse,387"))
01550   {
01551     if (!TARGET_SSE)
01552       {
01553         warning ("SSE instruction set disabled, using 387 arithmetics");
01554         ix86_fpmath = FPMATH_387;
01555       }
01556     else if (!TARGET_80387)
01557       {
01558         warning ("387 instruction set disabled, using SSE arithmetics");
01559         ix86_fpmath = FPMATH_SSE;
01560       }
01561     else
01562       ix86_fpmath = FPMATH_SSE | FPMATH_387;
01563   }
01564       else
01565   error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
01566     }
01567 
01568   /* If the i387 is disabled, then do not return values in it. */
01569   if (!TARGET_80387)
01570     target_flags &= ~MASK_FLOAT_RETURNS;
01571 
01572   if ((x86_accumulate_outgoing_args & TUNEMASK)
01573       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
01574       && !optimize_size)
01575     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
01576 
01577   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
01578   {
01579     char *p;
01580     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
01581     p = strchr (internal_label_prefix, 'X');
01582     internal_label_prefix_len = p - internal_label_prefix;
01583     *p = '\0';
01584   }
01585 
01586   /* When scheduling description is not available, disable scheduler pass
01587      so it won't slow down the compilation and make x87 code slower.  */
01588   if (!TARGET_SCHEDULE)
01589     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
01590 }
01591 
01592 void
01593 optimization_options (int level, int size ATTRIBUTE_UNUSED)
01594 {
01595   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
01596      make the problem with not enough registers even worse.  */
01597 #ifdef INSN_SCHEDULING
01598   if (level > 1)
01599     flag_schedule_insns = 0;
01600 #endif
01601 
01602   /* The default values of these switches depend on the TARGET_64BIT
01603      that is not known at this moment.  Mark these values with 2 and
01604      let user the to override these.  In case there is no command line option
01605      specifying them, we will set the defaults in override_options.  */
01606   if (optimize >= 1)
01607     flag_omit_frame_pointer = 2;
01608   flag_pcc_struct_return = 2;
01609   flag_asynchronous_unwind_tables = 2;
01610 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
01611   SUBTARGET_OPTIMIZATION_OPTIONS;
01612 #endif
01613 }
01614 
01615 /* Table of valid machine attributes.  */
01616 const struct attribute_spec ix86_attribute_table[] =
01617 {
01618   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
01619   /* Stdcall attribute says callee is responsible for popping arguments
01620      if they are not variable.  */
01621   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
01622   /* Fastcall attribute says callee is responsible for popping arguments
01623      if they are not variable.  */
01624   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
01625   /* Cdecl attribute says the callee is a normal C declaration */
01626   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
01627   /* Regparm attribute specifies how many integer arguments are to be
01628      passed in registers.  */
01629   { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
01630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
01631   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
01632   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
01633   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
01634 #endif
01635   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
01636   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
01637 #ifdef SUBTARGET_ATTRIBUTE_TABLE
01638   SUBTARGET_ATTRIBUTE_TABLE,
01639 #endif
01640   { NULL,        0, 0, false, false, false, NULL }
01641 };
01642 
01643 /* Decide whether we can make a sibling call to a function.  DECL is the
01644    declaration of the function being targeted by the call and EXP is the
01645    CALL_EXPR representing the call.  */
01646 
01647 static bool
01648 ix86_function_ok_for_sibcall (tree decl, tree exp)
01649 {
01650   /* If we are generating position-independent code, we cannot sibcall
01651      optimize any indirect call, or a direct call to a global function,
01652      as the PLT requires %ebx be live.  */
01653   if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
01654     return false;
01655 
01656   /* If we are returning floats on the 80387 register stack, we cannot
01657      make a sibcall from a function that doesn't return a float to a
01658      function that does or, conversely, from a function that does return
01659      a float to a function that doesn't; the necessary stack adjustment
01660      would not be executed.  */
01661   if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
01662       != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
01663     return false;
01664 
01665   /* If this call is indirect, we'll need to be able to use a call-clobbered
01666      register for the address of the target function.  Make sure that all
01667      such registers are not used for passing parameters.  */
01668   if (!decl && !TARGET_64BIT)
01669     {
01670       tree type;
01671 
01672       /* We're looking at the CALL_EXPR, we need the type of the function.  */
01673       type = TREE_OPERAND (exp, 0);   /* pointer expression */
01674       type = TREE_TYPE (type);      /* pointer type */
01675       type = TREE_TYPE (type);      /* function type */
01676 
01677       if (ix86_function_regparm (type, NULL) >= 3)
01678   {
01679     /* ??? Need to count the actual number of registers to be used,
01680        not the possible number of registers.  Fix later.  */
01681     return false;
01682   }
01683     }
01684 
01685 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
01686   /* Dllimport'd functions are also called indirectly.  */
01687   if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
01688       && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
01689     return false;
01690 #endif
01691 
01692   /* Otherwise okay.  That also includes certain types of indirect calls.  */
01693   return true;
01694 }
01695 
01696 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
01697    arguments as in struct attribute_spec.handler.  */
01698 static tree
01699 ix86_handle_cdecl_attribute (tree *node, tree name,
01700            tree args ATTRIBUTE_UNUSED,
01701            int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
01702 {
01703   if (TREE_CODE (*node) != FUNCTION_TYPE
01704       && TREE_CODE (*node) != METHOD_TYPE
01705       && TREE_CODE (*node) != FIELD_DECL
01706       && TREE_CODE (*node) != TYPE_DECL)
01707     {
01708       warning ("%qs attribute only applies to functions",
01709          IDENTIFIER_POINTER (name));
01710       *no_add_attrs = true;
01711     }
01712   else
01713     {
01714       if (is_attribute_p ("fastcall", name))
01715         {
01716           if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
01717             {
01718               error ("fastcall and stdcall attributes are not compatible");
01719             }
01720            else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
01721             {
01722               error ("fastcall and regparm attributes are not compatible");
01723             }
01724         }
01725       else if (is_attribute_p ("stdcall", name))
01726         {
01727           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
01728             {
01729               error ("fastcall and stdcall attributes are not compatible");
01730             }
01731         }
01732     }
01733 
01734   if (TARGET_64BIT)
01735     {
01736       warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
01737       *no_add_attrs = true;
01738     }
01739 
01740   return NULL_TREE;
01741 }
01742 
01743 /* Handle a "regparm" attribute;
01744    arguments as in struct attribute_spec.handler.  */
01745 static tree
01746 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
01747              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
01748 {
01749   if (TREE_CODE (*node) != FUNCTION_TYPE
01750       && TREE_CODE (*node) != METHOD_TYPE
01751       && TREE_CODE (*node) != FIELD_DECL
01752       && TREE_CODE (*node) != TYPE_DECL)
01753     {
01754       warning ("%qs attribute only applies to functions",
01755          IDENTIFIER_POINTER (name));
01756       *no_add_attrs = true;
01757     }
01758   else
01759     {
01760       tree cst;
01761 
01762       cst = TREE_VALUE (args);
01763       if (TREE_CODE (cst) != INTEGER_CST)
01764   {
01765     warning ("%qs attribute requires an integer constant argument",
01766        IDENTIFIER_POINTER (name));
01767     *no_add_attrs = true;
01768   }
01769       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
01770   {
01771     warning ("argument to %qs attribute larger than %d",
01772        IDENTIFIER_POINTER (name), REGPARM_MAX);
01773     *no_add_attrs = true;
01774   }
01775 
01776       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
01777   {
01778     error ("fastcall and regparm attributes are not compatible");
01779   }
01780     }
01781 
01782   return NULL_TREE;
01783 }
01784 
01785 /* Return 0 if the attributes for two types are incompatible, 1 if they
01786    are compatible, and 2 if they are nearly compatible (which causes a
01787    warning to be generated).  */
01788 
01789 static int
01790 ix86_comp_type_attributes (tree type1, tree type2)
01791 {
01792   /* Check for mismatch of non-default calling convention.  */
01793   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
01794 
01795   if (TREE_CODE (type1) != FUNCTION_TYPE)
01796     return 1;
01797 
01798   /*  Check for mismatched fastcall types */
01799   if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
01800       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
01801     return 0;
01802 
01803   /* Check for mismatched return types (cdecl vs stdcall).  */
01804   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
01805       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
01806     return 0;
01807   if (ix86_function_regparm (type1, NULL)
01808       != ix86_function_regparm (type2, NULL))
01809     return 0;
01810   return 1;
01811 }
01812 
01813 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
01814    DECL may be NULL when calling function indirectly
01815    or considering a libcall.  */
01816 
01817 static int
01818 ix86_function_regparm (tree type, tree decl)
01819 {
01820   tree attr;
01821   int regparm = ix86_regparm;
01822   bool user_convention = false;
01823 
01824   if (!TARGET_64BIT)
01825     {
01826       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
01827       if (attr)
01828   {
01829     regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
01830     user_convention = true;
01831   }
01832 
01833       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
01834   {
01835     regparm = 2;
01836     user_convention = true;
01837   }
01838 
01839       /* Use register calling convention for local functions when possible.  */
01840       if (!TARGET_64BIT && !user_convention && decl
01841     && flag_unit_at_a_time && !profile_flag)
01842   {
01843     struct cgraph_local_info *i = cgraph_local_info (decl);
01844     if (i && i->local)
01845       {
01846         int local_regparm, globals = 0, regno;
01847 
01848         /* Make sure no regparm register is taken by a global register
01849      variable.  */
01850         for (local_regparm = 0; local_regparm < 3; local_regparm++)
01851     if (global_regs[local_regparm])
01852       break;
01853         /* We can't use regparm(3) for nested functions as these use
01854      static chain pointer in third argument.  */
01855         if (local_regparm == 3
01856       && DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
01857     local_regparm = 2;
01858         /* Each global register variable increases register preassure,
01859      so the more global reg vars there are, the smaller regparm
01860      optimization use, unless requested by the user explicitly.  */
01861         for (regno = 0; regno < 6; regno++)
01862     if (global_regs[regno])
01863       globals++;
01864         local_regparm
01865     = globals < local_regparm ? local_regparm - globals : 0;
01866 
01867         if (local_regparm > regparm)
01868     regparm = local_regparm;
01869       }
01870   }
01871     }
01872   return regparm;
01873 }
01874 
01875 /* Return true if EAX is live at the start of the function.  Used by
01876    ix86_expand_prologue to determine if we need special help before
01877    calling allocate_stack_worker.  */
01878 
01879 static bool
01880 ix86_eax_live_at_start_p (void)
01881 {
01882   /* Cheat.  Don't bother working forward from ix86_function_regparm
01883      to the function type to whether an actual argument is located in
01884      eax.  Instead just look at cfg info, which is still close enough
01885      to correct at this point.  This gives false positives for broken
01886      functions that might use uninitialized data that happens to be
01887      allocated in eax, but who cares?  */
01888   return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
01889 }
01890 
01891 /* Value is the number of bytes of arguments automatically
01892    popped when returning from a subroutine call.
01893    FUNDECL is the declaration node of the function (as a tree),
01894    FUNTYPE is the data type of the function (as a tree),
01895    or for a library call it is an identifier node for the subroutine name.
01896    SIZE is the number of bytes of arguments passed on the stack.
01897 
01898    On the 80386, the RTD insn may be used to pop them if the number
01899      of args is fixed, but if the number is variable then the caller
01900      must pop them all.  RTD can't be used for library calls now
01901      because the library is compiled with the Unix compiler.
01902    Use of RTD is a selectable option, since it is incompatible with
01903    standard Unix calling sequences.  If the option is not selected,
01904    the caller must always pop the args.
01905 
01906    The attribute stdcall is equivalent to RTD on a per module basis.  */
01907 
01908 int
01909 ix86_return_pops_args (tree fundecl, tree funtype, int size)
01910 {
01911   int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
01912 
01913   /* Cdecl functions override -mrtd, and never pop the stack.  */
01914   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
01915 
01916     /* Stdcall and fastcall functions will pop the stack if not
01917        variable args.  */
01918     if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
01919         || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
01920       rtd = 1;
01921 
01922     if (rtd
01923         && (TYPE_ARG_TYPES (funtype) == NULL_TREE
01924       || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
01925     == void_type_node)))
01926       return size;
01927   }
01928 
01929   /* Lose any fake structure return argument if it is passed on the stack.  */
01930   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
01931       && !TARGET_64BIT
01932       && !KEEP_AGGREGATE_RETURN_POINTER)
01933     {
01934       int nregs = ix86_function_regparm (funtype, fundecl);
01935 
01936       if (!nregs)
01937   return GET_MODE_SIZE (Pmode);
01938     }
01939 
01940   return 0;
01941 }
01942 
01943 /* Argument support functions.  */
01944 
01945 /* Return true when register may be used to pass function parameters.  */
01946 bool
01947 ix86_function_arg_regno_p (int regno)
01948 {
01949   int i;
01950   if (!TARGET_64BIT)
01951     return (regno < REGPARM_MAX
01952       || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
01953   if (SSE_REGNO_P (regno) && TARGET_SSE)
01954     return true;
01955   /* RAX is used as hidden argument to va_arg functions.  */
01956   if (!regno)
01957     return true;
01958   for (i = 0; i < REGPARM_MAX; i++)
01959     if (regno == x86_64_int_parameter_registers[i])
01960       return true;
01961   return false;
01962 }
01963 
01964 /* Return if we do not know how to pass TYPE solely in registers.  */
01965 
01966 static bool
01967 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
01968 {
01969   if (must_pass_in_stack_var_size_or_pad (mode, type))
01970     return true;
01971 
01972   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
01973      The layout_type routine is crafty and tries to trick us into passing
01974      currently unsupported vector types on the stack by using TImode.  */
01975   return (!TARGET_64BIT && mode == TImode
01976     && type && TREE_CODE (type) != VECTOR_TYPE);
01977 }
01978 
01979 /* Initialize a variable CUM of type CUMULATIVE_ARGS
01980    for a call to a function whose data type is FNTYPE.
01981    For a library call, FNTYPE is 0.  */
01982 
01983 void
01984 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
01985           tree fntype,  /* tree ptr for function decl */
01986           rtx libname,  /* SYMBOL_REF of library name or 0 */
01987           tree fndecl)
01988 {
01989   static CUMULATIVE_ARGS zero_cum;
01990   tree param, next_param;
01991 
01992   if (TARGET_DEBUG_ARG)
01993     {
01994       fprintf (stderr, "\ninit_cumulative_args (");
01995       if (fntype)
01996   fprintf (stderr, "fntype code = %s, ret code = %s",
01997      tree_code_name[(int) TREE_CODE (fntype)],
01998      tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
01999       else
02000   fprintf (stderr, "no fntype");
02001 
02002       if (libname)
02003   fprintf (stderr, ", libname = %s", XSTR (libname, 0));
02004     }
02005 
02006   *cum = zero_cum;
02007 
02008   /* Set up the number of registers to use for passing arguments.  */
02009   if (fntype)
02010     cum->nregs = ix86_function_regparm (fntype, fndecl);
02011   else
02012     cum->nregs = ix86_regparm;
02013   if (TARGET_SSE)
02014     cum->sse_nregs = SSE_REGPARM_MAX;
02015   if (TARGET_MMX)
02016     cum->mmx_nregs = MMX_REGPARM_MAX;
02017   cum->warn_sse = true;
02018   cum->warn_mmx = true;
02019   cum->maybe_vaarg = false;
02020 
02021   /* Use ecx and edx registers if function has fastcall attribute */
02022   if (fntype && !TARGET_64BIT)
02023     {
02024       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
02025   {
02026     cum->nregs = 2;
02027     cum->fastcall = 1;
02028   }
02029     }
02030 
02031   /* Determine if this function has variable arguments.  This is
02032      indicated by the last argument being 'void_type_mode' if there
02033      are no variable arguments.  If there are variable arguments, then
02034      we won't pass anything in registers in 32-bit mode. */
02035 
02036   if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
02037     {
02038       for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
02039      param != 0; param = next_param)
02040   {
02041     next_param = TREE_CHAIN (param);
02042     if (next_param == 0 && TREE_VALUE (param) != void_type_node)
02043       {
02044         if (!TARGET_64BIT)
02045     {
02046       cum->nregs = 0;
02047       cum->sse_nregs = 0;
02048       cum->mmx_nregs = 0;
02049       cum->warn_sse = 0;
02050       cum->warn_mmx = 0;
02051       cum->fastcall = 0;
02052     }
02053         cum->maybe_vaarg = true;
02054       }
02055   }
02056     }
02057   if ((!fntype && !libname)
02058       || (fntype && !TYPE_ARG_TYPES (fntype)))
02059     cum->maybe_vaarg = 1;
02060 
02061   if (TARGET_DEBUG_ARG)
02062     fprintf (stderr, ", nregs=%d )\n", cum->nregs);
02063 
02064   return;
02065 }
02066 
02067 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
02068    But in the case of vector types, it is some vector mode.
02069 
02070    When we have only some of our vector isa extensions enabled, then there
02071    are some modes for which vector_mode_supported_p is false.  For these
02072    modes, the generic vector support in gcc will choose some non-vector mode
02073    in order to implement the type.  By computing the natural mode, we'll 
02074    select the proper ABI location for the operand and not depend on whatever
02075    the middle-end decides to do with these vector types.  */
02076 
02077 static enum machine_mode
02078 type_natural_mode (tree type)
02079 {
02080   enum machine_mode mode = TYPE_MODE (type);
02081 
02082   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
02083     {
02084       HOST_WIDE_INT size = int_size_in_bytes (type);
02085       if ((size == 8 || size == 16)
02086     /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
02087     && TYPE_VECTOR_SUBPARTS (type) > 1)
02088   {
02089     enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
02090 
02091     if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
02092       mode = MIN_MODE_VECTOR_FLOAT;
02093     else
02094       mode = MIN_MODE_VECTOR_INT;
02095 
02096     /* Get the mode which has this inner mode and number of units.  */
02097     for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
02098       if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
02099     && GET_MODE_INNER (mode) == innermode)
02100         return mode;
02101 
02102     abort ();
02103   }
02104     }
02105 
02106   return mode;
02107 }
02108 
02109 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
02110    this may not agree with the mode that the type system has chosen for the
02111    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
02112    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
02113 
02114 static rtx
02115 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
02116          unsigned int regno)
02117 {
02118   rtx tmp;
02119 
02120   if (orig_mode != BLKmode)
02121     tmp = gen_rtx_REG (orig_mode, regno);
02122   else
02123     {
02124       tmp = gen_rtx_REG (mode, regno);
02125       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
02126       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
02127     }
02128 
02129   return tmp;
02130 }
02131 
02132 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
02133    of this code is to classify each 8bytes of incoming argument by the register
02134    class and assign registers accordingly.  */
02135 
02136 /* Return the union class of CLASS1 and CLASS2.
02137    See the x86-64 PS ABI for details.  */
02138 
02139 static enum x86_64_reg_class
02140 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
02141 {
02142   /* Rule #1: If both classes are equal, this is the resulting class.  */
02143   if (class1 == class2)
02144     return class1;
02145 
02146   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
02147      the other class.  */
02148   if (class1 == X86_64_NO_CLASS)
02149     return class2;
02150   if (class2 == X86_64_NO_CLASS)
02151     return class1;
02152 
02153   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
02154   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
02155     return X86_64_MEMORY_CLASS;
02156 
02157   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
02158   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
02159       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
02160     return X86_64_INTEGERSI_CLASS;
02161   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
02162       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
02163     return X86_64_INTEGER_CLASS;
02164 
02165   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
02166      MEMORY is used.  */
02167   if (class1 == X86_64_X87_CLASS
02168       || class1 == X86_64_X87UP_CLASS
02169       || class1 == X86_64_COMPLEX_X87_CLASS
02170       || class2 == X86_64_X87_CLASS
02171       || class2 == X86_64_X87UP_CLASS
02172       || class2 == X86_64_COMPLEX_X87_CLASS)
02173     return X86_64_MEMORY_CLASS;
02174 
02175   /* Rule #6: Otherwise class SSE is used.  */
02176   return X86_64_SSE_CLASS;
02177 }
02178 
02179 /* Classify the argument of type TYPE and mode MODE.
02180    CLASSES will be filled by the register class used to pass each word
02181    of the operand.  The number of words is returned.  In case the parameter
02182    should be passed in memory, 0 is returned. As a special case for zero
02183    sized containers, classes[0] will be NO_CLASS and 1 is returned.
02184 
02185    BIT_OFFSET is used internally for handling records and specifies offset
02186    of the offset in bits modulo 256 to avoid overflow cases.
02187 
02188    See the x86-64 PS ABI for details.
02189 */
02190 
02191 static int
02192 classify_argument (enum machine_mode mode, tree type,
02193        enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
02194 {
02195   HOST_WIDE_INT bytes =
02196     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
02197   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
02198 
02199   /* Variable sized entities are always passed/returned in memory.  */
02200   if (bytes < 0)
02201     return 0;
02202 
02203   if (mode != VOIDmode
02204       && targetm.calls.must_pass_in_stack (mode, type))
02205     return 0;
02206 
02207   if (type && AGGREGATE_TYPE_P (type))
02208     {
02209       int i;
02210       tree field;
02211       enum x86_64_reg_class subclasses[MAX_CLASSES];
02212 
02213       /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
02214       if (bytes > 16)
02215   return 0;
02216 
02217       for (i = 0; i < words; i++)
02218   classes[i] = X86_64_NO_CLASS;
02219 
02220       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
02221    signalize memory class, so handle it as special case.  */
02222       if (!words)
02223   {
02224     classes[0] = X86_64_NO_CLASS;
02225     return 1;
02226   }
02227 
02228       /* Classify each field of record and merge classes.  */
02229       if (TREE_CODE (type) == RECORD_TYPE)
02230   {
02231     /* For classes first merge in the field of the subclasses.  */
02232     if (TYPE_BINFO (type))
02233       {
02234         tree binfo, base_binfo;
02235         int basenum;
02236 
02237         for (binfo = TYPE_BINFO (type), basenum = 0;
02238        BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
02239     {
02240        int num;
02241        int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
02242        tree type = BINFO_TYPE (base_binfo);
02243 
02244        num = classify_argument (TYPE_MODE (type),
02245               type, subclasses,
02246               (offset + bit_offset) % 256);
02247        if (!num)
02248          return 0;
02249        for (i = 0; i < num; i++)
02250          {
02251            int pos = (offset + (bit_offset % 64)) / 8 / 8;
02252            classes[i + pos] =
02253        merge_classes (subclasses[i], classes[i + pos]);
02254          }
02255     }
02256       }
02257     /* And now merge the fields of structure.  */
02258     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
02259       {
02260         if (TREE_CODE (field) == FIELD_DECL)
02261     {
02262       int num;
02263 
02264       /* Bitfields are always classified as integer.  Handle them
02265          early, since later code would consider them to be
02266          misaligned integers.  */
02267       if (DECL_BIT_FIELD (field))
02268         {
02269           for (i = int_bit_position (field) / 8 / 8;
02270          i < (int_bit_position (field)
02271               + tree_low_cst (DECL_SIZE (field), 0)
02272         + 63) / 8 / 8; i++)
02273       classes[i] =
02274         merge_classes (X86_64_INTEGER_CLASS,
02275            classes[i]);
02276         }
02277       else
02278         {
02279           num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
02280                  TREE_TYPE (field), subclasses,
02281                  (int_bit_position (field)
02282             + bit_offset) % 256);
02283           if (!num)
02284       return 0;
02285           for (i = 0; i < num; i++)
02286       {
02287         int pos =
02288           (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
02289         classes[i + pos] =
02290           merge_classes (subclasses[i], classes[i + pos]);
02291       }
02292         }
02293     }
02294       }
02295   }
02296       /* Arrays are handled as small records.  */
02297       else if (TREE_CODE (type) == ARRAY_TYPE)
02298   {
02299     int num;
02300     num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
02301            TREE_TYPE (type), subclasses, bit_offset);
02302     if (!num)
02303       return 0;
02304 
02305     /* The partial classes are now full classes.  */
02306     if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
02307       subclasses[0] = X86_64_SSE_CLASS;
02308     if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
02309       subclasses[0] = X86_64_INTEGER_CLASS;
02310 
02311     for (i = 0; i < words; i++)
02312       classes[i] = subclasses[i % num];
02313   }
02314       /* Unions are similar to RECORD_TYPE but offset is always 0.  */
02315       else if (TREE_CODE (type) == UNION_TYPE
02316          || TREE_CODE (type) == QUAL_UNION_TYPE)
02317   {
02318     /* For classes first merge in the field of the subclasses.  */
02319     if (TYPE_BINFO (type))
02320       {
02321         tree binfo, base_binfo;
02322         int basenum;
02323 
02324         for (binfo = TYPE_BINFO (type), basenum = 0;
02325        BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
02326     {
02327        int num;
02328        int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
02329        tree type = BINFO_TYPE (base_binfo);
02330 
02331        num = classify_argument (TYPE_MODE (type),
02332               type, subclasses,
02333               (offset + (bit_offset % 64)) % 256);
02334        if (!num)
02335          return 0;
02336        for (i = 0; i < num; i++)
02337          {
02338            int pos = (offset + (bit_offset % 64)) / 8 / 8;
02339            classes[i + pos] =
02340        merge_classes (subclasses[i], classes[i + pos]);
02341          }
02342     }
02343       }
02344     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
02345       {
02346         if (TREE_CODE (field) == FIELD_DECL)
02347     {
02348       int num;
02349       num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
02350              TREE_TYPE (field), subclasses,
02351              bit_offset);
02352       if (!num)
02353         return 0;
02354       for (i = 0; i < num; i++)
02355         classes[i] = merge_classes (subclasses[i], classes[i]);
02356     }
02357       }
02358   }
02359       else
02360   abort ();
02361 
02362       /* Final merger cleanup.  */
02363       for (i = 0; i < words; i++)
02364   {
02365     /* If one class is MEMORY, everything should be passed in
02366        memory.  */
02367     if (classes[i] == X86_64_MEMORY_CLASS)
02368       return 0;
02369 
02370     /* The X86_64_SSEUP_CLASS should be always preceded by
02371        X86_64_SSE_CLASS.  */
02372     if (classes[i] == X86_64_SSEUP_CLASS
02373         && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
02374       classes[i] = X86_64_SSE_CLASS;
02375 
02376     /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
02377     if (classes[i] == X86_64_X87UP_CLASS
02378         && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
02379       classes[i] = X86_64_SSE_CLASS;
02380   }
02381       return words;
02382     }
02383 
02384   /* Compute alignment needed.  We align all types to natural boundaries with
02385      exception of XFmode that is aligned to 64bits.  */
02386   if (mode != VOIDmode && mode != BLKmode)
02387     {
02388       int mode_alignment = GET_MODE_BITSIZE (mode);
02389 
02390       if (mode == XFmode)
02391   mode_alignment = 128;
02392       else if (mode == XCmode)
02393   mode_alignment = 256;
02394       if (COMPLEX_MODE_P (mode))
02395   mode_alignment /= 2;
02396       /* Misaligned fields are always returned in memory.  */
02397       if (bit_offset % mode_alignment)
02398   return 0;
02399     }
02400 
02401   /* for V1xx modes, just use the base mode */
02402   if (VECTOR_MODE_P (mode)
02403       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
02404     mode = GET_MODE_INNER (mode);
02405 
02406   /* Classification of atomic types.  */
02407   switch (mode)
02408     {
02409     case DImode:
02410     case SImode:
02411     case HImode:
02412     case QImode:
02413     case CSImode:
02414     case CHImode:
02415     case CQImode:
02416       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
02417   classes[0] = X86_64_INTEGERSI_CLASS;
02418       else
02419   classes[0] = X86_64_INTEGER_CLASS;
02420       return 1;
02421     case CDImode:
02422     case TImode:
02423       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
02424       return 2;
02425     case CTImode:
02426       return 0;
02427     case SFmode:
02428       if (!(bit_offset % 64))
02429   classes[0] = X86_64_SSESF_CLASS;
02430       else
02431   classes[0] = X86_64_SSE_CLASS;
02432       return 1;
02433     case DFmode:
02434       classes[0] = X86_64_SSEDF_CLASS;
02435       return 1;
02436     case XFmode:
02437       classes[0] = X86_64_X87_CLASS;
02438       classes[1] = X86_64_X87UP_CLASS;
02439       return 2;
02440     case TFmode:
02441       classes[0] = X86_64_SSE_CLASS;
02442       classes[1] = X86_64_SSEUP_CLASS;
02443       return 2;
02444     case SCmode:
02445       classes[0] = X86_64_SSE_CLASS;
02446       return 1;
02447     case DCmode:
02448       classes[0] = X86_64_SSEDF_CLASS;
02449       classes[1] = X86_64_SSEDF_CLASS;
02450       return 2;
02451     case XCmode:
02452       classes[0] = X86_64_COMPLEX_X87_CLASS;
02453       return 1;
02454     case TCmode:
02455       /* This modes is larger than 16 bytes.  */
02456       return 0;
02457     case V4SFmode:
02458     case V4SImode:
02459     case V16QImode:
02460     case V8HImode:
02461     case V2DFmode:
02462     case V2DImode:
02463       classes[0] = X86_64_SSE_CLASS;
02464       classes[1] = X86_64_SSEUP_CLASS;
02465       return 2;
02466     case V2SFmode:
02467     case V2SImode:
02468     case V4HImode:
02469     case V8QImode:
02470       classes[0] = X86_64_SSE_CLASS;
02471       return 1;
02472     case BLKmode:
02473     case VOIDmode:
02474       return 0;
02475     default:
02476       if (VECTOR_MODE_P (mode))
02477   {
02478     if (bytes > 16)
02479       return 0;
02480     if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
02481       {
02482         if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
02483     classes[0] = X86_64_INTEGERSI_CLASS;
02484         else
02485     classes[0] = X86_64_INTEGER_CLASS;
02486         classes[1] = X86_64_INTEGER_CLASS;
02487         return 1 + (bytes > 8);
02488       }
02489   }
02490       abort ();
02491     }
02492 }
02493 
02494 /* Examine the argument and return set number of register required in each
02495    class.  Return 0 iff parameter should be passed in memory.  */
02496 static int
02497 examine_argument (enum machine_mode mode, tree type, int in_return,
02498       int *int_nregs, int *sse_nregs)
02499 {
02500   enum x86_64_reg_class class[MAX_CLASSES];
02501   int n = classify_argument (mode, type, class, 0);
02502 
02503   *int_nregs = 0;
02504   *sse_nregs = 0;
02505   if (!n)
02506     return 0;
02507   for (n--; n >= 0; n--)
02508     switch (class[n])
02509       {
02510       case X86_64_INTEGER_CLASS:
02511       case X86_64_INTEGERSI_CLASS:
02512   (*int_nregs)++;
02513   break;
02514       case X86_64_SSE_CLASS:
02515       case X86_64_SSESF_CLASS:
02516       case X86_64_SSEDF_CLASS:
02517   (*sse_nregs)++;
02518   break;
02519       case X86_64_NO_CLASS:
02520       case X86_64_SSEUP_CLASS:
02521   break;
02522       case X86_64_X87_CLASS:
02523       case X86_64_X87UP_CLASS:
02524   if (!in_return)
02525     return 0;
02526   break;
02527       case X86_64_COMPLEX_X87_CLASS:
02528   return in_return ? 2 : 0;
02529       case X86_64_MEMORY_CLASS:
02530   abort ();
02531       }
02532   return 1;
02533 }
02534 
02535 /* Construct container for the argument used by GCC interface.  See
02536    FUNCTION_ARG for the detailed description.  */
02537 
02538 static rtx
02539 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
02540          tree type, int in_return, int nintregs, int nsseregs,
02541          const int *intreg, int sse_regno)
02542 {
02543   enum machine_mode tmpmode;
02544   int bytes =
02545     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
02546   enum x86_64_reg_class class[MAX_CLASSES];
02547   int n;
02548   int i;
02549   int nexps = 0;
02550   int needed_sseregs, needed_intregs;
02551   rtx exp[MAX_CLASSES];
02552   rtx ret;
02553 
02554   n = classify_argument (mode, type, class, 0);
02555   if (TARGET_DEBUG_ARG)
02556     {
02557       if (!n)
02558   fprintf (stderr, "Memory class\n");
02559       else
02560   {
02561     fprintf (stderr, "Classes:");
02562     for (i = 0; i < n; i++)
02563       {
02564         fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
02565       }
02566      fprintf (stderr, "\n");
02567   }
02568     }
02569   if (!n)
02570     return NULL;
02571   if (!examine_argument (mode, type, in_return, &needed_intregs,
02572        &needed_sseregs))
02573     return NULL;
02574   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
02575     return NULL;
02576 
02577   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
02578      some less clueful developer tries to use floating-point anyway.  */
02579   if (needed_sseregs && !TARGET_SSE)
02580     {
02581       static bool issued_error;
02582       if (!issued_error)
02583   {
02584     issued_error = true;
02585     if (in_return)
02586       error ("SSE register return with SSE disabled");
02587     else
02588       error ("SSE register argument with SSE disabled");
02589   }
02590       return NULL;
02591     }
02592 
02593   /* First construct simple cases.  Avoid SCmode, since we want to use
02594      single register to pass this type.  */
02595   if (n == 1 && mode != SCmode)
02596     switch (class[0])
02597       {
02598       case X86_64_INTEGER_CLASS:
02599       case X86_64_INTEGERSI_CLASS:
02600   return gen_rtx_REG (mode, intreg[0]);
02601       case X86_64_SSE_CLASS:
02602       case X86_64_SSESF_CLASS:
02603       case X86_64_SSEDF_CLASS:
02604   return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
02605       case X86_64_X87_CLASS:
02606       case X86_64_COMPLEX_X87_CLASS:
02607   return gen_rtx_REG (mode, FIRST_STACK_REG);
02608       case X86_64_NO_CLASS:
02609   /* Zero sized array, struct or class.  */
02610   return NULL;
02611       default:
02612   abort ();
02613       }
02614   if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
02615       && mode != BLKmode)
02616     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
02617   if (n == 2
02618       && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
02619     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
02620   if (n == 2 && class[0] == X86_64_INTEGER_CLASS
02621       && class[1] == X86_64_INTEGER_CLASS
02622       && (mode == CDImode || mode == TImode || mode == TFmode)
02623       && intreg[0] + 1 == intreg[1])
02624     return gen_rtx_REG (mode, intreg[0]);
02625 
02626   /* Otherwise figure out the entries of the PARALLEL.  */
02627   for (i = 0; i < n; i++)
02628     {
02629       switch (class[i])
02630         {
02631     case X86_64_NO_CLASS:
02632       break;
02633     case X86_64_INTEGER_CLASS:
02634     case X86_64_INTEGERSI_CLASS:
02635       /* Merge TImodes on aligned occasions here too.  */
02636       if (i * 8 + 8 > bytes)
02637         tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
02638       else if (class[i] == X86_64_INTEGERSI_CLASS)
02639         tmpmode = SImode;
02640       else
02641         tmpmode = DImode;
02642       /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
02643       if (tmpmode == BLKmode)
02644         tmpmode = DImode;
02645       exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
02646                  gen_rtx_REG (tmpmode, *intreg),
02647                  GEN_INT (i*8));
02648       intreg++;
02649       break;
02650     case X86_64_SSESF_CLASS:
02651       exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
02652                  gen_rtx_REG (SFmode,
02653                   SSE_REGNO (sse_regno)),
02654                  GEN_INT (i*8));
02655       sse_regno++;
02656       break;
02657     case X86_64_SSEDF_CLASS:
02658       exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
02659                  gen_rtx_REG (DFmode,
02660                   SSE_REGNO (sse_regno)),
02661                  GEN_INT (i*8));
02662       sse_regno++;
02663       break;
02664     case X86_64_SSE_CLASS:
02665       if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
02666         tmpmode = TImode;
02667       else
02668         tmpmode = DImode;
02669       exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
02670                  gen_rtx_REG (tmpmode,
02671                   SSE_REGNO (sse_regno)),
02672                  GEN_INT (i*8));
02673       if (tmpmode == TImode)
02674         i++;
02675       sse_regno++;
02676       break;
02677     default:
02678       abort ();
02679   }
02680     }
02681 
02682   /* Empty aligned struct, union or class.  */
02683   if (nexps == 0)
02684     return NULL;
02685 
02686   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
02687   for (i = 0; i < nexps; i++)
02688     XVECEXP (ret, 0, i) = exp [i];
02689   return ret;
02690 }
02691 
02692 /* Update the data in CUM to advance over an argument
02693    of mode MODE and data type TYPE.
02694    (TYPE is null for libcalls where that information may not be available.)  */
02695 
02696 void
02697 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
02698           tree type, int named)
02699 {
02700   int bytes =
02701     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
02702   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
02703 
02704   if (type)
02705     mode = type_natural_mode (type);
02706 
02707   if (TARGET_DEBUG_ARG)
02708     fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
02709        "mode=%s, named=%d)\n\n",
02710        words, cum->words, cum->nregs, cum->sse_nregs,
02711        GET_MODE_NAME (mode), named);
02712 
02713   if (TARGET_64BIT)
02714     {
02715       int int_nregs, sse_nregs;
02716       if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
02717   cum->words += words;
02718       else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
02719   {
02720     cum->nregs -= int_nregs;
02721     cum->sse_nregs -= sse_nregs;
02722     cum->regno += int_nregs;
02723     cum->sse_regno += sse_nregs;
02724   }
02725       else
02726   cum->words += words;
02727     }
02728   else
02729     {
02730       switch (mode)
02731   {
02732   default:
02733     break;
02734 
02735   case BLKmode:
02736     if (bytes < 0)
02737       break;
02738     /* FALLTHRU */
02739 
02740   case DImode:
02741   case SImode:
02742   case HImode:
02743   case QImode:
02744     cum->words += words;
02745     cum->nregs -= words;
02746     cum->regno += words;
02747 
02748     if (cum->nregs <= 0)
02749       {
02750         cum->nregs = 0;
02751         cum->regno = 0;
02752       }
02753     break;
02754 
02755   case TImode:
02756   case V16QImode:
02757   case V8HImode:
02758   case V4SImode:
02759   case V2DImode:
02760   case V4SFmode:
02761   case V2DFmode:
02762     if (!type || !AGGREGATE_TYPE_P (type))
02763       {
02764         cum->sse_words += words;
02765         cum->sse_nregs -= 1;
02766         cum->sse_regno += 1;
02767         if (cum->sse_nregs <= 0)
02768     {
02769       cum->sse_nregs = 0;
02770       cum->sse_regno = 0;
02771     }
02772       }
02773     break;
02774 
02775   case V8QImode:
02776   case V4HImode:
02777   case V2SImode:
02778   case V2SFmode:
02779     if (!type || !AGGREGATE_TYPE_P (type))
02780       {
02781         cum->mmx_words += words;
02782         cum->mmx_nregs -= 1;
02783         cum->mmx_regno += 1;
02784         if (cum->mmx_nregs <= 0)
02785     {
02786       cum->mmx_nregs = 0;
02787       cum->mmx_regno = 0;
02788     }
02789       }
02790     break;
02791   }
02792     }
02793 }
02794 
02795 /* Define where to put the arguments to a function.
02796    Value is zero to push the argument on the stack,
02797    or a hard register in which to store the argument.
02798 
02799    MODE is the argument's machine mode.
02800    TYPE is the data type of the argument (as a tree).
02801     This is null for libcalls where that information may
02802     not be available.
02803    CUM is a variable of type CUMULATIVE_ARGS which gives info about
02804     the preceding args and about the function being called.
02805    NAMED is nonzero if this argument is a named parameter
02806     (otherwise it is an extra parameter matching an ellipsis).  */
02807 
02808 rtx
02809 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
02810         tree type, int named)
02811 {
02812   enum machine_mode mode = orig_mode;
02813   rtx ret = NULL_RTX;
02814   int bytes =
02815     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
02816   int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
02817   static bool warnedsse, warnedmmx;
02818 
02819   /* To simplify the code below, represent vector types with a vector mode
02820      even if MMX/SSE are not active.  */
02821   if (type && TREE_CODE (type) == VECTOR_TYPE)
02822     mode = type_natural_mode (type);
02823 
02824   /* Handle a hidden AL argument containing number of registers for varargs
02825      x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
02826      any AL settings.  */
02827   if (mode == VOIDmode)
02828     {
02829       if (TARGET_64BIT)
02830   return GEN_INT (cum->maybe_vaarg
02831       ? (cum->sse_nregs < 0
02832          ? SSE_REGPARM_MAX
02833          : cum->sse_regno)
02834       : -1);
02835       else
02836   return constm1_rtx;
02837     }
02838   if (TARGET_64BIT)
02839     ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
02840              cum->sse_nregs,
02841              &x86_64_int_parameter_registers [cum->regno],
02842              cum->sse_regno);
02843   else
02844     switch (mode)
02845       {
02846   /* For now, pass fp/complex values on the stack.  */
02847       default:
02848   break;
02849 
02850       case BLKmode:
02851   if (bytes < 0)
02852     break;
02853   /* FALLTHRU */
02854       case DImode:
02855       case SImode:
02856       case HImode:
02857       case QImode:
02858   if (words <= cum->nregs)
02859     {
02860       int regno = cum->regno;
02861 
02862       /* Fastcall allocates the first two DWORD (SImode) or
02863          smaller arguments to ECX and EDX.  */
02864       if (cum->fastcall)
02865         {
02866           if (mode == BLKmode || mode == DImode)
02867             break;
02868 
02869           /* ECX not EAX is the first allocated register.  */
02870           if (regno == 0)
02871       regno = 2;
02872         }
02873       ret = gen_rtx_REG (mode, regno);
02874     }
02875   break;
02876       case TImode:
02877       case V16QImode:
02878       case V8HImode:
02879       case V4SImode:
02880       case V2DImode:
02881       case V4SFmode:
02882       case V2DFmode:
02883   if (!type || !AGGREGATE_TYPE_P (type))
02884     {
02885       if (!TARGET_SSE && !warnedsse && cum->warn_sse)
02886         {
02887     warnedsse = true;
02888     warning ("SSE vector argument without SSE enabled "
02889        "changes the ABI");
02890         }
02891       if (cum->sse_nregs)
02892         ret = gen_reg_or_parallel (mode, orig_mode,
02893            cum->sse_regno + FIRST_SSE_REG);
02894     }
02895   break;
02896       case V8QImode:
02897       case V4HImode:
02898       case V2SImode:
02899       case V2SFmode:
02900   if (!type || !AGGREGATE_TYPE_P (type))
02901     {
02902       if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
02903         {
02904     warnedmmx = true;
02905     warning ("MMX vector argument without MMX enabled "
02906        "changes the ABI");
02907         }
02908       if (cum->mmx_nregs)
02909         ret = gen_reg_or_parallel (mode, orig_mode,
02910            cum->mmx_regno + FIRST_MMX_REG);
02911     }
02912   break;
02913       }
02914 
02915   if (TARGET_DEBUG_ARG)
02916     {
02917       fprintf (stderr,
02918          "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
02919          words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
02920 
02921       if (ret)
02922   print_simple_rtl (stderr, ret);
02923       else
02924   fprintf (stderr, ", stack");
02925 
02926       fprintf (stderr, " )\n");
02927     }
02928 
02929   return ret;
02930 }
02931 
02932 /* A C expression that indicates when an argument must be passed by
02933    reference.  If nonzero for an argument, a copy of that argument is
02934    made in memory and a pointer to the argument is passed instead of
02935    the argument itself.  The pointer is passed in whatever way is
02936    appropriate for passing a pointer to that type.  */
02937 
02938 static bool
02939 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
02940       enum machine_mode mode ATTRIBUTE_UNUSED,
02941       tree type, bool named ATTRIBUTE_UNUSED)
02942 {
02943   if (!TARGET_64BIT)
02944     return 0;
02945 
02946   if (type && int_size_in_bytes (type) == -1)
02947     {
02948       if (TARGET_DEBUG_ARG)
02949   fprintf (stderr, "function_arg_pass_by_reference\n");
02950       return 1;
02951     }
02952 
02953   return 0;
02954 }
02955 
02956 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
02957    ABI.  Only called if TARGET_SSE.  */
02958 static bool
02959 contains_128bit_aligned_vector_p (tree type)
02960 {
02961   enum machine_mode mode = TYPE_MODE (type);
02962   if (SSE_REG_MODE_P (mode)
02963       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
02964     return true;
02965   if (TYPE_ALIGN (type) < 128)
02966     return false;
02967 
02968   if (AGGREGATE_TYPE_P (type))
02969     {
02970       /* Walk the aggregates recursively.  */
02971       if (TREE_CODE (type) == RECORD_TYPE
02972     || TREE_CODE (type) == UNION_TYPE
02973     || TREE_CODE (type) == QUAL_UNION_TYPE)
02974   {
02975     tree field;
02976 
02977     if (TYPE_BINFO (type))
02978       {
02979         tree binfo, base_binfo;
02980         int i;
02981 
02982         for (binfo = TYPE_BINFO (type), i = 0;
02983        BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
02984     if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
02985       return true;
02986       }
02987     /* And now merge the fields of structure.  */
02988     for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
02989       {
02990         if (TREE_CODE (field) == FIELD_DECL
02991       && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
02992     return true;
02993       }
02994   }
02995       /* Just for use if some languages passes arrays by value.  */
02996       else if (TREE_CODE (type) == ARRAY_TYPE)
02997   {
02998     if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
02999       return true;
03000   }
03001       else
03002   abort ();
03003     }
03004   return false;
03005 }
03006 
03007 /* Gives the alignment boundary, in bits, of an argument with the
03008    specified mode and type.  */
03009 
03010 int
03011 ix86_function_arg_boundary (enum machine_mode mode, tree type)
03012 {
03013   int align;
03014   if (type)
03015     align = TYPE_ALIGN (type);
03016   else
03017     align = GET_MODE_ALIGNMENT (mode);
03018   if (align < PARM_BOUNDARY)
03019     align = PARM_BOUNDARY;
03020   if (!TARGET_64BIT)
03021     {
03022       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
03023    make an exception for SSE modes since these require 128bit
03024    alignment.
03025 
03026    The handling here differs from field_alignment.  ICC aligns MMX
03027    arguments to 4 byte boundaries, while structure fields are aligned
03028    to 8 byte boundaries.  */
03029       if (!TARGET_SSE)
03030   align = PARM_BOUNDARY;
03031       else if (!type)
03032   {
03033     if (!SSE_REG_MODE_P (mode))
03034       align = PARM_BOUNDARY;
03035   }
03036       else
03037   {
03038     if (!contains_128bit_aligned_vector_p (type))
03039       align = PARM_BOUNDARY;
03040   }
03041     }
03042   if (align > 128)
03043     align = 128;
03044   return align;
03045 }
03046 
03047 /* Return true if N is a possible register number of function value.  */
03048 bool
03049 ix86_function_value_regno_p (int regno)
03050 {
03051   if (!TARGET_64BIT)
03052     {
03053       return ((regno) == 0
03054         || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
03055         || ((regno) == FIRST_SSE_REG && TARGET_SSE));
03056     }
03057   return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
03058     || ((regno) == FIRST_SSE_REG && TARGET_SSE)
03059     || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
03060 }
03061 
03062 /* Define how to find the value returned by a function.
03063    VALTYPE is the data type of the value (as a tree).
03064    If the precise function being called is known, FUNC is its FUNCTION_DECL;
03065    otherwise, FUNC is 0.  */
03066 rtx
03067 ix86_function_value (tree valtype)
03068 {
03069   enum machine_mode natmode = type_natural_mode (valtype);
03070 
03071   if (TARGET_64BIT)
03072     {
03073       rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
03074              1, REGPARM_MAX, SSE_REGPARM_MAX,
03075              x86_64_int_return_registers, 0);
03076       /* For zero sized structures, construct_container return NULL, but we
03077    need to keep rest of compiler happy by returning meaningful value.  */
03078       if (!ret)
03079   ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
03080       return ret;
03081     }
03082   else
03083     return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
03084 }
03085 
03086 /* Return false iff type is returned in memory.  */
03087 int
03088 ix86_return_in_memory (tree type)
03089 {
03090   int needed_intregs, needed_sseregs, size;
03091   enum machine_mode mode = type_natural_mode (type);
03092 
03093   if (TARGET_64BIT)
03094     return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
03095 
03096   if (mode == BLKmode)
03097     return 1;
03098 
03099   size = int_size_in_bytes (type);
03100 
03101   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
03102     return 0;
03103 
03104   if (VECTOR_MODE_P (mode) || mode == TImode)
03105     {
03106       /* User-created vectors small enough to fit in EAX.  */
03107       if (size < 8)
03108   return 0;
03109 
03110       /* MMX/3dNow values are returned on the stack, since we've
03111    got to EMMS/FEMMS before returning.  */
03112       if (size == 8)
03113   return 1;
03114 
03115       /* SSE values are returned in XMM0, except when it doesn't exist.  */
03116       if (size == 16)
03117   return (TARGET_SSE ? 0 : 1);
03118     }
03119 
03120   if (mode == XFmode)
03121     return 0;
03122 
03123   if (size > 12)
03124     return 1;
03125   return 0;
03126 }
03127 
03128 /* When returning SSE vector types, we have a choice of either
03129      (1) being abi incompatible with a -march switch, or
03130      (2) generating an error.
03131    Given no good solution, I think the safest thing is one warning.
03132    The user won't be able to use -Werror, but....
03133 
03134    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
03135    called in response to actually generating a caller or callee that
03136    uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
03137    via aggregate_value_p for general type probing from tree-ssa.  */
03138 
03139 static rtx
03140 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
03141 {
03142   static bool warned;
03143 
03144   if (!TARGET_SSE && type && !warned)
03145     {
03146       /* Look at the return type of the function, not the function type.  */
03147       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
03148 
03149       if (mode == TImode
03150     || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
03151   {
03152     warned = true;
03153     warning ("SSE vector return without SSE enabled changes the ABI");
03154   }
03155     }
03156 
03157   return NULL;
03158 }
03159 
03160 /* Define how to find the value returned by a library function
03161    assuming the value has mode MODE.  */
03162 rtx
03163 ix86_libcall_value (enum machine_mode mode)
03164 {
03165   if (TARGET_64BIT)
03166     {
03167       switch (mode)
03168   {
03169   case SFmode:
03170   case SCmode:
03171   case DFmode:
03172   case DCmode:
03173   case TFmode:
03174     return gen_rtx_REG (mode, FIRST_SSE_REG);
03175   case XFmode:
03176   case XCmode:
03177     return gen_rtx_REG (mode, FIRST_FLOAT_REG);
03178   case TCmode:
03179     return NULL;
03180   default:
03181     return gen_rtx_REG (mode, 0);
03182   }
03183     }
03184   else
03185     return gen_rtx_REG (mode, ix86_value_regno (mode));
03186 }
03187 
03188 /* Given a mode, return the register to use for a return value.  */
03189 
03190 static int
03191 ix86_value_regno (enum machine_mode mode)
03192 {
03193   /* Floating point return values in %st(0).  */
03194   if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
03195     return FIRST_FLOAT_REG;
03196   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
03197      we prevent this case when sse is not available.  */
03198   if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
03199     return FIRST_SSE_REG;
03200   /* Everything else in %eax.  */
03201   return 0;
03202 }
03203 
03204 /* Create the va_list data type.  */
03205 
03206 static tree
03207 ix86_build_builtin_va_list (void)
03208 {
03209   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
03210 
03211   /* For i386 we use plain pointer to argument area.  */
03212   if (!TARGET_64BIT)
03213     return build_pointer_type (char_type_node);
03214 
03215   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
03216   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
03217 
03218   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
03219           unsigned_type_node);
03220   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
03221           unsigned_type_node);
03222   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
03223           ptr_type_node);
03224   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
03225           ptr_type_node);
03226 
03227   DECL_FIELD_CONTEXT (f_gpr) = record;
03228   DECL_FIELD_CONTEXT (f_fpr) = record;
03229   DECL_FIELD_CONTEXT (f_ovf) = record;
03230   DECL_FIELD_CONTEXT (f_sav) = record;
03231 
03232   TREE_CHAIN (record) = type_decl;
03233   TYPE_NAME (record) = type_decl;
03234   TYPE_FIELDS (record) = f_gpr;
03235   TREE_CHAIN (f_gpr) = f_fpr;
03236   TREE_CHAIN (f_fpr) = f_ovf;
03237   TREE_CHAIN (f_ovf) = f_sav;
03238 
03239   layout_type (record);
03240 
03241   /* The correct type is an array type of one element.  */
03242   return build_array_type (record, build_index_type (size_zero_node));
03243 }
03244 
03245 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
03246 
03247 static void
03248 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
03249            tree type, int *pretend_size ATTRIBUTE_UNUSED,
03250            int no_rtl)
03251 {
03252   CUMULATIVE_ARGS next_cum;
03253   rtx save_area = NULL_RTX, mem;
03254   rtx label;
03255   rtx label_ref;
03256   rtx tmp_reg;
03257   rtx nsse_reg;
03258   int set;
03259   tree fntype;
03260   int stdarg_p;
03261   int i;
03262 
03263   if (!TARGET_64BIT)
03264     return;
03265 
03266   /* Indicate to allocate space on the stack for varargs save area.  */
03267   ix86_save_varrargs_registers = 1;
03268 
03269   cfun->stack_alignment_needed = 128;
03270 
03271   fntype = TREE_TYPE (current_function_decl);
03272   stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
03273         && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
03274       != void_type_node));
03275 
03276   /* For varargs, we do not want to skip the dummy va_dcl argument.
03277      For stdargs, we do want to skip the last named argument.  */
03278   next_cum = *cum;
03279   if (stdarg_p)
03280     function_arg_advance (&next_cum, mode, type, 1);
03281 
03282   if (!no_rtl)
03283     save_area = frame_pointer_rtx;
03284 
03285   set = get_varargs_alias_set ();
03286 
03287   for (i = next_cum.regno; i < ix86_regparm; i++)
03288     {
03289       mem = gen_rtx_MEM (Pmode,
03290        plus_constant (save_area, i * UNITS_PER_WORD));
03291       set_mem_alias_set (mem, set);
03292       emit_move_insn (mem, gen_rtx_REG (Pmode,
03293           x86_64_int_parameter_registers[i]));
03294     }
03295 
03296   if (next_cum.sse_nregs)
03297     {
03298       /* Now emit code to save SSE registers.  The AX parameter contains number
03299    of SSE parameter registers used to call this function.  We use
03300    sse_prologue_save insn template that produces computed jump across
03301    SSE saves.  We need some preparation work to get this working.  */
03302 
03303       label = gen_label_rtx ();
03304       label_ref = gen_rtx_LABEL_REF (Pmode, label);
03305 
03306       /* Compute address to jump to :
03307          label - 5*eax + nnamed_sse_arguments*5  */
03308       tmp_reg = gen_reg_rtx (Pmode);
03309       nsse_reg = gen_reg_rtx (Pmode);
03310       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
03311       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
03312             gen_rtx_MULT (Pmode, nsse_reg,
03313               GEN_INT (4))));
03314       if (next_cum.sse_regno)
03315   emit_move_insn
03316     (nsse_reg,
03317      gen_rtx_CONST (DImode,
03318         gen_rtx_PLUS (DImode,
03319           label_ref,
03320           GEN_INT (next_cum.sse_regno * 4))));
03321       else
03322   emit_move_insn (nsse_reg, label_ref);
03323       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
03324 
03325       /* Compute address of memory block we save into.  We always use pointer
03326    pointing 127 bytes after first byte to store - this is needed to keep
03327    instruction size limited by 4 bytes.  */
03328       tmp_reg = gen_reg_rtx (Pmode);
03329       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
03330             plus_constant (save_area,
03331                8 * REGPARM_MAX + 127)));
03332       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
03333       set_mem_alias_set (mem, set);
03334       set_mem_align (mem, BITS_PER_WORD);
03335 
03336       /* And finally do the dirty job!  */
03337       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
03338           GEN_INT (next_cum.sse_regno), label));
03339     }
03340 
03341 }
03342 
03343 /* Implement va_start.  */
03344 
03345 void
03346 ix86_va_start (tree valist, rtx nextarg)
03347 {
03348   HOST_WIDE_INT words, n_gpr, n_fpr;
03349   tree f_gpr, f_fpr, f_ovf, f_sav;
03350   tree gpr, fpr, ovf, sav, t;
03351 
03352   /* Only 64bit target needs something special.  */
03353   if (!TARGET_64BIT)
03354     {
03355       std_expand_builtin_va_start (valist, nextarg);
03356       return;
03357     }
03358 
03359   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
03360   f_fpr = TREE_CHAIN (f_gpr);
03361   f_ovf = TREE_CHAIN (f_fpr);
03362   f_sav = TREE_CHAIN (f_ovf);
03363 
03364   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
03365   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
03366   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
03367   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
03368   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
03369 
03370   /* Count number of gp and fp argument registers used.  */
03371   words = current_function_args_info.words;
03372   n_gpr = current_function_args_info.regno;
03373   n_fpr = current_function_args_info.sse_regno;
03374 
03375   if (TARGET_DEBUG_ARG)
03376     fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
03377        (int) words, (int) n_gpr, (int) n_fpr);
03378 
03379   t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
03380        build_int_cst (NULL_TREE, n_gpr * 8));
03381   TREE_SIDE_EFFECTS (t) = 1;
03382   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
03383 
03384   t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
03385        build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
03386   TREE_SIDE_EFFECTS (t) = 1;
03387   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
03388 
03389   /* Find the overflow area.  */
03390   t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
03391   if (words != 0)
03392     t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
03393          build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
03394   t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
03395   TREE_SIDE_EFFECTS (t) = 1;
03396   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
03397 
03398   /* Find the register save area.
03399      Prologue of the function save it right above stack frame.  */
03400   t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
03401   t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
03402   TREE_SIDE_EFFECTS (t) = 1;
03403   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
03404 }
03405 
03406 /* Implement va_arg.  */
03407 
03408 tree
03409 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
03410 {
03411   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
03412   tree f_gpr, f_fpr, f_ovf, f_sav;
03413   tree gpr, fpr, ovf, sav, t;
03414   int size, rsize;
03415   tree lab_false, lab_over = NULL_TREE;
03416   tree addr, t2;
03417   rtx container;
03418   int indirect_p = 0;
03419   tree ptrtype;
03420   enum machine_mode nat_mode;
03421 
03422   /* Only 64bit target needs something special.  */
03423   if (!TARGET_64BIT)
03424     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
03425 
03426   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
03427   f_fpr = TREE_CHAIN (f_gpr);
03428   f_ovf = TREE_CHAIN (f_fpr);
03429   f_sav = TREE_CHAIN (f_ovf);
03430 
03431   valist = build_va_arg_indirect_ref (valist);
03432   gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
03433   fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
03434   ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
03435   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
03436 
03437   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
03438   if (indirect_p)
03439     type = build_pointer_type (type);
03440   size = int_size_in_bytes (type);
03441   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
03442 
03443   nat_mode = type_natural_mode (type);
03444   container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
03445            REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
03446 
03447   /* Pull the value out of the saved registers.  */
03448 
03449   addr = create_tmp_var (ptr_type_node, "addr");
03450   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
03451 
03452   if (container)
03453     {
03454       int needed_intregs, needed_sseregs;
03455       bool need_temp;
03456       tree int_addr, sse_addr;
03457 
03458       lab_false = create_artificial_label ();
03459       lab_over = create_artificial_label ();
03460 
03461       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
03462 
03463       need_temp = (!REG_P (container)
03464        && ((needed_intregs && TYPE_ALIGN (type) > 64)
03465            || TYPE_ALIGN (type) > 128));
03466 
03467       /* In case we are passing structure, verify that it is consecutive block
03468          on the register save area.  If not we need to do moves.  */
03469       if (!need_temp && !REG_P (container))
03470   {
03471     /* Verify that all registers are strictly consecutive  */
03472     if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
03473       {
03474         int i;
03475 
03476         for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
03477     {
03478       rtx slot = XVECEXP (container, 0, i);
03479       if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
03480           || INTVAL (XEXP (slot, 1)) != i * 16)
03481         need_temp = 1;
03482     }
03483       }
03484     else
03485       {
03486         int i;
03487 
03488         for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
03489     {
03490       rtx slot = XVECEXP (container, 0, i);
03491       if (REGNO (XEXP (slot, 0)) != (unsigned int) i
03492           || INTVAL (XEXP (slot, 1)) != i * 8)
03493         need_temp = 1;
03494     }
03495       }
03496   }
03497       if (!need_temp)
03498   {
03499     int_addr = addr;
03500     sse_addr = addr;
03501   }
03502       else
03503   {
03504     int_addr = create_tmp_var (ptr_type_node, "int_addr");
03505     DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
03506     sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
03507     DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
03508   }
03509 
03510       /* First ensure that we fit completely in registers.  */
03511       if (needed_intregs)
03512   {
03513     t = build_int_cst (TREE_TYPE (gpr),
03514            (REGPARM_MAX - needed_intregs + 1) * 8);
03515     t = build2 (GE_EXPR, boolean_type_node, gpr, t);
03516     t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
03517     t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
03518     gimplify_and_add (t, pre_p);
03519   }
03520       if (needed_sseregs)
03521   {
03522     t = build_int_cst (TREE_TYPE (fpr),
03523            (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
03524            + REGPARM_MAX * 8);
03525     t = build2 (GE_EXPR, boolean_type_node, fpr, t);
03526     t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
03527     t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
03528     gimplify_and_add (t, pre_p);
03529   }
03530 
03531       /* Compute index to start of area used for integer regs.  */
03532       if (needed_intregs)
03533   {
03534     /* int_addr = gpr + sav; */
03535     t = fold_convert (ptr_type_node, gpr);
03536     t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
03537     t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
03538     gimplify_and_add (t, pre_p);
03539   }
03540       if (needed_sseregs)
03541   {
03542     /* sse_addr = fpr + sav; */
03543     t = fold_convert (ptr_type_node, fpr);
03544     t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
03545     t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
03546     gimplify_and_add (t, pre_p);
03547   }
03548       if (need_temp)
03549   {
03550     int i;
03551     tree temp = create_tmp_var (type, "va_arg_tmp");
03552 
03553     /* addr = &temp; */
03554     t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
03555     t = build2 (MODIFY_EXPR, void_type_node, addr, t);
03556     gimplify_and_add (t, pre_p);
03557 
03558     for (i = 0; i < XVECLEN (container, 0); i++)
03559       {
03560         rtx slot = XVECEXP (container, 0, i);
03561         rtx reg = XEXP (slot, 0);
03562         enum machine_mode mode = GET_MODE (reg);
03563         tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
03564         tree addr_type = build_pointer_type (piece_type);
03565         tree src_addr, src;
03566         int src_offset;
03567         tree dest_addr, dest;
03568 
03569         if (SSE_REGNO_P (REGNO (reg)))
03570     {
03571       src_addr = sse_addr;
03572       src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
03573     }
03574         else
03575     {
03576       src_addr = int_addr;
03577       src_offset = REGNO (reg) * 8;
03578     }
03579         src_addr = fold_convert (addr_type, src_addr);
03580         src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
03581                size_int (src_offset)));
03582         src = build_va_arg_indirect_ref (src_addr);
03583 
03584         dest_addr = fold_convert (addr_type, addr);
03585         dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
03586           size_int (INTVAL (XEXP (slot, 1)))));
03587         dest = build_va_arg_indirect_ref (dest_addr);
03588 
03589         t = build2 (MODIFY_EXPR, void_type_node, dest, src);
03590         gimplify_and_add (t, pre_p);
03591       }
03592   }
03593 
03594       if (needed_intregs)
03595   {
03596     t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
03597           build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
03598     t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
03599     gimplify_and_add (t, pre_p);
03600   }
03601       if (needed_sseregs)
03602   {
03603     t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
03604           build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
03605     t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
03606     gimplify_and_add (t, pre_p);
03607   }
03608 
03609       t = build1 (GOTO_EXPR, void_type_node, lab_over);
03610       gimplify_and_add (t, pre_p);
03611 
03612       t = build1 (LABEL_EXPR, void_type_node, lab_false);
03613       append_to_statement_list (t, pre_p);
03614     }
03615 
03616   /* ... otherwise out of the overflow area.  */
03617 
03618   /* Care for on-stack alignment if needed.  */
03619   if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
03620     t = ovf;
03621   else
03622     {
03623       HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
03624       t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
03625      build_int_cst (TREE_TYPE (ovf), align - 1));
03626       t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
03627      build_int_cst (TREE_TYPE (t), -align));
03628     }
03629   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
03630 
03631   t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
03632   gimplify_and_add (t2, pre_p);
03633 
03634   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
03635         build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
03636   t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
03637   gimplify_and_add (t, pre_p);
03638 
03639   if (container)
03640     {
03641       t = build1 (LABEL_EXPR, void_type_node, lab_over);
03642       append_to_statement_list (t, pre_p);
03643     }
03644 
03645   ptrtype = build_pointer_type (type);
03646   addr = fold_convert (ptrtype, addr);
03647 
03648   if (indirect_p)
03649     addr = build_va_arg_indirect_ref (addr);
03650   return build_va_arg_indirect_ref (addr);
03651 }
03652 
03653 /* Return nonzero if OPNUM's MEM should be matched
03654    in movabs* patterns.  */
03655 
03656 int
03657 ix86_check_movabs (rtx insn, int opnum)
03658 {
03659   rtx set, mem;
03660 
03661   set = PATTERN (insn);
03662   if (GET_CODE (set) == PARALLEL)
03663     set = XVECEXP (set, 0, 0);
03664   if (GET_CODE (set) != SET)
03665     abort ();
03666   mem = XEXP (set, opnum);
03667   while (GET_CODE (mem) == SUBREG)
03668     mem = SUBREG_REG (mem);
03669   if (GET_CODE (mem) != MEM)
03670     abort ();
03671   return (volatile_ok || !MEM_VOLATILE_P (mem));
03672 }
03673 
03674 /* Initialize the table of extra 80387 mathematical constants.  */
03675 
03676 static void
03677 init_ext_80387_constants (void)
03678 {
03679   static const char * cst[5] =
03680   {
03681     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
03682     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
03683     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
03684     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
03685     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
03686   };
03687   int i;
03688 
03689   for (i = 0; i < 5; i++)
03690     {
03691       real_from_string (&ext_80387_constants_table[i], cst[i]);
03692       /* Ensure each constant is rounded to XFmode precision.  */
03693       real_convert (&ext_80387_constants_table[i],
03694         XFmode, &ext_80387_constants_table[i]);
03695     }
03696 
03697   ext_80387_constants_init = 1;
03698 }
03699 
03700 /* Return true if the constant is something that can be loaded with
03701    a special instruction.  */
03702 
03703 int
03704 standard_80387_constant_p (rtx x)
03705 {
03706   if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
03707     return -1;
03708 
03709   if (x == CONST0_RTX (GET_MODE (x)))
03710     return 1;
03711   if (x == CONST1_RTX (GET_MODE (x)))
03712     return 2;
03713 
03714   /* For XFmode constants, try to find a special 80387 instruction when
03715      optimizing for size or on those CPUs that benefit from them.  */
03716   if (GET_MODE (x) == XFmode
03717       && (optimize_size || x86_ext_80387_constants & TUNEMASK))
03718     {
03719       REAL_VALUE_TYPE r;
03720       int i;
03721 
03722       if (! ext_80387_constants_init)
03723   init_ext_80387_constants ();
03724 
03725       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
03726       for (i = 0; i < 5; i++)
03727         if (real_identical (&r, &ext_80387_constants_table[i]))
03728     return i + 3;
03729     }
03730 
03731   return 0;
03732 }
03733 
03734 /* Return the opcode of the special instruction to be used to load
03735    the constant X.  */
03736 
03737 const char *
03738 standard_80387_constant_opcode (rtx x)
03739 {
03740   switch (standard_80387_constant_p (x))
03741     {
03742     case 1:
03743       return "fldz";
03744     case 2:
03745       return "fld1";
03746     case 3:
03747       return "fldlg2";
03748     case 4:
03749       return "fldln2";
03750     case 5:
03751       return "fldl2e";
03752     case 6:
03753       return "fldl2t";
03754     case 7:
03755       return "fldpi";
03756     }
03757   abort ();
03758 }
03759 
03760 /* Return the CONST_DOUBLE representing the 80387 constant that is
03761    loaded by the specified special instruction.  The argument IDX
03762    matches the return value from standard_80387_constant_p.  */
03763 
03764 rtx
03765 standard_80387_constant_rtx (int idx)
03766 {
03767   int i;
03768 
03769   if (! ext_80387_constants_init)
03770     init_ext_80387_constants ();
03771 
03772   switch (idx)
03773     {
03774     case 3:
03775     case 4:
03776     case 5:
03777     case 6:
03778     case 7:
03779       i = idx - 3;
03780       break;
03781 
03782     default:
03783       abort ();
03784     }
03785 
03786   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
03787                XFmode);
03788 }
03789 
03790 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
03791  */
03792 int
03793 standard_sse_constant_p (rtx x)
03794 {
03795   if (x == const0_rtx)
03796     return 1;
03797   return (x == CONST0_RTX (GET_MODE (x)));
03798 }
03799 
03800 /* Returns 1 if OP contains a symbol reference */
03801 
03802 int
03803 symbolic_reference_mentioned_p (rtx op)
03804 {
03805   const char *fmt;
03806   int i;
03807 
03808   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
03809     return 1;
03810 
03811   fmt = GET_RTX_FORMAT (GET_CODE (op));
03812   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
03813     {
03814       if (fmt[i] == 'E')
03815   {
03816     int j;
03817 
03818     for (j = XVECLEN (op, i) - 1; j >= 0; j--)
03819       if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
03820         return 1;
03821   }
03822 
03823       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
03824   return 1;
03825     }
03826 
03827   return 0;
03828 }
03829 
03830 /* Return 1 if it is appropriate to emit `ret' instructions in the
03831    body of a function.  Do this only if the epilogue is simple, needing a
03832    couple of insns.  Prior to reloading, we can't tell how many registers
03833    must be saved, so return 0 then.  Return 0 if there is no frame
03834    marker to de-allocate.  */
03835 
03836 int
03837 ix86_can_use_return_insn_p (void)
03838 {
03839   struct ix86_frame frame;
03840 
03841   if (! reload_completed || frame_pointer_needed)
03842     return 0;
03843 
03844   /* Don't allow more than 32 pop, since that's all we can do
03845      with one instruction.  */
03846   if (current_function_pops_args
03847       && current_function_args_size >= 32768)
03848     return 0;
03849 
03850   ix86_compute_frame_layout (&frame);
03851   return frame.to_allocate == 0 && frame.nregs == 0;
03852 }
03853 
03854 /* Value should be nonzero if functions must have frame pointers.
03855    Zero means the frame pointer need not be set up (and parms may
03856    be accessed via the stack pointer) in functions that seem suitable.  */
03857 
03858 int
03859 ix86_frame_pointer_required (void)
03860 {
03861   /* If we accessed previous frames, then the generated code expects
03862      to be able to access the saved ebp value in our frame.  */
03863   if (cfun->machine->accesses_prev_frame)
03864     return 1;
03865 
03866   /* Several x86 os'es need a frame pointer for other reasons,
03867      usually pertaining to setjmp.  */
03868   if (SUBTARGET_FRAME_POINTER_REQUIRED)
03869     return 1;
03870 
03871   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
03872      the frame pointer by default.  Turn it back on now if we've not
03873      got a leaf function.  */
03874   if (TARGET_OMIT_LEAF_FRAME_POINTER
03875       && (!current_function_is_leaf))
03876     return 1;
03877 
03878   if (current_function_profile)
03879     return 1;
03880 
03881   return 0;
03882 }
03883 
03884 /* Record that the current function accesses previous call frames.  */
03885 
03886 void
03887 ix86_setup_frame_addresses (void)
03888 {
03889   cfun->machine->accesses_prev_frame = 1;
03890 }
03891 
03892 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
03893 # define USE_HIDDEN_LINKONCE 1
03894 #else
03895 # define USE_HIDDEN_LINKONCE 0
03896 #endif
03897 
03898 static int pic_labels_used;
03899 
03900 /* Fills in the label name that should be used for a pc thunk for
03901    the given register.  */
03902 
03903 static void
03904 get_pc_thunk_name (char name[32], unsigned int regno)
03905 {
03906   if (USE_HIDDEN_LINKONCE)
03907     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
03908   else
03909     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
03910 }
03911 
03912 
03913 /* This function generates code for -fpic that loads %ebx with
03914    the return address of the caller and then returns.  */
03915 
03916 void
03917 ix86_file_end (void)
03918 {
03919   rtx xops[2];
03920   int regno;
03921 
03922   for (regno = 0; regno < 8; ++regno)
03923     {
03924       char name[32];
03925 
03926       if (! ((pic_labels_used >> regno) & 1))
03927   continue;
03928 
03929       get_pc_thunk_name (name, regno);
03930 
03931       if (USE_HIDDEN_LINKONCE)
03932   {
03933     tree decl;
03934 
03935     decl = build_decl (FUNCTION_DECL, get_identifier (name),
03936            error_mark_node);
03937     TREE_PUBLIC (decl) = 1;
03938     TREE_STATIC (decl) = 1;
03939     DECL_ONE_ONLY (decl) = 1;
03940 
03941     (*targetm.asm_out.unique_section) (decl, 0);
03942     named_section (decl, NULL, 0);
03943 
03944     (*targetm.asm_out.globalize_label) (asm_out_file, name);
03945     fputs ("\t.hidden\t", asm_out_file);
03946     assemble_name (asm_out_file, name);
03947     fputc ('\n', asm_out_file);
03948     ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
03949   }
03950       else
03951   {
03952     text_section ();
03953     ASM_OUTPUT_LABEL (asm_out_file, name);
03954   }
03955 
03956       xops[0] = gen_rtx_REG (SImode, regno);
03957       xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
03958       output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
03959       output_asm_insn ("ret", xops);
03960     }
03961 
03962   if (NEED_INDICATE_EXEC_STACK)
03963     file_end_indicate_exec_stack ();
03964 }
03965 
03966 /* Emit code for the SET_GOT patterns.  */
03967 
03968 const char *
03969 output_set_got (rtx dest)
03970 {
03971   rtx xops[3];
03972 
03973   xops[0] = dest;
03974   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
03975 
03976   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
03977     {
03978       xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
03979 
03980       if (!flag_pic)
03981   output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
03982       else
03983   output_asm_insn ("call\t%a2", xops);
03984 
03985 #if TARGET_MACHO
03986       /* Output the "canonical" label name ("Lxx$pb") here too.  This
03987          is what will be referred to by the Mach-O PIC subsystem.  */
03988       ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
03989 #endif
03990       (*targetm.asm_out.internal_label) (asm_out_file, "L",
03991          CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
03992 
03993       if (flag_pic)
03994   output_asm_insn ("pop{l}\t%0", xops);
03995     }
03996   else
03997     {
03998       char name[32];
03999       get_pc_thunk_name (name, REGNO (dest));
04000       pic_labels_used |= 1 << REGNO (dest);
04001 
04002       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
04003       xops[2] = gen_rtx_MEM (QImode, xops[2]);
04004       output_asm_insn ("call\t%X2", xops);
04005     }
04006 
04007   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
04008     output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
04009   else if (!TARGET_MACHO)
04010     output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
04011 
04012   return "";
04013 }
04014 
04015 /* Generate an "push" pattern for input ARG.  */
04016 
04017 static rtx
04018 gen_push (rtx arg)
04019 {
04020   return gen_rtx_SET (VOIDmode,
04021           gen_rtx_MEM (Pmode,
04022            gen_rtx_PRE_DEC (Pmode,
04023                 stack_pointer_rtx)),
04024           arg);
04025 }
04026 
04027 /* Return >= 0 if there is an unused call-clobbered register available
04028    for the entire function.  */
04029 
04030 static unsigned int
04031 ix86_select_alt_pic_regnum (void)
04032 {
04033   if (current_function_is_leaf && !current_function_profile)
04034     {
04035       int i;
04036       for (i = 2; i >= 0; --i)
04037         if (!regs_ever_live[i])
04038     return i;
04039     }
04040 
04041   return INVALID_REGNUM;
04042 }
04043 
04044 /* Return 1 if we need to save REGNO.  */
04045 static int
04046 ix86_save_reg (unsigned int regno, int maybe_eh_return)
04047 {
04048   if (pic_offset_table_rtx
04049       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
04050       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
04051     || current_function_profile
04052     || current_function_calls_eh_return
04053     || current_function_uses_const_pool))
04054     {
04055       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
04056   return 0;
04057       return 1;
04058     }
04059 
04060   if (current_function_calls_eh_return && maybe_eh_return)
04061     {
04062       unsigned i;
04063       for (i = 0; ; i++)
04064   {
04065     unsigned test = EH_RETURN_DATA_REGNO (i);
04066     if (test == INVALID_REGNUM)
04067       break;
04068     if (test == regno)
04069       return 1;
04070   }
04071     }
04072 
04073   return (regs_ever_live[regno]
04074     && !call_used_regs[regno]
04075     && !fixed_regs[regno]
04076     && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
04077 }
04078 
04079 /* Return number of registers to be saved on the stack.  */
04080 
04081 static int
04082 ix86_nsaved_regs (void)
04083 {
04084   int nregs = 0;
04085   int regno;
04086 
04087   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
04088     if (ix86_save_reg (regno, true))
04089       nregs++;
04090   return nregs;
04091 }
04092 
04093 /* Return the offset between two registers, one to be eliminated, and the other
04094    its replacement, at the start of a routine.  */
04095 
04096 HOST_WIDE_INT
04097 ix86_initial_elimination_offset (int from, int to)
04098 {
04099   struct ix86_frame frame;
04100   ix86_compute_frame_layout (&frame);
04101 
04102   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
04103     return frame.hard_frame_pointer_offset;
04104   else if (from == FRAME_POINTER_REGNUM
04105      && to == HARD_FRAME_POINTER_REGNUM)
04106     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
04107   else
04108     {
04109       if (to != STACK_POINTER_REGNUM)
04110   abort ();
04111       else if (from == ARG_POINTER_REGNUM)
04112   return frame.stack_pointer_offset;
04113       else if (from != FRAME_POINTER_REGNUM)
04114   abort ();
04115       else
04116   return frame.stack_pointer_offset - frame.frame_pointer_offset;
04117     }
04118 }
04119 
04120 /* Fill structure ix86_frame about frame of currently computed function.  */
04121 
04122 static void
04123 ix86_compute_frame_layout (struct ix86_frame *frame)
04124 {
04125   HOST_WIDE_INT total_size;
04126   unsigned int stack_alignment_needed;
04127   HOST_WIDE_INT offset;
04128   unsigned int preferred_alignment;
04129   HOST_WIDE_INT size = get_frame_size ();
04130 
04131   frame->nregs = ix86_nsaved_regs ();
04132   total_size = size;
04133 
04134   stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
04135   preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
04136 
04137   /* During reload iteration the amount of registers saved can change.
04138      Recompute the value as needed.  Do not recompute when amount of registers
04139      didn't change as reload does mutiple calls to the function and does not
04140      expect the decision to change within single iteration.  */
04141   if (!optimize_size
04142       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
04143     {
04144       int count = frame->nregs;
04145 
04146       cfun->machine->use_fast_prologue_epilogue_nregs = count;
04147       /* The fast prologue uses move instead of push to save registers.  This
04148          is significantly longer, but also executes faster as modern hardware
04149          can execute the moves in parallel, but can't do that for push/pop.
04150 
04151    Be careful about choosing what prologue to emit:  When function takes
04152    many instructions to execute we may use slow version as well as in
04153    case function is known to be outside hot spot (this is known with
04154    feedback only).  Weight the size of function by number of registers
04155    to save as it is cheap to use one or two push instructions but very
04156    slow to use many of them.  */
04157       if (count)
04158   count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
04159       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
04160     || (flag_branch_probabilities
04161         && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
04162         cfun->machine->use_fast_prologue_epilogue = false;
04163       else
04164         cfun->machine->use_fast_prologue_epilogue
04165      = !expensive_function_p (count);
04166     }
04167   if (TARGET_PROLOGUE_USING_MOVE
04168       && cfun->machine->use_fast_prologue_epilogue)
04169     frame->save_regs_using_mov = true;
04170   else
04171     frame->save_regs_using_mov = false;
04172 
04173 
04174   /* Skip return address and saved base pointer.  */
04175   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
04176 
04177   frame->hard_frame_pointer_offset = offset;
04178 
04179   /* Do some sanity checking of stack_alignment_needed and
04180      preferred_alignment, since i386 port is the only using those features
04181      that may break easily.  */
04182 
04183   if (size && !stack_alignment_needed)
04184     abort ();
04185   if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
04186     abort ();
04187   if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
04188     abort ();
04189   if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
04190     abort ();
04191 
04192   if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
04193     stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
04194 
04195   /* Register save area */
04196   offset += frame->nregs * UNITS_PER_WORD;
04197 
04198   /* Va-arg area */
04199   if (ix86_save_varrargs_registers)
04200     {
04201       offset += X86_64_VARARGS_SIZE;
04202       frame->va_arg_size = X86_64_VARARGS_SIZE;
04203     }
04204   else
04205     frame->va_arg_size = 0;
04206 
04207   /* Align start of frame for local function.  */
04208   frame->padding1 = ((offset + stack_alignment_needed - 1)
04209          & -stack_alignment_needed) - offset;
04210 
04211   offset += frame->padding1;
04212 
04213   /* Frame pointer points here.  */
04214   frame->frame_pointer_offset = offset;
04215 
04216   offset += size;
04217 
04218   /* Add outgoing arguments area.  Can be skipped if we eliminated
04219      all the function calls as dead code.
04220      Skipping is however impossible when function calls alloca.  Alloca
04221      expander assumes that last current_function_outgoing_args_size
04222      of stack frame are unused.  */
04223   if (ACCUMULATE_OUTGOING_ARGS
04224       && (!current_function_is_leaf || current_function_calls_alloca))
04225     {
04226       offset += current_function_outgoing_args_size;
04227       frame->outgoing_arguments_size = current_function_outgoing_args_size;
04228     }
04229   else
04230     frame->outgoing_arguments_size = 0;
04231 
04232   /* Align stack boundary.  Only needed if we're calling another function
04233      or using alloca.  */
04234   if (!current_function_is_leaf || current_function_calls_alloca)
04235     frame->padding2 = ((offset + preferred_alignment - 1)
04236            & -preferred_alignment) - offset;
04237   else
04238     frame->padding2 = 0;
04239 
04240   offset += frame->padding2;
04241 
04242   /* We've reached end of stack frame.  */
04243   frame->stack_pointer_offset = offset;
04244 
04245   /* Size prologue needs to allocate.  */
04246   frame->to_allocate =
04247     (size + frame->padding1 + frame->padding2
04248      + frame->outgoing_arguments_size + frame->va_arg_size);
04249 
04250   if ((!frame->to_allocate && frame->nregs <= 1)
04251       || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
04252     frame->save_regs_using_mov = false;
04253 
04254   if (TARGET_RED_ZONE && current_function_sp_is_unchanging
04255       && current_function_is_leaf)
04256     {
04257       frame->red_zone_size = frame->to_allocate;
04258       if (frame->save_regs_using_mov)
04259   frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
04260       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
04261   frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
04262     }
04263   else
04264     frame->red_zone_size = 0;
04265   frame->to_allocate -= frame->red_zone_size;
04266   frame->stack_pointer_offset -= frame->red_zone_size;
04267 #if 0
04268   fprintf (stderr, "nregs: %i\n", frame->nregs);
04269   fprintf (stderr, "size: %i\n", size);
04270   fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
04271   fprintf (stderr, "padding1: %i\n", frame->padding1);
04272   fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
04273   fprintf (stderr, "padding2: %i\n", frame->padding2);
04274   fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
04275   fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
04276   fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
04277   fprintf (stderr, "hard_frame_pointer_offset: %i\n",
04278      frame->hard_frame_pointer_offset);
04279   fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
04280 #endif
04281 }
04282 
04283 /* Emit code to save registers in the prologue.  */
04284 
04285 static void
04286 ix86_emit_save_regs (void)
04287 {
04288   int regno;
04289   rtx insn;
04290 
04291   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
04292     if (ix86_save_reg (regno, true))
04293       {
04294   insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
04295   RTX_FRAME_RELATED_P (insn) = 1;
04296       }
04297 }
04298 
04299 /* Emit code to save registers using MOV insns.  First register
04300    is restored from POINTER + OFFSET.  */
04301 static void
04302 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
04303 {
04304   int regno;
04305   rtx insn;
04306 
04307   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
04308     if (ix86_save_reg (regno, true))
04309       {
04310   insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
04311                  Pmode, offset),
04312              gen_rtx_REG (Pmode, regno));
04313   RTX_FRAME_RELATED_P (insn) = 1;
04314   offset += UNITS_PER_WORD;
04315       }
04316 }
04317 
04318 /* Expand prologue or epilogue stack adjustment.
04319    The pattern exist to put a dependency on all ebp-based memory accesses.
04320    STYLE should be negative if instructions should be marked as frame related,
04321    zero if %r11 register is live and cannot be freely used and positive
04322    otherwise.  */
04323 
04324 static void
04325 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
04326 {
04327   rtx insn;
04328 
04329   if (! TARGET_64BIT)
04330     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
04331   else if (x86_64_immediate_operand (offset, DImode))
04332     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
04333   else
04334     {
04335       rtx r11;
04336       /* r11 is used by indirect sibcall return as well, set before the
04337    epilogue and used after the epilogue.  ATM indirect sibcall
04338    shouldn't be used together with huge frame sizes in one
04339    function because of the frame_size check in sibcall.c.  */
04340       if (style == 0)
04341   abort ();
04342       r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
04343       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
04344       if (style < 0)
04345   RTX_FRAME_RELATED_P (insn) = 1;
04346       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
04347                      offset));
04348     }
04349   if (style < 0)
04350     RTX_FRAME_RELATED_P (insn) = 1;
04351 }
04352 
04353 /* Expand the prologue into a bunch of separate insns.  */
04354 
04355 void
04356 ix86_expand_prologue (void)
04357 {
04358   rtx insn;
04359   bool pic_reg_used;
04360   struct ix86_frame frame;
04361   HOST_WIDE_INT allocate;
04362 
04363   ix86_compute_frame_layout (&frame);
04364 
04365   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
04366      slower on all targets.  Also sdb doesn't like it.  */
04367 
04368   if (frame_pointer_needed)
04369     {
04370       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
04371       RTX_FRAME_RELATED_P (insn) = 1;
04372 
04373       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
04374       RTX_FRAME_RELATED_P (insn) = 1;
04375     }
04376 
04377   allocate = frame.to_allocate;
04378 
04379   if (!frame.save_regs_using_mov)
04380     ix86_emit_save_regs ();
04381   else
04382     allocate += frame.nregs * UNITS_PER_WORD;
04383 
04384   /* When using red zone we may start register saving before allocating
04385      the stack frame saving one cycle of the prologue.  */
04386   if (TARGET_RED_ZONE && frame.save_regs_using_mov)
04387     ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
04388            : stack_pointer_rtx,
04389            -frame.nregs * UNITS_PER_WORD);
04390 
04391   if (allocate == 0)
04392     ;
04393   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
04394     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
04395              GEN_INT (-allocate), -1);
04396   else
04397     {
04398       /* Only valid for Win32.  */
04399       rtx eax = gen_rtx_REG (SImode, 0);
04400       bool eax_live = ix86_eax_live_at_start_p ();
04401       rtx t;
04402 
04403       if (TARGET_64BIT)
04404         abort ();
04405 
04406       if (eax_live)
04407   {
04408     emit_insn (gen_push (eax));
04409     allocate -= 4;
04410   }
04411 
04412       emit_move_insn (eax, GEN_INT (allocate));
04413 
04414       insn = emit_insn (gen_allocate_stack_worker (eax));
04415       RTX_FRAME_RELATED_P (insn) = 1;
04416       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
04417       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
04418       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
04419               t, REG_NOTES (insn));
04420 
04421       if (eax_live)
04422   {
04423     if (frame_pointer_needed)
04424       t = plus_constant (hard_frame_pointer_rtx,
04425              allocate
04426              - frame.to_allocate
04427              - frame.nregs * UNITS_PER_WORD);
04428     else
04429       t = plus_constant (stack_pointer_rtx, allocate);
04430     emit_move_insn (eax, gen_rtx_MEM (SImode, t));
04431   }
04432     }
04433 
04434   if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
04435     {
04436       if (!frame_pointer_needed || !frame.to_allocate)
04437         ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
04438       else
04439         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
04440                -frame.nregs * UNITS_PER_WORD);
04441     }
04442 
04443   pic_reg_used = false;
04444   if (pic_offset_table_rtx
04445       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
04446     || current_function_profile))
04447     {
04448       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
04449 
04450       if (alt_pic_reg_used != INVALID_REGNUM)
04451   REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
04452 
04453       pic_reg_used = true;
04454     }
04455 
04456   if (pic_reg_used)
04457     {
04458       insn = emit_insn (gen_set_got (pic_offset_table_rtx));
04459 
04460       /* Even with accurate pre-reload life analysis, we can wind up
04461    deleting all references to the pic register after reload.
04462    Consider if cross-jumping unifies two sides of a branch
04463    controlled by a comparison vs the only read from a global.
04464    In which case, allow the set_got to be deleted, though we're
04465    too late to do anything about the ebx save in the prologue.  */
04466       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
04467     }
04468 
04469   /* Prevent function calls from be scheduled before the call to mcount.
04470      In the pic_reg_used case, make sure that the got load isn't deleted.  */
04471   if (current_function_profile)
04472     emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
04473 }
04474 
04475 /* Emit code to restore saved registers using MOV insns.  First register
04476    is restored from POINTER + OFFSET.  */
04477 static void
04478 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
04479           int maybe_eh_return)
04480 {
04481   int regno;
04482   rtx base_address = gen_rtx_MEM (Pmode, pointer);
04483 
04484   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
04485     if (ix86_save_reg (regno, maybe_eh_return))
04486       {
04487   /* Ensure that adjust_address won't be forced to produce pointer
04488      out of range allowed by x86-64 instruction set.  */
04489   if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
04490     {
04491       rtx r11;
04492 
04493       r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
04494       emit_move_insn (r11, GEN_INT (offset));
04495       emit_insn (gen_adddi3 (r11, r11, pointer));
04496       base_address = gen_rtx_MEM (Pmode, r11);
04497       offset = 0;
04498     }
04499   emit_move_insn (gen_rtx_REG (Pmode, regno),
04500       adjust_address (base_address, Pmode, offset));
04501   offset += UNITS_PER_WORD;
04502       }
04503 }
04504 
04505 /* Restore function stack, frame, and registers.  */
04506 
04507 void
04508 ix86_expand_epilogue (int style)
04509 {
04510   int regno;
04511   int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
04512   struct ix86_frame frame;
04513   HOST_WIDE_INT offset;
04514 
04515   ix86_compute_frame_layout (&frame);
04516 
04517   /* Calculate start of saved registers relative to ebp.  Special care
04518      must be taken for the normal return case of a function using
04519      eh_return: the eax and edx registers are marked as saved, but not
04520      restored along this path.  */
04521   offset = frame.nregs;
04522   if (current_function_calls_eh_return && style != 2)
04523     offset -= 2;
04524   offset *= -UNITS_PER_WORD;
04525 
04526   /* If we're only restoring one register and sp is not valid then
04527      using a move instruction to restore the register since it's
04528      less work than reloading sp and popping the register.
04529 
04530      The default code result in stack adjustment using add/lea instruction,
04531      while this code results in LEAVE instruction (or discrete equivalent),
04532      so it is profitable in some other cases as well.  Especially when there
04533      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
04534      and there is exactly one register to pop. This heuristic may need some
04535      tuning in future.  */
04536   if ((!sp_valid && frame.nregs <= 1)
04537       || (TARGET_EPILOGUE_USING_MOVE
04538     && cfun->machine->use_fast_prologue_epilogue
04539     && (frame.nregs > 1 || frame.to_allocate))
04540       || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
04541       || (frame_pointer_needed && TARGET_USE_LEAVE
04542     && cfun->machine->use_fast_prologue_epilogue
04543     && frame.nregs == 1)
04544       || current_function_calls_eh_return)
04545     {
04546       /* Restore registers.  We can use ebp or esp to address the memory
04547    locations.  If both are available, default to ebp, since offsets
04548    are known to be small.  Only exception is esp pointing directly to the
04549    end of block of saved registers, where we may simplify addressing
04550    mode.  */
04551 
04552       if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
04553   ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
04554             frame.to_allocate, style == 2);
04555       else
04556   ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
04557             offset, style == 2);
04558 
04559       /* eh_return epilogues need %ecx added to the stack pointer.  */
04560       if (style == 2)
04561   {
04562     rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
04563 
04564     if (frame_pointer_needed)
04565       {
04566         tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
04567         tmp = plus_constant (tmp, UNITS_PER_WORD);
04568         emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
04569 
04570         tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
04571         emit_move_insn (hard_frame_pointer_rtx, tmp);
04572 
04573         pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
04574            const0_rtx, style);
04575       }
04576     else
04577       {
04578         tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
04579         tmp = plus_constant (tmp, (frame.to_allocate
04580                                          + frame.nregs * UNITS_PER_WORD));
04581         emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
04582       }
04583   }
04584       else if (!frame_pointer_needed)
04585   pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
04586            GEN_INT (frame.to_allocate
04587               + frame.nregs * UNITS_PER_WORD),
04588            style);
04589       /* If not an i386, mov & pop is faster than "leave".  */
04590       else if (TARGET_USE_LEAVE || optimize_size
04591          || !cfun->machine->use_fast_prologue_epilogue)
04592   emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
04593       else
04594   {
04595     pro_epilogue_adjust_stack (stack_pointer_rtx,
04596              hard_frame_pointer_rtx,
04597              const0_rtx, style);
04598     if (TARGET_64BIT)
04599       emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
04600     else
04601       emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
04602   }
04603     }
04604   else
04605     {
04606       /* First step is to deallocate the stack frame so that we can
04607    pop the registers.  */
04608       if (!sp_valid)
04609   {
04610     if (!frame_pointer_needed)
04611       abort ();
04612     pro_epilogue_adjust_stack (stack_pointer_rtx,
04613              hard_frame_pointer_rtx,
04614              GEN_INT (offset), style);
04615   }
04616       else if (frame.to_allocate)
04617   pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
04618            GEN_INT (frame.to_allocate), style);
04619 
04620       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
04621   if (ix86_save_reg (regno, false))
04622     {
04623       if (TARGET_64BIT)
04624         emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
04625       else
04626         emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
04627     }
04628       if (frame_pointer_needed)
04629   {
04630     /* Leave results in shorter dependency chains on CPUs that are
04631        able to grok it fast.  */
04632     if (TARGET_USE_LEAVE)
04633       emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
04634     else if (TARGET_64BIT)
04635       emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
04636     else
04637       emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
04638   }
04639     }
04640 
04641   /* Sibcall epilogues don't want a return instruction.  */
04642   if (style == 0)
04643     return;
04644 
04645   if (current_function_pops_args && current_function_args_size)
04646     {
04647       rtx popc = GEN_INT (current_function_pops_args);
04648 
04649       /* i386 can only pop 64K bytes.  If asked to pop more, pop
04650    return address, do explicit add, and jump indirectly to the
04651    caller.  */
04652 
04653       if (current_function_pops_args >= 65536)
04654   {
04655     rtx ecx = gen_rtx_REG (SImode, 2);
04656 
04657     /* There is no "pascal" calling convention in 64bit ABI.  */
04658     if (TARGET_64BIT)
04659       abort ();
04660 
04661     emit_insn (gen_popsi1 (ecx));
04662     emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
04663     emit_jump_insn (gen_return_indirect_internal (ecx));
04664   }
04665       else
04666   emit_jump_insn (gen_return_pop_internal (popc));
04667     }
04668   else
04669     emit_jump_insn (gen_return_internal ());
04670 }
04671 
04672 /* Reset from the function's potential modifications.  */
04673 
04674 static void
04675 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
04676              HOST_WIDE_INT size ATTRIBUTE_UNUSED)
04677 {
04678   if (pic_offset_table_rtx)
04679     REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
04680 }
04681 
04682 /* Extract the parts of an RTL expression that is a valid memory address
04683    for an instruction.  Return 0 if the structure of the address is
04684    grossly off.  Return -1 if the address contains ASHIFT, so it is not
04685    strictly valid, but still used for computing length of lea instruction.  */
04686 
04687 int
04688 ix86_decompose_address (rtx addr, struct ix86_address *out)
04689 {
04690   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
04691   rtx base_reg, index_reg;
04692   HOST_WIDE_INT scale = 1;
04693   rtx scale_rtx = NULL_RTX;
04694   int retval = 1;
04695   enum ix86_address_seg seg = SEG_DEFAULT;
04696 
04697   if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
04698     base = addr;
04699   else if (GET_CODE (addr) == PLUS)
04700     {
04701       rtx addends[4], op;
04702       int n = 0, i;
04703 
04704       op = addr;
04705       do
04706   {
04707     if (n >= 4)
04708       return 0;
04709     addends[n++] = XEXP (op, 1);
04710     op = XEXP (op, 0);
04711   }
04712       while (GET_CODE (op) == PLUS);
04713       if (n >= 4)
04714   return 0;
04715       addends[n] = op;
04716 
04717       for (i = n; i >= 0; --i)
04718   {
04719     op = addends[i];
04720     switch (GET_CODE (op))
04721       {
04722       case MULT:
04723         if (index)
04724     return 0;
04725         index = XEXP (op, 0);
04726         scale_rtx = XEXP (op, 1);
04727         break;
04728 
04729       case UNSPEC:
04730         if (XINT (op, 1) == UNSPEC_TP
04731             && TARGET_TLS_DIRECT_SEG_REFS
04732             && seg == SEG_DEFAULT)
04733     seg = TARGET_64BIT ? SEG_FS : SEG_GS;
04734         else
04735     return 0;
04736         break;
04737 
04738       case REG:
04739       case SUBREG:
04740         if (!base)
04741     base = op;
04742         else if (!index)
04743     index = op;
04744         else
04745     return 0;
04746         break;
04747 
04748       case CONST:
04749       case CONST_INT:
04750       case SYMBOL_REF:
04751       case LABEL_REF:
04752         if (disp)
04753     return 0;
04754         disp = op;
04755         break;
04756 
04757       default:
04758         return 0;
04759       }
04760   }
04761     }
04762   else if (GET_CODE (addr) == MULT)
04763     {
04764       index = XEXP (addr, 0);   /* index*scale */
04765       scale_rtx = XEXP (addr, 1);
04766     }
04767   else if (GET_CODE (addr) == ASHIFT)
04768     {
04769       rtx tmp;
04770 
04771       /* We're called for lea too, which implements ashift on occasion.  */
04772       index = XEXP (addr, 0);
04773       tmp = XEXP (addr, 1);
04774       if (GET_CODE (tmp) != CONST_INT)
04775   return 0;
04776       scale = INTVAL (tmp);
04777       if ((unsigned HOST_WIDE_INT) scale > 3)
04778   return 0;
04779       scale = 1 << scale;
04780       retval = -1;
04781     }
04782   else
04783     disp = addr;      /* displacement */
04784 
04785   /* Extract the integral value of scale.  */
04786   if (scale_rtx)
04787     {
04788       if (GET_CODE (scale_rtx) != CONST_INT)
04789   return 0;
04790       scale = INTVAL (scale_rtx);
04791     }
04792 
04793   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
04794   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
04795 
04796   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
04797   if (base_reg && index_reg && scale == 1
04798       && (index_reg == arg_pointer_rtx
04799     || index_reg == frame_pointer_rtx
04800     || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
04801     {
04802       rtx tmp;
04803       tmp = base, base = index, index = tmp;
04804       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
04805     }
04806 
04807   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
04808   if ((base_reg == hard_frame_pointer_rtx
04809        || base_reg == frame_pointer_rtx
04810        || base_reg == arg_pointer_rtx) && !disp)
04811     disp = const0_rtx;
04812 
04813   /* Special case: on K6, [%esi] makes the instruction vector decoded.
04814      Avoid this by transforming to [%esi+0].  */
04815   if (ix86_tune == PROCESSOR_K6 && !optimize_size
04816       && base_reg && !index_reg && !disp
04817       && REG_P (base_reg)
04818       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
04819     disp = const0_rtx;
04820 
04821   /* Special case: encode reg+reg instead of reg*2.  */
04822   if (!base && index && scale && scale == 2)
04823     base = index, base_reg = index_reg, scale = 1;
04824 
04825   /* Special case: scaling cannot be encoded without base or displacement.  */
04826   if (!base && !disp && index && scale != 1)
04827     disp = const0_rtx;
04828 
04829   out->base = base;
04830   out->index = index;
04831   out->disp = disp;
04832   out->scale = scale;
04833   out->seg = seg;
04834 
04835   return retval;
04836 }
04837 
04838 /* Return cost of the memory address x.
04839    For i386, it is better to use a complex address than let gcc copy
04840    the address into a reg and make a new pseudo.  But not if the address
04841    requires to two regs - that would mean more pseudos with longer
04842    lifetimes.  */
04843 static int
04844 ix86_address_cost (rtx x)
04845 {
04846   struct ix86_address parts;
04847   int cost = 1;
04848 
04849   if (!ix86_decompose_address (x, &parts))
04850     abort ();
04851 
04852   if (parts.base && GET_CODE (parts.base) == SUBREG)
04853     parts.base = SUBREG_REG (parts.base);
04854   if (parts.index && GET_CODE (parts.index) == SUBREG)
04855     parts.index = SUBREG_REG (parts.index);
04856 
04857   /* More complex memory references are better.  */
04858   if (parts.disp && parts.disp != const0_rtx)
04859     cost--;
04860   if (parts.seg != SEG_DEFAULT)
04861     cost--;
04862 
04863   /* Attempt to minimize number of registers in the address.  */
04864   if ((parts.base
04865        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
04866       || (parts.index
04867     && (!REG_P (parts.index)
04868         || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
04869     cost++;
04870 
04871   if (parts.base
04872       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
04873       && parts.index
04874       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
04875       && parts.base != parts.index)
04876     cost++;
04877 
04878   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
04879      since it's predecode logic can't detect the length of instructions
04880      and it degenerates to vector decoded.  Increase cost of such
04881      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
04882      to split such addresses or even refuse such addresses at all.
04883 
04884      Following addressing modes are affected:
04885       [base+scale*index]
04886       [scale*index+disp]
04887       [base+index]
04888 
04889      The first and last case  may be avoidable by explicitly coding the zero in
04890      memory address, but I don't have AMD-K6 machine handy to check this
04891      theory.  */
04892 
04893   if (TARGET_K6
04894       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
04895     || (parts.disp && !parts.base && parts.index && parts.scale != 1)
04896     || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
04897     cost += 10;
04898 
04899   return cost;
04900 }
04901 
04902 /* If X is a machine specific address (i.e. a symbol or label being
04903    referenced as a displacement from the GOT implemented using an
04904    UNSPEC), then return the base term.  Otherwise return X.  */
04905 
04906 rtx
04907 ix86_find_base_term (rtx x)
04908 {
04909   rtx term;
04910 
04911   if (TARGET_64BIT)
04912     {
04913       if (GET_CODE (x) != CONST)
04914   return x;
04915       term = XEXP (x, 0);
04916       if (GET_CODE (term) == PLUS
04917     && (GET_CODE (XEXP (term, 1)) == CONST_INT
04918         || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
04919   term = XEXP (term, 0);
04920       if (GET_CODE (term) != UNSPEC
04921     || XINT (term, 1) != UNSPEC_GOTPCREL)
04922   return x;
04923 
04924       term = XVECEXP (term, 0, 0);
04925 
04926       if (GET_CODE (term) != SYMBOL_REF
04927     && GET_CODE (term) != LABEL_REF)
04928   return x;
04929 
04930       return term;
04931     }
04932 
04933   term = ix86_delegitimize_address (x);
04934 
04935   if (GET_CODE (term) != SYMBOL_REF
04936       && GET_CODE (term) != LABEL_REF)
04937     return x;
04938 
04939   return term;
04940 }
04941 
04942 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
04943    this is used for to form addresses to local data when -fPIC is in
04944    use.  */
04945 
04946 static bool
04947 darwin_local_data_pic (rtx disp)
04948 {
04949   if (GET_CODE (disp) == MINUS)
04950     {
04951       if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
04952           || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
04953         if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
04954           {
04955             const char *sym_name = XSTR (XEXP (disp, 1), 0);
04956             if (! strcmp (sym_name, "<pic base>"))
04957               return true;
04958           }
04959     }
04960 
04961   return false;
04962 }
04963 
04964 /* Determine if a given RTX is a valid constant.  We already know this
04965    satisfies CONSTANT_P.  */
04966 
04967 bool
04968 legitimate_constant_p (rtx x)
04969 {
04970   switch (GET_CODE (x))
04971     {
04972     case CONST:
04973       x = XEXP (x, 0);
04974 
04975       if (GET_CODE (x) == PLUS)
04976   {
04977     if (GET_CODE (XEXP (x, 1)) != CONST_INT)
04978       return false;
04979     x = XEXP (x, 0);
04980   }
04981 
04982       if (TARGET_MACHO && darwin_local_data_pic (x))
04983   return true;
04984 
04985       /* Only some unspecs are valid as "constants".  */
04986       if (GET_CODE (x) == UNSPEC)
04987   switch (XINT (x, 1))
04988     {
04989     case UNSPEC_TPOFF:
04990     case UNSPEC_NTPOFF:
04991       return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
04992     case UNSPEC_DTPOFF:
04993       return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
04994     default:
04995       return false;
04996     }
04997 
04998       /* We must have drilled down to a symbol.  */
04999       if (!symbolic_operand (x, Pmode))
05000   return false;
05001       /* FALLTHRU */
05002 
05003     case SYMBOL_REF:
05004       /* TLS symbols are never valid.  */
05005       if (tls_symbolic_operand (x, Pmode))
05006   return false;
05007       break;
05008 
05009     default:
05010       break;
05011     }
05012 
05013   /* Otherwise we handle everything else in the move patterns.  */
05014   return true;
05015 }
05016 
05017 /* Determine if it's legal to put X into the constant pool.  This
05018    is not possible for the address of thread-local symbols, which
05019    is checked above.  */
05020 
05021 static bool
05022 ix86_cannot_force_const_mem (rtx x)
05023 {
05024   return !legitimate_constant_p (x);
05025 }
05026 
05027 /* Determine if a given RTX is a valid constant address.  */
05028 
05029 bool
05030 constant_address_p (rtx x)
05031 {
05032   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
05033 }
05034 
05035 /* Nonzero if the constant value X is a legitimate general operand
05036    when generating PIC code.  It is given that flag_pic is on and
05037    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
05038 
05039 bool
05040 legitimate_pic_operand_p (rtx x)
05041 {
05042   rtx inner;
05043 
05044   switch (GET_CODE (x))
05045     {
05046     case CONST:
05047       inner = XEXP (x, 0);
05048 
05049       /* Only some unspecs are valid as "constants".  */
05050       if (GET_CODE (inner) == UNSPEC)
05051   switch (XINT (inner, 1))
05052     {
05053     case UNSPEC_TPOFF:
05054       return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
05055     default:
05056       return false;
05057     }
05058       /* FALLTHRU */
05059 
05060     case SYMBOL_REF:
05061     case LABEL_REF:
05062       return legitimate_pic_address_disp_p (x);
05063 
05064     default:
05065       return true;
05066     }
05067 }
05068 
05069 /* Determine if a given CONST RTX is a valid memory displacement
05070    in PIC mode.  */
05071 
05072 int
05073 legitimate_pic_address_disp_p (rtx disp)
05074 {
05075   bool saw_plus;
05076 
05077   /* In 64bit mode we can allow direct addresses of symbols and labels
05078      when they are not dynamic symbols.  */
05079   if (TARGET_64BIT)
05080     {
05081       /* TLS references should always be enclosed in UNSPEC.  */
05082       if (tls_symbolic_operand (disp, GET_MODE (disp)))
05083   return 0;
05084       if (GET_CODE (disp) == SYMBOL_REF
05085     && ix86_cmodel == CM_SMALL_PIC
05086     && SYMBOL_REF_LOCAL_P (disp))
05087   return 1;
05088       if (GET_CODE (disp) == LABEL_REF)
05089   return 1;
05090       if (GET_CODE (disp) == CONST
05091     && GET_CODE (XEXP (disp, 0)) == PLUS)
05092   {
05093     rtx op0 = XEXP (XEXP (disp, 0), 0);
05094     rtx op1 = XEXP (XEXP (disp, 0), 1);
05095 
05096     /* TLS references should always be enclosed in UNSPEC.  */
05097     if (tls_symbolic_operand (op0, GET_MODE (op0)))
05098       return 0;
05099     if (((GET_CODE (op0) == SYMBOL_REF
05100     && ix86_cmodel == CM_SMALL_PIC
05101     && SYMBOL_REF_LOCAL_P (op0))
05102          || GET_CODE (op0) == LABEL_REF)
05103         && GET_CODE (op1) == CONST_INT
05104         && INTVAL (op1) < 16*1024*1024
05105         && INTVAL (op1) >= -16*1024*1024)
05106       return 1;
05107   }
05108     }
05109   if (GET_CODE (disp) != CONST)
05110     return 0;
05111   disp = XEXP (disp, 0);
05112 
05113   if (TARGET_64BIT)
05114     {
05115       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
05116          of GOT tables.  We should not need these anyway.  */
05117       if (GET_CODE (disp) != UNSPEC
05118     || XINT (disp, 1) != UNSPEC_GOTPCREL)
05119   return 0;
05120 
05121       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
05122     && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
05123   return 0;
05124       return 1;
05125     }
05126 
05127   saw_plus = false;
05128   if (GET_CODE (disp) == PLUS)
05129     {
05130       if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
05131   return 0;
05132       disp = XEXP (disp, 0);
05133       saw_plus = true;
05134     }
05135 
05136   if (TARGET_MACHO && darwin_local_data_pic (disp))
05137     return 1;
05138 
05139   if (GET_CODE (disp) != UNSPEC)
05140     return 0;
05141 
05142   switch (XINT (disp, 1))
05143     {
05144     case UNSPEC_GOT:
05145       if (saw_plus)
05146   return false;
05147       return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
05148     case UNSPEC_GOTOFF:
05149       if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
05150     || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
05151         return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
05152       return false;
05153     case UNSPEC_GOTTPOFF:
05154     case UNSPEC_GOTNTPOFF:
05155     case UNSPEC_INDNTPOFF:
05156       if (saw_plus)
05157   return false;
05158       return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
05159     case UNSPEC_NTPOFF:
05160       return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
05161     case UNSPEC_DTPOFF:
05162       return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
05163     }
05164 
05165   return 0;
05166 }
05167 
05168 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
05169    memory address for an instruction.  The MODE argument is the machine mode
05170    for the MEM expression that wants to use this address.
05171 
05172    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
05173    convert common non-canonical forms to canonical form so that they will
05174    be recognized.  */
05175 
05176 int
05177 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
05178 {
05179   struct ix86_address parts;
05180   rtx base, index, disp;
05181   HOST_WIDE_INT scale;
05182   const char *reason = NULL;
05183   rtx reason_rtx = NULL_RTX;
05184 
05185   if (TARGET_DEBUG_ADDR)
05186     {
05187       fprintf (stderr,
05188          "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
05189          GET_MODE_NAME (mode), strict);
05190       debug_rtx (addr);
05191     }
05192 
05193   if (ix86_decompose_address (addr, &parts) <= 0)
05194     {
05195       reason = "decomposition failed";
05196       goto report_error;
05197     }
05198 
05199   base = parts.base;
05200   index = parts.index;
05201   disp = parts.disp;
05202   scale = parts.scale;
05203 
05204   /* Validate base register.
05205 
05206      Don't allow SUBREG's that span more than a word here.  It can lead to spill
05207      failures when the base is one word out of a two word structure, which is
05208      represented internally as a DImode int.  */
05209 
05210   if (base)
05211     {
05212       rtx reg;
05213       reason_rtx = base;
05214   
05215       if (REG_P (base))
05216     reg = base;
05217       else if (GET_CODE (base) == SUBREG
05218          && REG_P (SUBREG_REG (base))
05219          && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
05220       <= UNITS_PER_WORD)
05221     reg = SUBREG_REG (base);
05222       else
05223   {
05224     reason = "base is not a register";
05225     goto report_error;
05226   }
05227 
05228       if (GET_MODE (base) != Pmode)
05229   {
05230     reason = "base is not in Pmode";
05231     goto report_error;
05232   }
05233 
05234       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
05235     || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
05236   {
05237     reason = "base is not valid";
05238     goto report_error;
05239   }
05240     }
05241 
05242   /* Validate index register.
05243 
05244      Don't allow SUBREG's that span more than a word here -- same as above.  */
05245 
05246   if (index)
05247     {
05248       rtx reg;
05249       reason_rtx = index;
05250 
05251       if (REG_P (index))
05252     reg = index;
05253       else if (GET_CODE (index) == SUBREG
05254          && REG_P (SUBREG_REG (index))
05255          && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
05256       <= UNITS_PER_WORD)
05257     reg = SUBREG_REG (index);
05258       else
05259   {
05260     reason = "index is not a register";
05261     goto report_error;
05262   }
05263 
05264       if (GET_MODE (index) != Pmode)
05265   {
05266     reason = "index is not in Pmode";
05267     goto report_error;
05268   }
05269 
05270       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
05271     || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
05272   {
05273     reason = "index is not valid";
05274     goto report_error;
05275   }
05276     }
05277 
05278   /* Validate scale factor.  */
05279   if (scale != 1)
05280     {
05281       reason_rtx = GEN_INT (scale);
05282       if (!index)
05283   {
05284     reason = "scale without index";
05285     goto report_error;
05286   }
05287 
05288       if (scale != 2 && scale != 4 && scale != 8)
05289   {
05290     reason = "scale is not a valid multiplier";
05291     goto report_error;
05292   }
05293     }
05294 
05295   /* Validate displacement.  */
05296   if (disp)
05297     {
05298       reason_rtx = disp;
05299 
05300       if (GET_CODE (disp) == CONST
05301     && GET_CODE (XEXP (disp, 0)) == UNSPEC)
05302   switch (XINT (XEXP (disp, 0), 1))
05303     {
05304     case UNSPEC_GOT:
05305     case UNSPEC_GOTOFF:
05306     case UNSPEC_GOTPCREL:
05307       if (!flag_pic)
05308         abort ();
05309       goto is_legitimate_pic;
05310 
05311     case UNSPEC_GOTTPOFF:
05312     case UNSPEC_GOTNTPOFF:
05313     case UNSPEC_INDNTPOFF:
05314     case UNSPEC_NTPOFF:
05315     case UNSPEC_DTPOFF:
05316       break;
05317 
05318     default:
05319       reason = "invalid address unspec";
05320       goto report_error;
05321     }
05322 
05323       else if (flag_pic && (SYMBOLIC_CONST (disp)
05324 #if TARGET_MACHO
05325           && !machopic_operand_p (disp)
05326 #endif
05327           ))
05328   {
05329   is_legitimate_pic:
05330     if (TARGET_64BIT && (index || base))
05331       {
05332         /* foo@dtpoff(%rX) is ok.  */
05333         if (GET_CODE (disp) != CONST
05334       || GET_CODE (XEXP (disp, 0)) != PLUS
05335       || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
05336       || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
05337       || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
05338           && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
05339     {
05340       reason = "non-constant pic memory reference";
05341       goto report_error;
05342     }
05343       }
05344     else if (! legitimate_pic_address_disp_p (disp))
05345       {
05346         reason = "displacement is an invalid pic construct";
05347         goto report_error;
05348       }
05349 
05350           /* This code used to verify that a symbolic pic displacement
05351        includes the pic_offset_table_rtx register.
05352 
05353        While this is good idea, unfortunately these constructs may
05354        be created by "adds using lea" optimization for incorrect
05355        code like:
05356 
05357        int a;
05358        int foo(int i)
05359          {
05360            return *(&a+i);
05361          }
05362 
05363        This code is nonsensical, but results in addressing
05364        GOT table with pic_offset_table_rtx base.  We can't
05365        just refuse it easily, since it gets matched by
05366        "addsi3" pattern, that later gets split to lea in the
05367        case output register differs from input.  While this
05368        can be handled by separate addsi pattern for this case
05369        that never results in lea, this seems to be easier and
05370        correct fix for crash to disable this test.  */
05371   }
05372       else if (GET_CODE (disp) != LABEL_REF
05373          && GET_CODE (disp) != CONST_INT
05374          && (GET_CODE (disp) != CONST
05375        || !legitimate_constant_p (disp))
05376          && (GET_CODE (disp) != SYMBOL_REF
05377        || !legitimate_constant_p (disp)))
05378   {
05379     reason = "displacement is not constant";
05380     goto report_error;
05381   }
05382       else if (TARGET_64BIT
05383          && !x86_64_immediate_operand (disp, VOIDmode))
05384   {
05385     reason = "displacement is out of range";
05386     goto report_error;
05387   }
05388     }
05389 
05390   /* Everything looks valid.  */
05391   if (TARGET_DEBUG_ADDR)
05392     fprintf (stderr, "Success.\n");
05393   return TRUE;
05394 
05395  report_error:
05396   if (TARGET_DEBUG_ADDR)
05397     {
05398       fprintf (stderr, "Error: %s\n", reason);
05399       debug_rtx (reason_rtx);
05400     }
05401   return FALSE;
05402 }
05403 
05404 /* Return an unique alias set for the GOT.  */
05405 
05406 static HOST_WIDE_INT
05407 ix86_GOT_alias_set (void)
05408 {
05409   static HOST_WIDE_INT set = -1;
05410   if (set == -1)
05411     set = new_alias_set ();
05412   return set;
05413 }
05414 
05415 /* Return a legitimate reference for ORIG (an address) using the
05416    register REG.  If REG is 0, a new pseudo is generated.
05417 
05418    There are two types of references that must be handled:
05419 
05420    1. Global data references must load the address from the GOT, via
05421       the PIC reg.  An insn is emitted to do this load, and the reg is
05422       returned.
05423 
05424    2. Static data references, constant pool addresses, and code labels
05425       compute the address as an offset from the GOT, whose base is in
05426       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
05427       differentiate them from global data objects.  The returned
05428       address is the PIC reg + an unspec constant.
05429 
05430    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
05431    reg also appears in the address.  */
05432 
05433 static rtx
05434 legitimize_pic_address (rtx orig, rtx reg)
05435 {
05436   rtx addr = orig;
05437   rtx new = orig;
05438   rtx base;
05439 
05440 #if TARGET_MACHO
05441   if (reg == 0)
05442     reg = gen_reg_rtx (Pmode);
05443   /* Use the generic Mach-O PIC machinery.  */
05444   return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
05445 #endif
05446 
05447   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
05448     new = addr;
05449   else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
05450     {
05451       /* This symbol may be referenced via a displacement from the PIC
05452    base address (@GOTOFF).  */
05453 
05454       if (reload_in_progress)
05455   regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
05456       if (GET_CODE (addr) == CONST)
05457   addr = XEXP (addr, 0);
05458       if (GET_CODE (addr) == PLUS)
05459     {
05460             new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
05461       new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
05462     }
05463   else
05464           new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
05465       new = gen_rtx_CONST (Pmode, new);
05466       new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
05467 
05468       if (reg != 0)
05469   {
05470     emit_move_insn (reg, new);
05471     new = reg;
05472   }
05473     }
05474   else if (GET_CODE (addr) == SYMBOL_REF)
05475     {
05476       if (TARGET_64BIT)
05477   {
05478     new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
05479     new = gen_rtx_CONST (Pmode, new);
05480     new = gen_const_mem (Pmode, new);
05481     set_mem_alias_set (new, ix86_GOT_alias_set ());
05482 
05483     if (reg == 0)
05484       reg = gen_reg_rtx (Pmode);
05485     /* Use directly gen_movsi, otherwise the address is loaded
05486        into register for CSE.  We don't want to CSE this addresses,
05487        instead we CSE addresses from the GOT table, so skip this.  */
05488     emit_insn (gen_movsi (reg, new));
05489     new = reg;
05490   }
05491       else
05492   {
05493     /* This symbol must be referenced via a load from the
05494        Global Offset Table (@GOT).  */
05495 
05496     if (reload_in_progress)
05497       regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
05498     new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
05499     new = gen_rtx_CONST (Pmode, new);
05500     new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
05501     new = gen_const_mem (Pmode, new);
05502     set_mem_alias_set (new, ix86_GOT_alias_set ());
05503 
05504     if (reg == 0)
05505       reg = gen_reg_rtx (Pmode);
05506     emit_move_insn (reg, new);
05507     new = reg;
05508   }
05509     }
05510   else
05511     {
05512       if (GET_CODE (addr) == CONST)
05513   {
05514     addr = XEXP (addr, 0);
05515 
05516     /* We must match stuff we generate before.  Assume the only
05517        unspecs that can get here are ours.  Not that we could do
05518        anything with them anyway....  */
05519     if (GET_CODE (addr) == UNSPEC
05520         || (GET_CODE (addr) == PLUS
05521       && GET_CODE (XEXP (addr, 0)) == UNSPEC))
05522       return orig;
05523     if (GET_CODE (addr) != PLUS)
05524       abort ();
05525   }
05526       if (GET_CODE (addr) == PLUS)
05527   {
05528     rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
05529 
05530     /* Check first to see if this is a constant offset from a @GOTOFF
05531        symbol reference.  */
05532     if (local_symbolic_operand (op0, Pmode)
05533         && GET_CODE (op1) == CONST_INT)
05534       {
05535         if (!TARGET_64BIT)
05536     {
05537       if (reload_in_progress)
05538         regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
05539       new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
05540           UNSPEC_GOTOFF);
05541       new = gen_rtx_PLUS (Pmode, new, op1);
05542       new = gen_rtx_CONST (Pmode, new);
05543       new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
05544 
05545       if (reg != 0)
05546         {
05547           emit_move_insn (reg, new);
05548           new = reg;
05549         }
05550     }
05551         else
05552     {
05553       if (INTVAL (op1) < -16*1024*1024
05554           || INTVAL (op1) >= 16*1024*1024)
05555         new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
05556     }
05557       }
05558     else
05559       {
05560         base = legitimize_pic_address (XEXP (addr, 0), reg);
05561         new  = legitimize_pic_address (XEXP (addr, 1),
05562                base == reg ? NULL_RTX : reg);
05563 
05564         if (GET_CODE (new) == CONST_INT)
05565     new = plus_constant (base, INTVAL (new));
05566         else
05567     {
05568       if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
05569         {
05570           base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
05571           new = XEXP (new, 1);
05572         }
05573       new = gen_rtx_PLUS (Pmode, base, new);
05574     }
05575       }
05576   }
05577     }
05578   return new;
05579 }
05580 
05581 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
05582 
05583 static rtx
05584 get_thread_pointer (int to_reg)
05585 {
05586   rtx tp, reg, insn;
05587 
05588   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
05589   if (!to_reg)
05590     return tp;
05591 
05592   reg = gen_reg_rtx (Pmode);
05593   insn = gen_rtx_SET (VOIDmode, reg, tp);
05594   insn = emit_insn (insn);
05595 
05596   return reg;
05597 }
05598 
05599 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
05600    false if we expect this to be used for a memory address and true if
05601    we expect to load the address into a register.  */
05602 
05603 static rtx
05604 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
05605 {
05606   rtx dest, base, off, pic;
05607   int type;
05608 
05609   switch (model)
05610     {
05611     case TLS_MODEL_GLOBAL_DYNAMIC:
05612       dest = gen_reg_rtx (Pmode);
05613       if (TARGET_64BIT)
05614   {
05615     rtx rax = gen_rtx_REG (Pmode, 0), insns;
05616 
05617     start_sequence ();
05618     emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
05619     insns = get_insns ();
05620     end_sequence ();
05621 
05622     emit_libcall_block (insns, dest, rax, x);
05623   }
05624       else
05625   emit_insn (gen_tls_global_dynamic_32 (dest, x));
05626       break;
05627 
05628     case TLS_MODEL_LOCAL_DYNAMIC:
05629       base = gen_reg_rtx (Pmode);
05630       if (TARGET_64BIT)
05631   {
05632     rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
05633 
05634     start_sequence ();
05635     emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
05636     insns = get_insns ();
05637     end_sequence ();
05638 
05639     note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
05640     note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
05641     emit_libcall_block (insns, base, rax, note);
05642   }
05643       else
05644   emit_insn (gen_tls_local_dynamic_base_32 (base));
05645 
05646       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
05647       off = gen_rtx_CONST (Pmode, off);
05648 
05649       return gen_rtx_PLUS (Pmode, base, off);
05650 
05651     case TLS_MODEL_INITIAL_EXEC:
05652       if (TARGET_64BIT)
05653   {
05654     pic = NULL;
05655     type = UNSPEC_GOTNTPOFF;
05656   }
05657       else if (flag_pic)
05658   {
05659     if (reload_in_progress)
05660       regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
05661     pic = pic_offset_table_rtx;
05662     type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
05663   }
05664       else if (!TARGET_GNU_TLS)
05665   {
05666     pic = gen_reg_rtx (Pmode);
05667     emit_insn (gen_set_got (pic));
05668     type = UNSPEC_GOTTPOFF;
05669   }
05670       else
05671   {
05672     pic = NULL;
05673     type = UNSPEC_INDNTPOFF;
05674   }
05675 
05676       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
05677       off = gen_rtx_CONST (Pmode, off);
05678       if (pic)
05679   off = gen_rtx_PLUS (Pmode, pic, off);
05680       off = gen_const_mem (Pmode, off);
05681       set_mem_alias_set (off, ix86_GOT_alias_set ());
05682 
05683       if (TARGET_64BIT || TARGET_GNU_TLS)
05684   {
05685           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
05686     off = force_reg (Pmode, off);
05687     return gen_rtx_PLUS (Pmode, base, off);
05688   }
05689       else
05690   {
05691     base = get_thread_pointer (true);
05692     dest = gen_reg_rtx (Pmode);
05693     emit_insn (gen_subsi3 (dest, base, off));
05694   }
05695       break;
05696 
05697     case TLS_MODEL_LOCAL_EXEC:
05698       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
05699           (TARGET_64BIT || TARGET_GNU_TLS)
05700           ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
05701       off = gen_rtx_CONST (Pmode, off);
05702 
05703       if (TARGET_64BIT || TARGET_GNU_TLS)
05704   {
05705     base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
05706     return gen_rtx_PLUS (Pmode, base, off);
05707   }
05708       else
05709   {
05710     base = get_thread_pointer (true);
05711     dest = gen_reg_rtx (Pmode);
05712     emit_insn (gen_subsi3 (dest, base, off));
05713   }
05714       break;
05715 
05716     default:
05717       abort ();
05718     }
05719 
05720   return dest;
05721 }
05722 
05723 /* Try machine-dependent ways of modifying an illegitimate address
05724    to be legitimate.  If we find one, return the new, valid address.
05725    This macro is used in only one place: `memory_address' in explow.c.
05726 
05727    OLDX is the address as it was before break_out_memory_refs was called.
05728    In some cases it is useful to look at this to decide what needs to be done.
05729 
05730    MODE and WIN are passed so that this macro can use
05731    GO_IF_LEGITIMATE_ADDRESS.
05732 
05733    It is always safe for this macro to do nothing.  It exists to recognize
05734    opportunities to optimize the output.
05735 
05736    For the 80386, we handle X+REG by loading X into a register R and
05737    using R+REG.  R will go in a general reg and indexing will be used.
05738    However, if REG is a broken-out memory address or multiplication,
05739    nothing needs to be done because REG can certainly go in a general reg.
05740 
05741    When -fpic is used, special handling is needed for symbolic references.
05742    See comments by legitimize_pic_address in i386.c for details.  */
05743 
05744 rtx
05745 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
05746 {
05747   int changed = 0;
05748   unsigned log;
05749 
05750   if (TARGET_DEBUG_ADDR)
05751     {
05752       fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
05753          GET_MODE_NAME (mode));
05754       debug_rtx (x);
05755     }
05756 
05757   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
05758   if (log)
05759     return legitimize_tls_address (x, log, false);
05760   if (GET_CODE (x) == CONST
05761       && GET_CODE (XEXP (x, 0)) == PLUS
05762       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
05763       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
05764     {
05765       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
05766       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
05767     }
05768 
05769   if (flag_pic && SYMBOLIC_CONST (x))
05770     return legitimize_pic_address (x, 0);
05771 
05772   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
05773   if (GET_CODE (x) == ASHIFT
05774       && GET_CODE (XEXP (x, 1)) == CONST_INT
05775       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
05776     {
05777       changed = 1;
05778       log = INTVAL (XEXP (x, 1));
05779       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
05780       GEN_INT (1 << log));
05781     }
05782 
05783   if (GET_CODE (x) == PLUS)
05784     {
05785       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
05786 
05787       if (GET_CODE (XEXP (x, 0)) == ASHIFT
05788     && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
05789     && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
05790   {
05791     changed = 1;
05792     log = INTVAL (XEXP (XEXP (x, 0), 1));
05793     XEXP (x, 0) = gen_rtx_MULT (Pmode,
05794               force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
05795               GEN_INT (1 << log));
05796   }
05797 
05798       if (GET_CODE (XEXP (x, 1)) == ASHIFT
05799     && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
05800     && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
05801   {
05802     changed = 1;
05803     log = INTVAL (XEXP (XEXP (x, 1), 1));
05804     XEXP (x, 1) = gen_rtx_MULT (Pmode,
05805               force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
05806               GEN_INT (1 << log));
05807   }
05808 
05809       /* Put multiply first if it isn't already.  */
05810       if (GET_CODE (XEXP (x, 1)) == MULT)
05811   {
05812     rtx tmp = XEXP (x, 0);
05813     XEXP (x, 0) = XEXP (x, 1);
05814     XEXP (x, 1) = tmp;
05815     changed = 1;
05816   }
05817 
05818       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
05819    into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
05820    created by virtual register instantiation, register elimination, and
05821    similar optimizations.  */
05822       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
05823   {
05824     changed = 1;
05825     x = gen_rtx_PLUS (Pmode,
05826           gen_rtx_PLUS (Pmode, XEXP (x, 0),
05827             XEXP (XEXP (x, 1), 0)),
05828           XEXP (XEXP (x, 1), 1));
05829   }
05830 
05831       /* Canonicalize
05832    (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
05833    into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
05834       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
05835          && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
05836          && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
05837          && CONSTANT_P (XEXP (x, 1)))
05838   {
05839     rtx constant;
05840     rtx other = NULL_RTX;
05841 
05842     if (GET_CODE (XEXP (x, 1)) == CONST_INT)
05843       {
05844         constant = XEXP (x, 1);
05845         other = XEXP (XEXP (XEXP (x, 0), 1), 1);
05846       }
05847     else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
05848       {
05849         constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
05850         other = XEXP (x, 1);
05851       }
05852     else
05853       constant = 0;
05854 
05855     if (constant)
05856       {
05857         changed = 1;
05858         x = gen_rtx_PLUS (Pmode,
05859         gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
05860                 XEXP (XEXP (XEXP (x, 0), 1), 0)),
05861         plus_constant (other, INTVAL (constant)));
05862       }
05863   }
05864 
05865       if (changed && legitimate_address_p (mode, x, FALSE))
05866   return x;
05867 
05868       if (GET_CODE (XEXP (x, 0)) == MULT)
05869   {
05870     changed = 1;
05871     XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
05872   }
05873 
05874       if (GET_CODE (XEXP (x, 1)) == MULT)
05875   {
05876     changed = 1;
05877     XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
05878   }
05879 
05880       if (changed
05881     && GET_CODE (XEXP (x, 1)) == REG
05882     && GET_CODE (XEXP (x, 0)) == REG)
05883   return x;
05884 
05885       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
05886   {
05887     changed = 1;
05888     x = legitimize_pic_address (x, 0);
05889   }
05890 
05891       if (changed && legitimate_address_p (mode, x, FALSE))
05892   return x;
05893 
05894       if (GET_CODE (XEXP (x, 0)) == REG)
05895   {
05896     rtx temp = gen_reg_rtx (Pmode);
05897     rtx val  = force_operand (XEXP (x, 1), temp);
05898     if (val != temp)
05899       emit_move_insn (temp, val);
05900 
05901     XEXP (x, 1) = temp;
05902     return x;
05903   }
05904 
05905       else if (GET_CODE (XEXP (x, 1)) == REG)
05906   {
05907     rtx temp = gen_reg_rtx (Pmode);
05908     rtx val  = force_operand (XEXP (x, 0), temp);
05909     if (val != temp)
05910       emit_move_insn (temp, val);
05911 
05912     XEXP (x, 0) = temp;
05913     return x;
05914   }
05915     }
05916 
05917   return x;
05918 }
05919 
05920 /* Print an integer constant expression in assembler syntax.  Addition
05921    and subtraction are the only arithmetic that may appear in these
05922    expressions.  FILE is the stdio stream to write to, X is the rtx, and
05923    CODE is the operand print code from the output string.  */
05924 
05925 static void
05926 output_pic_addr_const (FILE *file, rtx x, int code)
05927 {
05928   char buf[256];
05929 
05930   switch (GET_CODE (x))
05931     {
05932     case PC:
05933       if (flag_pic)
05934   putc ('.', file);
05935       else
05936   abort ();
05937       break;
05938 
05939     case SYMBOL_REF:
05940      /* Mark the decl as referenced so that cgraph will output the function.  */
05941      if (SYMBOL_REF_DECL (x))
05942        mark_decl_referenced (SYMBOL_REF_DECL (x));
05943 
05944       assemble_name (file, XSTR (x, 0));
05945       if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
05946   fputs ("@PLT", file);
05947       break;
05948 
05949     case LABEL_REF:
05950       x = XEXP (x, 0);
05951       /* FALLTHRU */
05952     case CODE_LABEL:
05953       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
05954       assemble_name (asm_out_file, buf);
05955       break;
05956 
05957     case CONST_INT:
05958       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
05959       break;
05960 
05961     case CONST:
05962       /* This used to output parentheses around the expression,
05963    but that does not work on the 386 (either ATT or BSD assembler).  */
05964       output_pic_addr_const (file, XEXP (x, 0), code);
05965       break;
05966 
05967     case CONST_DOUBLE:
05968       if (GET_MODE (x) == VOIDmode)
05969   {
05970     /* We can use %d if the number is <32 bits and positive.  */
05971     if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
05972       fprintf (file, "0x%lx%08lx",
05973          (unsigned long) CONST_DOUBLE_HIGH (x),
05974          (unsigned long) CONST_DOUBLE_LOW (x));
05975     else
05976       fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
05977   }
05978       else
05979   /* We can't handle floating point constants;
05980      PRINT_OPERAND must handle them.  */
05981   output_operand_lossage ("floating constant misused");
05982       break;
05983 
05984     case PLUS:
05985       /* Some assemblers need integer constants to appear first.  */
05986       if (GET_CODE (XEXP (x, 0)) == CONST_INT)
05987   {
05988     output_pic_addr_const (file, XEXP (x, 0), code);
05989     putc ('+', file);
05990     output_pic_addr_const (file, XEXP (x, 1), code);
05991   }
05992       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
05993   {
05994     output_pic_addr_const (file, XEXP (x, 1), code);
05995     putc ('+', file);
05996     output_pic_addr_const (file, XEXP (x, 0), code);
05997   }
05998       else
05999   abort ();
06000       break;
06001 
06002     case MINUS:
06003       if (!TARGET_MACHO)
06004   putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
06005       output_pic_addr_const (file, XEXP (x, 0), code);
06006       putc ('-', file);
06007       output_pic_addr_const (file, XEXP (x, 1), code);
06008       if (!TARGET_MACHO)
06009   putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
06010       break;
06011 
06012      case UNSPEC:
06013        if (XVECLEN (x, 0) != 1)
06014    abort ();
06015        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
06016        switch (XINT (x, 1))
06017   {
06018   case UNSPEC_GOT:
06019     fputs ("@GOT", file);
06020     break;
06021   case UNSPEC_GOTOFF:
06022     fputs ("@GOTOFF", file);
06023     break;
06024   case UNSPEC_GOTPCREL:
06025     fputs ("@GOTPCREL(%rip)", file);
06026     break;
06027   case UNSPEC_GOTTPOFF:
06028     /* FIXME: This might be @TPOFF in Sun ld too.  */
06029     fputs ("@GOTTPOFF", file);
06030     break;
06031   case UNSPEC_TPOFF:
06032     fputs ("@TPOFF", file);
06033     break;
06034   case UNSPEC_NTPOFF:
06035     if (TARGET_64BIT)
06036       fputs ("@TPOFF", file);
06037     else
06038       fputs ("@NTPOFF", file);
06039     break;
06040   case UNSPEC_DTPOFF:
06041     fputs ("@DTPOFF", file);
06042     break;
06043   case UNSPEC_GOTNTPOFF:
06044     if (TARGET_64BIT)
06045       fputs ("@GOTTPOFF(%rip)", file);
06046     else
06047       fputs ("@GOTNTPOFF", file);
06048     break;
06049   case UNSPEC_INDNTPOFF:
06050     fputs ("@INDNTPOFF", file);
06051     break;
06052   default:
06053     output_operand_lossage ("invalid UNSPEC as operand");
06054     break;
06055   }
06056        break;
06057 
06058     default:
06059       output_operand_lossage ("invalid expression as operand");
06060     }
06061 }
06062 
06063 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
06064    We need to emit DTP-relative relocations.  */
06065 
06066 void
06067 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
06068 {
06069   fputs (ASM_LONG, file);
06070   output_addr_const (file, x);
06071   fputs ("@DTPOFF", file);
06072   switch (size)
06073     {
06074     case 4:
06075       break;
06076     case 8:
06077       fputs (", 0", file);
06078       break;
06079     default:
06080       abort ();
06081    }
06082 }
06083 
06084 /* In the name of slightly smaller debug output, and to cater to
06085    general assembler losage, recognize PIC+GOTOFF and turn it back
06086    into a direct symbol reference.  */
06087 
06088 static rtx
06089 ix86_delegitimize_address (rtx orig_x)
06090 {
06091   rtx x = orig_x, y;
06092 
06093   if (GET_CODE (x) == MEM)
06094     x = XEXP (x, 0);
06095 
06096   if (TARGET_64BIT)
06097     {
06098       if (GET_CODE (x) != CONST
06099     || GET_CODE (XEXP (x, 0)) != UNSPEC
06100     || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
06101     || GET_CODE (orig_x) != MEM)
06102   return orig_x;
06103       return XVECEXP (XEXP (x, 0), 0, 0);
06104     }
06105 
06106   if (GET_CODE (x) != PLUS
06107       || GET_CODE (XEXP (x, 1)) != CONST)
06108     return orig_x;
06109 
06110   if (GET_CODE (XEXP (x, 0)) == REG
06111       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
06112     /* %ebx + GOT/GOTOFF */
06113     y = NULL;
06114   else if (GET_CODE (XEXP (x, 0)) == PLUS)
06115     {
06116       /* %ebx + %reg * scale + GOT/GOTOFF */
06117       y = XEXP (x, 0);
06118       if (GET_CODE (XEXP (y, 0)) == REG
06119     && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
06120   y = XEXP (y, 1);
06121       else if (GET_CODE (XEXP (y, 1)) == REG
06122          && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
06123   y = XEXP (y, 0);
06124       else
06125   return orig_x;
06126       if (GET_CODE (y) != REG
06127     && GET_CODE (y) != MULT
06128     && GET_CODE (y) != ASHIFT)
06129   return orig_x;
06130     }
06131   else
06132     return orig_x;
06133 
06134   x = XEXP (XEXP (x, 1), 0);
06135   if (GET_CODE (x) == UNSPEC
06136       && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
06137     || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
06138     {
06139       if (y)
06140   return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
06141       return XVECEXP (x, 0, 0);
06142     }
06143 
06144   if (GET_CODE (x) == PLUS
06145       && GET_CODE (XEXP (x, 0)) == UNSPEC
06146       && GET_CODE (XEXP (x, 1)) == CONST_INT
06147       && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
06148     || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
06149         && GET_CODE (orig_x) != MEM)))
06150     {
06151       x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
06152       if (y)
06153   return gen_rtx_PLUS (Pmode, y, x);
06154       return x;
06155     }
06156 
06157   return orig_x;
06158 }
06159 
06160 static void
06161 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
06162         int fp, FILE *file)
06163 {
06164   const char *suffix;
06165 
06166   if (mode == CCFPmode || mode == CCFPUmode)
06167     {
06168       enum rtx_code second_code, bypass_code;
06169       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
06170       if (bypass_code != UNKNOWN || second_code != UNKNOWN)
06171   abort ();
06172       code = ix86_fp_compare_code_to_integer (code);
06173       mode = CCmode;
06174     }
06175   if (reverse)
06176     code = reverse_condition (code);
06177 
06178   switch (code)
06179     {
06180     case EQ:
06181       suffix = "e";
06182       break;
06183     case NE:
06184       suffix = "ne";
06185       break;
06186     case GT:
06187       if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
06188   abort ();
06189       suffix = "g";
06190       break;
06191     case GTU:
06192       /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
06193    Those same assemblers have the same but opposite losage on cmov.  */
06194       if (mode != CCmode)
06195   abort ();
06196       suffix = fp ? "nbe" : "a";
06197       break;
06198     case LT:
06199       if (mode == CCNOmode || mode == CCGOCmode)
06200   suffix = "s";
06201       else if (mode == CCmode || mode == CCGCmode)
06202   suffix = "l";
06203       else
06204   abort ();
06205       break;
06206     case LTU:
06207       if (mode != CCmode)
06208   abort ();
06209       suffix = "b";
06210       break;
06211     case GE:
06212       if (mode == CCNOmode || mode == CCGOCmode)
06213   suffix = "ns";
06214       else if (mode == CCmode || mode == CCGCmode)
06215   suffix = "ge";
06216       else
06217   abort ();
06218       break;
06219     case GEU:
06220       /* ??? As above.  */
06221       if (mode != CCmode)
06222   abort ();
06223       suffix = fp ? "nb" : "ae";
06224       break;
06225     case LE:
06226       if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
06227   abort ();
06228       suffix = "le";
06229       break;
06230     case LEU:
06231       if (mode != CCmode)
06232   abort ();
06233       suffix = "be";
06234       break;
06235     case UNORDERED:
06236       suffix = fp ? "u" : "p";
06237       break;
06238     case ORDERED:
06239       suffix = fp ? "nu" : "np";
06240       break;
06241     default:
06242       abort ();
06243     }
06244   fputs (suffix, file);
06245 }
06246 
06247 /* Print the name of register X to FILE based on its machine mode and number.
06248    If CODE is 'w', pretend the mode is HImode.
06249    If CODE is 'b', pretend the mode is QImode.
06250    If CODE is 'k', pretend the mode is SImode.
06251    If CODE is 'q', pretend the mode is DImode.
06252    If CODE is 'h', pretend the reg is the `high' byte register.
06253    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
06254 
06255 void
06256 print_reg (rtx x, int code, FILE *file)
06257 {
06258   if (REGNO (x) == ARG_POINTER_REGNUM
06259       || REGNO (x) == FRAME_POINTER_REGNUM
06260       || REGNO (x) == FLAGS_REG
06261       || REGNO (x) == FPSR_REG)
06262     abort ();
06263 
06264   if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
06265     putc ('%', file);
06266 
06267   if (code == 'w' || MMX_REG_P (x))
06268     code = 2;
06269   else if (code == 'b')
06270     code = 1;
06271   else if (code == 'k')
06272     code = 4;
06273   else if (code == 'q')
06274     code = 8;
06275   else if (code == 'y')
06276     code = 3;
06277   else if (code == 'h')
06278     code = 0;
06279   else
06280     code = GET_MODE_SIZE (GET_MODE (x));
06281 
06282   /* Irritatingly, AMD extended registers use different naming convention
06283      from the normal registers.  */
06284   if (REX_INT_REG_P (x))
06285     {
06286       if (!TARGET_64BIT)
06287   abort ();
06288       switch (code)
06289   {
06290     case 0:
06291       error ("extended registers have no high halves");
06292       break;
06293     case 1:
06294       fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
06295       break;
06296     case 2:
06297       fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
06298       break;
06299     case 4:
06300       fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
06301       break;
06302     case 8:
06303       fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
06304       break;
06305     default:
06306       error ("unsupported operand size for extended register");
06307       break;
06308   }
06309       return;
06310     }
06311   switch (code)
06312     {
06313     case 3:
06314       if (STACK_TOP_P (x))
06315   {
06316     fputs ("st(0)", file);
06317     break;
06318   }
06319       /* FALLTHRU */
06320     case 8:
06321     case 4:
06322     case 12:
06323       if (! ANY_FP_REG_P (x))
06324   putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
06325       /* FALLTHRU */
06326     case 16:
06327     case 2:
06328     normal:
06329       fputs (hi_reg_name[REGNO (x)], file);
06330       break;
06331     case 1:
06332       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
06333   goto normal;
06334       fputs (qi_reg_name[REGNO (x)], file);
06335       break;
06336     case 0:
06337       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
06338   goto normal;
06339       fputs (qi_high_reg_name[REGNO (x)], file);
06340       break;
06341     default:
06342       abort ();
06343     }
06344 }
06345 
06346 /* Locate some local-dynamic symbol still in use by this function
06347    so that we can print its name in some tls_local_dynamic_base
06348    pattern.  */
06349 
06350 static const char *
06351 get_some_local_dynamic_name (void)
06352 {
06353   rtx insn;
06354 
06355   if (cfun->machine->some_ld_name)
06356     return cfun->machine->some_ld_name;
06357 
06358   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
06359     if (INSN_P (insn)
06360   && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
06361       return cfun->machine->some_ld_name;
06362 
06363   abort ();
06364 }
06365 
06366 static int
06367 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
06368 {
06369   rtx x = *px;
06370 
06371   if (GET_CODE (x) == SYMBOL_REF
06372       && local_dynamic_symbolic_operand (x, Pmode))
06373     {
06374       cfun->machine->some_ld_name = XSTR (x, 0);
06375       return 1;
06376     }
06377 
06378   return 0;
06379 }
06380 
06381 /* Meaning of CODE:
06382    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
06383    C -- print opcode suffix for set/cmov insn.
06384    c -- like C, but print reversed condition
06385    F,f -- likewise, but for floating-point.
06386    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
06387         otherwise nothing
06388    R -- print the prefix for register names.
06389    z -- print the opcode suffix for the size of the current operand.
06390    * -- print a star (in certain assembler syntax)
06391    A -- print an absolute memory reference.
06392    w -- print the operand as if it's a "word" (HImode) even if it isn't.
06393    s -- print a shift double count, followed by the assemblers argument
06394   delimiter.
06395    b -- print the QImode name of the register for the indicated operand.
06396   %b0 would print %al if operands[0] is reg 0.
06397    w --  likewise, print the HImode name of the register.
06398    k --  likewise, print the SImode name of the register.
06399    q --  likewise, print the DImode name of the register.
06400    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
06401    y -- print "st(0)" instead of "st" as a register.
06402    D -- print condition for SSE cmp instruction.
06403    P -- if PIC, print an @PLT suffix.
06404    X -- don't print any sort of PIC '@' suffix for a symbol.
06405    & -- print some in-use local-dynamic symbol name.
06406    H -- print a memory address offset by 8; used for sse high-parts
06407  */
06408 
06409 void
06410 print_operand (FILE *file, rtx x, int code)
06411 {
06412   if (code)
06413     {
06414       switch (code)
06415   {
06416   case '*':
06417     if (ASSEMBLER_DIALECT == ASM_ATT)
06418       putc ('*', file);
06419     return;
06420 
06421   case '&':
06422     assemble_name (file, get_some_local_dynamic_name ());
06423     return;
06424 
06425   case 'A':
06426     if (ASSEMBLER_DIALECT == ASM_ATT)
06427       putc ('*', file);
06428     else if (ASSEMBLER_DIALECT == ASM_INTEL)
06429       {
06430         /* Intel syntax. For absolute addresses, registers should not
06431      be surrounded by braces.  */
06432         if (GET_CODE (x) != REG)
06433     {
06434       putc ('[', file);
06435       PRINT_OPERAND (file, x, 0);
06436       putc (']', file);
06437       return;
06438     }
06439       }
06440     else
06441       abort ();
06442 
06443     PRINT_OPERAND (file, x, 0);
06444     return;
06445 
06446 
06447   case 'L':
06448     if (ASSEMBLER_DIALECT == ASM_ATT)
06449       putc ('l', file);
06450     return;
06451 
06452   case 'W':
06453     if (ASSEMBLER_DIALECT == ASM_ATT)
06454       putc ('w', file);
06455     return;
06456 
06457   case 'B':
06458     if (ASSEMBLER_DIALECT == ASM_ATT)
06459       putc ('b', file);
06460     return;
06461 
06462   case 'Q':
06463     if (ASSEMBLER_DIALECT == ASM_ATT)
06464       putc ('l', file);
06465     return;
06466 
06467   case 'S':
06468     if (ASSEMBLER_DIALECT == ASM_ATT)
06469       putc ('s', file);
06470     return;
06471 
06472   case 'T':
06473     if (ASSEMBLER_DIALECT == ASM_ATT)
06474       putc ('t', file);
06475     return;
06476 
06477   case 'z':
06478     /* 387 opcodes don't get size suffixes if the operands are
06479        registers.  */
06480     if (STACK_REG_P (x))
06481       return;
06482 
06483     /* Likewise if using Intel opcodes.  */
06484     if (ASSEMBLER_DIALECT == ASM_INTEL)
06485       return;
06486 
06487     /* This is the size of op from size of operand.  */
06488     switch (GET_MODE_SIZE (GET_MODE (x)))
06489       {
06490       case 2:
06491 #ifdef HAVE_GAS_FILDS_FISTS
06492         putc ('s', file);
06493 #endif
06494         return;
06495 
06496       case 4:
06497         if (GET_MODE (x) == SFmode)
06498     {
06499       putc ('s', file);
06500       return;
06501     }
06502         else
06503     putc ('l', file);
06504         return;
06505 
06506       case 12:
06507       case 16:
06508         putc ('t', file);
06509         return;
06510 
06511       case 8:
06512         if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
06513     {
06514 #ifdef GAS_MNEMONICS
06515       putc ('q', file);
06516 #else
06517       putc ('l', file);
06518       putc ('l', file);
06519 #endif
06520     }
06521         else
06522           putc ('l', file);
06523         return;
06524 
06525       default:
06526         abort ();
06527       }
06528 
06529   case 'b':
06530   case 'w':
06531   case 'k':
06532   case 'q':
06533   case 'h':
06534   case 'y':
06535   case 'X':
06536   case 'P':
06537     break;
06538 
06539   case 's':
06540     if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
06541       {
06542         PRINT_OPERAND (file, x, 0);
06543         putc (',', file);
06544       }
06545     return;
06546 
06547   case 'D':
06548     /* Little bit of braindamage here.  The SSE compare instructions
06549        does use completely different names for the comparisons that the
06550        fp conditional moves.  */
06551     switch (GET_CODE (x))
06552       {
06553       case EQ:
06554       case UNEQ:
06555         fputs ("eq", file);
06556         break;
06557       case LT:
06558       case UNLT:
06559         fputs ("lt", file);
06560         break;
06561       case LE:
06562       case UNLE:
06563         fputs ("le", file);
06564         break;
06565       case UNORDERED:
06566         fputs ("unord", file);
06567         break;
06568       case NE:
06569       case LTGT:
06570         fputs ("neq", file);
06571         break;
06572       case UNGE:
06573       case GE:
06574         fputs ("nlt", file);
06575         break;
06576       case UNGT:
06577       case GT:
06578         fputs ("nle", file);
06579         break;
06580       case ORDERED:
06581         fputs ("ord", file);
06582         break;
06583       default:
06584         abort ();
06585         break;
06586       }
06587     return;
06588   case 'O':
06589 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
06590     if (ASSEMBLER_DIALECT == ASM_ATT)
06591       {
06592         switch (GET_MODE (x))
06593     {
06594     case HImode: putc ('w', file); break;
06595     case SImode:
06596     case SFmode: putc ('l', file); break;
06597     case DImode:
06598     case DFmode: putc ('q', file); break;
06599     default: abort ();
06600     }
06601         putc ('.', file);
06602       }
06603 #endif
06604     return;
06605   case 'C':
06606     put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
06607     return;
06608   case 'F':
06609 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
06610     if (ASSEMBLER_DIALECT == ASM_ATT)
06611       putc ('.', file);
06612 #endif
06613     put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
06614     return;
06615 
06616     /* Like above, but reverse condition */
06617   case 'c':
06618     /* Check to see if argument to %c is really a constant
06619        and not a condition code which needs to be reversed.  */
06620     if (!COMPARISON_P (x))
06621     {
06622       output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
06623        return;
06624     }
06625     put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
06626     return;
06627   case 'f':
06628 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
06629     if (ASSEMBLER_DIALECT == ASM_ATT)
06630       putc ('.', file);
06631 #endif
06632     put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
06633     return;
06634 
06635   case 'H':
06636     /* It doesn't actually matter what mode we use here, as we're
06637        only going to use this for printing.  */
06638     x = adjust_address_nv (x, DImode, 8);
06639     break;
06640 
06641   case '+':
06642     {
06643       rtx x;
06644 
06645       if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
06646         return;
06647 
06648       x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
06649       if (x)
06650         {
06651     int pred_val = INTVAL (XEXP (x, 0));
06652 
06653     if (pred_val < REG_BR_PROB_BASE * 45 / 100
06654         || pred_val > REG_BR_PROB_BASE * 55 / 100)
06655       {
06656         int taken = pred_val > REG_BR_PROB_BASE / 2;
06657         int cputaken = final_forward_branch_p (current_output_insn) == 0;
06658 
06659         /* Emit hints only in the case default branch prediction
06660            heuristics would fail.  */
06661         if (taken != cputaken)
06662           {
06663       /* We use 3e (DS) prefix for taken branches and
06664          2e (CS) prefix for not taken branches.  */
06665       if (taken)
06666         fputs ("ds ; ", file);
06667       else
06668         fputs ("cs ; ", file);
06669           }
06670       }
06671         }
06672       return;
06673     }
06674   default:
06675       output_operand_lossage ("invalid operand code '%c'", code);
06676   }
06677     }
06678 
06679   if (GET_CODE (x) == REG)
06680     print_reg (x, code, file);
06681 
06682   else if (GET_CODE (x) == MEM)
06683     {
06684       /* No `byte ptr' prefix for call instructions.  */
06685       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
06686   {
06687     const char * size;
06688     switch (GET_MODE_SIZE (GET_MODE (x)))
06689       {
06690       case 1: size = "BYTE"; break;
06691       case 2: size = "WORD"; break;
06692       case 4: size = "DWORD"; break;
06693       case 8: size = "QWORD"; break;
06694       case 12: size = "XWORD"; break;
06695       case 16: size = "XMMWORD"; break;
06696       default:
06697         abort ();
06698       }
06699 
06700     /* Check for explicit size override (codes 'b', 'w' and 'k')  */
06701     if (code == 'b')
06702       size = "BYTE";
06703     else if (code == 'w')
06704       size = "WORD";
06705     else if (code == 'k')
06706       size = "DWORD";
06707 
06708     fputs (size, file);
06709     fputs (" PTR ", file);
06710   }
06711 
06712       x = XEXP (x, 0);
06713       /* Avoid (%rip) for call operands.  */
06714       if (CONSTANT_ADDRESS_P (x) && code == 'P'
06715          && GET_CODE (x) != CONST_INT)
06716   output_addr_const (file, x);
06717       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
06718   output_operand_lossage ("invalid constraints for operand");
06719       else
06720   output_address (x);
06721     }
06722 
06723   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
06724     {
06725       REAL_VALUE_TYPE r;
06726       long l;
06727 
06728       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
06729       REAL_VALUE_TO_TARGET_SINGLE (r, l);
06730 
06731       if (ASSEMBLER_DIALECT == ASM_ATT)
06732   putc ('$', file);
06733       fprintf (file, "0x%08lx", l);
06734     }
06735 
06736   /* These float cases don't actually occur as immediate operands.  */
06737   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
06738     {
06739       char dstr[30];
06740 
06741       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
06742       fprintf (file, "%s", dstr);
06743     }
06744 
06745   else if (GET_CODE (x) == CONST_DOUBLE
06746      && GET_MODE (x) == XFmode)
06747     {
06748       char dstr[30];
06749 
06750       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
06751       fprintf (file, "%s", dstr);
06752     }
06753 
06754   else
06755     {
06756       /* We have patterns that allow zero sets of memory, for instance.
06757    In 64-bit mode, we should probably support all 8-byte vectors,
06758    since we can in fact encode that into an immediate.  */
06759       if (GET_CODE (x) == CONST_VECTOR)
06760   {
06761     if (x == CONST0_RTX (GET_MODE (x)))
06762       x = const0_rtx;
06763     else
06764       abort ();
06765   }
06766 
06767       if (code != 'P')
06768   {
06769     if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
06770       {
06771         if (ASSEMBLER_DIALECT == ASM_ATT)
06772     putc ('$', file);
06773       }
06774     else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
06775        || GET_CODE (x) == LABEL_REF)
06776       {
06777         if (ASSEMBLER_DIALECT == ASM_ATT)
06778     putc ('$', file);
06779         else
06780     fputs ("OFFSET FLAT:", file);
06781       }
06782   }
06783       if (GET_CODE (x) == CONST_INT)
06784   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
06785       else if (flag_pic)
06786   output_pic_addr_const (file, x, code);
06787       else
06788   output_addr_const (file, x);
06789     }
06790 }
06791 
06792 /* Print a memory operand whose address is ADDR.  */
06793 
06794 void
06795 print_operand_address (FILE *file, rtx addr)
06796 {
06797   struct ix86_address parts;
06798   rtx base, index, disp;
06799   int scale;
06800 
06801   if (! ix86_decompose_address (addr, &parts))
06802     abort ();
06803 
06804   base = parts.base;
06805   index = parts.index;
06806   disp = parts.disp;
06807   scale = parts.scale;
06808 
06809   switch (parts.seg)
06810     {
06811     case SEG_DEFAULT:
06812       break;
06813     case SEG_FS:
06814     case SEG_GS:
06815       if (USER_LABEL_PREFIX[0] == 0)
06816   putc ('%', file);
06817       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
06818       break;
06819     default:
06820       abort ();
06821     }
06822 
06823   if (!base && !index)
06824     {
06825       /* Displacement only requires special attention.  */
06826 
06827       if (GET_CODE (disp) == CONST_INT)
06828   {
06829     if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
06830       {
06831         if (USER_LABEL_PREFIX[0] == 0)
06832     putc ('%', file);
06833         fputs ("ds:", file);
06834       }
06835     fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
06836   }
06837       else if (flag_pic)
06838   output_pic_addr_const (file, disp, 0);
06839       else
06840   output_addr_const (file, disp);
06841 
06842       /* Use one byte shorter RIP relative addressing for 64bit mode.  */
06843       if (TARGET_64BIT
06844     && ((GET_CODE (disp) == SYMBOL_REF
06845          && ! tls_symbolic_operand (disp, GET_MODE (disp)))
06846         || GET_CODE (disp) == LABEL_REF
06847         || (GET_CODE (disp) == CONST
06848       && GET_CODE (XEXP (disp, 0)) == PLUS
06849       && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
06850           || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
06851       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
06852   fputs ("(%rip)", file);
06853     }
06854   else
06855     {
06856       if (ASSEMBLER_DIALECT == ASM_ATT)
06857   {
06858     if (disp)
06859       {
06860         if (flag_pic)
06861     output_pic_addr_const (file, disp, 0);
06862         else if (GET_CODE (disp) == LABEL_REF)
06863     output_asm_label (disp);
06864         else
06865     output_addr_const (file, disp);
06866       }
06867 
06868     putc ('(', file);
06869     if (base)
06870       print_reg (base, 0, file);
06871     if (index)
06872       {
06873         putc (',', file);
06874         print_reg (index, 0, file);
06875         if (scale != 1)
06876     fprintf (file, ",%d", scale);
06877       }
06878     putc (')', file);
06879   }
06880       else
06881   {
06882     rtx offset = NULL_RTX;
06883 
06884     if (disp)
06885       {
06886         /* Pull out the offset of a symbol; print any symbol itself.  */
06887         if (GET_CODE (disp) == CONST
06888       && GET_CODE (XEXP (disp, 0)) == PLUS
06889       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
06890     {
06891       offset = XEXP (XEXP (disp, 0), 1);
06892       disp = gen_rtx_CONST (VOIDmode,
06893           XEXP (XEXP (disp, 0), 0));
06894     }
06895 
06896         if (flag_pic)
06897     output_pic_addr_const (file, disp, 0);
06898         else if (GET_CODE (disp) == LABEL_REF)
06899     output_asm_label (disp);
06900         else if (GET_CODE (disp) == CONST_INT)
06901     offset = disp;
06902         else
06903     output_addr_const (file, disp);
06904       }
06905 
06906     putc ('[', file);
06907     if (base)
06908       {
06909         print_reg (base, 0, file);
06910         if (offset)
06911     {
06912       if (INTVAL (offset) >= 0)
06913         putc ('+', file);
06914       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
06915     }
06916       }
06917     else if (offset)
06918       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
06919     else
06920       putc ('0', file);
06921 
06922     if (index)
06923       {
06924         putc ('+', file);
06925         print_reg (index, 0, file);
06926         if (scale != 1)
06927     fprintf (file, "*%d", scale);
06928       }
06929     putc (']', file);
06930   }
06931     }
06932 }
06933 
06934 bool
06935 output_addr_const_extra (FILE *file, rtx x)
06936 {
06937   rtx op;
06938 
06939   if (GET_CODE (x) != UNSPEC)
06940     return false;
06941 
06942   op = XVECEXP (x, 0, 0);
06943   switch (XINT (x, 1))
06944     {
06945     case UNSPEC_GOTTPOFF:
06946       output_addr_const (file, op);
06947       /* FIXME: This might be @TPOFF in Sun ld.  */
06948       fputs ("@GOTTPOFF", file);
06949       break;
06950     case UNSPEC_TPOFF:
06951       output_addr_const (file, op);
06952       fputs ("@TPOFF", file);
06953       break;
06954     case UNSPEC_NTPOFF:
06955       output_addr_const (file, op);
06956       if (TARGET_64BIT)
06957   fputs ("@TPOFF", file);
06958       else
06959   fputs ("@NTPOFF", file);
06960       break;
06961     case