• Main Page
  • Modules
  • Data Types
  • Files

osprey/be/cg/x8664/expand.cxx

Go to the documentation of this file.
00001 /*
00002  *  Copyright (C) 2007, 2008 PathScale, LLC. All Rights Reserved.
00003  */
00004 
00005 /*
00006  *  Copyright (C) 2006, 2007. QLogic Corporation. All Rights Reserved.
00007  */
00008 
00009 /*
00010  * Copyright 2003, 2004, 2005, 2006 PathScale, Inc.  All Rights Reserved.
00011  */
00012 
00013 /*
00014 
00015   Copyright (C) 2000, 2001 Silicon Graphics, Inc.  All Rights Reserved.
00016 
00017   This program is free software; you can redistribute it and/or modify it
00018   under the terms of version 2 of the GNU General Public License as
00019   published by the Free Software Foundation.
00020 
00021   This program is distributed in the hope that it would be useful, but
00022   WITHOUT ANY WARRANTY; without even the implied warranty of
00023   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00024 
00025   Further, this software is distributed without any warranty that it is
00026   free of the rightful claim of any third person regarding infringement 
00027   or the like.  Any license provided herein, whether implied or 
00028   otherwise, applies only to this software file.  Patent licenses, if 
00029   any, provided herein do not apply to combinations of this program with 
00030   other software, or any other product whatsoever.  
00031 
00032   You should have received a copy of the GNU General Public License along
00033   with this program; if not, write the Free Software Foundation, Inc., 59
00034   Temple Place - Suite 330, Boston MA 02111-1307, USA.
00035 
00036   Contact information:  Silicon Graphics, Inc., 1600 Amphitheatre Pky,
00037   Mountain View, CA 94043, or:
00038 
00039   http://www.sgi.com
00040 
00041   For further information regarding this notice, see:
00042 
00043   http://oss.sgi.com/projects/GenInfo/NoticeExplan
00044 
00045 */
00046 
00047 
00048 /* ====================================================================
00049  * ====================================================================
00050  *
00051  * Module: expand.c
00052  * $Revision: 1.363 $
00053  * $Date: 05/12/01 12:16:40-08:00 $
00054  * $Author: cfang@dunite.internal.keyresearch.com $
00055  * $Source: be/cg/x8664/SCCS/s.expand.cxx $
00056  *
00057  * Description:
00058  *
00059  * This file contains the internals of code expansion. Its interface
00060  * is 'Exp_OP', which takes an OP, expands it into a list of OPs which
00061  * are appended to the oplist passed in.
00062  *
00063  * It handles all the macro expansions, special handling during 
00064  * expansion and all the nitty gritty stuff that goes along with it.
00065  *
00066  * ====================================================================
00067  * ====================================================================
00068  */
00069 
00070 #include <stdint.h>
00071 #include "defs.h"
00072 #include "config.h"
00073 #include "erglob.h"
00074 #include "ercg.h"
00075 #include "glob.h"
00076 #include "tracing.h"
00077 #include "util.h"
00078 
00079 #include "tn.h"
00080 #include "cg_flags.h"
00081 #include "bb.h"
00082 #include "symtab.h"
00083 #include "opcode.h"
00084 #include "const.h"  /* needed to manipulate target/host consts */
00085 #include "targ_const.h" /* needed to manipulate target/host consts */
00086 #include "op.h"
00087 #include "data_layout.h"
00088 #include "stblock.h"
00089 #include "cgexp.h"
00090 #include "cgexp_internals.h"
00091 #include "w2op.h"
00092 #include "label_util.h"
00093 #include "cgtarget.h"
00094 #include "whirl2ops.h"
00095 #include "targ_sim.h"   /* To generate stores of param registers in builtin_apply_args */
00096 #include "targ_const_private.h"
00097 #include "config_opt.h" /* For Force_IEEE_Comparisons */
00098 #include "intrn_info.h" // for INTRN_rt_name
00099 #ifdef KEY
00100 #include "ebo.h"
00101 #endif
00102 
00103 BOOL Reuse_Temp_TNs = FALSE;
00104 
00105 BOOL Trace_Exp2 = FALSE;      /* extra cgexp trace*/
00106 
00107 /* Dup_TN won't dup a dedicated tn, but for our purposes we
00108  * can just re-use the dedicated tn.  Don't want to re-use a
00109  * symbolic tn or it will mess up live ranges. */
00110 /* DOESN'T WORK:  causes problems in Create_lvs because it causes
00111  * a use of a parm reg at the call-site, so it looks like the
00112  * parm-reg is incoming at the call?  This probably should work,
00113  * but for now we can use other routine that create a real dup tn. */
00114 #define DUP_TN(tn)  Dup_TN_Even_If_Dedicated(tn)
00115 
00116 static BOOL Target_Support_Cmov()
00117 {
00118   if (Is_Target_32bit() &&
00119       (Target == TARGET_anyx86 ||
00120        Target == TARGET_pentium4 ||
00121        Target == TARGET_xeon ||
00122        Target == TARGET_athlon) )
00123     return FALSE;
00124   else
00125     return TRUE;
00126 }
00127 
00128 static TN_MAP _TN_Pair_table = NULL;
00129 
00130 static TN *Exp_Fetch_and_Add (TN *addr, TN *opnd1, TYPE_ID mtype, OPS *ops);
00131 static void Store_To_Temp_Stack(TYPE_ID desc, TN *src, const char *sym_name, TN **mem_base_tn,
00132         TN **mem_ofst_tn, OPS *ops);
00133 
00134 void
00135 Expand_Cmov (TOP top, TN *result, TN *src, TN *rflags, OPS *ops, TN *result2,
00136        TN *src2)
00137 {
00138   // OSP, laijx
00139   if ( ! Target_Support_Cmov() ) {
00140     // Processor doesn't support cmov.  Emit conditional branch followed by
00141     // mov.
00142 
00143     TN *tmp_result = result;
00144     TN *tmp_result2 = result2;
00145 
00146     if (TN_is_dedicated(result)) {
00147       tmp_result = Build_TN_Like(result);
00148     }
00149     if (result2 != NULL &&
00150   TN_is_dedicated(result2)) {
00151       tmp_result2 = Build_TN_Like(result2);
00152     }
00153 
00154     // Determine the branch instruction from the cmov.
00155     TOP br_top;
00156     switch (top) {
00157       case TOP_cmovb: br_top = TOP_jae; break;
00158       case TOP_cmovae:  br_top = TOP_jb;  break;
00159       case TOP_cmovp: br_top = TOP_jnp; break;
00160       case TOP_cmovnp:  br_top = TOP_jp;  break;
00161       case TOP_cmove: br_top = TOP_jne; break;
00162       case TOP_cmovne:  br_top = TOP_je;  break;
00163       case TOP_cmovbe:  br_top = TOP_ja;  break;
00164       case TOP_cmova: br_top = TOP_jbe; break;
00165       case TOP_cmovl: br_top = TOP_jge; break;
00166       case TOP_cmovge:  br_top = TOP_jl;  break;
00167       case TOP_cmovle:  br_top = TOP_jg;  break;
00168       case TOP_cmovg: br_top = TOP_jle; break;
00169       case TOP_cmovs: br_top = TOP_jns; break;
00170       case TOP_cmovns:  br_top = TOP_js;  break;
00171       case TOP_fcmovb:  br_top = TOP_jae; break;
00172       case TOP_fcmovbe: br_top = TOP_ja;  break;
00173       case TOP_fcmovnb: br_top = TOP_jb;  break;
00174       case TOP_fcmovnbe: br_top = TOP_jbe;  break;
00175       case TOP_fcmove:  br_top = TOP_jne; break;
00176       case TOP_fcmovne: br_top = TOP_je;  break;
00177       case TOP_fcmovu:  br_top = TOP_jnp; break;  // br if PF=0
00178       case TOP_fcmovnu: br_top = TOP_jp;  break;  // br if Pf=1
00179       default:    FmtAssert(FALSE, ("Expand_Cmov: unexpected OP code"));
00180     }
00181     BB *bb_entry = Cur_BB;
00182     BB *bb_then = Gen_And_Append_BB(bb_entry);
00183     BB *bb_exit = Gen_And_Append_BB(bb_then);
00184 
00185     const LABEL_IDX bb_exit_label = Gen_Label_For_BB(bb_exit);
00186 
00187     BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR, 1);
00188     WN_kid0(BB_branch_wn(bb_entry)) = NULL;
00189     WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
00190 
00191     // Build bb_entry.
00192     {
00193       if (result != tmp_result) {
00194   Exp_COPY(tmp_result, result, ops);
00195       }
00196       if (result2 != NULL &&
00197     result2 != tmp_result) {
00198   Exp_COPY(tmp_result2, result2, ops);
00199       }
00200       Build_OP(br_top, rflags, Gen_Label_TN(bb_exit_label, 0), ops);
00201       if (&New_OPs != ops)
00202         OPS_Append_Ops(&New_OPs, ops);
00203       Process_New_OPs();
00204       BB_Append_Ops(bb_entry, &New_OPs);
00205       OPS_Init(&New_OPs);
00206       OPS_Init(ops);
00207     }
00208 
00209     // Build bb_then.
00210     {
00211       OPS *bb_then_ops = &New_OPs;
00212       Exp_COPY(tmp_result, src, bb_then_ops);
00213       if (result2 != NULL)
00214         Exp_COPY(tmp_result2, src2, bb_then_ops);
00215       total_bb_insts = 0;
00216       Last_Processed_OP = NULL;
00217       Process_New_OPs();
00218       BB_Append_Ops(bb_then, bb_then_ops);
00219       OPS_Init(bb_then_ops);
00220     }
00221 
00222     Cur_BB = bb_exit;
00223 
00224     if (result != tmp_result)
00225       Exp_COPY(result, tmp_result, ops);
00226     if (result2 != tmp_result2)
00227       Exp_COPY(result2, tmp_result2, ops);
00228   } else {
00229     // Processor supports cmov.
00230     Build_OP(top, result, src, rflags, ops);
00231     Set_OP_cond_def_kind(OPS_last(ops), OP_ALWAYS_COND_DEF);
00232     if (result2 != NULL) {
00233       Is_True(src2 != NULL, ("Expand_Cmov: invalid src2"));
00234       Build_OP(top, result2, src2, rflags, ops);
00235       Set_OP_cond_def_kind(OPS_last(ops), OP_ALWAYS_COND_DEF);
00236     }
00237   }
00238 }
00239 
00240 void Expand_Start()
00241 {
00242   if( !Is_Target_32bit() )
00243     return;
00244 
00245   FmtAssert( _TN_Pair_table == NULL, ("TN_Pair_table is not NULL") );
00246   _TN_Pair_table = TN_MAP_Create();
00247 }
00248 
00249 
00250 // Always use the lower part as key.
00251 TN* Get_TN_Pair( TN* key )
00252 {
00253   TN* pair = NULL;
00254 
00255   if( Is_Target_32bit() &&
00256       TN_is_register( key ) ){
00257     pair = (TN*)TN_MAP_Get( _TN_Pair_table, key );
00258   }
00259 
00260   return pair;
00261 }
00262 
00263 
00264 void Create_TN_Pair( TN* key, TN* pair )
00265 {
00266   Is_True( Get_TN_Pair( key ) == NULL, ("Add_TN_Pair: higher 32-bit is missing") );
00267   TN_MAP_Set( _TN_Pair_table, key, pair );    
00268 }
00269 
00270 
00271 /* Always use the lower part as key.
00272    Notice that literal TNs will not have pairs.
00273  */
00274 TN* Create_TN_Pair( TN* key, TYPE_ID mtype )
00275 {
00276   FmtAssert( TN_is_register(key), ("TN is not a register type") );
00277 
00278   if( mtype == MTYPE_I8 )
00279     mtype = MTYPE_I4;
00280   else if( mtype == MTYPE_U8 )
00281     mtype = MTYPE_U4;
00282 
00283   TN* pair = Get_TN_Pair( key );
00284 
00285   if( pair == NULL ){
00286     Set_TN_size( key, MTYPE_byte_size(mtype) );
00287     /* We don't know what <pair> will be later. So don't use
00288        Dup_TN that will carry the homing info of <key>.
00289      */
00290     pair = Build_TN_Like( key );
00291     TN_MAP_Set( _TN_Pair_table, key, pair );
00292   }
00293 
00294   if( TN_register(key) != REGISTER_UNDEFINED ){
00295     Is_True( TN_register(pair) != REGISTER_UNDEFINED, ("pair TN is async") );
00296   }
00297 
00298   return pair;
00299 }
00300 
00301 
00302 void Expand_Finish()
00303 {
00304   if( !Is_Target_32bit() )
00305     return;
00306 
00307   FmtAssert( _TN_Pair_table != NULL, ("TN_Pair_table is NULL") );
00308   TN_MAP_Delete( _TN_Pair_table );
00309   _TN_Pair_table = NULL;
00310 }
00311 
00312 static TN* Gen_Const_Symbol_TN( INT64 int_val,
00313         double float_val,
00314         TYPE_ID mtype,
00315         TN_RELOCS relocs = TN_RELOC_NONE )
00316 {
00317   FmtAssert( !MTYPE_is_quad(mtype), ("Quad const is not supported") );
00318   FmtAssert( !MTYPE_is_vector(mtype), ("Vector const is not supported") );
00319 
00320   const TCON tcon = MTYPE_is_integral(mtype)
00321     ? Host_To_Targ( mtype, int_val ) : Host_To_Targ_Float( mtype, float_val );
00322 
00323   ST* sym = New_Const_Sym( Enter_tcon(tcon),  Be_Type_Tbl( TCON_ty(tcon) ) );
00324 
00325   ST* base_sym = NULL;
00326   INT64 base_ofst = 0;
00327 
00328   Allocate_Object(sym);
00329   Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
00330 
00331   return Gen_Symbol_TN( base_sym, base_ofst, relocs );
00332 }
00333 
00334 
00335 /*  <result> = <cmp_kid1> <compare> <cmp_kid2> ? <true_tn> : <false_tn>
00336  */
00337 static void Expand_Split_Select( TN* dest, OPERATOR compare, TOP cmp_opcode,
00338          TN* cmp_kid1, TN* cmp_kid2, TYPE_ID cmp_type,
00339          TN* true_tn, TN* false_tn, TYPE_ID select_type,
00340          OPS* ops )
00341 {
00342   TN* result = dest;
00343 
00344   if( TN_is_dedicated(dest) ){
00345     result = Build_TN_Like( dest );
00346 
00347     if( Get_TN_Pair(dest) != NULL ){
00348       TN* result_hi = Create_TN_Pair( result, MTYPE_I8 );
00349       Build_OP( TOP_ldc32, result_hi, Gen_Literal_TN(0,4), ops );
00350     }
00351   }
00352 
00353   FmtAssert( result != false_tn, ("result and false_tn are identical") );
00354 
00355   Expand_Copy( result, true_tn, select_type, ops );
00356 
00357   switch( cmp_opcode ){
00358   case TOP_cmp64:   cmp_opcode = TOP_cmp32;   break;
00359   case TOP_cmpi64:  cmp_opcode = TOP_cmpi32;  break;
00360   case TOP_test64:  cmp_opcode = TOP_test32;  break;
00361   case TOP_testi64: cmp_opcode = TOP_testi32; break;
00362   default:
00363     FmtAssert( false, ("Expand_Split_Select: Unknown compare opcode") );
00364   }
00365 
00366   TN* cmp_kid1_hi = Get_TN_Pair( cmp_kid1 );
00367   TN* cmp_kid2_hi = Get_TN_Pair( cmp_kid2 );
00368   TN* rflags = Rflags_TN();
00369 
00370   if( cmp_kid2_hi == NULL ){
00371     if( TN_has_value(cmp_kid2) ){
00372       const INT64 val = TN_value(cmp_kid2);
00373       cmp_kid2_hi = Gen_Literal_TN( val >> 32, 4 );
00374 
00375     } else {
00376       DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00377          TN_number(cmp_kid2) );
00378       cmp_kid2_hi = Build_TN_Like( cmp_kid2 );
00379       Build_OP( TOP_ldc32, cmp_kid2_hi, Gen_Literal_TN(0,4), ops );    
00380     }
00381   }
00382 
00383   if( cmp_kid1_hi == NULL ){
00384     if( TN_has_value(cmp_kid1) ){
00385       const INT64 val = TN_value(cmp_kid1);
00386       cmp_kid1_hi = Gen_Literal_TN( val >> 32, 4 );
00387 
00388     } else {
00389       DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00390          TN_number(cmp_kid1) );
00391       cmp_kid1_hi = Build_TN_Like( cmp_kid1 );
00392       Build_OP( TOP_ldc32, cmp_kid1_hi, Gen_Literal_TN(0,4), ops );    
00393     }
00394   }
00395 
00396   BB* bb_entry  = Cur_BB;
00397   BB* bb_cmp_hi = Gen_And_Append_BB( bb_entry );
00398   BB* bb_cmp_lo = Gen_And_Append_BB( bb_cmp_hi );
00399   BB* bb_non_set = Gen_And_Append_BB( bb_cmp_lo );
00400   const LABEL_IDX bb_non_set_label = Gen_Label_For_BB( bb_non_set );
00401   BB* bb_exit    = Gen_And_Append_BB( bb_non_set );
00402   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
00403 
00404   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
00405   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
00406   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
00407 
00408   BB_branch_wn(bb_cmp_hi) = WN_Create(OPC_TRUEBR,1);
00409   WN_kid0(BB_branch_wn(bb_cmp_hi)) = NULL;
00410   WN_label_number(BB_branch_wn(bb_cmp_hi)) = bb_non_set_label;
00411 
00412   BB_branch_wn(bb_cmp_lo) = WN_Create(OPC_TRUEBR,1);
00413   WN_kid0(BB_branch_wn(bb_cmp_lo)) = NULL;
00414   WN_label_number(BB_branch_wn(bb_cmp_lo)) = bb_exit_label;
00415 
00416   // Compare the higher 32-bit here.
00417   {
00418     if( compare != OPR_EQ ){
00419       TOP jmp = TOP_UNDEFINED;
00420       switch( compare ){
00421       case OPR_GT:
00422       case OPR_GE:  jmp = MTYPE_is_signed(cmp_type) ? TOP_jg : TOP_ja;   break;
00423       case OPR_LT:
00424       case OPR_LE:  jmp = MTYPE_is_signed(cmp_type) ? TOP_jl : TOP_jb;   break;
00425       case OPR_NE:  jmp = TOP_jne; break;
00426       }
00427 
00428       Build_OP( cmp_opcode, rflags, cmp_kid1_hi, cmp_kid2_hi, ops );
00429       Build_OP( jmp, rflags, Gen_Label_TN( bb_exit_label, 0 ), ops );
00430     }
00431 
00432     if( ops != &New_OPs )
00433       OPS_Append_Ops( &New_OPs, ops );
00434 
00435     Process_New_OPs();
00436     BB_Append_Ops( bb_entry, &New_OPs );
00437     OPS_Init( &New_OPs );
00438     OPS_Init( ops );
00439   }
00440 
00441   // Compare the higher 32-bit here.
00442   if( compare != OPR_NE ){
00443     OPS* bb_cmp_hi_ops = &New_OPs;
00444     TOP jmp = TOP_UNDEFINED;
00445 
00446     switch( compare ){
00447     case OPR_GT:
00448     case OPR_GE: jmp = MTYPE_is_signed(cmp_type) ? TOP_jl : TOP_jb;  break;
00449     case OPR_LE:
00450     case OPR_LT: jmp = MTYPE_is_signed(cmp_type) ? TOP_jg : TOP_ja;  break;
00451     case OPR_EQ: jmp = TOP_jne; break;
00452     }
00453 
00454     Build_OP( cmp_opcode, rflags, cmp_kid1_hi, cmp_kid2_hi, bb_cmp_hi_ops );
00455     Build_OP( jmp, rflags, Gen_Label_TN( bb_non_set_label, 0 ), bb_cmp_hi_ops );
00456 
00457     total_bb_insts = 0;
00458     Last_Processed_OP = NULL;
00459     Process_New_OPs();
00460     BB_Append_Ops( bb_cmp_hi, bb_cmp_hi_ops );
00461     OPS_Init( bb_cmp_hi_ops );
00462   }
00463 
00464   // Compare the lower 32-bit, given the same higher 32-bit.
00465   {
00466     OPS* bb_cmp_lo_ops = &New_OPs;
00467     TOP jmp = TOP_UNDEFINED;
00468 
00469     switch( compare ){
00470     case OPR_GT:  jmp = TOP_ja;  break;
00471     case OPR_GE:  jmp = TOP_jae; break;
00472     case OPR_LT:  jmp = TOP_jb;  break;
00473     case OPR_LE:  jmp = TOP_jbe; break;
00474     case OPR_NE:  jmp = TOP_jne; break;
00475     case OPR_EQ:  jmp = TOP_je;  break;
00476     }
00477 
00478     Build_OP( cmp_opcode, rflags, cmp_kid1, cmp_kid2, bb_cmp_lo_ops );
00479     Build_OP( jmp, rflags, Gen_Label_TN( bb_exit_label, 0 ), bb_cmp_lo_ops );
00480 
00481     total_bb_insts = 0;
00482     Last_Processed_OP = NULL;
00483     Process_New_OPs();
00484     BB_Append_Ops( bb_cmp_lo, bb_cmp_lo_ops );
00485     OPS_Init( bb_cmp_lo_ops );
00486   }
00487 
00488   // Now we reach a false condition
00489   {
00490     OPS* bb_non_set_ops = &New_OPs;
00491 
00492     Expand_Copy( result, false_tn, select_type, bb_non_set_ops );
00493 
00494     total_bb_insts = 0;
00495     Last_Processed_OP = NULL;
00496     Process_New_OPs();
00497     BB_Append_Ops( bb_non_set, bb_non_set_ops );
00498     OPS_Init( bb_non_set_ops );
00499   }
00500 
00501   Cur_BB = bb_exit;
00502 
00503   if( result != dest ){
00504     Expand_Copy( dest, result, select_type, ops );
00505   }
00506 }
00507 
00508 
00509 static void  Expand_Split_Int_Cmp( TOP cmp_opcode, TN* src1_lo, TN* src2_lo,
00510            TOP set_opcode, TN* result,
00511            TYPE_ID mtype,  OPS* ops )
00512 {
00513   TN* tmp_result = result;
00514 
00515   if( TN_is_dedicated( result ) ){
00516     tmp_result = Build_TN_Like( result );
00517   }
00518 
00519   if( tmp_result == src1_lo ){
00520     TN* tmp = Build_TN_Like(src1_lo);
00521     Expand_Copy( tmp, src1_lo, mtype, ops );
00522     src1_lo = tmp;
00523   }
00524 
00525   if( tmp_result == src2_lo ){
00526     TN* tmp = Build_TN_Like(src2_lo);
00527     Expand_Copy( tmp, src2_lo, mtype, ops );
00528     src2_lo = tmp;
00529   }
00530 
00531   Exp_Immediate( tmp_result, Gen_Literal_TN(1,4), FALSE, ops );
00532 
00533   switch( cmp_opcode ){
00534   case TOP_cmp64:   cmp_opcode = TOP_cmp32;   break;
00535   case TOP_cmpi64:  cmp_opcode = TOP_cmpi32;  break;
00536   case TOP_test64:  cmp_opcode = TOP_test32;  break;
00537   case TOP_testi64: cmp_opcode = TOP_testi32; break;
00538   default:
00539     FmtAssert( false, ("Expand_Split_Int_Cmp: Unknown compare opcode") );
00540   }
00541 
00542   TN* src1_hi = Get_TN_Pair( src1_lo );
00543   TN* src2_hi = Get_TN_Pair( src2_lo );
00544   TN* rflags = Rflags_TN();
00545 
00546   if( src1_hi == NULL ){
00547     if( TN_has_value(src1_lo) ){
00548       const INT64 val = TN_value( src1_lo ) >> 32;
00549       src1_hi = Gen_Literal_TN( val, 4 );
00550 
00551     } else {
00552       DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00553          TN_number(src1_lo) );
00554       src1_hi = Build_TN_Like( src1_lo );
00555       Build_OP( TOP_ldc32, src1_hi, Gen_Literal_TN(0,4), ops );    
00556     }
00557   }
00558 
00559   if( src2_hi == NULL ){
00560     if( TN_has_value(src2_lo) ){
00561       const INT64 val = TN_value( src2_lo ) >> 32;
00562       src2_hi = Gen_Literal_TN( val, 4 );
00563 
00564     } else {
00565       DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00566          TN_number(src2_lo) );
00567       src2_hi = Build_TN_Like( src2_lo );
00568       Build_OP( TOP_ldc32, src2_hi, Gen_Literal_TN(0,4), ops );    
00569     }
00570   }
00571 
00572   BB* bb_entry  = Cur_BB;
00573   BB* bb_cmp_hi = Gen_And_Append_BB( bb_entry );
00574   BB* bb_cmp_lo = Gen_And_Append_BB( bb_cmp_hi );
00575   BB* bb_non_set = Gen_And_Append_BB( bb_cmp_lo );
00576   const LABEL_IDX bb_non_set_label = Gen_Label_For_BB( bb_non_set );
00577   BB* bb_exit    = Gen_And_Append_BB( bb_non_set );
00578   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
00579 
00580   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
00581   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
00582   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
00583 
00584   BB_branch_wn(bb_cmp_hi) = WN_Create(OPC_TRUEBR,1);
00585   WN_kid0(BB_branch_wn(bb_cmp_hi)) = NULL;
00586   WN_label_number(BB_branch_wn(bb_cmp_hi)) = bb_non_set_label;
00587 
00588   BB_branch_wn(bb_cmp_lo) = WN_Create(OPC_TRUEBR,1);
00589   WN_kid0(BB_branch_wn(bb_cmp_lo)) = NULL;
00590   WN_label_number(BB_branch_wn(bb_cmp_lo)) = bb_exit_label;
00591 
00592   // Compare the higher 32-bit here.
00593   {
00594     if( set_opcode != TOP_sete ){
00595       TOP jmp = TOP_UNDEFINED;
00596       switch( set_opcode ){
00597       case TOP_setg:
00598       case TOP_setge: jmp = TOP_jg;  break;
00599       case TOP_seta:
00600       case TOP_setae: jmp = TOP_ja;  break;
00601       case TOP_setl:
00602       case TOP_setle: jmp = TOP_jl;  break;
00603       case TOP_setb:
00604       case TOP_setbe: jmp = TOP_jb;  break;
00605       case TOP_setne: jmp = TOP_jne; break;
00606       }
00607 
00608       Build_OP( cmp_opcode, rflags, src1_hi, src2_hi, ops );
00609       Build_OP( jmp, rflags, Gen_Label_TN( bb_exit_label, 0 ), ops );
00610     }
00611 
00612     if( &New_OPs != ops )
00613       OPS_Append_Ops( &New_OPs, ops );
00614 
00615     Process_New_OPs();
00616     BB_Append_Ops( bb_entry, &New_OPs );
00617     OPS_Init( &New_OPs );
00618     OPS_Init( ops );
00619   }
00620 
00621   // Compare the higher 32-bit here.
00622   if( set_opcode != TOP_setne ){
00623     OPS* bb_cmp_hi_ops = &New_OPs;
00624     TOP jmp = TOP_UNDEFINED;
00625 
00626     switch( set_opcode ){
00627     case TOP_setg:
00628     case TOP_setge: jmp = TOP_jl;  break;
00629     case TOP_seta:
00630     case TOP_setae: jmp = TOP_jb;  break;
00631     case TOP_setl:
00632     case TOP_setle: jmp = TOP_jg;  break;
00633     case TOP_setb:
00634     case TOP_setbe: jmp = TOP_ja;  break;
00635     case TOP_sete:  jmp = TOP_jne; break;
00636     }
00637 
00638     Build_OP( cmp_opcode, rflags, src1_hi, src2_hi, bb_cmp_hi_ops );
00639     Build_OP( jmp, rflags, Gen_Label_TN( bb_non_set_label, 0 ), bb_cmp_hi_ops );
00640 
00641     total_bb_insts = 0;
00642     Last_Processed_OP = NULL;
00643     Process_New_OPs();
00644     BB_Append_Ops( bb_cmp_hi, bb_cmp_hi_ops );
00645     OPS_Init( bb_cmp_hi_ops );
00646   }
00647 
00648   // Compare the lower 32-bit, given the same higher 32-bit.
00649   {
00650     OPS* bb_cmp_lo_ops = &New_OPs;
00651     TOP jmp = TOP_UNDEFINED;
00652 
00653     switch( set_opcode ){
00654     case TOP_setg:
00655     case TOP_seta:  jmp = TOP_ja;  break;
00656     case TOP_setge:
00657     case TOP_setae: jmp = TOP_jae; break;
00658     case TOP_setl:
00659     case TOP_setb:  jmp = TOP_jb;  break;
00660     case TOP_setle:
00661     case TOP_setbe: jmp = TOP_jbe; break;
00662     case TOP_setne: jmp = TOP_jne; break;
00663     case TOP_sete:  jmp = TOP_je;  break;
00664     }
00665 
00666     Build_OP( cmp_opcode, rflags, src1_lo, src2_lo, bb_cmp_lo_ops );
00667     Build_OP( jmp, rflags, Gen_Label_TN( bb_exit_label, 0 ), bb_cmp_lo_ops );
00668 
00669     total_bb_insts = 0;
00670     Last_Processed_OP = NULL;
00671     Process_New_OPs();
00672     BB_Append_Ops( bb_cmp_lo, bb_cmp_lo_ops );
00673     OPS_Init( bb_cmp_lo_ops );
00674   }
00675 
00676   // Now we reach a false condition
00677   {
00678     OPS* bb_non_set_ops = &New_OPs;
00679 
00680     Build_OP( TOP_ldc32, tmp_result, Gen_Literal_TN(0,4), bb_non_set_ops );
00681 
00682     total_bb_insts = 0;
00683     Last_Processed_OP = NULL;
00684     Process_New_OPs();
00685     BB_Append_Ops( bb_non_set, bb_non_set_ops );
00686     OPS_Init( bb_non_set_ops );
00687   }
00688 
00689   Cur_BB = bb_exit;
00690 
00691   if( result != tmp_result ){
00692     Exp_COPY( result, tmp_result, ops );
00693     if( Get_TN_Pair( result ) == NULL )
00694       return;
00695   }
00696 
00697   TN* result_hi = Create_TN_Pair( result, MTYPE_I8 );
00698   Build_OP( TOP_ldc32, result_hi, Gen_Literal_TN(0,4), ops );
00699 }
00700 
00701 
00702 static void Expand_Split_Cvtl( TYPE_ID mtype, TOP top, TN* result, TN* src, OPS* ops )
00703 {
00704   TN* result_hi = Create_TN_Pair( result, mtype );
00705 
00706   switch( top ){
00707   case TOP_movsbq:
00708     Build_OP( TOP_movsbl, result, src, ops );
00709     Build_OP( TOP_sari32,  result_hi, src, Gen_Literal_TN(31,4), ops );
00710     break;
00711 
00712   case TOP_movswq:
00713     Build_OP( TOP_movswl, result, src, ops );
00714     Build_OP( TOP_sari32,  result_hi, src, Gen_Literal_TN(31,4), ops );
00715     break;
00716 
00717   case TOP_movslq:
00718     Build_OP( TOP_mov32, result, src, ops );
00719     Build_OP( TOP_sari32, result_hi, src, Gen_Literal_TN(31,4), ops );
00720     break;
00721 
00722   case TOP_movzbq:
00723     Build_OP( TOP_movzbl, result, src, ops );
00724     Build_OP( TOP_ldc32,  result_hi, Gen_Literal_TN(0,4), ops );
00725     break;
00726 
00727   case TOP_movzwq:
00728     Build_OP( TOP_movzwl, result, src, ops );
00729     Build_OP( TOP_ldc32,  result_hi, Gen_Literal_TN(0,4), ops );
00730     break;
00731 
00732   case TOP_mov32:
00733     Build_OP( TOP_mov32, result, src, ops );
00734     Build_OP( TOP_ldc32, result_hi, Gen_Literal_TN(0,4), ops );
00735     break;
00736 
00737   default:
00738     FmtAssert( FALSE,
00739          ("Expand_Split_Cvtl: Unsupported operation (%s)", TOP_Name(top)) );
00740   }
00741 }
00742 
00743 
00744 /* Use two or three 32-bit operations to emulate a 64-bit
00745    unary operation.
00746 */
00747 void Expand_Split_UOP( OPERATOR opr, TYPE_ID mtype,
00748            TN* result, TN* src,
00749            OPS* ops )
00750 {
00751   TOP top = TOP_UNDEFINED;
00752   TN* result_h = Create_TN_Pair( result, mtype );
00753   TN* src_h = TN_has_value(src) ? NULL : Get_TN_Pair(src);
00754 
00755   if( TN_has_value(src) ){
00756     if( MTYPE_signed( mtype ) ){
00757       const INT64 val = TN_value( src );
00758       src   = Gen_Literal_TN( ( val << 32 ) >> 32, 4 );
00759       src_h = Gen_Literal_TN( ( val >> 32 ), 4 );
00760     } else {
00761       const UINT64 val = TN_value( src );
00762       src   = Gen_Literal_TN( ( val << 32 ) >> 32, 4 );
00763       src_h = Gen_Literal_TN( ( val >> 32 ), 4 );
00764     }
00765   }
00766 
00767   if ( src_h == NULL && opr != OPR_INTCONST ) {
00768     DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00769        TN_number(src) );
00770     src_h = Build_TN_Like( src );
00771     Build_OP( TOP_ldc32, src_h, Gen_Literal_TN(0,4), ops );
00772   }
00773 
00774   switch( opr ){
00775   case OPR_BNOT:
00776     top = TOP_not32;
00777     break;
00778   case OPR_INTCONST:
00779     top = TOP_ldc32;
00780     break;
00781   case OPR_NEG:
00782     {
00783       Build_OP( TOP_neg32, result, src, ops );
00784       TN* tmp_src = Build_TN_Like( src );
00785       Build_OP( TOP_adci32, tmp_src, src_h, Gen_Literal_TN(0,4), ops );
00786       Set_OP_cond_def_kind( OPS_last(ops), OP_ALWAYS_COND_DEF );
00787       Build_OP( TOP_neg32, result_h, tmp_src, ops );
00788       return;
00789     }
00790     break;
00791   case OPR_LDA:
00792     top = TOP_mov32;
00793     break;
00794   default:
00795     FmtAssert( FALSE, ("Expand_Split_UOP: unknown operator") );
00796   }
00797 
00798   Build_OP( top, result,   src,   ops );
00799   Build_OP( top, result_h, src_h, ops );
00800 }
00801 
00802 
00803 /* Use two 32-bit binary operations to emulate a 64-bit
00804    binary operation.
00805 */
00806 static void Expand_Split_BOP( OPERATOR opr, TYPE_ID mtype,
00807             TN* result, TN* src1, TN* src2,
00808             OPS* ops )
00809 {
00810   TN* result_h = Create_TN_Pair( result, mtype );
00811   TN* src1_h = Get_TN_Pair(src1);
00812   TN* src2_h = TN_has_value(src2) ? NULL : Get_TN_Pair(src2);
00813 
00814   if( TN_has_value(src2) ){
00815     const INT64 val = TN_value(src2);
00816     src2   = Gen_Literal_TN( ( val << 32 ) >> 32, 4 );
00817     src2_h = Gen_Literal_TN( val >> 32, 4 );
00818   }
00819 
00820   if( src2_h == NULL ){
00821     DevWarn( "The higher 32-bit of TN%d is treated as 0\n",
00822        TN_number(src2) );
00823     src2_h  = Build_TN_Like( src2 );
00824     Build_OP( TOP_ldc32, src2_h, Gen_Literal_TN(0,4), ops );    
00825   }
00826 
00827   TOP top = TOP_UNDEFINED, top_h = TOP_UNDEFINED;
00828 
00829   if( src1_h == NULL ){
00830     DevWarn( "The higher 32-bit of TN%d is treated as 0\n", TN_number(src1) );
00831     src1_h = Build_TN_Like( src1 );
00832     Build_OP( TOP_ldc32,  src1_h, Gen_Literal_TN(0,4), ops );    
00833   }
00834 
00835   switch( opr ){
00836   case OPR_ADD:
00837     top   = TN_has_value(src2) ? TOP_addi32 : TOP_add32;
00838     top_h = TN_has_value(src2) ? TOP_adci32 : TOP_adc32;
00839     break;
00840   case OPR_SUB:
00841     top   = TN_has_value(src2) ? TOP_subi32 : TOP_sub32;
00842     top_h = TN_has_value(src2) ? TOP_sbbi32 : TOP_sbb32;
00843     break;
00844   case OPR_BAND:
00845     top_h = top = TN_has_value(src2) ? TOP_andi32 : TOP_and32;
00846     break;
00847   case OPR_BXOR:
00848     top_h = top = TN_has_value(src2) ? TOP_xori32 : TOP_xor32;
00849     break;
00850   case OPR_BIOR:
00851     top_h = top = TN_has_value(src2) ? TOP_ori32 : TOP_or32;
00852     break;
00853   default:
00854     FmtAssert( false, ("Expand_Split_BOP: Unknown operator") );
00855   }
00856 
00857   Build_OP( top,   result,   src1,   src2,   ops );
00858   Build_OP( top_h, result_h, src1_h, src2_h, ops );
00859 }
00860 
00861 
00862 static void Expand_Split_Multiply( TN* result, TN* src1, TN* src2, OPS* ops )
00863 {
00864   TN* result_hi = Create_TN_Pair( result, MTYPE_I8 );
00865   TN* src1_hi = Get_TN_Pair( src1 );
00866   TN* src2_hi = Get_TN_Pair( src2 );
00867 
00868   TN* tmp = NULL;
00869   if( src1_hi != NULL ){
00870     tmp = Build_TN_Like( result );
00871     Build_OP( TOP_imul32, tmp,  src1_hi, src2, ops );
00872   }
00873 
00874   if( src2_hi == NULL ){
00875     DevWarn( "The higher 32-bit of TN%d is treated as 0\n", TN_number(src2) );
00876     src2_hi = Build_TN_Like( src2 );
00877     Build_OP( TOP_ldc32, src2_hi, Gen_Literal_TN(0,4), ops );    
00878   }
00879 
00880   TN* tmp1 = Build_TN_Like( result );
00881   Build_OP( TOP_imul32, tmp1, src2_hi, src1, ops );
00882 
00883   TN* tmp2 = NULL;
00884   if( tmp != NULL ){
00885     tmp2 = Build_TN_Like( result );
00886     Build_OP( TOP_add32, tmp2, tmp, tmp1, ops );
00887   } else
00888     tmp2 = tmp1;
00889 
00890   TN* tmp_hi = Build_TN_Like( result );
00891   Build_OP( TOP_mul32, result, tmp_hi, src1, src2, ops );
00892   Build_OP( TOP_add32, result_hi, tmp_hi, tmp2, ops );    
00893 }
00894 
00895 
00896 static void Expand_Split_Shift( SHIFT_DIRECTION shift_dir,
00897         TN* result, TN* src_lo, TN* shift, OPS* ops )
00898 {
00899   TN* src_hi = Get_TN_Pair( src_lo );
00900 
00901   if( src_hi == NULL ){
00902     if( TN_has_value( src_lo ) ){
00903       const INT64 val = TN_value(src_lo);
00904       src_hi = Gen_Literal_TN( val >> 32, 4 );
00905 
00906     } else {
00907       DevWarn( "The higher 32-bit of TN%d is treated as 0\n", TN_number(src_lo) );
00908       src_hi = Build_TN_Like( src_lo );
00909       Build_OP( TOP_ldc32, src_hi, Gen_Literal_TN(0,4), ops );
00910     }
00911   }
00912 
00913   // Handle case where <shift> has value.
00914   if( TN_has_value( shift ) ){
00915     const INT64 shift_amt = TN_value( shift );
00916     TN* result_hi = Create_TN_Pair( result, MTYPE_I8 );
00917 
00918     if( shift_amt >= 32 ){
00919       switch( shift_dir ){
00920       case shift_left:
00921   Build_OP( TOP_shli32, result_hi, src_lo, Gen_Literal_TN(shift_amt-32,4), ops );
00922   Build_OP( TOP_ldc32,  result,    Gen_Literal_TN(0,4), ops );
00923   break;
00924 
00925       case shift_aright:
00926   Build_OP( TOP_sari32, result,    src_hi, Gen_Literal_TN(shift_amt-32,4), ops );
00927   Build_OP( TOP_sari32, result_hi, src_hi, Gen_Literal_TN(31,4), ops );
00928   break;
00929 
00930       case shift_lright:
00931   Build_OP( TOP_shri32, result,    src_hi, Gen_Literal_TN(shift_amt-32,4), ops );
00932   Build_OP( TOP_ldc32,  result_hi, Gen_Literal_TN(0,4), ops );
00933   break;
00934       }
00935 
00936     } else {
00937       // for shift_amt < 32
00938       switch( shift_dir ){
00939       case shift_left:
00940   Build_OP( TOP_shldi32, result_hi, src_hi, src_lo, shift, ops );
00941   Build_OP( TOP_shli32,   result,    src_lo, shift,  ops );
00942   break;
00943 
00944       case shift_aright:
00945   Build_OP( TOP_shrdi32, result,    src_lo, src_hi, shift, ops );
00946   Build_OP( TOP_sari32,  result_hi, src_hi, shift,  ops );
00947   break;
00948 
00949       case shift_lright:
00950   if( src_hi == NULL ){
00951     Build_OP( TOP_shri32, result,    src_lo, shift, ops );
00952     Build_OP( TOP_ldc32,  result_hi, Gen_Literal_TN(0,4), ops );
00953   } else {
00954     Build_OP( TOP_shrdi32, result,    src_lo, src_hi, shift, ops );
00955     Build_OP( TOP_shri32,  result_hi, src_hi, shift,  ops );
00956   }
00957   break;
00958       }
00959     }
00960 
00961   } else {
00962     // Handle case where <shift> is a variable.
00963     TN* tmp_result = result;
00964     TN* tmp_result_hi = Create_TN_Pair( tmp_result, MTYPE_I8 );
00965 
00966     if( TN_is_dedicated( result ) ){
00967       tmp_result = Build_TN_Like( result );
00968       tmp_result_hi = Create_TN_Pair( tmp_result, MTYPE_I8 );
00969     }
00970 
00971     // Under m32, the x86 shift instruction can shift at most 31 bits (the
00972     // upper bits of the shift count are ignored).  Construct 64-bit shift from
00973     // 32-bit shifts.
00974 
00975     BB* bb_entry = Cur_BB;
00976     BB* bb_then = Gen_And_Append_BB( bb_entry );  // for shift_cnt > 31
00977 
00978     BB* bb_exit  = Gen_And_Append_BB( bb_then );
00979     const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
00980 
00981     BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
00982     WN_kid0(BB_branch_wn(bb_entry)) = NULL;
00983     WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
00984 
00985     // Build bb_entry
00986     {
00987       switch( shift_dir ){
00988       case shift_left:
00989   Build_OP( TOP_shld32, tmp_result_hi, src_hi, src_lo, shift, ops );
00990   Build_OP( TOP_shl32,  tmp_result,    src_lo, shift, ops );
00991   break;
00992 
00993       case shift_aright:
00994   Build_OP( TOP_shrd32, tmp_result,    src_lo, src_hi, shift, ops );
00995   Build_OP( TOP_sar32,  tmp_result_hi, src_hi, shift, ops );
00996   break;
00997 
00998       case shift_lright:
00999   Build_OP( TOP_shrd32, tmp_result,    src_lo, src_hi, shift, ops );
01000   Build_OP( TOP_shr32,  tmp_result_hi, src_hi, shift, ops );
01001   break;
01002       }
01003 
01004       // Go to the then block if the shift count is in the range [32,63].  If
01005       // it is < 32, the shrd/shr combo already produces the correct result.
01006       // If it is > 63, it is treated as modulo 64.  This means go to the then
01007       // block only if (32 & shift count) is 1.  Bug 9687.
01008 
01009       TN *rflags = Rflags_TN();
01010       Build_OP(TOP_testi32, rflags, shift, Gen_Literal_TN(32, 4), ops);
01011       Build_OP(TOP_je, rflags, Gen_Label_TN(bb_exit_label, 0), ops);
01012 
01013       if( &New_OPs != ops )
01014   OPS_Append_Ops( &New_OPs, ops );
01015       Process_New_OPs();
01016       BB_Append_Ops( bb_entry, &New_OPs );
01017       OPS_Init( &New_OPs );
01018       OPS_Init( ops );
01019     }
01020 
01021     // Build bb_then here.
01022     {
01023       OPS* bb_then_ops = &New_OPs;
01024 
01025       switch( shift_dir ){
01026       case shift_left:
01027   Exp_COPY( tmp_result_hi, tmp_result, bb_then_ops );
01028   Build_OP( TOP_ldc32, tmp_result, Gen_Literal_TN(0,4), bb_then_ops );
01029   break;
01030 
01031       case shift_aright:
01032   Exp_COPY( tmp_result, tmp_result_hi, bb_then_ops );
01033   Build_OP( TOP_sari32, tmp_result_hi, tmp_result_hi,
01034       Gen_Literal_TN(31,4), bb_then_ops );
01035   break;
01036 
01037       case shift_lright:
01038   Exp_COPY( tmp_result, tmp_result_hi, bb_then_ops );
01039   Build_OP( TOP_ldc32,  tmp_result_hi, Gen_Literal_TN(0,4), bb_then_ops );  
01040   break;
01041       }
01042       
01043       total_bb_insts = 0;
01044       Last_Processed_OP = NULL;
01045       Process_New_OPs();
01046       BB_Append_Ops( bb_then, bb_then_ops );
01047       OPS_Init( bb_then_ops );
01048     }
01049 
01050     Cur_BB = bb_exit;
01051     if( result != tmp_result )
01052       Expand_Split_UOP( OPR_LDA, MTYPE_I8, result, tmp_result, ops );
01053   }
01054 }
01055 
01056 
01057 static void Expand_Split_Abs( TN* dest, TN* src, TYPE_ID mtype, OPS* ops )
01058 {
01059   TN* result = dest;
01060 
01061   if( TN_is_dedicated(dest) ){
01062     result = Build_TN_Like( dest );
01063   }
01064 
01065   TN* src_hi = Get_TN_Pair( src );
01066 
01067   FmtAssert( src_hi != NULL,
01068        ("Expand_Split_Abs: the higher 32-bit of source is NULL") );
01069 
01070   BB* bb_entry = Cur_BB;
01071   BB* bb_then = Gen_And_Append_BB( bb_entry );
01072   BB* bb_exit = Gen_And_Append_BB( bb_then );
01073 
01074   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
01075 
01076   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
01077   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
01078   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
01079 
01080   // build bb_entry
01081   {
01082     Expand_Split_UOP( OPR_LDA, mtype, result, src, ops );
01083 
01084     Exp_OP3v( OPC_TRUEBR,
01085         NULL,
01086         Gen_Label_TN( bb_exit_label, 0 ),
01087         src_hi,
01088         Gen_Literal_TN(0,4),
01089         V_BR_I4GE,
01090         ops );
01091 
01092     if( &New_OPs != ops )
01093       OPS_Append_Ops( &New_OPs, ops );
01094     Process_New_OPs();
01095     BB_Append_Ops( bb_entry, &New_OPs );
01096     OPS_Init( &New_OPs );
01097     OPS_Init( ops );    
01098   }
01099 
01100   // Build bb_then here if src_hi < 0
01101   {
01102     OPS* bb_then_ops = &New_OPs;
01103     Expand_Split_UOP( OPR_NEG, mtype, result, src, bb_then_ops );
01104 
01105     total_bb_insts = 0;
01106     Last_Processed_OP = NULL;
01107     Process_New_OPs();
01108     BB_Append_Ops( bb_then, bb_then_ops );
01109     OPS_Init( bb_then_ops );    
01110   }
01111 
01112   Cur_BB = bb_exit;
01113 
01114   if( result != dest ){
01115     Expand_Split_UOP( OPR_LDA, MTYPE_I8, dest, result, ops );
01116   }
01117 }
01118 
01119 
01120 // If safezero is TRUE, then be careful to map 0 --> MTYPE_bit_size(mtype).
01121 static void
01122 Expand_Split_Leading_Zeros( TN* dest, TN* src, TYPE_ID mtype,
01123           BOOL safezero, OPS* ops )
01124 {
01125   // TN1 :- ldc32 (0x7f)
01126   // TN2 :- bsr32 TN_src_low ;
01127   // TN2 :- cmove TN1 (%rflags)
01128   // TN3 :- xori32 TN2 (0x20)
01129   // TN4 :- bsr32 TN_src_high;
01130   // TN4 :- cmove TN3 (%rflags)
01131   // TN5 :- xori32 TN4 (0x1f)
01132   if ( mtype != MTYPE_I8 && mtype != MTYPE_U8 )
01133     Fail_FmtAssertion("Expand_Split_Leading_Zeros: unexpected mtype");
01134   if ( TN_has_value(src) ) {  // I don't think this will ever happen.
01135     src = Expand_Immediate_Into_Register( src, TRUE, ops );
01136   }
01137   TN *src_hi = Get_TN_Pair(src);
01138   FmtAssert( src_hi != NULL, ("Expand_Split_Leading_Zeros: the "
01139             "higher 32-bit of source is NULL") );
01140   TN *tmp1, *rflags = Rflags_TN();
01141   if (safezero) {
01142     tmp1 = Build_TN_Of_Mtype( MTYPE_I4 );
01143     Exp_Immediate( tmp1, Gen_Literal_TN(127, 4), TRUE, ops );
01144   }
01145   TN *tmp2 = Build_TN_Of_Mtype( MTYPE_I4 );
01146   Build_OP( TOP_bsr32, tmp2, src, ops );
01147   if (safezero) {
01148     Expand_Cmov( TOP_cmove, tmp2, tmp1, rflags, ops );
01149   }
01150   TN *tmp3 = Build_TN_Of_Mtype( MTYPE_I4 );
01151   Expand_Binary_Xor( tmp3, tmp2, Gen_Literal_TN(32, 4), MTYPE_I4, ops );
01152   TN *tmp4 = Build_TN_Of_Mtype( MTYPE_I4 );
01153   Build_OP( TOP_bsr32, tmp4, src_hi, ops );
01154   Expand_Cmov( TOP_cmove, tmp4, tmp3, rflags, ops );
01155   Expand_Binary_Xor( dest, tmp4, Gen_Literal_TN(31, 4), MTYPE_I4, ops );
01156 }
01157 
01158 
01159 void
01160 Expand_Copy (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
01161 {
01162   const BOOL is_128bit = (MTYPE_size_reg(mtype) == 128);
01163 
01164   if( MTYPE_is_quad( mtype ) ){
01165     Build_OP( TOP_fmov, result, src, ops );
01166 
01167   } else if( MTYPE_is_float(mtype) ){
01168     if( Is_Target_SSE2() )
01169       Build_OP( is_128bit ? TOP_movdq: 
01170     (mtype == MTYPE_F8 ? TOP_movsd : TOP_movss), result, src, ops );
01171     else
01172       Build_OP( TOP_fmov, result, src, ops );
01173 
01174   } else {
01175     if( OP_NEED_PAIR( mtype ) ){
01176       Expand_Split_UOP( OPR_LDA, mtype, result, src, ops );
01177       Set_OP_copy( OP_prev(OPS_last(ops)) );
01178 
01179     } else {
01180       Build_OP( MTYPE_is_size_double(mtype) ? TOP_mov64: TOP_mov32, 
01181     result, src, ops);
01182     }
01183   }
01184 
01185   Set_OP_copy (OPS_last(ops));
01186 }
01187 
01188 //
01189 //  Helper routine to do proper sign extension
01190 //
01191 static void
01192 Fixup_32_Bit_Op(TN *result,TN *src, TYPE_ID dest_type, OPS *ops)
01193 {
01194   if (dest_type == MTYPE_I8 || dest_type == MTYPE_U8) {
01195     Expand_Copy(result,src,dest_type,ops);
01196   } else {
01197     Expand_Convert_Length (result, src, Gen_Literal_TN(MTYPE_size_reg(dest_type), 4),
01198          dest_type, MTYPE_is_signed(dest_type),ops);
01199   }
01200 }
01201 
01202 
01203 /* ====================================================================
01204  *
01205  * Expand_Convert_Length
01206  *
01207  * ====================================================================
01208  */
01209 void Expand_Convert_Length ( TN *dest, TN *src, TN *length_tn, TYPE_ID mtype,
01210            BOOL signed_extension, OPS *ops )
01211 {
01212   FmtAssert (! MTYPE_float(mtype),
01213        ("Expand_Convert_Length: illegal data type\n"));
01214   FmtAssert (TN_has_value(length_tn),
01215        ("Expand_Convert_Length: non-constant length\n"));
01216   const UINT64 val = TN_value(length_tn);
01217   const BOOL is_64bit = MTYPE_is_size_double(mtype);
01218 
01219   TOP new_opcode = TOP_UNDEFINED;
01220 
01221   if( val != 8 && val != 16  && val != 32 ){
01222     // Bug046
01223     if( signed_extension || val >= 32 ){
01224       TN* tmp1 = Build_TN_Like( dest );
01225       TN* tmp2 = Build_TN_Like( dest );
01226       const int shift_amt = is_64bit ? 64 - val : 32 - val;
01227 
01228       if ( val < 32 )
01229         Build_OP( TOP_andi32, tmp1, src, Gen_Literal_TN((1<<val)-1, 4), ops );
01230       else {
01231   TN* tmp3 = Build_TN_Like( dest );
01232   if( Is_Target_32bit() && is_64bit && Get_TN_Pair(tmp3) == 0 )
01233     (void *) Create_TN_Pair( tmp3, mtype );
01234   Exp_Immediate( tmp3, Gen_Literal_TN ((1LL<<val)-1LL, 8), FALSE, ops );
01235   Expand_Binary_And(tmp1, src, tmp3, mtype, ops);
01236       }
01237       Expand_Shift( tmp2, tmp1, Gen_Literal_TN( shift_amt, 4 ),
01238         mtype, shift_left, ops );
01239       Expand_Shift( dest, tmp2, Gen_Literal_TN( shift_amt, 4 ),
01240         mtype, shift_aright, ops );
01241       
01242     } else {
01243       Build_OP( TOP_andi32, dest, src, Gen_Literal_TN((1<<val)-1, 4), ops );
01244     }
01245 
01246     return;
01247 
01248   } else if( val == 8 ){
01249     if( signed_extension ){
01250       new_opcode = is_64bit ? TOP_movsbq : TOP_movsbl;
01251     } else {
01252       new_opcode = is_64bit ? TOP_movzbq : TOP_movzbl;
01253     }
01254 
01255   } else if( val == 16 ){
01256     if( signed_extension ){
01257       new_opcode = is_64bit ? TOP_movswq : TOP_movswl;
01258     } else {
01259       new_opcode = is_64bit ? TOP_movzwq : TOP_movzwl;
01260     }
01261 
01262   } else if( val == 32 ){
01263     if( is_64bit ) {
01264       if (signed_extension)
01265         new_opcode = TOP_movslq;
01266       else {
01267   if( OP_NEED_PAIR(mtype) ){
01268     Expand_Split_Cvtl( mtype, TOP_mov32, dest, src, ops );
01269   } else {
01270     /* Fix bug#1363.
01271        We are doing the OPC_U8U4CVT here.
01272      */
01273     Build_OP( TOP_movzlq, dest, src, ops);
01274   }
01275         return;
01276       }
01277     }
01278     else if( MTYPE_bit_size(mtype) == 32 ){
01279       // Bug 4117 - use a move here without setting the copy bits 
01280       // (that is, don't call Expand_Copy).
01281       Build_OP( TOP_mov32, dest, src, ops);
01282       return;
01283     }
01284   }
01285 
01286   FmtAssert( new_opcode != TOP_UNDEFINED,
01287        ("Expand_Convert_Length: new opcode is undefined") );
01288 
01289   if( OP_NEED_PAIR(mtype) )
01290     Expand_Split_Cvtl( mtype, new_opcode, dest, src, ops );
01291   else
01292     Build_OP( new_opcode, dest, src, ops );
01293 }
01294 
01295 static void Exp_Immediate (TN *dest, TN *src, OPS *ops)
01296 {
01297   INT64 val = 0;
01298   TN* tmp = Build_TN_Like(dest);
01299 
01300   if ( TN_has_value(src) ) {
01301     val = TN_value(src);
01302 
01303   } else  if ( TN_is_symbol(src) ) {
01304     ST *base;
01305     Base_Symbol_And_Offset_For_Addressing (TN_var(src), TN_offset(src), &base, &val);
01306 
01307   } else
01308     FmtAssert(FALSE,("unexpected constant in Exp_Immediate"));
01309 
01310   if( Is_Target_32bit()     &&
01311       /* TN_is_dedicated(dest) && */
01312 #ifdef KEY // bug 14228
01313       ! (TN_is_symbol(src) && ST_sym_class(TN_var(src)) == CLASS_NAME) &&
01314 #endif
01315       Get_TN_Pair(dest) != NULL ){
01316     Expand_Split_UOP( OPR_INTCONST, MTYPE_I8, dest, src, ops );
01317     
01318   } else if (TN_size(dest) == 8) {
01319     if( OP_NEED_PAIR( MTYPE_I8 ) ){
01320       Expand_Split_UOP( OPR_INTCONST, MTYPE_I8, dest, src, ops );
01321     } else {
01322       Build_OP (TOP_ldc64, dest, src, ops);
01323     }
01324 
01325   } else if (ISA_LC_Value_In_Class (val, LC_simm32)) {
01326     Build_OP (TOP_ldc32, dest, src, ops);
01327 
01328   } else if (ISA_LC_Value_In_Class (val, LC_uimm32)) {
01329     Build_OP (TOP_ldc32, dest, src, ops);
01330 
01331   } else if (val >= INT32_MIN && val <= INT32_MAX) {
01332     Build_OP (TOP_ldc32, dest, src, ops);
01333 
01334   } else if ((UINT64)val <= UINT32_MAX) {
01335     Build_OP (TOP_ldc32, dest, src, ops);
01336 
01337   } else if ((UINT64)val > UINT32_MAX) {
01338     if( Is_Target_32bit() ){
01339       // The upper 32-bit is dead.
01340       Build_OP( TOP_ldc32, dest, Gen_Literal_TN( (val & 0xffffffff), 4 ), ops );
01341     } else
01342       Build_OP (TOP_ldc64, dest, src, ops);
01343 
01344   } else
01345     FmtAssert( FALSE, ("UNIMPLEMENTED") );
01346 }
01347 
01348 void
01349 Exp_Immediate (TN *dest, TN *src, BOOL is_signed, OPS *ops)
01350 {
01351   Expand_Immediate(dest, src, is_signed, ops);
01352 }
01353 
01354 /* 
01355  * Expand Immediate value.
01356  */
01357 void
01358 Expand_Immediate (TN *dest, TN *src, BOOL is_signed, OPS *ops)
01359 {
01360   FmtAssert((TN_is_constant(src)),
01361       ("unexpected non-constant in Expand_Immediate"));
01362   FmtAssert((TN_has_value(src) || TN_is_symbol(src)), 
01363       ("expected value or const in Expand_Immediate"));
01364   Exp_Immediate (dest, src, ops);
01365 }
01366 
01367 TN*
01368 Expand_Immediate_Into_Register (TN *src, BOOL is_64bit, OPS *ops)
01369 {
01370   /* load into reg and do reg case */
01371   TN *tmp = Build_TN_Of_Mtype (is_64bit ? MTYPE_I8 : MTYPE_I4);
01372   Expand_Immediate (tmp, src, TRUE, ops);
01373   return tmp;
01374 }
01375 
01376 
01377 void
01378 Expand_Add (TN *result, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
01379 {
01380   TOP new_opcode;
01381   INT64 val;
01382   const BOOL is_64bit = MTYPE_is_size_double(mtype);
01383   BOOL is_vector_type;
01384   is_vector_type = (mtype == MTYPE_V16I1 || 
01385         mtype == MTYPE_V16I2 ||
01386         mtype == MTYPE_V16I4 ||
01387         mtype == MTYPE_V16I8 ||
01388         mtype == MTYPE_M8I1 ||
01389         mtype == MTYPE_M8I2 ||
01390         mtype == MTYPE_M8I4);
01391   if (TN_is_constant(src1) && !is_vector_type) {
01392     if (TN_has_value(src1)) {
01393       val = TN_value(src1);
01394       if (val == 0) {
01395   Expand_Copy (result, src2, mtype, ops);
01396   return;
01397       }
01398     } else if ( TN_is_symbol(src1) ) {
01399       /* symbolic constant, gp-relative or sp-relative */
01400       ST *base;
01401       INT64 val;
01402       TN *tmp = Build_TN_Of_Mtype (mtype);
01403       Base_Symbol_And_Offset_For_Addressing (TN_var(src1), TN_offset(src1), 
01404                &base, &val);
01405       new_opcode = is_64bit ? TOP_addi64 : TOP_addi32;
01406       if( ISA_LC_Value_In_Class (val, LC_simm32) ){
01407   Build_OP (new_opcode, result, src2, src1, ops);
01408       } else if (val >= INT32_MIN && val <= INT32_MAX) {
01409   Build_OP (TOP_ldc32, tmp, Gen_Literal_TN((val >> 16)&0xffff, 4), ops);
01410   Build_OP (TOP_ori32, tmp, tmp, Gen_Literal_TN(val & 0xffff, 4), ops);
01411   Build_OP (is_64bit ? TOP_add64 : TOP_add32, result, tmp, src2, ops);
01412       } else {
01413   TN* const_tn = Gen_Const_Symbol_TN( val, 0.0, MTYPE_I8, TN_RELOC_GOT_DISP );
01414   Build_OP( Use_32_Bit_Pointers ? TOP_ld32 : TOP_ld64,
01415       tmp, GP_TN, const_tn, ops );
01416   Build_OP(TOP_ld32, tmp, tmp, Gen_Literal_TN(0, 4), ops);
01417   Build_OP (is_64bit ? TOP_add64 : TOP_add32, result, tmp, src2, ops);
01418       }       
01419       return;
01420     } 
01421     else FmtAssert(FALSE,("unexpected constant in Expand_Add"));
01422     
01423     if (ISA_LC_Value_In_Class ( val, LC_simm32)) {
01424       if( OP_NEED_PAIR(mtype) ){
01425   Expand_Split_BOP( OPR_ADD, mtype, result, src2, Gen_Literal_TN(val,8), ops );
01426 
01427       } else {
01428   new_opcode = is_64bit ? TOP_addi64 : TOP_addi32;
01429   Build_OP (new_opcode, result, src2, Gen_Literal_TN(val,4), ops);
01430       }
01431 
01432     } else {
01433       src1 = Expand_Immediate_Into_Register( src1, is_64bit, ops );
01434 
01435       if( OP_NEED_PAIR(mtype) ){
01436   Expand_Split_BOP( OPR_ADD, mtype, result, src2, src1, ops );  
01437 
01438       } else {
01439   new_opcode = is_64bit ? TOP_add64 : TOP_add32;
01440   Build_OP (new_opcode, result, src2, src1, ops);
01441       }
01442     }
01443   } else if (TN_is_constant(src2) && !is_vector_type) {
01444     // switch order of src so immediate is first
01445     Expand_Add (result, src2, src1, mtype, ops);
01446   } else {
01447     switch(mtype) {
01448     case MTYPE_V16I1:
01449       Build_OP (TOP_add128v8, result, src1, src2, ops);
01450       break;
01451     case MTYPE_V16I2:
01452       Build_OP (TOP_add128v16, result, src1, src2, ops);
01453       break;
01454     case MTYPE_V16I4:
01455       Build_OP (TOP_add128v32, result, src1, src2, ops);
01456       break;
01457     case MTYPE_V16I8:
01458       Build_OP (TOP_add128v64, result, src1, src2, ops);
01459       break;
01460     case MTYPE_V8I1:
01461       Build_OP (TOP_add128v8, result, src1, src2, ops);
01462       break;
01463     case MTYPE_V8I2:
01464       Build_OP (TOP_add128v16, result, src1, src2, ops);
01465       break;
01466     case MTYPE_V8I4:
01467       Build_OP (TOP_add128v32, result, src1, src2, ops);
01468       break;
01469     case MTYPE_M8I1:
01470       Build_OP (TOP_add64v8, result, src1, src2, ops);
01471       break;
01472     case MTYPE_M8I2:
01473       Build_OP (TOP_add64v16, result, src1, src2, ops);
01474       break;
01475     case MTYPE_M8I4:
01476       Build_OP (TOP_add64v32, result, src1, src2, ops);
01477       break;
01478     case MTYPE_V8F4:
01479       Build_OP (TOP_fadd128v32, result, src1, src2, ops);
01480       break;
01481     case MTYPE_M8F4:
01482       Fail_FmtAssertion ("Expand_Add: NYI");
01483     default:
01484       if( OP_NEED_PAIR(mtype) ){
01485   Expand_Split_BOP( OPR_ADD, mtype, result, src1, src2, ops );
01486 
01487       } else {
01488   Build_OP (is_64bit ? TOP_add64 : TOP_add32, result, src1, src2, ops);
01489       }
01490 
01491       break;
01492     }
01493   }
01494 }
01495 
01496 void
01497 Expand_Sub (TN *result, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
01498 {
01499   INT64 val;
01500   const BOOL is_64bit = MTYPE_is_size_double(mtype);
01501   TOP new_opcode;
01502   BOOL is_vector_type;
01503   is_vector_type = (mtype == MTYPE_V16I1 || 
01504         mtype == MTYPE_V16I2 ||
01505         mtype == MTYPE_V16I4 ||
01506         mtype == MTYPE_V16I8 ||
01507         mtype == MTYPE_M8I1 ||
01508         mtype == MTYPE_M8I2 ||
01509         mtype == MTYPE_M8I4);
01510 
01511   if (TN_is_constant(src2) && !is_vector_type) {
01512     if (TN_has_value(src2)) {
01513       val = - TN_value(src2);
01514       if (val == 0) {
01515   Expand_Copy (result, src1, mtype, ops);
01516   return;
01517       }
01518     } 
01519     else if ( TN_is_symbol(src2) ) {
01520       /* symbolic constant, gp-relative or sp-relative */
01521       ST *base;
01522       INT64 val;
01523       Base_Symbol_And_Offset_For_Addressing (TN_var(src2), TN_offset(src2), &base, &val);
01524       val = - val;
01525     } 
01526     else FmtAssert(FALSE,("unexpected constant in Expand_Sub"));
01527     
01528     if (ISA_LC_Value_In_Class ( val, LC_simm32)) {
01529       if( OP_NEED_PAIR(mtype) ){
01530   Expand_Split_BOP( OPR_ADD, mtype, result, src1, Gen_Literal_TN(val,8), ops );
01531 
01532       } else {
01533   new_opcode = is_64bit ? TOP_addi64 : TOP_addi32;
01534   Build_OP (new_opcode, result, src1, Gen_Literal_TN(val,4), ops);
01535       }
01536 
01537     } else {
01538       src2 = Expand_Immediate_Into_Register( src2, is_64bit, ops );
01539 
01540       if( OP_NEED_PAIR(mtype) ){
01541   Expand_Split_BOP( OPR_SUB, mtype, result, src1, src2, ops );
01542 
01543       } else {
01544   new_opcode = is_64bit ? TOP_sub64 : TOP_sub32;
01545   Build_OP (new_opcode, result, src1, src2, ops);
01546       }
01547     }
01548   }
01549   else if (TN_is_constant(src1) && !is_vector_type) {
01550     TN *tmp = Build_TN_Of_Mtype (mtype);
01551     // switch order of src so immediate is first
01552     Expand_Sub (tmp, src2, src1, mtype, ops);
01553     // generate a negate
01554     if( OP_NEED_PAIR(mtype) )
01555       Expand_Split_UOP( OPR_NEG, mtype, result, tmp, ops );
01556     else
01557       Build_OP(is_64bit ? TOP_neg64 : TOP_neg32, result, tmp, ops);
01558   } 
01559   else {
01560     switch(mtype) {
01561     case MTYPE_V16I1:
01562       Build_OP (TOP_sub128v8, result, src1, src2, ops);
01563       break;
01564     case MTYPE_V16I2:
01565       Build_OP (TOP_sub128v16, result, src1, src2, ops);
01566       break;
01567     case MTYPE_V16I4:
01568       Build_OP (TOP_sub128v32, result, src1, src2, ops);
01569       break;
01570     case MTYPE_V16I8:
01571       Build_OP (TOP_sub128v64, result, src1, src2, ops);
01572       break;
01573     case MTYPE_V8I1:
01574       Build_OP (TOP_sub128v8, result, src1, src2, ops);
01575       break;
01576     case MTYPE_V8I2:
01577       Build_OP (TOP_sub128v16, result, src1, src2, ops);
01578       break;
01579     case MTYPE_V8I4:
01580       Build_OP (TOP_sub128v32, result, src1, src2, ops);
01581       break;
01582     case MTYPE_M8I1:
01583       Build_OP (TOP_sub64v8, result, src1, src2, ops);
01584       break;
01585     case MTYPE_M8I2:
01586       Build_OP (TOP_sub64v16, result, src1, src2, ops);
01587       break;
01588     case MTYPE_M8I4:
01589       Build_OP (TOP_sub64v32, result, src1, src2, ops);
01590       break;
01591     case MTYPE_M8F4:
01592       Fail_FmtAssertion ("Expand_Sub: NYI");
01593     default:
01594       if( OP_NEED_PAIR(mtype) ){
01595   Expand_Split_BOP( OPR_SUB, mtype, result, src1, src2, ops );
01596 
01597       } else {
01598   Build_OP (is_64bit ? TOP_sub64 : TOP_sub32, result, src1, src2, ops);
01599       }
01600       break;
01601     }
01602   }
01603 }
01604 
01605 
01606 void
01607 Expand_Neg (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
01608 {
01609   const BOOL is_64bit = MTYPE_is_size_double(mtype);
01610   FmtAssert ((MTYPE_bit_size(mtype) == 32 || 
01611         MTYPE_bit_size(mtype) == 64 ||
01612         MTYPE_bit_size(mtype) == 96 ||
01613         MTYPE_bit_size(mtype) == 128 ),
01614                  ("Expand_Neg: illegal result size\n"));
01615 
01616   if (mtype == MTYPE_V16F4 || mtype == MTYPE_V16F8 ||
01617       mtype == MTYPE_V16I1 || mtype == MTYPE_V16I2 ||
01618       mtype == MTYPE_V16I4 || mtype == MTYPE_V16I8 ||
01619       mtype == MTYPE_V16C8) {
01620     switch (mtype) {
01621     case MTYPE_V16F4: {
01622       TCON then = Host_To_Targ (MTYPE_I4, 0x80000000);
01623       TCON now  = Create_Simd_Const (MTYPE_V16F4, then);
01624       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
01625       Allocate_Object(sym);
01626       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
01627       TN *tmp = Build_TN_Like(result);
01628       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
01629       Build_OP(is_64bit ? TOP_xorpd: TOP_xorps, result, src, tmp, ops);
01630       break;
01631     }
01632     case MTYPE_V16C8:
01633     case MTYPE_V16F8: {
01634       TCON then = Host_To_Targ (MTYPE_I8, 0x8000000000000000ULL);
01635       TCON now  = Create_Simd_Const (MTYPE_V16F8, then);
01636       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
01637       Allocate_Object(sym);
01638       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
01639       TN *tmp = Build_TN_Like(result);
01640       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
01641       Build_OP(is_64bit ? TOP_xorpd: TOP_xorps, result, src, tmp, ops);
01642       break;
01643     }
01644 #ifdef KEY
01645     //Bug 5701 13347: Vectorize Neg of Integers
01646     case MTYPE_V16I1:
01647     case MTYPE_V16I2:
01648     case MTYPE_V16I4:
01649     case MTYPE_V16I8:{
01650       TYPE_ID host_type;
01651       if(mtype==MTYPE_V16I1) host_type = MTYPE_I1;
01652       else if(mtype==MTYPE_V16I2) host_type = MTYPE_I2;
01653            else if(mtype==MTYPE_V16I4) host_type = MTYPE_I4;
01654                 else host_type = MTYPE_I8;
01655       TCON then = Host_To_Targ (host_type, 0x0);
01656       TCON now  = Create_Simd_Const (mtype, then);
01657       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
01658       Allocate_Object(sym);
01659       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
01660       TN *tmp = Build_TN_Like(result);
01661       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
01662       Expand_Sub (result, tmp, src, mtype, ops);
01663       break;
01664     }
01665 #endif
01666     default:
01667       FmtAssert(FALSE, ("Expand_Neg: unknown mtype"));
01668       break;
01669     }
01670 
01671   } else if (!MTYPE_is_float(mtype)) {
01672     if( OP_NEED_PAIR(mtype) ){
01673       Expand_Split_UOP( OPR_NEG, mtype, result, src, ops );
01674 
01675     } else {
01676       Build_OP( is_64bit ? TOP_neg64 : TOP_neg32, result, src, ops );
01677     }
01678 
01679   } else if( MTYPE_is_quad(mtype) ||
01680        !Is_Target_SSE2() ){
01681     Build_OP( TOP_fchs, result, src, ops );
01682 
01683   } else {
01684 #if 1
01685     // Perform neg operation by flipping the msb.
01686     TCON tcon = is_64bit ? Host_To_Targ (MTYPE_I8, 0x8000000000000000ULL) :
01687       Host_To_Targ (MTYPE_I4, 0x80000000);
01688     ST *sym = New_Const_Sym( Enter_tcon (tcon), Be_Type_Tbl(TCON_ty(tcon)) );
01689     Allocate_Object(sym);
01690     ST *base_sym; INT64 base_ofst;
01691 
01692     Base_Symbol_And_Offset_For_Addressing (sym, 0, &base_sym, &base_ofst);
01693 
01694     TN *tmp = Build_TN_Like(result);
01695     if( Is_Target_64bit() ){
01696       Build_OP(is_64bit ? TOP_ldsd : TOP_ldss, tmp, Rip_TN(),
01697          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01698     } else {
01699       Build_OP(is_64bit ? TOP_ldsd_n32 : TOP_ldss_n32, tmp,
01700          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01701     }
01702 
01703     Set_OP_no_alias( OPS_last(ops)  );
01704     Build_OP(is_64bit ? TOP_xorpd: TOP_xorps, result, src, tmp, ops);
01705 #else
01706     // Perform neg operation by a sub operation from 0.0.
01707     TCON tcon = Host_To_Targ_Float( is_64bit ? MTYPE_F8 : MTYPE_F4, 0.0 );
01708     ST *sym = New_Const_Sym (Enter_tcon (tcon), Be_Type_Tbl( TCON_ty(tcon) ) );
01709     Allocate_Object(sym);
01710     ST *base_sym; INT64 base_ofst;
01711 
01712     Base_Symbol_And_Offset_For_Addressing (sym, 0, &base_sym, &base_ofst);
01713 
01714     TN *tmp = Build_TN_Like(result);
01715     if( Is_Target_64bit() ){
01716       Build_OP(is_64bit ? TOP_ldsd : TOP_ldss, tmp, Rip_TN(),
01717          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01718     } else {
01719       Build_OP(is_64bit ? TOP_ldsd_n32 : TOP_ldss_n32, tmp,
01720          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01721     }
01722 
01723     Set_OP_no_alias( OPS_last(ops)  );
01724     Build_OP(is_64bit ? TOP_subsd: TOP_subss, result, tmp, src, ops);
01725 #endif
01726   }
01727 }
01728 
01729 
01730 void
01731 Expand_Abs (TN *dest, TN *src, TYPE_ID mtype, OPS *ops)
01732 {
01733   BOOL is_double = MTYPE_is_size_double(mtype);
01734   if (!MTYPE_is_float(mtype)) {
01735     if( dest == src ){
01736       TN* tmp_src = Build_TN_Like( src );
01737       Expand_Copy( tmp_src, src, mtype, ops );
01738       src = tmp_src;
01739     }
01740 
01741     if( OP_NEED_PAIR(mtype) ){
01742       Expand_Split_Abs( dest, src, mtype, ops );
01743 
01744     } else {
01745       Expand_Neg (dest, src, mtype, ops);
01746       Expand_Cmov (TOP_cmovs, dest, src, Rflags_TN(), ops);
01747     }
01748 
01749   } else if( MTYPE_is_quad( mtype ) ||
01750        !Is_Target_SSE2() ){
01751     Build_OP( TOP_fabs, dest, src, ops );
01752 
01753   } else if ( MTYPE_is_vector( mtype ) ) {
01754     FmtAssert( mtype == MTYPE_V16F4 || mtype == MTYPE_V16F8, ("NYI") );
01755     if ( mtype == MTYPE_V16F4 ) {
01756       TCON then = Host_To_Targ (MTYPE_I4, 0x7FFFFFFF);
01757       TCON now  = Create_Simd_Const (MTYPE_V16F4, then);
01758       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
01759       Allocate_Object(sym);
01760       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
01761       TN *tmp = Build_TN_Like(dest);
01762       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
01763       Build_OP(TOP_andps, dest, src, tmp, ops);      
01764     } else {
01765       TCON then = Host_To_Targ (MTYPE_I8, 0x7FFFFFFFFFFFFFFFULL);
01766       TCON now  = Create_Simd_Const (MTYPE_V16F8, then);
01767       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
01768       Allocate_Object(sym);
01769       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
01770       TN *tmp = Build_TN_Like(dest);
01771       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
01772       Build_OP(TOP_andpd, dest, src, tmp, ops);
01773     }
01774 
01775   } else {
01776     TCON tcon = is_double ? Host_To_Targ (MTYPE_I8, 0x7FFFFFFFFFFFFFFFULL) :
01777       Host_To_Targ (MTYPE_I4, 0x7FFFFFFF);
01778     ST *sym = New_Const_Sym (Enter_tcon (tcon), Be_Type_Tbl(TCON_ty(tcon)) );
01779     Allocate_Object(sym);
01780     ST *base_sym; INT64 base_ofst;
01781 
01782     Base_Symbol_And_Offset_For_Addressing (sym, 0, &base_sym, &base_ofst);
01783 
01784     TN *tmp = Build_TN_Like(dest);
01785 
01786     if( Is_Target_64bit() ){
01787       Build_OP(is_double ? TOP_ldsd : TOP_ldss, tmp, Rip_TN(),
01788          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01789     } else {
01790       Build_OP(is_double ? TOP_ldsd_n32 : TOP_ldss_n32, tmp,
01791          Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), ops);
01792     }
01793 
01794     Set_OP_no_alias( OPS_last(ops)  );
01795     Build_OP(is_double ? TOP_andpd: TOP_andps, dest, src, tmp, ops);
01796   }
01797   return;
01798 }
01799 
01800 void
01801 Expand_Shift (TN *result, TN *src1, TN *src2, TYPE_ID mtype, SHIFT_DIRECTION kind, OPS *ops)
01802 {
01803   WN *tree;
01804   TOP top;  
01805   const BOOL is_64bit = MTYPE_is_size_double(mtype);
01806 
01807   if (TN_is_constant(src1))
01808     src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
01809   if (TN_has_value(src2)) {
01810     // In mips, only the low log2(wordsize) bits of the shift count are used. 
01811     const UINT64 val = TN_value(src2);
01812     const UINT8  shift_amt = is_64bit ? 63 : 31;
01813     FmtAssert( val <= shift_amt, ("Shift amount > %d", shift_amt) );
01814 
01815     switch (kind) {
01816     case shift_left:
01817       if( val == 1 ){
01818   Expand_Add( result, src1, src1, mtype, ops );
01819   return;
01820       }
01821 
01822       top = is_64bit ? TOP_shli64 : TOP_shli32;
01823       break;
01824     case shift_aright:
01825       top = is_64bit ? TOP_sari64 : TOP_sari32;
01826       break;
01827     case shift_lright:
01828       top = is_64bit ? TOP_shri64 : TOP_shri32;
01829       break;
01830     }
01831 
01832     src2 = Gen_Literal_TN( val & shift_amt, 4 );
01833 
01834   } else {
01835     switch (kind) {
01836     case shift_left:
01837       top = is_64bit ? TOP_shl64 : TOP_shl32;
01838       break;
01839     case shift_aright:
01840       top = is_64bit ? TOP_sar64 : TOP_sar32;
01841       break;
01842     case shift_lright:
01843       top = is_64bit ? TOP_shr64 : TOP_shr32;
01844       break;
01845     }
01846   }
01847 
01848   if( OP_NEED_PAIR( mtype ) )
01849     Expand_Split_Shift( kind, result, src1, src2, ops );
01850   else
01851     Build_OP(top, result, src1, src2, ops);
01852 }
01853 
01854 void
01855 Expand_Rrotate (TN *result, TN *src1, TN *src2, TYPE_ID rtype, TYPE_ID desc, OPS *ops)
01856 {
01857   WN *tree;
01858   TOP top;  
01859   const BOOL value_size = MTYPE_bit_size(desc);
01860   const BOOL is_64bit = MTYPE_is_size_double(rtype);
01861 
01862   if (TN_is_constant(src1))
01863     src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
01864   if (TN_has_value(src2)) {
01865     const UINT64 bits_to_rotate = TN_value(src2);
01866     UINT bitmask; // only the lowest log2(value_size) bits of bits_to_rotate 
01867           // are used 
01868     switch (value_size) {
01869     case 8: top = TOP_rori8; bitmask = 0x7; break;
01870     case 16: top = TOP_rori16; bitmask = 0xf; break;
01871     case 32: top = TOP_rori32; bitmask = 0x1f; break;
01872     case 64: top = TOP_rori64; bitmask = 0x3f; break;
01873     }
01874 
01875     src2 = Gen_Literal_TN( bits_to_rotate & bitmask, 4 );
01876 
01877   } else {
01878     switch (value_size) {
01879     case 8: top = TOP_ror8; break;
01880     case 16: top = TOP_ror16; break;
01881     case 32: top = TOP_ror32; break;
01882     case 64: top = TOP_ror64; break;
01883     }
01884   }
01885 
01886   if( OP_NEED_PAIR( rtype ) )
01887     FmtAssert(FALSE,("RROTATE of simulated 64-bit integer NYI"));
01888   else
01889     Build_OP(top, result, src1, src2, ops);
01890 }
01891 
01892 void
01893 Expand_Left_Rotate (TN *result, TN *src1, TN *src2, TYPE_ID rtype, TYPE_ID desc, OPS *ops)
01894 {
01895   WN *tree;
01896   TOP top;  
01897   const BOOL value_size = MTYPE_bit_size(desc);
01898   const BOOL is_64bit = MTYPE_is_size_double(rtype);
01899 
01900   if (TN_is_constant(src1))
01901     src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
01902   if (TN_has_value(src2)) {
01903     const UINT64 bits_to_rotate = TN_value(src2);
01904     UINT bitmask; // only the lowest log2(value_size) bits of bits_to_rotate 
01905           // are used 
01906     switch (value_size) {
01907     case 8: top = TOP_roli8; bitmask = 0x7; break;
01908     case 16: top = TOP_roli16; bitmask = 0xf; break;
01909     case 32: top = TOP_roli32; bitmask = 0x1f; break;
01910     case 64: top = TOP_roli64; bitmask = 0x3f; break;
01911     }
01912 
01913     src2 = Gen_Literal_TN( bits_to_rotate & bitmask, 4 );
01914 
01915   } else {
01916     switch (value_size) {
01917     case 8: top = TOP_rol8; break;
01918     case 16: top = TOP_rol16; break;
01919     case 32: top = TOP_rol32; break;
01920     case 64: top = TOP_rol64; break;
01921     }
01922   }
01923 
01924   if( OP_NEED_PAIR( rtype ) )
01925     FmtAssert(FALSE,("Left-rotate of simulated 64-bit integer NYI"));
01926   else
01927     Build_OP(top, result, src1, src2, ops);
01928 }
01929 
01930 inline void
01931 Expand_G_To_F (TN *ftn, TN *gtn, OPS *ops)
01932 {
01933   FmtAssert(FALSE,("Unimplemented"));
01934 }
01935 
01936 inline void
01937 Expand_F_To_G (TN *gtn, TN *ftn, OPS *ops)
01938 {
01939   FmtAssert(FALSE,("Unimplemented"));
01940 }
01941 
01942 /*
01943  *  Try to expand a multiply into a sequence of less expensive operations.
01944  */
01945 static BOOL
01946 Expand_Constant_Multiply (TN *result, TN *var_tn, TARG_INT constant, TYPE_ID mtype, OPS *ops)
01947 {
01948   if( OP_NEED_PAIR(mtype) ){
01949     DevWarn( "Expand_Constant_Multiply: SUPPORT ME !!!" );
01950     return FALSE;
01951   }
01952 
01953   BOOL did_do_fast;
01954   INT16 limit;  /* maximum number of operations to replace the multiply */
01955   TN *x = var_tn;
01956   INT64 c = constant; // I don't want to depend on TARG_INT
01957   BOOL needs_sign_extension;
01958 
01959   // fast special cases
01960   if (c == 0) {
01961     Expand_Copy (result, Zero_TN, MTYPE_I8, ops);
01962     return TRUE;
01963   } else if (c == 1) {
01964     Expand_Copy (result, var_tn, MTYPE_I8, ops);
01965     return TRUE;
01966   } else if (c == -1) {
01967     Expand_Neg(result, var_tn, mtype, ops);
01968     return TRUE;
01969   }
01970     
01971   needs_sign_extension = MTYPE_size_reg(mtype) != 64;
01972 
01973   if (c < 0) {
01974     c = -c;
01975     x = DUP_TN(var_tn);
01976     Expand_Neg(x, var_tn, mtype, ops);
01977   }    
01978 
01979   TOP lea;
01980   TN *tmp_tn = Build_TN_Like(result);
01981   TN *tmp1_tn = Build_TN_Like(result);
01982   if (mtype == MTYPE_I4 || mtype == MTYPE_U4)
01983     lea = TOP_leax32;
01984   else
01985     lea = TOP_leax64;
01986   switch (c) {
01987   case 3:
01988     Build_OP (lea, result, x, x, Gen_Literal_TN (2, 4), 
01989         Gen_Literal_TN(0, 4), ops); 
01990     break;
01991   case 5:
01992     Build_OP (lea, result, x, x, Gen_Literal_TN (4, 4), 
01993         Gen_Literal_TN (0, 4), ops);
01994     break;
01995   case 6:
01996     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (2, 4), 
01997         Gen_Literal_TN (0, 4), ops); 
01998     Expand_Add (result, tmp_tn, tmp_tn, mtype, ops);
01999     break;
02000   case 7:
02001     Expand_Shift (tmp_tn, x, 
02002       Gen_Literal_TN (3, 4), mtype, shift_left, ops);
02003     Expand_Sub (result, tmp_tn, x, mtype, ops);
02004     break;
02005   case 9:
02006     Build_OP (lea, result, x, x, Gen_Literal_TN (8, 4), 
02007         Gen_Literal_TN (0, 4), ops);
02008     break;
02009   case 10:
02010     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (4, 4), 
02011         Gen_Literal_TN (0, 4), ops);
02012     Expand_Add (result, tmp_tn, tmp_tn, mtype, ops);    
02013     break;
02014   case 11:
02015     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (8, 4), 
02016         Gen_Literal_TN (0, 4), ops);
02017     Expand_Add (tmp1_tn, x, x, mtype, ops);    
02018     Expand_Add (result, tmp1_tn, tmp_tn, mtype, ops);    
02019     break;    
02020   case 12:
02021     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (2, 4), 
02022         Gen_Literal_TN (0, 4), ops);
02023     Expand_Shift (result, tmp_tn, 
02024       Gen_Literal_TN (2, 4), mtype, shift_left, ops);
02025     break;    
02026   case 13:
02027     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (2, 4), 
02028         Gen_Literal_TN (0, 4), ops);
02029     Expand_Shift (tmp1_tn, x, 
02030       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02031     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02032     break;    
02033   case 14:
02034     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (1, 4), 
02035         Gen_Literal_TN (0, 4), ops);
02036     Expand_Shift (tmp1_tn, x, 
02037       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02038     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02039     break;    
02040   case 15:
02041     Expand_Shift (tmp_tn, x, 
02042       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02043     Expand_Sub (result, tmp_tn, x, mtype, ops);    
02044     break;    
02045   case 17:
02046     Expand_Shift (tmp_tn, x, 
02047       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02048     Expand_Add (result, tmp_tn, x, mtype, ops);    
02049     break;    
02050   case 18:
02051     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (8, 4), 
02052         Gen_Literal_TN (0, 4), ops);
02053     Expand_Add (result, tmp_tn, tmp_tn, mtype, ops);    
02054     break;
02055   case 19:
02056     Build_OP (lea, tmp1_tn, x, x, Gen_Literal_TN (2, 4), 
02057         Gen_Literal_TN (0, 4), ops);
02058     Expand_Shift (tmp_tn, x, 
02059       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02060     Expand_Add (result, tmp_tn, tmp1_tn, mtype, ops);    
02061     break;
02062   case 20:
02063     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (4, 4), 
02064         Gen_Literal_TN (0, 4), ops);
02065     Expand_Shift (result, tmp_tn, 
02066       Gen_Literal_TN (2, 4), mtype, shift_left, ops);
02067     break;
02068   case 21:
02069     Build_OP (lea, tmp1_tn, x, x, Gen_Literal_TN (4, 4), 
02070         Gen_Literal_TN (0, 4), ops);
02071     Expand_Shift (tmp_tn, x, 
02072       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02073     Expand_Add (result, tmp_tn, tmp1_tn, mtype, ops);    
02074     break;
02075   case 23:
02076     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (8, 4), 
02077         Gen_Literal_TN (0, 4), ops);
02078     Expand_Shift (tmp1_tn, x, 
02079       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02080     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02081     break;    
02082   case 24:
02083     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (2, 4), 
02084         Gen_Literal_TN (0, 4), ops);
02085     Expand_Shift (result, tmp_tn, 
02086       Gen_Literal_TN (3, 4), mtype, shift_left, ops);
02087     break;
02088   case 25:
02089     Build_OP (lea, tmp1_tn, x, x, Gen_Literal_TN (8, 4), 
02090         Gen_Literal_TN (0, 4), ops);
02091     Expand_Shift (tmp_tn, x, 
02092       Gen_Literal_TN (4, 4), mtype, shift_left, ops);
02093     Expand_Add (result, tmp_tn, tmp1_tn, mtype, ops);    
02094     break;
02095   case 27:
02096     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (4, 4), 
02097         Gen_Literal_TN (0, 4), ops);
02098     Expand_Shift (tmp1_tn, x, 
02099       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02100     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02101     break;    
02102   case 28:
02103     Build_OP (mtype == MTYPE_I4 || 
02104         mtype == MTYPE_U4 ? TOP_leaxx32 : 
02105         TOP_leaxx64,tmp_tn, x, Gen_Literal_TN (4, 4), 
02106         Gen_Literal_TN (0, 4), ops);
02107     Expand_Shift (tmp1_tn, x, 
02108       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02109     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02110     break;
02111   case 29:
02112     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (2, 4), 
02113         Gen_Literal_TN (0, 4), ops);
02114     Expand_Shift (tmp1_tn, x, 
02115       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02116     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02117     break;    
02118   case 30:
02119     Build_OP (lea, tmp_tn, x, x, Gen_Literal_TN (1, 4), 
02120         Gen_Literal_TN (0, 4), ops);
02121     Expand_Shift (tmp1_tn, x, 
02122       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02123     Expand_Sub (result, tmp1_tn, tmp_tn, mtype, ops);    
02124     break;    
02125   case 31:
02126     Expand_Shift (tmp_tn, x, 
02127       Gen_Literal_TN (5, 4), mtype, shift_left, ops);
02128     Expand_Sub (result, tmp_tn, x, mtype, ops);    
02129     break;    
02130   default:
02131     return FALSE;
02132   }
02133   return TRUE;
02134 }
02135 
02136 void
02137 Expand_Multiply (TN *result, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02138 {
02139   const BOOL is_64bit = MTYPE_is_size_double(mtype);
02140 
02141   FmtAssert( !TN_has_value(src1), ("UNIMPLEMENTED") );
02142   FmtAssert( MTYPE_is_integral(mtype) && !MTYPE_is_mmx_vector(mtype),
02143              ("Should be handled in Expand_Flop") );
02144 
02145   if ( mtype == MTYPE_V16I2 ) {
02146     Expand_Flop(OPC_V16I2MPY, result, src1, src2, NULL, ops);
02147     return;
02148   }
02149 
02150   if( TN_has_value(src2) ){
02151     INT64 val = TN_value( src2 );
02152     if( val > 0 &&
02153   ( val & ( val - 1 ) ) == 0 ){
02154       int amt = 0;
02155       while( val != 1 ){
02156   amt++;
02157   val >>= 1;
02158       }
02159       TN* shift_tn = Gen_Literal_TN( amt, 4 );
02160       Expand_Shift( result, src1, shift_tn, mtype, shift_left, ops );
02161 
02162       return;
02163 
02164     } else if (CGEXP_cvrt_int_mult_to_add_shift &&
02165          Can_Do_Fast_Multiply (mtype, val)) {
02166       if (Expand_Constant_Multiply (result, src1, val, mtype, ops)) {
02167   /* able to convert multiply into shifts/adds/subs */
02168   return;
02169       }      
02170     } 
02171   }
02172 
02173   if (TN_has_value(src2)) {
02174     src2 = Expand_Immediate_Into_Register (src2, is_64bit, ops);
02175   }
02176   FmtAssert(!TN_is_constant(src1),
02177       ("Expand_Multiply: unexpected constant operand"));
02178   if (TN_is_constant(src2))
02179     src2 = Expand_Immediate_Into_Register(src2, is_64bit, ops);
02180 
02181   if( OP_NEED_PAIR( mtype ) ){
02182     Expand_Split_Multiply( result, src1, src2, ops );
02183 
02184   } else {
02185     Build_OP( is_64bit ? TOP_imul64 : TOP_imul32, result, src1, src2, ops );
02186   }
02187 }
02188 
02189 /* return high part of multiply result */
02190 void
02191 Expand_High_Multiply (TN *result, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02192 {
02193   FmtAssert(!TN_is_constant(src1),
02194       ("Expand_High_Multiply: unexpected constant operand"));
02195   if (!MTYPE_is_size_double(mtype)) {
02196     if (MTYPE_is_signed(mtype)) {
02197       if (TN_is_constant(src2))
02198   src2 = 
02199     Expand_Immediate_Into_Register(src2, 
02200            MTYPE_is_size_double(mtype), ops);
02201       TN *tmp_tn = Build_TN_Of_Mtype (MTYPE_I8);
02202       Build_OP(TOP_imul64, tmp_tn, src1, src2, ops); 
02203       Expand_Shift (result, tmp_tn, Gen_Literal_TN(32, 4), 
02204         MTYPE_I8, shift_lright, ops);
02205    } else {
02206       if (TN_is_constant(src2))
02207   src2 = 
02208     Expand_Immediate_Into_Register(src2, 
02209            MTYPE_is_size_double(mtype), ops);
02210       TN *tmp_tn = Build_TN_Like( result );
02211       Build_OP(TOP_mul32, tmp_tn, result, src1, src2, ops);
02212     }
02213   } else {    
02214     BOOL is_signed = MTYPE_is_signed(mtype);
02215     TN *result1 = Build_TN_Like(result);
02216     Build_OP(is_signed?TOP_imulx64:TOP_mulx64, 
02217        result1, result, src1, src2, ops);
02218   }
02219 }
02220 
02221 
02222 void Expand_Logical_Not (TN *dest, TN *src, VARIANT variant, OPS *ops)
02223 {
02224   /* dest = (src == 0) ? 1 : 0 */
02225   const BOOL is_64bit = (TN_size(src) == 8 );
02226   TN *rflags = Rflags_TN();
02227   // Perform "test" before clearing dest, since dest could be the same as src.
02228   // Use "move 0" instead of xor to clear dest in order to preserve rflag.
02229   Build_OP(is_64bit ? TOP_test64 : TOP_test32, rflags, src, src, ops);
02230   Build_OP(TOP_ldc32, dest, Gen_Literal_TN(0, 4), ops);
02231   Build_OP(TOP_sete, dest, rflags, ops);
02232   Set_OP_cond_def_kind(OPS_last(ops), OP_ALWAYS_COND_DEF);
02233 }
02234 
02235 void Expand_Logical_And (TN *dest, TN *src1, TN *src2, VARIANT variant, OPS *ops)
02236 {
02237   const BOOL is_64bit = (TN_size(src1) == 8 && TN_size(src2) == 8);
02238   Build_OP( is_64bit ? TOP_and64 : TOP_and32, dest, src1, src2, ops );
02239 }
02240 
02241 void Expand_Logical_Or (TN *dest, TN *src1, TN *src2, VARIANT variant, OPS *ops)
02242 {
02243   const BOOL is_64bit = (TN_size(src1) == 8 && TN_size(src2) == 8);
02244   Build_OP( is_64bit ? TOP_or64 : TOP_or32, dest, src1, src2, ops );
02245 }
02246 
02247 
02248 void Expand_Binary_Complement (TN *dest, TN *src, TYPE_ID mtype, OPS *ops)
02249 {
02250   if( OP_NEED_PAIR(mtype) )
02251     Expand_Split_UOP( OPR_BNOT, mtype, dest, src, ops );
02252   else
02253     Build_OP( MTYPE_is_size_double(mtype) ? TOP_not64 : TOP_not32, dest, src, ops );
02254 }
02255 
02256 void Expand_Binary_And (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02257 {
02258   const BOOL is_64bit = MTYPE_is_size_double(mtype);
02259   TOP new_opcode = is_64bit ? TOP_and64 : TOP_and32;
02260 
02261   if (TN_is_constant(src1)) {
02262     FmtAssert(TN_has_value(src1),("unexpected constant in Expand_Binary_And"));
02263     INT64 val = TN_value(src1);
02264     if (val == -1 ||
02265         !is_64bit && (val << 32 >> 32) == -1) {
02266       Expand_Copy (dest, src2, mtype, ops);
02267       return;
02268     }
02269 
02270     // Change "andl 0xff,src2" to "movzbl src1,src2".  This saves 3 bytes and
02271     // does not require the src and dest to be the same register.
02272     if (val == 0xff) {
02273       if( OP_NEED_PAIR(mtype) )
02274   Expand_Split_Cvtl( mtype, TOP_movzbq, dest, src2, ops );
02275       else
02276   Build_OP( is_64bit ? TOP_movzbq : TOP_movzbl, dest, src2, ops );
02277       return;
02278     }
02279 
02280     // Likewise for 32-bit src.
02281     if (val == 0xffff) {
02282       new_opcode = is_64bit ? TOP_movzwq : TOP_movzwl;
02283       if( OP_NEED_PAIR(mtype) )
02284   Expand_Split_Cvtl( mtype, new_opcode, dest, src2, ops );
02285       else
02286   Build_OP (new_opcode, dest, src2, ops);
02287       return;
02288     }
02289 
02290     if (!is_64bit || ISA_LC_Value_In_Class ( val, LC_simm32)) 
02291       new_opcode = is_64bit ? TOP_andi64 : TOP_andi32;
02292     else {
02293       src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
02294     }
02295 
02296     if( OP_NEED_PAIR(mtype) )
02297       Expand_Split_BOP( OPR_BAND, mtype, dest, src2, src1, ops );
02298     else
02299       Build_OP (new_opcode, dest, src2, src1, ops);
02300 
02301   } else if (TN_is_constant(src2)) {
02302     // switch order of src so immediate is first
02303     Expand_Binary_And (dest, src2, src1, mtype, ops);
02304 
02305   } else {
02306     switch(mtype) {
02307     case MTYPE_V16I1: 
02308       Build_OP(TOP_and128v8, dest, src1, src2, ops); break;
02309     case MTYPE_V16I2: 
02310       Build_OP(TOP_and128v16, dest, src1, src2, ops); break;
02311     case MTYPE_V16I4: 
02312       Build_OP(TOP_and128v32, dest, src1, src2, ops); break;
02313     case MTYPE_V16I8: 
02314       Build_OP(TOP_and128v64, dest, src1, src2, ops); break;
02315     default:
02316       if( OP_NEED_PAIR(mtype) ){
02317   Expand_Split_BOP( OPR_BAND, mtype, dest, src1, src2, ops );
02318 
02319       } else {
02320   Build_OP(new_opcode, dest, src1, src2, ops);
02321       }
02322       break;
02323     }
02324   }
02325 }
02326 
02327 void Expand_Binary_Or (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02328 {
02329   const BOOL is_64bit = MTYPE_is_size_double(mtype);
02330   TOP new_opcode = is_64bit ? TOP_or64 : TOP_or32;
02331 
02332   if (TN_is_constant(src1)) {
02333     FmtAssert(TN_has_value(src1),("unexpected constant in Expand_Binary_Or"));
02334     INT64 val = TN_value(src1);
02335     if (val == 0) {
02336       Expand_Copy (dest, src2, mtype, ops);
02337       return;
02338     }
02339 
02340     if (!is_64bit || ISA_LC_Value_In_Class ( val, LC_simm32)) 
02341       new_opcode = is_64bit ? TOP_ori64 : TOP_ori32;
02342     else {
02343       src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
02344     }
02345 
02346     if( OP_NEED_PAIR( mtype ) )
02347       Expand_Split_BOP( OPR_BIOR, mtype, dest, src2, src1, ops );
02348     else
02349       Build_OP (new_opcode, dest, src2, src1, ops);
02350 
02351   } else if (TN_is_constant(src2)) {
02352     // switch order of src so immediate is first
02353     Expand_Binary_Or (dest, src2, src1, mtype, ops);
02354 
02355   } else {
02356     switch(mtype) {
02357     case MTYPE_V16I1: 
02358       Build_OP(TOP_or128v8, dest, src1, src2, ops); break;
02359     case MTYPE_V16I2: 
02360       Build_OP(TOP_or128v16, dest, src1, src2, ops); break;
02361     case MTYPE_V16I4: 
02362       Build_OP(TOP_or128v32, dest, src1, src2, ops); break;
02363     case MTYPE_V16I8: 
02364       Build_OP(TOP_or128v64, dest, src1, src2, ops); break;
02365     default:
02366       if( OP_NEED_PAIR(mtype) ){
02367   Expand_Split_BOP( OPR_BIOR, mtype, dest, src1, src2, ops );
02368 
02369       } else {
02370   Build_OP(new_opcode, dest, src1, src2, ops);
02371       }
02372       break;
02373     }
02374   }
02375 }
02376 
02377 void Expand_Binary_Xor (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02378 {
02379   const BOOL is_64bit = MTYPE_is_size_double(mtype);
02380   TOP new_opcode = is_64bit ? TOP_xor64 : TOP_xor32;
02381 
02382   if (TN_is_constant(src1)) {
02383     FmtAssert(TN_has_value(src1),("unexpected constant in Expand_Binary_And"));
02384     INT64 val = TN_value(src1);
02385     if (val == 0 && src1 == dest ) {
02386       return;
02387     }
02388 
02389     if (!is_64bit || ISA_LC_Value_In_Class ( val, LC_simm32)) 
02390       new_opcode = is_64bit ? TOP_xori64 : TOP_xori32;
02391     else {
02392       src1 = Expand_Immediate_Into_Register(src1, is_64bit, ops);
02393     }
02394 
02395     if( OP_NEED_PAIR(mtype) ){
02396       Expand_Split_BOP( OPR_BXOR, mtype, dest, src2, src1, ops );
02397     } else {      
02398       Build_OP (new_opcode, dest, src2, src1, ops);
02399     }
02400 
02401   } else if (TN_is_constant(src2)) {
02402     // switch order of src so immediate is first
02403     Expand_Binary_Xor (dest, src2, src1, mtype, ops);
02404 
02405   } else {
02406     switch(mtype) {
02407     case MTYPE_V16I1: 
02408       Build_OP(TOP_xor128v8, dest, src1, src2, ops); break;
02409     case MTYPE_V16I2: 
02410       Build_OP(TOP_xor128v16, dest, src1, src2, ops); break;
02411     case MTYPE_V16I4: 
02412       Build_OP(TOP_xor128v32, dest, src1, src2, ops); break;
02413     case MTYPE_V16I8: 
02414       Build_OP(TOP_xor128v64, dest, src1, src2, ops); break;
02415     default:
02416       if( OP_NEED_PAIR(mtype) ){
02417   Expand_Split_BOP( OPR_BXOR, mtype, dest, src1, src2, ops );
02418       } else {      
02419   Build_OP(new_opcode, dest, src1, src2, ops); break;
02420       }
02421     }
02422   }
02423 }
02424 
02425 void Expand_Binary_Nor (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02426 {
02427   FmtAssert( FALSE, ("UNIMPLEMENTED") );
02428 }
02429 
02430 
02431 
02432 static void  Expand_Int_Cmp_With_Branch( TOP cmp_opcode, TN* src1, TN* src2,
02433            TOP set_opcode, TN* result, OPS* ops )
02434 {
02435   BB* bb_entry = Cur_BB;
02436   BB* bb_then  = Gen_And_Append_BB( bb_entry );  // for condition is satisfied
02437   BB* bb_exit  = Gen_And_Append_BB( bb_then );
02438   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
02439   TN* tmp_result = result;
02440 
02441   if( TN_is_dedicated(result) ){
02442     tmp_result = Build_TN_Like( result );
02443   }
02444 
02445   if( src1 == tmp_result ){
02446     TN* tmp = Build_TN_Like( src1 );
02447     Exp_COPY( tmp, src1, ops );
02448     src1 = tmp;
02449   }
02450 
02451   if( src2 == tmp_result ){
02452     TN* tmp = Build_TN_Like( src2 );
02453     Exp_COPY( tmp, src2, ops );
02454     src2 = tmp;
02455   }
02456 
02457   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
02458   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
02459   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
02460 
02461   // Build bb_entry
02462   {
02463     TN* rflags = Rflags_TN();
02464     Build_OP( TOP_zero32, tmp_result, ops );
02465     Build_OP( cmp_opcode, rflags, src1, src2, ops );
02466 
02467     TOP jmp = TOP_UNDEFINED;
02468 
02469     switch( set_opcode ){
02470     case TOP_setg:  jmp = TOP_jle; break;
02471     case TOP_setge: jmp = TOP_jl;  break;
02472     case TOP_seta:  jmp = TOP_jbe; break;
02473     case TOP_setae: jmp = TOP_jb;  break;
02474     case TOP_setl:  jmp = TOP_jge; break;
02475     case TOP_setle: jmp = TOP_jg;  break;
02476     case TOP_setb:  jmp = TOP_jae; break;
02477     case TOP_setbe: jmp = TOP_ja;  break;
02478     case TOP_sete:  jmp = TOP_jne; break;
02479     case TOP_setne: jmp = TOP_je;  break;
02480     }
02481 
02482     Build_OP( jmp, rflags, Gen_Label_TN( bb_exit_label, 0 ), ops );
02483 
02484     if( &New_OPs != ops )
02485       OPS_Append_Ops( &New_OPs, ops );
02486     Process_New_OPs();
02487     BB_Append_Ops( bb_entry, &New_OPs );
02488     OPS_Init( &New_OPs );
02489     OPS_Init( ops );
02490   }
02491 
02492   // Build bb_then here.
02493   {
02494     OPS* bb_then_ops = &New_OPs;
02495     Exp_Immediate( tmp_result, Gen_Literal_TN(1,0), FALSE, bb_then_ops );
02496 
02497     total_bb_insts = 0;
02498     Last_Processed_OP = NULL;
02499     Process_New_OPs();
02500     BB_Append_Ops( bb_then, bb_then_ops );
02501     OPS_Init( bb_then_ops );
02502   }
02503 
02504   Cur_BB = bb_exit;
02505 
02506   if( result != tmp_result ){
02507     Exp_COPY( result, tmp_result, ops );
02508   }
02509 }
02510 
02511 
02512 static void Expand_Int_Cmp( TN* dest, TN* src1, TN* src2,
02513           TYPE_ID mtype, TOP set_opcode, OPS* ops )
02514 {
02515   if (TN_has_value( src1 )){
02516     FmtAssert( !TN_has_value( src2 ), ("src2 has value") );    
02517     TOP top = TOP_UNDEFINED;
02518     // Bug:084
02519     switch( set_opcode ){
02520     case TOP_sete:
02521     case TOP_setne:  top = set_opcode;  break;
02522     case TOP_setb:   top = TOP_seta;    break;
02523     case TOP_seta:   top = TOP_setb;    break;
02524     case TOP_setae:  top = TOP_setbe;   break;
02525     case TOP_setbe:  top = TOP_setae;   break;
02526     case TOP_setg:   top = TOP_setl;    break;
02527     case TOP_setl:   top = TOP_setg;    break;
02528     case TOP_setge:  top = TOP_setle;   break;
02529     case TOP_setle:  top = TOP_setge;   break;
02530     }
02531 
02532     Expand_Int_Cmp(dest, src2, src1, mtype, top, ops);
02533     return;
02534   }
02535 
02536   const BOOL is_64bit = MTYPE_is_size_double(mtype);
02537   TOP cmp_opcode = is_64bit ? TOP_cmp64 : TOP_cmp32;
02538   TN* rflags = Rflags_TN();
02539 
02540   if( TN_has_value( src2 ) ){
02541     UINT64 val = TN_value( src2 );
02542     if( TN_value( src2 ) == 0 ){
02543       cmp_opcode = is_64bit ? TOP_test64 : TOP_test32;
02544       src2 = src1;
02545     } else if( ISA_LC_Value_In_Class( val, LC_simm32 ) ){
02546       cmp_opcode = is_64bit ? TOP_cmpi64 : TOP_cmpi32;
02547     } else {
02548       src2 = Expand_Immediate_Into_Register (src2, is_64bit, ops);
02549     }
02550   }
02551 
02552   if( OP_NEED_PAIR( mtype ) ){
02553     Expand_Split_Int_Cmp( cmp_opcode, src1, src2, set_opcode, dest, mtype, ops );
02554 
02555   } else if( !CG_use_setcc ){
02556     Expand_Int_Cmp_With_Branch( cmp_opcode, src1, src2, set_opcode, dest, ops );
02557 
02558   } else {
02559     TN* tmp_tn = Build_TN_Of_Mtype(MTYPE_U1);  
02560     Build_OP( cmp_opcode, rflags, src1, src2, ops );
02561     Build_OP( set_opcode, tmp_tn, rflags, ops );
02562 
02563     if( Is_Target_32bit() &&
02564   TN_size(dest) == 8 ){
02565       Expand_Split_Cvtl( MTYPE_I8, TOP_movzbq, dest, tmp_tn, ops );
02566     } else {
02567       Build_OP( TN_size(dest) == 8 ? TOP_movzbq : TOP_movzbl,
02568     dest, tmp_tn, ops );
02569     }
02570   }
02571 }
02572 
02573 void Expand_Int_Less (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02574 {
02575   if( MTYPE_is_signed(mtype) &&
02576       TN_has_value( src2 )   &&
02577       TN_value( src2 ) == 0 ){
02578     // Replace "cmp" and "setl" with "shrl".
02579     TN* shift_amt = Gen_Literal_TN( MTYPE_is_size_double(mtype) ? 63 : 31, 4 );
02580     Expand_Shift( dest, src1, shift_amt, mtype, shift_lright, ops );
02581 
02582   } else {
02583     Expand_Int_Cmp( dest, src1, src2, mtype,
02584         MTYPE_is_signed(mtype) ? TOP_setl : TOP_setb,
02585         ops );
02586   }
02587 }
02588 
02589 void Expand_Int_Less_Equal (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02590 {
02591   Expand_Int_Cmp( dest, src1, src2, mtype,
02592       MTYPE_is_signed(mtype) ? TOP_setle : TOP_setbe,
02593       ops );
02594 }
02595 
02596 void Expand_Int_Equal (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02597 {
02598   Expand_Int_Cmp( dest, src1, src2, mtype, TOP_sete, ops );
02599 }
02600 
02601 void Expand_Int_Not_Equal (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02602 {
02603   Expand_Int_Cmp( dest, src1, src2, mtype, TOP_setne, ops );
02604 }
02605 
02606 void Expand_Int_Greater_Equal (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02607 {
02608   Expand_Int_Cmp( dest, src1, src2, mtype,
02609       MTYPE_is_signed(mtype) ? TOP_setge : TOP_setae,
02610       ops );
02611 }
02612 
02613 void Expand_Int_Greater (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
02614 {
02615   Expand_Int_Cmp( dest, src1, src2, mtype,
02616       MTYPE_is_signed(mtype) ? TOP_setg : TOP_seta,
02617       ops );
02618 }
02619 
02620 static void
02621 Expand_Bool_Comparison (BOOL equals, TN *dest, TN *src1, TN *src2, OPS *ops)
02622 {
02623   FmtAssert(FALSE,("Unimplemented"));
02624 }
02625 
02626 void
02627 Expand_Bool_Equal (TN *dest, TN *src1, TN *src2, OPS *ops)
02628 {
02629   FmtAssert(FALSE,("Unimplemented"));
02630 }
02631 
02632 void
02633 Expand_Bool_Not_Equal (TN *dest, TN *src1, TN *src2, OPS *ops)
02634 {
02635   FmtAssert(FALSE,("Unimplemented"));
02636 }
02637 
02638 void
02639 Expand_Bool_To_Int (TN *dest, TN *src, TYPE_ID rtype, OPS *ops)
02640 {
02641   FmtAssert(FALSE,("Unimplemented"));
02642 }
02643 
02644 typedef enum {
02645   ROUND_USER,
02646   ROUND_NEAREST,
02647   ROUND_CHOP,
02648   ROUND_NEG_INF,
02649   ROUND_PLUS_INF
02650 } ROUND_MODE;
02651 
02652 
02653 static void Expand_SSE3_Long_Double_To_Int( TN* dest, TN* src,
02654               TYPE_ID imtype, OPS* ops )
02655 {
02656   TOP top = TOP_UNDEFINED;
02657   TY_IDX mem_ty = MTYPE_To_TY( imtype );
02658 
02659   switch( imtype ){
02660   case MTYPE_I2:  top = TOP_fisttps;   break;
02661   case MTYPE_I4:  top = TOP_fisttpl;   break;
02662   case MTYPE_U4:
02663     mem_ty = MTYPE_To_TY( MTYPE_U8 );
02664     /* fall thru */
02665   case MTYPE_U8:
02666   case MTYPE_I8:  top = TOP_fisttpll;  break;
02667   default:
02668     FmtAssert( false, ("Expand_SSE3_Long_Double_To_Int: unknown imtype") );
02669   }
02670 
02671   ST* mem_loc = Gen_Temp_Symbol( mem_ty, "x87_2_int" );
02672   Allocate_Temp_To_Memory( mem_loc );
02673   ST* mem_base_sym = NULL;
02674   INT64 mem_base_ofst = 0;
02675 
02676   Base_Symbol_And_Offset_For_Addressing(mem_loc, 0, &mem_base_sym,
02677           &mem_base_ofst);
02678   TN* mem_base_tn = mem_base_sym == SP_Sym ? SP_TN : FP_TN;
02679   TN* mem_ofst_tn = Gen_Literal_TN( mem_base_ofst, 4 );
02680 
02681   Build_OP( top, src, mem_base_tn, mem_ofst_tn, ops );
02682 
02683   CGTARG_Load_From_Memory( dest, mem_loc, ops );
02684 
02685   if( Trace_Exp ){
02686     Print_OPS( ops );
02687   }
02688 
02689   return;
02690 }
02691 
02692 
02693 static void
02694 Expand_Long_Double_To_Int(TN* dest, TN* src, TYPE_ID imtype, OPS* ops)
02695 {
02696   TN* x87_cw_tn = NULL;
02697   TN* base_tn = NULL;
02698   TN* base_tn_new = NULL;
02699   TN* ofst_tn = NULL;
02700   TN* ofst_tn_new = NULL;
02701 
02702   // If not using SSE3 "fistt", we must emit code to set the rounding mode to
02703   // truncation before emitting "fist".
02704 
02705   if (!Is_Target_SSE3()) {
02706     x87_cw_tn = X87_cw_TN();
02707 
02708     // Allocate space to store the x87 control-word.
02709     const TY_IDX ty = MTYPE_To_TY( MTYPE_U2 );
02710     ST* st = Gen_Temp_Symbol( ty, "x87_cw" );
02711     Allocate_Temp_To_Memory( st );
02712 
02713     ST* base_sym = NULL;
02714     INT64 base_ofst = 0;
02715 
02716     Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
02717     FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
02718          ("Expand_Long_Double_To_Int: base symbol is at stack") );
02719 
02720     base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
02721     ofst_tn = Gen_Literal_TN( base_ofst, 4 );
02722 
02723     // store the x87 control-word.
02724     Build_OP( TOP_fnstcw, x87_cw_tn, base_tn, ofst_tn, ops );
02725     Set_OP_volatile( OPS_last(ops) );
02726 
02727     // load the value into a 32-bit int register.
02728     TN* x87_cw = Gen_Register_TN( ISA_REGISTER_CLASS_integer, 4 );
02729     Exp_Load( MTYPE_U4, TY_mtype(ty), x87_cw, st, 0, ops, 0 );
02730 
02731     // perform an or to mask out that bit.
02732     TN* new_x87_cw = Build_TN_Like( x87_cw );
02733     Expand_Binary_Or(new_x87_cw, x87_cw, Gen_Literal_TN(3072,4), MTYPE_U4, ops);
02734 
02735     // store new_x87_cw back to a new memory location.
02736     ST* st_new = Gen_Temp_Symbol( ty, "x87_cw_new" );
02737     Allocate_Temp_To_Memory( st_new );
02738     ST* base_sym_new = NULL;
02739     INT64 base_ofst_new = 0;
02740 
02741     Base_Symbol_And_Offset_For_Addressing(st_new, 0, &base_sym_new,
02742             &base_ofst_new);
02743 
02744     base_tn_new = base_sym_new == SP_Sym ? SP_TN : FP_TN;
02745     ofst_tn_new = Gen_Literal_TN( base_ofst_new, 4 );
02746 
02747     Exp_Store( TY_mtype(ty), new_x87_cw, st_new, 0, ops, 0 );
02748 
02749     // load the new x87_cw
02750     Build_OP( TOP_fldcw, x87_cw_tn, base_tn_new, ofst_tn_new, ops );
02751     Set_OP_volatile( OPS_last(ops) );
02752   }
02753 
02754   // do the real convertion work here.
02755   TOP top = TOP_UNDEFINED;
02756   TY_IDX mem_ty = MTYPE_To_TY( imtype );
02757 
02758   if (Is_Target_SSE3()) {
02759     switch( imtype ){
02760       case MTYPE_I2:  top = TOP_fisttps;   break;
02761       case MTYPE_I4:  top = TOP_fisttpl;   break;
02762       case MTYPE_U4:
02763   mem_ty = MTYPE_To_TY( MTYPE_U8 );
02764   /* fall thru */
02765       case MTYPE_U8:
02766       case MTYPE_I8:  top = TOP_fisttpll;  break;
02767       default:
02768   FmtAssert( false, ("Expand_Long_Double_To_Int: unknown imtype") );
02769     }
02770   } else {  // not SSE3
02771     switch (imtype) {
02772       case MTYPE_I2:    top = TOP_fistps;   break;
02773       case MTYPE_I4:    top = TOP_fistpl;   break;
02774       case MTYPE_U4:
02775   /* bug#658
02776      We need bigger space for TOP_fistpll.
02777    */
02778   mem_ty = MTYPE_To_TY( MTYPE_U8 );
02779   /* fall thru */
02780       case MTYPE_U8:
02781       case MTYPE_I8:    top = TOP_fistpll;  break;
02782       default:
02783   FmtAssert( false, ("Expand_Long_Double_To_Int: unknown imtype") );
02784     }
02785   }
02786 
02787   ST* mem_loc = Gen_Temp_Symbol( mem_ty, "x87_2_int" );
02788   Allocate_Temp_To_Memory( mem_loc );
02789   ST* mem_base_sym = NULL;
02790   INT64 mem_base_ofst = 0;
02791 
02792   Base_Symbol_And_Offset_For_Addressing(mem_loc, 0, &mem_base_sym,
02793           &mem_base_ofst);
02794   TN* mem_base_tn = mem_base_sym == SP_Sym ? SP_TN : FP_TN;
02795   TN* mem_ofst_tn = Gen_Literal_TN( mem_base_ofst, 4 );
02796 
02797   Build_OP( top, src, mem_base_tn, mem_ofst_tn, ops );
02798 
02799   // restore the original x87_cw
02800   if (!Is_Target_SSE3()) {
02801     Build_OP( TOP_fldcw, x87_cw_tn, base_tn, ofst_tn, ops );
02802     Set_OP_volatile( OPS_last(ops) );
02803   }
02804 
02805   // More work to do for a unsigned long -> long double conversion.
02806 
02807   if( imtype == MTYPE_U8 ){
02808     BB* bb_entry = Cur_BB;
02809     BB* bb_then = Gen_And_Append_BB( bb_entry );  // for a negative <src>
02810 
02811     BB* bb_exit  = Gen_And_Append_BB( bb_then );
02812     const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
02813 
02814     BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
02815     WN_kid0(BB_branch_wn(bb_entry)) = NULL;
02816     WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
02817 
02818     TCON tcon = Host_To_Targ_Quad( 0 );
02819     TCON_u0( tcon ) = 0x0;
02820     TCON_u1( tcon ) = 0x80000000;
02821     TCON_u2( tcon ) = 0x403e;
02822     TCON_u3( tcon ) = 0x0;      
02823 
02824     ST* tcon_sym = New_Const_Sym(Enter_tcon(tcon), Be_Type_Tbl(TCON_ty(tcon)));
02825     ST* tcon_base_sym = NULL;
02826     INT64 tcon_base_ofst = 0;
02827 
02828     Allocate_Object(tcon_sym);
02829     Base_Symbol_And_Offset_For_Addressing( tcon_sym, 0,
02830              &tcon_base_sym, &tcon_base_ofst );
02831 
02832     TN* max_value_tn = Build_TN_Like( src );
02833     Expand_Const( max_value_tn,
02834       Gen_Symbol_TN( tcon_base_sym, tcon_base_ofst, TN_RELOC_NONE ),
02835       MTYPE_FQ, ops );
02836 
02837     // Build bb_entry
02838     {
02839       Exp_OP3v( OPC_TRUEBR,
02840     NULL,
02841     Gen_Label_TN( bb_exit_label, 0 ),
02842     src,
02843     max_value_tn,
02844     V_BR_QLT,
02845     ops );
02846 
02847       if( &New_OPs != ops )
02848   OPS_Append_Ops( &New_OPs, ops );
02849       Process_New_OPs();
02850       BB_Append_Ops( bb_entry, &New_OPs );
02851       OPS_Init( &New_OPs );
02852       OPS_Init( ops );
02853     }
02854 
02855     // Build bb_then here.
02856     {
02857       OPS* bb_then_ops = &New_OPs;
02858       TN* tmp = Build_TN_Like( src );
02859 
02860       Build_OP( TOP_fsub, tmp, src, max_value_tn, bb_then_ops );
02861 
02862       // load the new x87_cw
02863       if (!Is_Target_SSE3()) {
02864   Build_OP( TOP_fldcw, x87_cw_tn, base_tn_new, ofst_tn_new, bb_then_ops );
02865   Set_OP_volatile( OPS_last(bb_then_ops) );
02866       }
02867 
02868       // convert again.
02869       Build_OP( TOP_fistpll, tmp, mem_base_tn, mem_ofst_tn, bb_then_ops );
02870 
02871       // restore the original x87_cw
02872       if (!Is_Target_SSE3()) {
02873   Build_OP( TOP_fldcw, x87_cw_tn, base_tn, ofst_tn, bb_then_ops );
02874   Set_OP_volatile( OPS_last(bb_then_ops) );
02875       }
02876 
02877       TN* tmp_dest = Build_TN_Like( dest );
02878       CGTARG_Load_From_Memory( tmp_dest, mem_loc, bb_then_ops );
02879       
02880       Expand_Binary_Xor( tmp_dest, tmp_dest,
02881        Gen_Literal_TN( 0x8000000000000000ULL, 8 ),
02882        MTYPE_U8, bb_then_ops );
02883 
02884       /* ebo should get rid of this extra store; otherwise, we need to insert
02885    a bb_else_bb.
02886       */
02887       CGTARG_Store_To_Memory( tmp_dest, mem_loc, bb_then_ops );
02888 
02889       total_bb_insts = 0;
02890       Last_Processed_OP = NULL;
02891       Process_New_OPs();
02892       BB_Append_Ops( bb_then, bb_then_ops );
02893       OPS_Init( bb_then_ops );
02894     }
02895 
02896     Cur_BB = bb_exit;
02897   }
02898 
02899   // OSP 495
02900   if ( imtype == MTYPE_U4 && TY_mtype(mem_ty) == MTYPE_U8 ) {
02901     // in this case, we only need to load lower 4 bytes
02902     Exp_Load(MTYPE_U4, MTYPE_U4, dest, mem_loc, 0, ops, 0);
02903   }
02904   else {
02905     CGTARG_Load_From_Memory( dest, mem_loc, ops );
02906   }
02907 
02908   if( Trace_Exp ){
02909     Print_OPS( ops );
02910   }
02911 }
02912 
02913 
02914 static void Expand_Float_To_Long_m32( TN* dest,
02915               TN* src,
02916               TYPE_ID imtype,
02917               TYPE_ID fmtype,
02918               OPS* ops )
02919 {
02920   FmtAssert( TN_register_class(src) == ISA_REGISTER_CLASS_float,
02921        ("Expand_Float_To_Long_m32: source is not a xmm register") );
02922 
02923   TN* x87_src = Build_TN_Of_Mtype( MTYPE_FQ );
02924   Expand_Float_To_Float( x87_src, src, MTYPE_FQ, fmtype, ops );
02925   Expand_Long_Double_To_Int( dest, x87_src, imtype, ops );  
02926 }
02927 
02928 
02929 static TN* Generate_Cmp_Ctrl_TN( OPERATOR compare )
02930 {
02931   int imm8 = 0;
02932 
02933   switch( compare ){
02934   case OPR_EQ: imm8 = 0; break;
02935   case OPR_LT: imm8 = 1; break;
02936   case OPR_LE: imm8 = 2; break;
02937   case OPR_NE: imm8 = 4; break;
02938   case OPR_GE: imm8 = 5; break;
02939   case OPR_GT: imm8 = 6; break;
02940   default:
02941     FmtAssert( FALSE, ("Unknown opcode") );
02942   }
02943 
02944   return Gen_Literal_TN( imm8, 4 );
02945 }
02946 
02947 
02948 static void Expand_Unsigned_Float_To_Int_m32( TN* result,
02949                 TN* src,
02950                 TYPE_ID fmtype,
02951                 OPS* ops )
02952 {
02953   TN* dest = result;
02954 
02955   if( TN_is_dedicated(result) ){
02956     dest = Build_TN_Like(result);
02957   }
02958 
02959   const BOOL is_double = MTYPE_is_size_double(fmtype);
02960   TN* fp_const = Build_TN_Like( src );
02961 
02962   Build_OP( TOP_ldsd_n32, fp_const,
02963       Gen_Const_Symbol_TN( 0, 0x80000000, fmtype ),
02964       ops );
02965 
02966   TN* fp_tmp_tn = Build_TN_Like( src );
02967   TN* ctrl = Generate_Cmp_Ctrl_TN( OPR_LE );
02968   Build_OP( is_double ? TOP_cmpsd : TOP_cmpss,
02969       fp_tmp_tn, fp_const, src, ctrl, ops );
02970   Build_OP( is_double ? TOP_andpd : TOP_andps, fp_tmp_tn,
02971       fp_tmp_tn, fp_const, ops );
02972   Build_OP( is_double ? TOP_subsd : TOP_subss, fp_tmp_tn,
02973       src, fp_tmp_tn, ops );
02974 
02975   TN* int_tmp_tn = Build_TN_Of_Mtype( MTYPE_U4 );
02976   Exp_Immediate( int_tmp_tn, Gen_Literal_TN(0,4), ops );
02977 
02978   TN* sign_mask = Build_TN_Of_Mtype( MTYPE_U4 );
02979   Exp_Immediate( sign_mask, Gen_Literal_TN(0x80000000,4), ops );
02980 
02981   TN* rflags = Rflags_TN();
02982   Build_OP( is_double ? TOP_comisd : TOP_comiss,
02983       rflags, fp_const, src, ops );
02984   Expand_Cmov( TOP_cmovbe, int_tmp_tn, sign_mask, rflags, ops );
02985 
02986   Build_OP( is_double ? TOP_cvttsd2si : TOP_cvttss2si, dest, fp_tmp_tn, ops );
02987   Build_OP( TOP_add32, dest, dest, int_tmp_tn, ops );
02988 
02989   if( result != dest ){
02990     Exp_COPY( result, dest, ops );
02991   }
02992 }
02993 
02994 
02995 static void
02996 Expand_Float_To_Int (ROUND_MODE rm, TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
02997 {
02998   TOP top = TOP_UNDEFINED;
02999 
03000   if( MTYPE_is_quad( fmtype ) ||
03001       !Is_Target_SSE2() ){
03002     Expand_Long_Double_To_Int( dest, src, imtype, ops );
03003     return;
03004 
03005   } else if( MTYPE_bit_size(imtype) == 64 &&
03006        Is_Target_32bit() ){
03007     Expand_Float_To_Long_m32( dest, src, imtype, fmtype, ops );
03008     return;
03009 
03010   } else if( fmtype == MTYPE_F4 ){ 
03011     if( MTYPE_bit_size(imtype) == 64 ){
03012       if( MTYPE_is_signed(imtype) )
03013   top = TOP_cvttss2siq;
03014       else {
03015   /* For "float" to "unsigned long long" conversion, operation
03016      cvttss2siq will lose some accuracy.  (bug#2867)
03017   */
03018   Expand_Float_To_Long_m32( dest, src, imtype, fmtype, ops );
03019   return;
03020       } 
03021 
03022     } else if( MTYPE_bit_size(imtype) == 32 ){
03023       if( Is_Target_32bit() &&
03024     MTYPE_is_unsigned(imtype) ){
03025   Expand_Unsigned_Float_To_Int_m32( dest, src, fmtype, ops );
03026   return;
03027       }
03028 
03029       top = MTYPE_is_signed(imtype) ? TOP_cvttss2si : TOP_cvttss2siq;
03030     }
03031 
03032   } else if (fmtype == MTYPE_F8) {
03033     if( MTYPE_bit_size(imtype) == 64 ){
03034       if( MTYPE_is_signed(imtype) )
03035   top = TOP_cvttsd2siq;
03036       else {
03037   /* For "double" to "unsigned long long" conversion, operation
03038      cvttsd2siq will lose some accuracy.  (bug#2867)
03039   */
03040   Expand_Float_To_Long_m32( dest, src, imtype, fmtype, ops );
03041   return;
03042       }
03043 
03044     } else if( MTYPE_bit_size(imtype) == 32 ){
03045       if( Is_Target_32bit() &&
03046     MTYPE_is_unsigned(imtype) ){
03047   Expand_Unsigned_Float_To_Int_m32( dest, src, fmtype, ops );
03048   return;
03049       }
03050 
03051       top = MTYPE_is_signed(imtype) ? TOP_cvttsd2si : TOP_cvttsd2siq;
03052     }
03053   }
03054 
03055   else if ( fmtype == MTYPE_V16F4 ) {
03056     if (imtype == MTYPE_V16I4)
03057       top = TOP_cvttps2dq;
03058     else if (imtype == MTYPE_V16I8)
03059       top = TOP_movdq;  // bug 12731
03060     else
03061       FmtAssert(FALSE, ("Expand_Float_To_Int: NYI"));
03062   }
03063 
03064   else if ( fmtype == MTYPE_V16F8 ) {
03065     // Workaround for bug 3082, not supposed to generate correct code
03066     top = TOP_cvttpd2dq;
03067   }
03068 
03069   FmtAssert( top != TOP_UNDEFINED,
03070        ("Expand_Float_To_Int: undefined opcode") );
03071 
03072   Build_OP( top, dest, src, ops );  
03073 }
03074 
03075 void
03076 Expand_Float_To_Int_Cvt (TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03077 {
03078   Expand_Float_To_Int (ROUND_USER, dest, src, imtype, fmtype, ops);
03079 }
03080 
03081 void
03082 Expand_Float_To_Int_Round (TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03083 {
03084   Expand_Float_To_Int (ROUND_NEAREST, dest, src, imtype, fmtype, ops);
03085 }
03086 
03087 void
03088 Expand_Float_To_Int_Trunc (TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03089 {
03090   Expand_Float_To_Int (ROUND_CHOP, dest, src, imtype, fmtype, ops);
03091 }
03092 
03093 void
03094 Expand_Float_To_Int_Tas (TN *dest, TN *src, TYPE_ID imtype, OPS *ops)
03095 {
03096   Is_True( Is_Target_32bit(), ("Expand_Float_To_Int_Tas should not be invoked under -m64") );
03097   // Allocate space to store the floating point value
03098   const TY_IDX ty = MTYPE_To_TY( imtype );
03099   ST* st = Gen_Temp_Symbol( ty, "float_2_int" );
03100   Allocate_Temp_To_Memory( st );
03101 
03102   ST* base_sym = NULL;
03103   INT64 base_ofst = 0;
03104 
03105   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03106   FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
03107        ("Expand_Float_To_Int_Tas: base symbol is on stack") );
03108 
03109   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03110   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03111 
03112   if (MTYPE_byte_size(imtype) == 4) {
03113     // store the float value to memory
03114     Build_OP(Is_Target_SSE2() ? TOP_stss : TOP_fstps, src, base_tn, ofst_tn, ops );
03115     // load the value into an int register.
03116     Build_OP(TOP_ld32, dest, base_tn, ofst_tn, ops );
03117   }
03118   else {
03119     // store the float value to memory
03120     Build_OP(Is_Target_SSE2() ? TOP_stsd : TOP_fstpl, src, base_tn, ofst_tn, ops );
03121     // load the value into an int register.
03122     Expand_Load(OPCODE_make_op(OPR_LDID, imtype, imtype), dest, base_tn, ofst_tn, ops);
03123   }
03124 } 
03125 
03126 void
03127 Expand_Int_To_Float_Tas (TN *dest, TN *src, TYPE_ID fmtype, OPS *ops)
03128 {
03129   Is_True( Is_Target_32bit(), ("Expand_Int_To_Float_Tas should not be invoked under -m64") );
03130   // Allocate space to store the integer point value
03131   const TY_IDX ty = MTYPE_To_TY( fmtype );
03132   ST* st = Gen_Temp_Symbol( ty, "int_2_float" );
03133   Allocate_Temp_To_Memory( st );
03134 
03135   ST* base_sym = NULL;
03136   INT64 base_ofst = 0;
03137 
03138   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03139   FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
03140        ("Expand_Float_To_Int_Tas: base symbol is on stack") );
03141 
03142   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03143   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03144 
03145   if (MTYPE_byte_size(fmtype) == 4) {
03146     // store the int value to memory
03147     Build_OP(TOP_store32, src, base_tn, ofst_tn, ops );
03148     // load the value into a float register.
03149     Build_OP(Is_Target_SSE2() ? TOP_ldss : TOP_flds, dest, base_tn, ofst_tn, ops );
03150   }
03151   else {
03152     // store the int value to memory
03153     Expand_Store(MTYPE_U8, src, base_tn, ofst_tn, ops );
03154     // load the value into a float register.
03155     Build_OP(Is_Target_SSE2() ? TOP_ldsd : TOP_fldl, dest, base_tn, ofst_tn, ops );
03156   }
03157 } 
03158 
03159 void
03160 Expand_Int_To_Vect_Tas (TN *dest, TN *src, TYPE_ID vectype, OPS *ops)
03161 {
03162   FmtAssert(MTYPE_byte_size(vectype) == 8,
03163         ("Expand_Int_To_Vect_Tas: 16-byte vector type not handled"));
03164   FmtAssert(TN_register_class(src) == ISA_REGISTER_CLASS_integer,
03165         ("Expand_Int_To_Vect_Tas: source operand not integer"));
03166 
03167   // Allocate space to store the integer value
03168   const TY_IDX ty = MTYPE_To_TY( vectype );
03169   ST* st = Gen_Temp_Symbol( ty, "int_2_vect" );
03170   Allocate_Temp_To_Memory( st );
03171 
03172   ST* base_sym = NULL;
03173   INT64 base_ofst = 0;
03174 
03175   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03176   FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
03177        ("Expand_Float_To_Int_Tas: base symbol is on stack") );
03178 
03179   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03180   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03181 
03182   // store the int value to memory
03183   Expand_Store(MTYPE_U8, src, base_tn, ofst_tn, ops);
03184 
03185   // load the value into the vector register
03186   Expand_Load(OPCODE_make_op(OPR_LDID,vectype,vectype), dest, base_tn, ofst_tn,
03187           ops);
03188 }
03189 
03190 static void Expand_non_SSE2_Float_Floor( TN* dest, TN* src, OPS* ops )
03191 {
03192   // Allocate space to store the x87 control-word.
03193   const TY_IDX ty = MTYPE_To_TY( MTYPE_U2 );
03194   TN* x87_cw_tn = X87_cw_TN();
03195   ST* st = Gen_Temp_Symbol( ty, "x87_cw" );
03196   Allocate_Temp_To_Memory( st );
03197 
03198   ST* base_sym = NULL;
03199   INT64 base_ofst = 0;
03200 
03201   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03202   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03203   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03204 
03205   // store the x87 control-word.
03206   Build_OP( TOP_fnstcw, x87_cw_tn, base_tn, ofst_tn, ops );
03207   Set_OP_volatile( OPS_last(ops) );
03208 
03209   // load the value into a 32-bit int register.
03210   TN* x87_cw = Gen_Register_TN( ISA_REGISTER_CLASS_integer, 4 );
03211   Exp_Load( MTYPE_U4, TY_mtype(ty), x87_cw, st, 0, ops, 0 );
03212 
03213   // perform an AND and an OR to mask out that bit.
03214   TN* new_x87_cw = Build_TN_Like( x87_cw );
03215   Expand_Binary_And( new_x87_cw, x87_cw, Gen_Literal_TN(-3073,4), MTYPE_U4, ops );
03216   Expand_Binary_Or( new_x87_cw, new_x87_cw, Gen_Literal_TN(1024,4), MTYPE_U4, ops );
03217 
03218   // store new_x87_cw back to a new memory location.
03219   ST* st_new = Gen_Temp_Symbol( ty, "x87_cw_new" );
03220   Allocate_Temp_To_Memory( st_new );
03221   ST* base_sym_new = NULL;
03222   INT64 base_ofst_new = 0;
03223 
03224   Base_Symbol_And_Offset_For_Addressing( st_new, 0, &base_sym_new, &base_ofst_new );
03225 
03226   TN* base_tn_new = base_sym_new == SP_Sym ? SP_TN : FP_TN;
03227   TN* ofst_tn_new = Gen_Literal_TN( base_ofst_new, 4 );
03228 
03229   Exp_Store( TY_mtype(ty), new_x87_cw, st_new, 0, ops, 0 );
03230 
03231   // load the new x87_cw
03232   Build_OP( TOP_fldcw, x87_cw_tn, base_tn_new, ofst_tn_new, ops );
03233   Set_OP_volatile( OPS_last(ops) );
03234 
03235   // do the real convertion work here.
03236   Build_OP( TOP_frndint, dest, src, ops );
03237 
03238   // load the original x87_cw
03239   Build_OP( TOP_fldcw, x87_cw_tn, base_tn, ofst_tn, ops );
03240   Set_OP_volatile( OPS_last(ops) );  
03241 }
03242 
03243 
03244 void Expand_Float_To_Float_Floorl( TN* dest, TN* src,
03245            TYPE_ID rtype, TYPE_ID desc, OPS* ops )
03246 {
03247   Expand_non_SSE2_Float_Floor( dest, src, ops );
03248   return;
03249 }
03250 
03251 
03252 void Expand_Float_To_Float_Floorf( TN* dest, TN* src,
03253            TYPE_ID rtype, TYPE_ID desc, OPS* ops )
03254 {
03255   FmtAssert( rtype == MTYPE_F4,
03256        ("Expand_Float_To_Float_Floorf: rtype is not float") );
03257   FmtAssert( rtype == desc,
03258        ("Expand_Float_To_Float_Floorf: rtype and desc are different") );
03259 
03260   if( dest == src ){
03261     TN* tmp = Build_TN_Like( src );
03262     Expand_Copy( tmp, src, desc, ops );
03263     src = tmp;
03264   }
03265 
03266   if( !Is_Target_SSE2() ){
03267     Expand_non_SSE2_Float_Floor( dest, src, ops );
03268     return;
03269   }
03270 
03271   /* First, generate all the necessary values. */
03272 
03273   TN* sign_mask = Build_TN_Like( dest );
03274   Expand_Const( sign_mask, Gen_Const_Symbol_TN( 0x80000000, 0.0, MTYPE_I4 ),
03275     rtype, ops );
03276 
03277   TN* mi6_val = Build_TN_Like( dest );
03278   Expand_Const( mi6_val, Gen_Const_Symbol_TN( 0x4b000000, 0.0, MTYPE_I4 ),
03279     rtype, ops );
03280 
03281   TN* one_point_zero = Build_TN_Like( dest );
03282   Expand_Const( one_point_zero, Gen_Const_Symbol_TN( 0x3f800000, 0.0, MTYPE_I4 ),
03283     rtype, ops );
03284 
03285   /* Execute the floor algorithm. */
03286 
03287   TN* sign_tn = Build_TN_Like( dest );
03288   TN* mi6_tn = Build_TN_Like( dest );
03289   TN* tmp1 = Build_TN_Like( dest );
03290   TN* result1 = Build_TN_Like( dest );
03291   TN* diff_tn = Build_TN_Like( dest );
03292   TN* ones_or_zeros = Build_TN_Like( dest );
03293   TN* fraction_tn = Build_TN_Like( dest );
03294 
03295   Build_OP( TOP_andps, sign_tn, sign_mask, src,     ops );
03296   Build_OP( TOP_orps,  mi6_tn,  mi6_val,   sign_tn, ops );
03297   Build_OP( TOP_addss, tmp1,    src,       mi6_tn,  ops );
03298   Build_OP( TOP_subss, result1, tmp1,      mi6_tn,  ops );
03299   Build_OP( TOP_subss, diff_tn, result1,   src,     ops );
03300   TN* ctrl = Generate_Cmp_Ctrl_TN( OPR_GT );
03301   Build_OP( TOP_cmpss, ones_or_zeros, diff_tn, sign_mask, ctrl, ops );
03302   Build_OP( TOP_andps, fraction_tn, ones_or_zeros, one_point_zero, ops );
03303   Build_OP( TOP_subss, dest,    result1,   fraction_tn, ops );
03304 }
03305 
03306 
03307 void Expand_Float_To_Float_Floor( TN* dest, TN* src,
03308           TYPE_ID rtype, TYPE_ID desc, OPS* ops )
03309 {
03310   FmtAssert( rtype == MTYPE_F8,
03311        ("Expand_Float_To_Float_Floor: rtype is not double") );       
03312   FmtAssert( rtype == desc,
03313        ("Expand_Float_To_Float_Floor: rtype and desc are different") );
03314 
03315   if( dest == src ){
03316     TN* tmp = Build_TN_Like( src );
03317     Expand_Copy( tmp, src, desc, ops );
03318     src = tmp;
03319   }
03320 
03321   if( !Is_Target_SSE2() ){
03322     Expand_non_SSE2_Float_Floor( dest, src, ops );
03323     return;
03324   }
03325 
03326   /* First, generate all the necessary values. */
03327 
03328   TN* sign_mask = Build_TN_Like( dest );
03329   Expand_Const( sign_mask,
03330     Gen_Const_Symbol_TN( 0x8000000000000000ULL, 0.0, MTYPE_I8 ),
03331     rtype, ops );
03332 
03333   TN* mi6_val = Build_TN_Like( dest );
03334   Expand_Const( mi6_val,
03335     Gen_Const_Symbol_TN( 0x4330000000000000ULL, 0.0, MTYPE_I8 ),
03336     rtype, ops );
03337 
03338   TN* one_point_zero = Build_TN_Like( dest );
03339   Expand_Const( one_point_zero,
03340     Gen_Const_Symbol_TN( 0x3ff0000000000000ULL, 0.0, MTYPE_I8 ),
03341     rtype, ops );
03342 
03343   /* Execute the floor algorithm. */
03344 
03345   TN* sign_tn = Build_TN_Like( dest );
03346   TN* mi6_tn = Build_TN_Like( dest );
03347   TN* xor_tn = Build_TN_Like( dest );
03348   TN* lt_tn = Build_TN_Like( dest );
03349   TN* and_tn = Build_TN_Like( dest );
03350   TN* tmp1 = Build_TN_Like( dest );
03351   TN* result1 = Build_TN_Like( dest );
03352   TN* diff_tn = Build_TN_Like( dest );
03353   TN* ones_or_zeros = Build_TN_Like( dest );
03354   TN* fraction_tn = Build_TN_Like( dest );
03355 
03356   Build_OP( TOP_andpd, sign_tn, sign_mask, src,     ops );
03357   Build_OP( TOP_xorpd, xor_tn,  src, sign_tn, ops );
03358   TN* ctrl_lt = Generate_Cmp_Ctrl_TN( OPR_LT );
03359   Build_OP( TOP_cmpsd, lt_tn, xor_tn, mi6_val, ctrl_lt, ops );
03360   Build_OP( TOP_andpd, and_tn, lt_tn, mi6_val, ops );
03361   Build_OP( TOP_orpd, mi6_tn, and_tn, sign_tn, ops );
03362   Build_OP( TOP_addsd, tmp1,    src,       mi6_tn,  ops );
03363   Build_OP( TOP_subsd, result1, tmp1,      mi6_tn,  ops );
03364   Build_OP( TOP_subsd, diff_tn, result1,   src,     ops );
03365   TN* ctrl = Generate_Cmp_Ctrl_TN( OPR_GT );
03366   Build_OP( TOP_cmpsd, ones_or_zeros, diff_tn, sign_mask, ctrl, ops );
03367   Build_OP( TOP_andpd, fraction_tn, ones_or_zeros, one_point_zero, ops );
03368   Build_OP( TOP_subsd, dest,    result1,   fraction_tn, ops );
03369 }
03370 
03371 
03372 void
03373 Expand_Float_To_Int_Floor (TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03374 {
03375   TN* dest1 = Build_TN_Like( dest );
03376   const BOOL is_double = MTYPE_is_size_double(fmtype);
03377   TN* rflags = Rflags_TN();
03378 
03379   Expand_Float_To_Int( ROUND_NEG_INF, dest, src, imtype, fmtype, ops );
03380   Expand_Sub( dest1, dest, Gen_Literal_TN( 1, 4 ), imtype, ops );
03381 
03382   TN* src1  = Build_TN_Like( src );
03383   if( Is_Target_SSE2() )
03384     Build_OP( is_double ? TOP_xorpd : TOP_xorps, src1, src, src, ops );
03385   else
03386     Build_OP( TOP_fldz, src1, ops );
03387 
03388   // Compare <src> with 0.0
03389   Build_OP( ( MTYPE_is_quad( imtype ) || !Is_Target_SSE2() )
03390       ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss ),
03391       rflags, src, src1, ops );
03392   
03393   Expand_Cmov( TOP_cmova, dest1, dest, rflags, ops );
03394 
03395   Expand_Int_To_Float( src1, dest, imtype, fmtype, ops );
03396 
03397   // Compare <src> with itself at the integer side.
03398   Build_OP( ( MTYPE_is_quad( imtype ) || !Is_Target_SSE2() )
03399       ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss ),
03400       rflags, src, src1, ops );
03401 
03402   Expand_Cmov( TOP_cmovne, dest, dest1, rflags, ops );
03403 }
03404 
03405 
03406 void
03407 Expand_Float_To_Int_Ceil (TN *result, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03408 {
03409   TN* dest = result;
03410 
03411   if( TN_is_dedicated(result) ){
03412     dest = Build_TN_Like( result );
03413   }
03414 
03415   TN* dest1 = Build_TN_Like( dest );
03416   const BOOL is_double = MTYPE_is_size_double(fmtype);
03417   TN* rflags = Rflags_TN();
03418 
03419   Expand_Float_To_Int( ROUND_NEG_INF, dest, src, imtype, fmtype, ops );
03420   Expand_Add( dest1, dest, Gen_Literal_TN( 1, 4 ), imtype, ops );
03421 
03422   TN* src1  = Build_TN_Like( src );
03423   if( Is_Target_SSE2() )
03424     Build_OP( is_double ? TOP_xorpd : TOP_xorps, src1, src, src, ops );
03425   else
03426     Build_OP( TOP_fldz, src1, ops );
03427 
03428   // Compare <src> with 0.0
03429   Build_OP( ( MTYPE_is_quad( imtype ) || !Is_Target_SSE2() )
03430       ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss ),
03431       rflags, src, src1, ops );
03432   
03433   Expand_Cmov( TOP_cmovbe, dest1, dest, rflags, ops );
03434 
03435   Expand_Int_To_Float( src1, dest, imtype, fmtype, ops );
03436 
03437   // Compare <src> with itself at the integer side.
03438   Build_OP( ( MTYPE_is_quad( imtype ) || !Is_Target_SSE2() )
03439       ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss ),
03440       rflags, src, src1, ops );
03441 
03442   Expand_Cmov( TOP_cmovne, dest, dest1, rflags, ops );
03443 
03444   if( dest != result )
03445     Exp_COPY( result, dest, ops );
03446 }
03447 
03448 
03449 void
03450 Expand_Float_To_Float (TN *dest, TN *src, TYPE_ID rtype, TYPE_ID desc, OPS *ops)
03451 {
03452   if( Is_Target_SSE2()        &&
03453       !MTYPE_is_quad( rtype ) &&
03454       !MTYPE_is_quad( desc ) ){
03455     if (!MTYPE_is_vector(rtype)){
03456 #ifdef KEY //bug 14346: fp-fp scalar conversion for barcelona is special
03457      if(Is_Target_Barcelona())
03458       Build_OP( (rtype == MTYPE_F8) ? TOP_cvtps2pd : TOP_cvtpd2ps,
03459     dest, src, ops );
03460      else
03461 #endif
03462       Build_OP( (rtype == MTYPE_F8) ? TOP_cvtss2sd : TOP_cvtsd2ss,
03463                 dest, src, ops );
03464     }
03465     else
03466       Build_OP( (rtype == MTYPE_V16F8) ? TOP_cvtps2pd : TOP_cvtpd2ps,
03467     dest, src, ops ); 
03468     return;
03469   }
03470 
03471   const TY_IDX ty = MTYPE_To_TY( !MTYPE_is_quad(rtype) ? rtype : desc );
03472   ST* st = Gen_Temp_Symbol( ty, "x87_cvt" );
03473   Allocate_Temp_To_Memory( st ); 
03474 
03475   ST* base_sym = NULL;
03476   INT64 base_ofst = 0;
03477 
03478   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03479   FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
03480        ("Expand_Float_To_Float: base symbol is not at stack") );
03481 
03482   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03483   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03484 
03485   if( MTYPE_is_quad( desc ) ){
03486     // long double -> float/double
03487     CGTARG_Store_To_Memory( src, st, ops );
03488     CGTARG_Load_From_Memory( dest, st, ops );
03489     
03490   } else if( MTYPE_is_quad( rtype ) ){
03491     // float/double -> long double
03492     CGTARG_Store_To_Memory( src, st, ops );
03493     CGTARG_Load_From_Memory( dest, st, ops );
03494 
03495   } else {
03496     // long double -> long double
03497     FmtAssert( TN_register_class(dest) == ISA_REGISTER_CLASS_x87,
03498          ("Expand_Float_To_Float: dest is not x87 register") );
03499     FmtAssert( TN_register_class(src)  == ISA_REGISTER_CLASS_x87,
03500          ("Expand_Float_To_Float: source is not x87 register") );
03501 
03502     Build_OP( rtype == MTYPE_F8 ? TOP_fstpl : TOP_fstps,
03503         src, base_tn, ofst_tn, ops );
03504 
03505     Build_OP( rtype == MTYPE_F8 ? TOP_fldl : TOP_flds,
03506         dest, base_tn, ofst_tn, ops );
03507   }
03508 }
03509 
03510 
03511 static void Expand_Unsigned_Int_To_Float_m32( TN* dest,
03512                 TN* src,
03513                 TYPE_ID fmtype,
03514                 OPS* ops )
03515 {
03516   BB* bb_entry = Cur_BB;
03517   BB* bb_then  = Gen_And_Append_BB( bb_entry );  // for case MSB == 1
03518   TN* tmp_dest = Build_TN_Like( dest );
03519   const BOOL is_double = MTYPE_is_size_double(fmtype);
03520 
03521   BB* bb_exit  = Gen_And_Append_BB( bb_then );
03522   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
03523 
03524   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
03525   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
03526   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
03527 
03528   // Build bb_entry
03529   {
03530     Build_OP( is_double ? TOP_cvtsi2sd : TOP_cvtsi2ss, tmp_dest, src, ops );
03531 
03532     Exp_OP3v( OPC_TRUEBR,
03533         NULL,
03534         Gen_Label_TN( bb_exit_label, 0 ),
03535         src,
03536         Gen_Literal_TN(0,4),
03537         V_BR_I4GE,
03538         ops );
03539 
03540     if( &New_OPs != ops )
03541       OPS_Append_Ops( &New_OPs, ops );
03542     Process_New_OPs();
03543     BB_Append_Ops( bb_entry, &New_OPs );
03544     OPS_Init( &New_OPs );
03545     OPS_Init( ops );
03546   }
03547 
03548   // Build bb_then here.
03549   {
03550     OPS* bb_then_ops = &New_OPs;
03551 
03552     TCON tcon = Host_To_Targ_Float( fmtype, 0x100000000LL );
03553     ST* sym = New_Const_Sym( Enter_tcon (tcon), Be_Type_Tbl( TCON_ty(tcon) ) );
03554     Allocate_Object( sym );
03555     ST* base_sym = NULL;
03556     INT64 base_ofst = 0;
03557     TN* tmp_c = Build_TN_Of_Mtype( fmtype );
03558 
03559     Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
03560     Build_OP( is_double ? TOP_ldsd_n32 : TOP_ldss_n32, tmp_c,
03561         Gen_Symbol_TN(base_sym, base_ofst, TN_RELOC_NONE), bb_then_ops );
03562     Expand_Flop( is_double ? OPC_F8ADD : OPC_F4ADD,
03563      tmp_dest, tmp_dest, tmp_c, NULL, bb_then_ops );
03564 
03565     total_bb_insts = 0;
03566     Last_Processed_OP = NULL;
03567     Process_New_OPs();
03568     BB_Append_Ops( bb_then, bb_then_ops );
03569     OPS_Init( bb_then_ops );
03570   }
03571 
03572   Cur_BB = bb_exit;
03573   Build_OP( is_double ? TOP_movsd : TOP_movss, dest, tmp_dest, ops );
03574 }
03575 
03576 
03577 static void Expand_Unsigned_Long_To_Float( TN* dest, TN* src, TYPE_ID mtype, OPS* ops )
03578 {
03579   const BOOL is_64bit = MTYPE_is_size_double(mtype);
03580   BB* bb_entry = Cur_BB;
03581   BB* bb_then    = Gen_And_Append_BB( bb_entry );  // for case MSB == 0
03582   BB* bb_else  = Gen_And_Append_BB( bb_then );     // for case MSB == 1
03583   const LABEL_IDX bb_else_label = Gen_Label_For_BB( bb_else );
03584 
03585   /* We need to insert a copy at the exit bb, since <dest> could be a
03586      dedicated reg. for return value, and it must be at a exit bb;
03587      otherwise, ebo will eliminate many of the useful ops.
03588   */
03589   TN* tmp_dest = dest;
03590 
03591   if( TN_is_dedicated(dest) ){
03592     tmp_dest = Build_TN_Like( dest );
03593   }
03594 
03595   //BB* bb_exit  = Start_New_Basic_Block();
03596   BB* bb_exit  = Gen_And_Append_BB( bb_else );
03597   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
03598 
03599   BB_branch_wn(bb_then) = WN_Create(OPC_GOTO,0);
03600   WN_label_number(BB_branch_wn(bb_then)) = bb_exit_label;
03601 
03602   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
03603   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
03604   WN_label_number(BB_branch_wn(bb_entry)) = bb_else_label;
03605 
03606   // Build bb_entry
03607   {
03608     Exp_OP3v( OPC_TRUEBR,
03609         NULL,
03610         Gen_Label_TN( bb_else_label, 0 ),
03611         src,
03612         Gen_Literal_TN(0,4),
03613         V_BR_I8LT,
03614         ops );
03615 
03616     if( &New_OPs != ops )
03617       OPS_Append_Ops( &New_OPs, ops );
03618     Process_New_OPs();
03619     BB_Append_Ops( bb_entry, &New_OPs );
03620     OPS_Init( &New_OPs );
03621     OPS_Init( ops );
03622   }
03623 
03624   // Build bb_then here.
03625   {
03626     OPS* bb_then_ops = &New_OPs;
03627     Build_OP( is_64bit ? TOP_cvtsi2sdq : TOP_cvtsi2ssq, tmp_dest, src, bb_then_ops );
03628     Build_OP( TOP_jmp, Gen_Label_TN( bb_exit_label, 0 ), bb_then_ops );
03629 
03630     total_bb_insts = 0;
03631     Last_Processed_OP = NULL;
03632     Process_New_OPs();
03633     BB_Append_Ops( bb_then, bb_then_ops );
03634     OPS_Init( bb_then_ops );
03635   }
03636 
03637   // Build bb_else here.
03638   {
03639     OPS* bb_else_ops = &New_OPs;
03640     TN* tmp1 = Build_TN_Like(src);
03641     TN* tmp2 = Build_TN_Like(src);
03642     TN* tmp3 = Build_TN_Like(src);
03643     TN* dest1 = Build_TN_Like(tmp_dest);
03644 
03645     Build_OP( TOP_shri64, tmp1, src, Gen_Literal_TN( 1, 4 ), bb_else_ops );
03646     Build_OP( TOP_andi32, tmp2, src, Gen_Literal_TN( 1, 4 ), bb_else_ops );
03647     Build_OP( TOP_or64, tmp3, tmp1, tmp2, bb_else_ops );
03648     Build_OP( is_64bit ? TOP_cvtsi2sdq : TOP_cvtsi2ssq,
03649         dest1, tmp3, bb_else_ops );
03650     Build_OP( is_64bit ? TOP_addsd : TOP_addss,
03651         tmp_dest, dest1, dest1, bb_else_ops );
03652 
03653     total_bb_insts = 0;
03654     Last_Processed_OP = NULL;
03655     Process_New_OPs();
03656     BB_Append_Ops( bb_else, bb_else_ops );
03657     OPS_Init( bb_else_ops );
03658   }
03659 
03660   if( tmp_dest != dest ){
03661     Build_OP( mtype == MTYPE_F8 ? TOP_movsd : TOP_movss, dest, tmp_dest, ops );
03662   }
03663 
03664   Cur_BB = bb_exit;
03665 }
03666 
03667 
03668 static void Expand_Int_To_Long_Double( TN* result, TN* src,
03669                TYPE_ID imtype, TYPE_ID fmtype,
03670                OPS* ops )
03671 {
03672   TN* dest = result;
03673 
03674   if( TN_is_dedicated(result) ){
03675     dest = Build_TN_Like( result );
03676   }
03677 
03678   if( imtype == MTYPE_U4 ){
03679     imtype = MTYPE_I8;
03680     if (Is_Target_32bit()) {
03681       // Create the high 32 bits and rely on the fact that Expand_Split_Store
03682       // will automatically store the high part of the pair.  Bug 5688.
03683       TN *src_h = Create_TN_Pair(src, MTYPE_I8);
03684       Build_OP(TOP_ldc32, src_h, Gen_Literal_TN(0, 4), ops);
03685     } else {
03686       TN* tmp = Build_TN_Of_Mtype( imtype );
03687       Build_OP( TOP_mov32, tmp, src, ops );
03688       src = tmp;
03689     }
03690   }
03691 
03692   TY_IDX ty = MTYPE_To_TY( imtype );
03693   ST* st = Gen_Temp_Symbol( ty, "x87_cvt" );
03694   Allocate_Temp_To_Memory( st ); 
03695 
03696   ST* base_sym = NULL;
03697   INT64 base_ofst = 0;
03698 
03699   Base_Symbol_And_Offset_For_Addressing( st, 0, &base_sym, &base_ofst );
03700   FmtAssert( base_sym == SP_Sym || base_sym == FP_Sym,
03701        ("Expand_Int_To_Long_Double: base symbol is not at stack") );
03702 
03703   TN* base_tn = base_sym == SP_Sym ? SP_TN : FP_TN;
03704   TN* ofst_tn = Gen_Literal_TN( base_ofst, 4 );
03705 
03706   CGTARG_Store_To_Memory( src, st, ops );
03707 
03708   TOP top = TOP_UNDEFINED;
03709 
03710   switch( imtype ){
03711   case MTYPE_I2:   top = TOP_filds;   break;
03712   case MTYPE_I4:   top = TOP_fildl;   break;
03713   case MTYPE_U8:
03714   case MTYPE_I8:   top = TOP_fildll;  break;
03715   default:
03716     FmtAssert( false, ("Expand_Int_To_Long_Double: Unknown imtype") );
03717   }
03718 
03719   Build_OP( top, dest, base_tn, ofst_tn, ops );
03720 
03721   /* More work to do with unsigned long long -> long double.
03722    */
03723   if( imtype == MTYPE_U8 ){
03724     BB* bb_entry = Cur_BB;
03725     BB* bb_then = Gen_And_Append_BB( bb_entry );  // for src < 0
03726 
03727     BB* bb_exit  = Gen_And_Append_BB( bb_then );
03728     const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
03729 
03730     BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
03731     WN_kid0(BB_branch_wn(bb_entry)) = NULL;
03732     WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
03733 
03734     // Build bb_entry
03735     {
03736       if( &New_OPs != ops )      
03737   OPS_Append_Ops( &New_OPs, ops );
03738       Process_New_OPs();
03739       BB_Append_Ops( bb_entry, &New_OPs );
03740       OPS_Init( &New_OPs );
03741       OPS_Init( ops );
03742 
03743       ops = &New_OPs;
03744 
03745       TN* src_hi = OP_NEED_PAIR(imtype) ? Get_TN_Pair(src) : src;
03746       const VARIANT variant = TN_size(src_hi) > 4 ? V_BR_I8GE : V_BR_I4GE;
03747 
03748       Exp_OP3v( OPC_TRUEBR,
03749     NULL,
03750     Gen_Label_TN( bb_exit_label, 0 ),
03751     src_hi,
03752     Gen_Literal_TN(0,4),
03753     variant,
03754     ops );
03755 
03756       if( bb_entry != Cur_BB ){
03757   FmtAssert( OPS_length( ops ) == 0,
03758        ("Expand_Int_To_Long_Double: ops is not empty") );
03759   bb_entry = Cur_BB;
03760       }
03761 
03762       total_bb_insts = 0;
03763       Last_Processed_OP = NULL;
03764       Process_New_OPs();
03765       BB_Append_Ops( bb_entry, ops );
03766       OPS_Init( ops );
03767     }
03768 
03769     // Build bb_then here.
03770     {
03771       OPS* bb_then_ops = &New_OPs;
03772       TCON tcon = Host_To_Targ_Quad( 0 );
03773       TCON_u0( tcon ) = 0x0;
03774       TCON_u1( tcon ) = 0x80000000;
03775       TCON_u2( tcon ) = 0x403f;
03776       TCON_u3( tcon ) = 0x0;      
03777 
03778       ST* sym = New_Const_Sym( Enter_tcon(tcon),  Be_Type_Tbl( TCON_ty(tcon) ) );
03779 
03780       ST* base_sym = NULL;
03781       INT64 base_ofst = 0;
03782 
03783       Allocate_Object(sym);
03784       Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
03785 
03786       TN* max_value_tn = Build_TN_Like( dest );
03787       Expand_Const( max_value_tn, Gen_Symbol_TN( base_sym, base_ofst, TN_RELOC_NONE ),
03788         MTYPE_FQ, bb_then_ops );
03789 
03790       Build_OP( TOP_fadd, dest, dest, max_value_tn, bb_then_ops );
03791 
03792       total_bb_insts = 0;
03793       Last_Processed_OP = NULL;
03794       Process_New_OPs();
03795       BB_Append_Ops( bb_then, bb_then_ops );
03796       OPS_Init( bb_then_ops );
03797     }
03798 
03799     Cur_BB = bb_exit;
03800   }
03801 
03802   if( result != dest ){
03803     Exp_COPY( result, dest, ops );
03804   }
03805 }
03806 
03807 
03808 static void Expand_Long_To_Float_m32( TN* dest,
03809               TN* src,
03810               TYPE_ID imtype,
03811               TYPE_ID fmtype,
03812               OPS* ops )
03813 {
03814   FmtAssert( TN_register_class(dest) == ISA_REGISTER_CLASS_float,
03815        ("Expand_Long_To_Float_m32: dest is not float register") );
03816   /* TODO:
03817      Get rid of useless loads and stores. (A simple long long to float
03818      conversion can expose the awkwardness.)
03819   */
03820 
03821   TN* x87_dest = Build_TN_Of_Mtype( MTYPE_FQ );
03822   Expand_Int_To_Long_Double( x87_dest, src, imtype, MTYPE_FQ, ops );
03823   // Now, convert <x87_dest> to <dest>.
03824   Expand_Float_To_Float( dest, x87_dest, fmtype, MTYPE_FQ, ops );
03825 }
03826 
03827 
03828 void
03829 Expand_Int_To_Float (TN *dest, TN *src, TYPE_ID imtype, TYPE_ID fmtype, OPS *ops)
03830 {
03831   TOP top = TOP_UNDEFINED;
03832 
03833   if( MTYPE_is_quad( fmtype ) ){
03834     Expand_Int_To_Long_Double( dest, src, imtype, fmtype, ops );
03835     return;
03836   }
03837 
03838   /* Without the support of sse2 registers, the conversion from
03839      U8 to float need to be handled well to preserve the accuracy.
03840      (bug#2600)
03841   */
03842   if( !Is_Target_SSE2() ){
03843     TN* x87_dest =
03844       ( imtype == MTYPE_U8 ) ? Build_TN_Of_Mtype( MTYPE_FQ ) : dest;
03845     Expand_Int_To_Long_Double( x87_dest, src, imtype, MTYPE_FQ, ops );
03846 
03847     // Now, convert <x87_dest> to <dest>, if necessary.
03848     if( x87_dest != dest )
03849       Expand_Float_To_Float( dest, x87_dest, fmtype, MTYPE_FQ, ops );
03850 
03851     return;
03852   }
03853 
03854   if( fmtype == MTYPE_F4 ){ 
03855     if( MTYPE_bit_size(imtype) == 64 ){
03856 
03857       if( MTYPE_is_signed(imtype) ){
03858   if( Is_Target_32bit() ){
03859     Expand_Long_To_Float_m32( dest, src, imtype, fmtype, ops );
03860     return;
03861   }
03862 
03863   top = TOP_cvtsi2ssq;
03864 
03865       } else {
03866   if( Is_Target_32bit() ){
03867     Expand_Long_To_Float_m32( dest, src, imtype, fmtype, ops );
03868   } else {
03869     Expand_Unsigned_Long_To_Float( dest, src, fmtype, ops );
03870   }
03871 
03872   return;
03873       }
03874 
03875     } else if( MTYPE_bit_size(imtype) == 32 ){
03876       if( MTYPE_is_signed(imtype) )
03877 #ifdef KEY //cvt signed integer to single precision scalar
03878        if(Is_Target_Barcelona()){
03879          TN *tmp_dest = Build_TN_Like(dest);
03880          Build_OP(TOP_movg2x, tmp_dest, src, ops);
03881          src = tmp_dest;
03882    top = TOP_cvtdq2ps;
03883        }
03884        else
03885 #endif 
03886         top = TOP_cvtsi2ss;
03887 
03888       else {
03889   if( Is_Target_32bit() ){
03890     Expand_Unsigned_Int_To_Float_m32( dest, src, fmtype, ops );
03891     return;
03892   }
03893 
03894         TN *tmp = Build_TN_Of_Mtype(MTYPE_I8);      
03895   Build_OP(TOP_mov32, tmp, src, ops);
03896   src = tmp;
03897         top = TOP_cvtsi2ssq;       
03898       }
03899     }
03900 
03901   } else if (fmtype == MTYPE_F8) {
03902     if( MTYPE_bit_size(imtype) == 64 ){
03903       if( MTYPE_is_signed(imtype) ){
03904   if( Is_Target_32bit() ){
03905     Expand_Long_To_Float_m32( dest, src, imtype, fmtype, ops );
03906     return;
03907   }
03908 
03909   top = TOP_cvtsi2sdq;
03910 
03911       } else {
03912   if( Is_Target_32bit() ){
03913     Expand_Long_To_Float_m32( dest, src, imtype, fmtype, ops );
03914   } else {
03915     Expand_Unsigned_Long_To_Float( dest, src, fmtype, ops );
03916   }
03917 
03918   return;
03919       }
03920 
03921     } else {
03922       FmtAssert( MTYPE_bit_size(imtype) == 32,
03923      ("Expand_Int_To_Float: size of imtype is not 32-bit-long") );
03924 
03925       if( MTYPE_is_signed(imtype) ){
03926 #ifdef KEY
03927         if(Is_Target_Barcelona()){
03928          TN *tmp_dest = Build_TN_Like(dest);
03929          Build_OP(TOP_movg2x, tmp_dest, src, ops); 
03930          src = tmp_dest;
03931          top = TOP_cvtdq2pd;
03932         }else
03933 #endif
03934          top = TOP_cvtsi2sd;
03935 
03936       } else {
03937   if( Is_Target_32bit() ){
03938     Expand_Unsigned_Int_To_Float_m32( dest, src, fmtype, ops );
03939     return;
03940   }
03941 
03942   TN *tmp = Build_TN_Of_Mtype(MTYPE_I8);
03943   Build_OP(TOP_mov32, tmp, src, ops);
03944   src = tmp;
03945         top = TOP_cvtsi2sdq;
03946       }
03947     }
03948 
03949   } else if (fmtype == MTYPE_V16F8) {
03950     // imtype == V16I8: bug 3082 workaround
03951     if (imtype == MTYPE_V16I4 || imtype == MTYPE_V8I4 || imtype == MTYPE_V16I8)
03952       top = TOP_cvtdq2pd;
03953     else if (imtype == MTYPE_U8 || imtype == MTYPE_I8)
03954       top = TOP_cvtsi2sdq; // bug 3082 workaround, others should not reach here
03955   } else if (fmtype == MTYPE_V16F4) {
03956     if (imtype == MTYPE_V16I4)
03957       top = TOP_cvtdq2ps;    
03958     else if (imtype == MTYPE_U8 || imtype == MTYPE_I8)
03959       top = TOP_cvtsi2sdq; // bug 3082 workaround, others should not reach here
03960   }
03961 
03962   FmtAssert( top != TOP_UNDEFINED, ("Expand_Int_To_Float: Undefined opcode") );
03963 
03964   Build_OP( top, dest, src, ops );
03965 }
03966 
03967 
03968 static BOOL
03969 Optimize_Select (
03970   TOP cmp,
03971     TN *cond1, 
03972     TN *cond2, 
03973     TN *dest, 
03974     TN *dest2,
03975     TN *src1, 
03976     TN *src2, 
03977   BOOL is_float,
03978   OPS *ops)
03979 {
03980   ErrMsg( EC_Unimplemented, "Optimize_Select: NYI" );
03981   return FALSE;
03982 }
03983 
03984 
03985 static void Expand_Compare_And_Select ( TOP cmp,
03986           TN *cond1, 
03987           TN *cond2, 
03988           TN *dest, 
03989           TN *opposite_dest, 
03990           TN *true_tn, 
03991           TN *false_tn, 
03992           BOOL is_float,
03993           OPS *ops)
03994 {
03995   ErrMsg( EC_Unimplemented, "Expand_Compare_And_Select: NYI" );
03996 }
03997 
03998 void
03999 Expand_Select (
04000   TN *dest_tn, 
04001   TN *cond_tn, 
04002   TN *true_tn, 
04003   TN *false_tn, 
04004   TYPE_ID mtype, 
04005   BOOL float_cond,
04006   OPS *ops)
04007 {
04008   Is_True( TN_register_class(cond_tn) == ISA_REGISTER_CLASS_integer,
04009      ("Handle this case in Expand_Select") );
04010   const BOOL non_sse2_fp = MTYPE_is_quad(mtype) ||
04011     ( MTYPE_is_float(mtype) && !Is_Target_SSE2() );
04012 
04013   if( dest_tn == false_tn ){
04014     TN* tmp = Build_TN_Like( false_tn );
04015     Expand_Copy( tmp, false_tn, mtype, ops );
04016     false_tn = tmp;
04017   }
04018 
04019   if( dest_tn == true_tn ){
04020     TN* tmp = Build_TN_Like( true_tn );
04021     Expand_Copy( tmp, true_tn, mtype, ops );
04022     true_tn = tmp;
04023   }
04024 
04025   if( dest_tn == cond_tn ){ // bug 13180
04026     TN* tmp = Build_TN_Like( cond_tn );
04027     Expand_Copy( tmp, cond_tn, mtype, ops );
04028     cond_tn = tmp;
04029   }
04030 
04031   if (non_sse2_fp ||
04032       (TN_register_class(dest_tn) == ISA_REGISTER_CLASS_integer)) {
04033 
04034     // First, assign <true_tn> to <dest_tn>.
04035     Expand_Copy( dest_tn, true_tn, mtype, ops );
04036     
04037     // Next, check whether <cond_tn> is 0 to set rflags
04038     TN *p = Rflags_TN();
04039     Build_OP( (TN_size(cond_tn) == 8) ? TOP_test64:TOP_test32,
04040         p, cond_tn, cond_tn, ops );
04041     
04042     // Now, use the rflags to conditionally move if <cond_tn> is 0
04043     TN *dest_tn_hi = NULL;
04044     TN *false_tn_hi = NULL;
04045     if (OP_NEED_PAIR(mtype)) {
04046       TN *true_tn_hi = Get_TN_Pair(true_tn);
04047       dest_tn_hi = Get_TN_Pair(dest_tn);
04048       false_tn_hi = Get_TN_Pair(false_tn);
04049 #if 0 // bug 11709: not needed because last Expand_Copy includes the hi part
04050       Expand_Copy(dest_tn_hi, true_tn_hi, mtype, ops );
04051 #endif
04052     }
04053     Expand_Cmov(non_sse2_fp ? TOP_fcmove : TOP_cmove, dest_tn, false_tn, p,
04054     ops, dest_tn_hi, false_tn_hi);
04055   } else if (TN_register_class(dest_tn) == ISA_REGISTER_CLASS_float) {
04056     // SSE2 floats, intergral vectors
04057     TN *tmp3 = Build_TN_Like(dest_tn);
04058     TN *tmp4 = Build_TN_Like(dest_tn);
04059     TN *tmp5 = Build_TN_Like(dest_tn);
04060 
04061     // Need to generate a constant of size dest_tn
04062     BOOL is_double = (TN_size(dest_tn) == 8); 
04063     TYPE_ID imtype = is_double ? MTYPE_I8 :MTYPE_I4;
04064     TYPE_ID fmtype = is_double ? MTYPE_F8 :MTYPE_F4;
04065 
04066     TN* tmp1 = Build_TN_Of_Mtype( imtype );
04067     TN* tmp2 = Build_TN_Of_Mtype( imtype );
04068 
04069     Expand_Shift (tmp1, cond_tn, Gen_Literal_TN(is_double?63:31, 4), 
04070       is_double?MTYPE_I8:MTYPE_I4, shift_left, ops);
04071     Expand_Shift (tmp2, tmp1, Gen_Literal_TN(is_double?63:31, 4), 
04072       is_double?MTYPE_I8:MTYPE_I4, shift_aright, ops);
04073     /* Don't use Expand_Int_To_Float, which will convert the all 1's
04074        value to fp format. */
04075 #if 0
04076     Build_OP( TOP_movg2x, tmp3, tmp2, ops );
04077 #else
04078     //TY_IDX ty = Spill_Int_Type;
04079     TY_IDX ty = MTYPE_To_TY( imtype );
04080     ST* st = Gen_Temp_Symbol( ty, "movd" );
04081     Allocate_Temp_To_Memory( st );
04082 
04083     //TYPE_ID imtype = TY_mtype(ST_type(st));
04084     Exp_Store( imtype, tmp2, st, 0, ops, 0 );
04085     Exp_Load( fmtype, fmtype, tmp3, st, 0, ops, 0 );
04086 #endif
04087     Build_OP( is_double ? TOP_andpd : TOP_andps, tmp4, true_tn, tmp3, ops );
04088     Build_OP( is_double ? TOP_andnpd : TOP_andnps, tmp5, tmp3, false_tn, ops );
04089     Build_OP( is_double ? TOP_orpd : TOP_orps, dest_tn, tmp5, tmp4, ops );
04090 
04091   } else {
04092     FmtAssert(FALSE, ("Handle this case"));
04093   }
04094 }
04095   
04096 void
04097 Expand_Min (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
04098 {
04099   Is_True( !TN_has_value( src1 ), ("Expand_Min: src1 has value") );
04100 
04101   if( OP_NEED_PAIR(mtype) ){
04102     if( dest == src2 ){
04103       TN* tmp = Build_TN_Like( src2 );
04104       Expand_Copy( tmp, src2, mtype, ops );
04105       src2 = tmp;
04106     }
04107 
04108     Expand_Split_Select( dest,
04109        OPR_LE,
04110        TN_has_value(src2) ? TOP_cmpi64 : TOP_cmp64,
04111        src1, src2, mtype,  /* cmp kids       */
04112        src1, src2, mtype,  /* true and false */
04113        ops );
04114 
04115   } else if( MTYPE_is_float(mtype) &&
04116        Is_Target_SSE2()      &&
04117        !MTYPE_is_quad( mtype ) ){
04118     switch(mtype) {
04119     case MTYPE_V16F4:
04120       Build_OP( TOP_fmin128v32, dest, src1, src2, ops );      
04121       break;
04122     case MTYPE_V16F8:
04123       Build_OP( TOP_fmin128v64, dest, src1, src2, ops );      
04124       break;
04125     default:
04126       Build_OP( mtype == MTYPE_F8 ? TOP_minsd : TOP_minss, 
04127     dest, src1, src2, ops );
04128       break;
04129     }
04130 
04131   } else if ( MTYPE_is_vector(mtype) ) { // Integer MIN      
04132       TN *tmp1 = Build_TN_Like(src1); 
04133       TN *tmp2 = Build_TN_Like(src1); 
04134       TN *tmp3 = Build_TN_Like(src1); 
04135       TN *tmp4 = Build_TN_Like(src1); 
04136       TN *tmp5 = Build_TN_Like(src1); 
04137       TN *tmp6 = Build_TN_Like(src1); 
04138       Build_OP( TOP_movdq, tmp1, src1, ops );
04139       Build_OP( TOP_movdq, tmp2, tmp1, ops );
04140       Build_OP( TOP_movdq, tmp3, src2, ops );
04141       switch(mtype){
04142         case MTYPE_V16I1: //added for bug 5695, refer to 8676
04143             Build_OP( TOP_xor128v8, tmp4, tmp1, tmp3, ops );
04144             Build_OP( TOP_cmpgt128v8, tmp5, tmp3, tmp2, ops );
04145             Build_OP( TOP_and128v8, tmp6, tmp5, tmp4, ops );
04146             Build_OP( TOP_xor128v8, dest, tmp6, tmp3, ops );
04147             break;
04148         case MTYPE_V16I2: //added for bug 5695, refer to 8676
04149             Build_OP( TOP_xor128v16, tmp4, tmp1, tmp3, ops );
04150             Build_OP( TOP_cmpgt128v16, tmp5, tmp3, tmp2, ops );
04151             Build_OP( TOP_and128v16, tmp6, tmp5, tmp4, ops );
04152             Build_OP( TOP_xor128v16, dest, tmp6, tmp3, ops );
04153             break;
04154         case MTYPE_V16I4: 
04155             Build_OP( TOP_xor128v32, tmp4, tmp1, tmp3, ops );
04156             Build_OP( TOP_cmpgt128v32, tmp5, tmp3, tmp2, ops );
04157             Build_OP( TOP_and128v32, tmp6, tmp5, tmp4, ops );
04158             Build_OP( TOP_xor128v32, dest, tmp6, tmp3, ops );
04159             break;
04160          default:
04161             FmtAssert(FALSE, ("NYI"));
04162             break;
04163       }//end switch
04164   } else {
04165     const BOOL is_64bit = MTYPE_is_size_double(mtype);
04166 
04167     TOP cmp_opcode =
04168       MTYPE_is_float(mtype) ? TOP_fucomi : ( is_64bit ? TOP_cmp64 : TOP_cmp32 );
04169     TOP mov_opcode =
04170       MTYPE_is_float(mtype) ? TOP_fmov : ( is_64bit ? TOP_mov64 : TOP_mov32 );
04171     const TOP cmov_opcode =
04172       MTYPE_is_float(mtype) ? TOP_fcmovbe : ( MTYPE_is_signed(mtype) ? TOP_cmovle : TOP_cmovbe );
04173     TN* rflags = Rflags_TN();
04174 
04175     if( TN_has_value( src2 ) ){
04176       cmp_opcode = is_64bit ? TOP_cmpi64 : TOP_cmpi32;
04177       mov_opcode = is_64bit ? TOP_ldc64 : TOP_ldc32;
04178     }
04179 
04180     if( dest == src1 ){
04181       TN* tmp = src1;
04182       src1 = src2;
04183       src2 = tmp;
04184     }
04185 
04186     // OSP, laijx
04187     // If is return reg and target does not support cmov,
04188     // The MIN will be expand to multiple BBs
04189     // Store the return of MIN(which is also the return of the func)
04190     //      to a temp reg, then copy the temp reg to return reg
04191     TN* orig_dest = NULL;
04192     if( ! Target_Support_Cmov() &&
04193          TN_is_dedicated(dest) &&
04194          REGISTER_SET_MemberP(
04195            REGISTER_CLASS_function_value(TN_register_class(dest)),
04196              TN_register(dest) ) ) {
04197       orig_dest = dest;
04198       dest = Dup_TN_Even_If_Dedicated(orig_dest);
04199     }
04200 
04201     if( dest != src2 )
04202       Build_OP( mov_opcode, dest, src2, ops );
04203 
04204     Build_OP( cmp_opcode, rflags, src1, src2, ops );
04205     Expand_Cmov( cmov_opcode, dest, src1, rflags, ops );
04206 
04207     // OSP, laijx
04208     if( orig_dest != NULL ) {
04209       Expand_Copy( orig_dest, dest, mtype, ops );
04210     }
04211   
04212   }
04213 }
04214 
04215 void
04216 Expand_Max (TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
04217 {     
04218   Is_True( !TN_has_value( src1 ), ("Expand_Max: src1 has value") );
04219 
04220   if( OP_NEED_PAIR(mtype) ){
04221     if( dest == src2 ){
04222       TN* tmp = Build_TN_Like( src2 );
04223       Expand_Copy( tmp, src2, mtype, ops );
04224       src2 = tmp;
04225     }
04226 
04227     Expand_Split_Select( dest,
04228        OPR_GE,
04229        TN_has_value(src2) ? TOP_cmpi64 : TOP_cmp64,
04230        src1, src2, mtype,  /* cmp kids       */
04231        src1, src2, mtype,  /* true and false */
04232        ops );
04233 
04234   } else if( MTYPE_is_float(mtype) &&
04235        Is_Target_SSE2()      &&
04236        !MTYPE_is_quad(mtype) ){
04237     switch(mtype) {
04238     case MTYPE_V16F4:
04239       Build_OP( TOP_fmax128v32, dest, src1, src2, ops );      
04240       break;
04241     case MTYPE_V16F8:
04242       Build_OP( TOP_fmax128v64, dest, src1, src2, ops );      
04243       break;
04244     default:
04245       Build_OP( mtype == MTYPE_F8 ? TOP_maxsd : TOP_maxss, 
04246     dest, src1, src2, ops );
04247       break;
04248     }
04249 
04250   } else if ( MTYPE_is_vector(mtype) ) { // Integer MAX
04251       TN *tmp1 = Build_TN_Like(src1); 
04252       TN *tmp2 = Build_TN_Like(src1); 
04253       TN *tmp3 = Build_TN_Like(src1); 
04254       TN *tmp4 = Build_TN_Like(src1); 
04255       TN *tmp5 = Build_TN_Like(src1); 
04256       TN *tmp6 = Build_TN_Like(src1); 
04257       Build_OP( TOP_movdq, tmp1, src1, ops );
04258       Build_OP( TOP_movdq, tmp2, tmp1, ops );
04259       Build_OP( TOP_movdq, tmp3, src2, ops );
04260       switch(mtype){
04261         case MTYPE_V16I1: 
04262             Build_OP( TOP_xor128v8, tmp4, tmp1, tmp3, ops );
04263             Build_OP( TOP_cmpgt128v8, tmp5, tmp2, tmp3, ops );
04264             Build_OP( TOP_and128v8, tmp6, tmp5, tmp4, ops );
04265             Build_OP( TOP_xor128v8, dest, tmp6, tmp3, ops );
04266             break;
04267         case MTYPE_V16I2: 
04268       Build_OP( TOP_xor128v16, tmp4, tmp1, tmp3, ops );
04269             Build_OP( TOP_cmpgt128v16, tmp5, tmp2, tmp3, ops );
04270             Build_OP( TOP_and128v16, tmp6, tmp5, tmp4, ops );
04271             Build_OP( TOP_xor128v16, dest, tmp6, tmp3, ops );
04272             break;
04273         case MTYPE_V16I4: 
04274       Build_OP( TOP_xor128v32, tmp4, tmp1, tmp3, ops );
04275       Build_OP( TOP_cmpgt128v32, tmp5, tmp2, tmp3, ops );
04276       Build_OP( TOP_and128v32, tmp6, tmp5, tmp4, ops );
04277       Build_OP( TOP_xor128v32, dest, tmp6, tmp3, ops );
04278             break;
04279          default:
04280             FmtAssert(FALSE, ("NYI"));
04281             break;
04282       }//end switch
04283   } else {
04284     const BOOL is_64bit = MTYPE_is_size_double(mtype);
04285     TN* rflags = Rflags_TN();
04286 
04287     TOP cmp_opcode = MTYPE_is_float(mtype)
04288       ? TOP_fucomi : ( is_64bit ? TOP_cmp64 : TOP_cmp32 );
04289     const TOP cmov_opcode = MTYPE_is_float(mtype)
04290       ? TOP_fcmovnb : ( MTYPE_is_signed(mtype) ? TOP_cmovge : TOP_cmovae );
04291 
04292     if( TN_has_value( src2 ) ){
04293       cmp_opcode = is_64bit ? TOP_cmpi64 : TOP_cmpi32;
04294     }
04295 
04296     if( dest == src1 ){
04297       TN* tmp = src1;
04298       src1 = src2;
04299       src2 = tmp;
04300     }
04301 
04302     // OSP, laijx
04303     // If is return reg and target does not support cmov,
04304     // The MAX will be expand to multiple BBs
04305     // Store the return of MAX(which is also the return of the func)
04306     //      to a temp reg, then copy the temp reg to return reg
04307     TN* orig_dest = NULL;
04308     if( ! Target_Support_Cmov() &&
04309          TN_is_dedicated(dest) &&
04310          REGISTER_SET_MemberP(
04311            REGISTER_CLASS_function_value(TN_register_class(dest)),
04312              TN_register(dest) ) ) {
04313       orig_dest = dest;
04314       dest = Dup_TN_Even_If_Dedicated(orig_dest);
04315     }
04316 
04317     if( dest != src2 ){
04318       Expand_Copy( dest, src2, mtype, ops );
04319     }
04320 
04321     Build_OP( cmp_opcode, rflags, src1, src2, ops );
04322     Expand_Cmov( cmov_opcode, dest, src1, rflags, ops );
04323 
04324     // OSP, laijx
04325     if( orig_dest != NULL ) {
04326       Expand_Copy( orig_dest, dest, mtype, ops );
04327     }
04328   
04329   }
04330 }
04331 
04332 void
04333 Expand_MinMax (TN *dest_min, TN *dest_max,
04334          TN *src1, TN *src2,
04335          TYPE_ID mtype, OPS *ops)
04336 { 
04337   Is_True( !TN_has_value( src1 ), ("Expand_MinMax: src1 has value") );
04338 
04339   if( dest_min == src1 || dest_max == src1 ){
04340     TN* tmp = Build_TN_Like( src1 );
04341     Expand_Copy( tmp, src1, mtype, ops );
04342     src1 = tmp;
04343   }
04344 
04345   if( dest_min == src2 || dest_max == src2 ){
04346     TN* tmp = Build_TN_Like( src2 );
04347     Expand_Copy( tmp, src2, mtype, ops );
04348     src2 = tmp;
04349   }
04350 
04351   if( OP_NEED_PAIR(mtype) ){
04352     Expand_Min( dest_min, src1, src2, mtype, ops );
04353     Expand_Max( dest_max, src1, src2, mtype, ops );
04354 
04355   } else if( MTYPE_is_float(mtype) &&
04356        Is_Target_SSE2()      &&
04357        !MTYPE_is_quad(mtype) ){
04358     switch(mtype) {
04359     case MTYPE_V16F4:
04360       Build_OP( TOP_fmin128v32, dest_min, src1, src2, ops );      
04361       Build_OP( TOP_fmax128v32, dest_max, src1, src2, ops );      
04362       break;
04363     case MTYPE_V16F8:
04364       Build_OP( TOP_fmin128v64, dest_min, src1, src2, ops );      
04365       Build_OP( TOP_fmax128v64, dest_max, src1, src2, ops );      
04366       break;
04367     default:
04368       Build_OP( mtype == MTYPE_F8 ? TOP_minsd : TOP_minss, 
04369     dest_min, src1, src2, ops );
04370       Build_OP( mtype == MTYPE_F8 ? TOP_maxsd : TOP_maxss, 
04371     dest_max, src1, src2, ops );
04372       break;
04373     }
04374 
04375   } else if ( MTYPE_is_vector(mtype) ) { // Integer MINMAX
04376     if (mtype == MTYPE_V16I4) {
04377       TN *tmp1 = Build_TN_Like(src1); 
04378       TN *tmp2 = Build_TN_Like(src1); 
04379       TN *tmp3 = Build_TN_Like(src1); 
04380       TN *tmp4 = Build_TN_Like(src1); 
04381       TN *tmp5 = Build_TN_Like(src1); 
04382       TN *tmp6 = Build_TN_Like(src1); 
04383       TN *tmp7 = Build_TN_Like(src1); 
04384       
04385       Build_OP( TOP_movdq, tmp1, src1, ops );
04386       Build_OP( TOP_movdq, tmp2, tmp1, ops );
04387       Build_OP( TOP_movdq, tmp3, tmp1, ops );      
04388       Build_OP( TOP_movdq, tmp4, src2, ops );
04389       Build_OP( TOP_cmpgt128v32, tmp5, tmp2, tmp4, ops );
04390       Build_OP( TOP_xor128v32, tmp6, tmp3, tmp4, ops );
04391       Build_OP( TOP_and128v32, tmp7, tmp5, tmp6, ops );
04392       Build_OP( TOP_xor128v32, dest_max, tmp4, tmp7, ops );
04393       Build_OP( TOP_xor128v32, dest_min, tmp1, tmp7, ops );
04394     } else
04395       FmtAssert(FALSE, ("NYI"));
04396 
04397   } else {
04398     const BOOL is_64bit = MTYPE_is_size_double(mtype);
04399 
04400     TOP cmp_opcode =
04401       MTYPE_is_float(mtype) ? TOP_fucomi : ( is_64bit ? TOP_cmp64 : TOP_cmp32 );
04402     TOP mov_opcode =
04403       MTYPE_is_float(mtype) ? TOP_fmov : ( is_64bit ? TOP_mov64 : TOP_mov32 );
04404     TN* rflags = Rflags_TN();
04405 
04406     if( TN_has_value( src2 ) ){
04407       cmp_opcode = is_64bit ? TOP_cmpi64 : TOP_cmpi32;
04408       mov_opcode = is_64bit ? TOP_ldc64 : TOP_ldc32;
04409     }
04410 
04411     Build_OP( mov_opcode, dest_min, src2, ops );
04412     Build_OP( mov_opcode, dest_max, src1, ops );
04413     Build_OP( cmp_opcode, rflags, src1, src2, ops );
04414 
04415     const TOP cmov_opcode = MTYPE_is_float(mtype)
04416       ? TOP_fcmovb : ( MTYPE_is_signed(mtype) ? TOP_cmovl : TOP_cmovb );
04417     Expand_Cmov( cmov_opcode, dest_min, src1, rflags, ops, dest_max, src2 );
04418   }
04419 }
04420 
04421 /* check whether to eval condition before select */
04422 extern BOOL
04423 Check_Select_Expansion (OPCODE compare)
04424 {
04425   // in order to get optimal code,
04426   // don't evaluate the condition first,
04427   // but pass the condition and kids to exp_select,
04428   // which will do the compare and use the predicate results.
04429   return FALSE;
04430 }
04431 
04432 static void 
04433 Expand_Ordered_Select_Compare ( OPS* ops, TOP cond_move )
04434 {
04435   FmtAssert(OPS_length(ops) == 3 || OPS_length(ops) == 5,
04436       ("Expand_Ordered_Select_Compare: wrong ops length"));
04437 
04438   if ( cond_move == TOP_cmova || cond_move == TOP_cmovae )
04439     // We are as good before. 
04440     return;
04441 
04442   if( TOP_is_x87( cond_move ) ){
04443     DevWarn( "Expand_Ordered_Select_Compare: %s is not supported yet.\n",
04444             TOP_Name(cond_move) );
04445     return;
04446   }
04447 
04448   // Collect the compare and select operands from ops
04449   TN *cmp_kid1, *cmp_kid2, *true_tn, *false_tn, *result, *tmp;
04450   TN *result_hi = NULL;
04451   TN *true_tn_hi = NULL;
04452   TN *false_tn_hi = NULL;
04453   OP* init_op = OPS_first ( ops );
04454   OP* cmp_op  = init_op->next;
04455   OP* cmov_op = OPS_last ( ops );
04456   TOP cmp_opcode = OP_code ( cmp_op );
04457   BOOL need_pair = FALSE;
04458 
04459   if (OPS_length(ops) == 5) {
04460     need_pair = TRUE;
04461     cmp_op  = init_op->next->next;
04462     cmov_op = cmp_op->next;
04463     cmp_opcode = OP_code ( cmp_op );
04464   }
04465 
04466   TOP init_opcode = OP_code ( init_op );
04467   TN* rflags = Rflags_TN();
04468   TOP new_cond_move = TOP_UNDEFINED;
04469 
04470   false_tn = OP_opnd(init_op, 0);
04471   true_tn =  OP_opnd(cmov_op, 0);
04472   cmp_kid1 = OP_opnd(cmp_op, 0);
04473   cmp_kid2 = OP_opnd(cmp_op, 1);
04474   result   = OP_result(init_op, 0);
04475   
04476   OPS_Remove_All(ops);
04477 
04478   switch (cond_move) {
04479   case TOP_cmove: 
04480     {
04481       new_cond_move = TOP_cmovne; 
04482       // Interchange
04483       tmp = true_tn;
04484       true_tn = false_tn;
04485       false_tn = tmp;
04486       break;
04487     }
04488   case TOP_cmovne: new_cond_move = TOP_cmovne; break;
04489   case TOP_cmovb:  new_cond_move = TOP_cmova; break;
04490   case TOP_cmovbe: new_cond_move = TOP_cmovae; break;
04491   default:
04492     FmtAssert( FALSE,
04493          ("Expand_Ordered_Select_Compare: unsupported opcode (%s)",
04494     TOP_Name(cond_move)) );
04495   }
04496 
04497   if (need_pair) {
04498     result_hi = Get_TN_Pair( result );
04499     true_tn_hi = Get_TN_Pair( true_tn );
04500     false_tn_hi = Get_TN_Pair( false_tn );
04501   }
04502 
04503   Build_OP ( init_opcode, result, false_tn, ops );
04504   if (need_pair)
04505     Build_OP ( init_opcode, result_hi, false_tn_hi, ops );
04506   Build_OP ( cmp_opcode, rflags, cmp_kid2, cmp_kid1, ops );
04507   Expand_Cmov(new_cond_move, result, true_tn, rflags, ops, result_hi,
04508         true_tn_hi);
04509   if ( new_cond_move == TOP_cmovne ) {
04510     Expand_Cmov(TOP_cmovp, result, true_tn, rflags, ops, result_hi, true_tn_hi);
04511   }    
04512 
04513   return;
04514 }
04515 
04516 
04517 /*  <result> = <cmp_kid1> <compare> <cmp_kid2> ? <true_tn> : <false_tn>
04518  */
04519 static void Expand_non_SSE2_Float_Select( TN* dest, VARIANT variant,
04520             TN* cmp_kid1, TN* cmp_kid2,
04521             TN* true_tn, TN* false_tn, TYPE_ID select_type,
04522             OPS* ops )
04523 {
04524   TN* result = dest;
04525 
04526   if( TN_is_dedicated(dest) ){
04527     result = Build_TN_Like( dest );
04528   }
04529 
04530   Expand_Copy( result, true_tn, select_type, ops );
04531 
04532   BB* bb_entry  = Cur_BB;
04533   BB* bb_then = Gen_And_Append_BB( bb_entry );
04534   BB* bb_exit = Gen_And_Append_BB( bb_then );
04535 
04536   const LABEL_IDX bb_exit_label = Gen_Label_For_BB( bb_exit );
04537 
04538   BB_branch_wn(bb_entry) = WN_Create(OPC_TRUEBR,1);
04539   WN_kid0(BB_branch_wn(bb_entry)) = NULL;
04540   WN_label_number(BB_branch_wn(bb_entry)) = bb_exit_label;
04541 
04542   // build bb_entry
04543   {
04544     Exp_OP3v( OPC_TRUEBR,
04545         NULL,
04546         Gen_Label_TN( bb_exit_label, 0 ),
04547         cmp_kid1,
04548         cmp_kid2,
04549         variant,
04550         ops );
04551 
04552     if( &New_OPs != ops )
04553       OPS_Append_Ops( &New_OPs, ops );
04554     Process_New_OPs();
04555     BB_Append_Ops( bb_entry, &New_OPs );
04556     OPS_Init( &New_OPs );
04557     OPS_Init( ops );    
04558   }
04559 
04560   // Build bb_then here if <cmp_kid1> <compare> <cmp_kid2> is FALSE.
04561   {
04562     OPS* bb_then_ops = &New_OPs;
04563     Expand_Copy( result, false_tn, select_type, bb_then_ops );
04564 
04565     total_bb_insts = 0;
04566     Last_Processed_OP = NULL;
04567     Process_New_OPs();
04568     BB_Append_Ops( bb_then, bb_then_ops );
04569     OPS_Init( bb_then_ops );    
04570   }
04571 
04572   Cur_BB = bb_exit;
04573 
04574   if( result != dest ){
04575     Expand_Copy( dest, result, select_type, ops );
04576   }
04577 }
04578 
04579 //implemented to handle storing vectorized floating-point comparison
04580 //to a preg for bug 11088
04581 extern void 
04582 Exp_Stid_And_VComp(
04583         OPCODE stid, TN *result, TN *cmp_kid1, TN *cmp_kid2,
04584         OPCODE compare, OPS *ops)
04585 {
04586   OPS new_ops = OPS_EMPTY;
04587   const TYPE_ID desc = OPCODE_desc(compare);
04588   const TYPE_ID rtype =OPCODE_rtype(compare);
04589   const TYPE_ID stid_desc = OPCODE_desc(stid);
04590   
04591   FmtAssert(MTYPE_is_integral(rtype) && MTYPE_is_float(desc)
04592             && MTYPE_is_vector(rtype) && MTYPE_is_vector(desc),
04593             ("Exp_Stid_And_VComp: comparison type not handled")); 
04594 
04595   FmtAssert( stid_desc==MTYPE_V16I4 || stid_desc==MTYPE_V16I8,
04596                  ("Exp_Stid_And_VComp: store type not handled"));
04597 
04598   const OPERATOR compare_opr = OPCODE_operator(compare);
04599   TN* ctrl = Generate_Cmp_Ctrl_TN( compare_opr );  
04600 
04601   Build_OP( (stid_desc == MTYPE_V16I8 ) ? TOP_cmppd : TOP_cmpps,
04602                result, cmp_kid1, cmp_kid2, ctrl, ops);
04603 
04604 }
04605 
04606 //implemented to handle select with vectorized ldid as condition
04607 //for bug 11088
04608 extern void
04609 Exp_Select_And_VLdid(
04610         OPCODE select, TN *result, TN *true_tn, TN *false_tn,
04611         OPCODE compare, TN *vldid,  OPS *ops)
04612 {
04613   OPS new_ops = OPS_EMPTY;
04614   const TYPE_ID select_type = OPCODE_rtype(select);
04615   FmtAssert( select_type == MTYPE_V16F4 || select_type == MTYPE_V16F8,
04616                  ("Exp_Select_And_VLdid: select type not handled"));
04617 
04618   BOOL is_rsize_double = (select_type == MTYPE_V16F8) ? TRUE : FALSE;
04619 
04620    if( result == true_tn ){
04621     TN* tmp = Build_TN_Like( true_tn );
04622     Expand_Copy( tmp, true_tn, select_type, ops );
04623     true_tn = tmp;
04624   }
04625 
04626   if( result == false_tn ){
04627     TN* tmp = Build_TN_Like( false_tn );
04628     Expand_Copy( tmp, false_tn, select_type, ops );
04629     false_tn = tmp;
04630   }
04631 
04632   if( result == vldid ){
04633     TN* tmp = Build_TN_Like(vldid);
04634     Expand_Copy( tmp, vldid, select_type, ops );
04635     vldid = tmp;
04636   }
04637  
04638   TN* tmp1 = Build_TN_Like(result);
04639   TN* tmp2 = Build_TN_Like(result);
04640 
04641   Build_OP( is_rsize_double ? TOP_andpd : TOP_andps, tmp1, true_tn, vldid, &new_ops );
04642   Build_OP( is_rsize_double ? TOP_andnpd : TOP_andnps, tmp2, vldid, false_tn, &new_ops );
04643   Build_OP( is_rsize_double ? TOP_orpd : TOP_orps, result, tmp2, tmp1, &new_ops );
04644 
04645   if( Trace_Exp ){
04646      Print_OPS( &new_ops );
04647   }
04648 
04649   OPS_Append_Ops(ops, &new_ops);
04650 }
04651 
04652 
04653 extern void 
04654 Exp_Select_And_Condition (
04655         OPCODE select, TN *result, TN *true_tn, TN *false_tn,
04656         OPCODE compare, TN *cmp_kid1, TN *cmp_kid2, VARIANT variant, OPS *ops)
04657 {
04658   if (Trace_Exp) {
04659     fprintf(TFile, "expand %s: ", OPCODE_name(select));
04660     if (result) Print_TN(result,FALSE);
04661     fprintf(TFile, " :- (");
04662     if (cmp_kid1) Print_TN(cmp_kid1,FALSE);
04663     fprintf(TFile, " ");
04664     fprintf(TFile, OPCODE_name(compare));
04665     fprintf(TFile, " ");
04666     if (cmp_kid2) Print_TN(cmp_kid2,FALSE);
04667     fprintf(TFile, ") ? ");
04668     if (true_tn) Print_TN(true_tn,FALSE);
04669     fprintf(TFile, " : ");
04670     if (false_tn) Print_TN(false_tn,FALSE);
04671     fprintf(TFile, " ");
04672     if (variant) fprintf(TFile, "(0x%llx)", (INT64)variant);
04673     fprintf(TFile, "\n");
04674   }
04675 
04676   OPS new_ops = OPS_EMPTY;
04677   const TYPE_ID desc = OPCODE_desc(compare);
04678   const TYPE_ID select_type = OPCODE_rtype(select);
04679   BOOL is_rsize_double = MTYPE_is_size_double(select_type);
04680   BOOL is_ssize_double = MTYPE_is_size_double(desc);
04681   const OPERATOR compare_opr = OPCODE_operator(compare);
04682 
04683   /* Fix bug#1325
04684      Before expanding, make sure <result> is none of its operands.
04685    */
04686 
04687   if( result == true_tn ){
04688     TN* tmp = Build_TN_Like( true_tn );
04689     Expand_Copy( tmp, true_tn, select_type, ops );
04690     true_tn = tmp;
04691   }
04692 
04693   if( result == false_tn ){
04694     TN* tmp = Build_TN_Like( false_tn );
04695     Expand_Copy( tmp, false_tn, select_type, ops );
04696     false_tn = tmp;
04697   }
04698 
04699   if (result == cmp_kid1) {
04700     TN* tmp = Build_TN_Like( cmp_kid1 );
04701     Expand_Copy( tmp, cmp_kid1, desc, ops );
04702     cmp_kid1 = tmp;
04703   }
04704 
04705   if (result == cmp_kid2) {
04706     TN* tmp = Build_TN_Like( cmp_kid2 );
04707     Expand_Copy( tmp, cmp_kid2, desc, ops );
04708     cmp_kid2 = tmp;
04709   }
04710 
04711   if( MTYPE_is_float(select_type) &&
04712       !Is_Target_SSE2()           &&
04713       !MTYPE_is_float(desc)       &&
04714       MTYPE_is_signed(desc) ){
04715     Expand_non_SSE2_Float_Select( result, variant,
04716           cmp_kid1, cmp_kid2,
04717           true_tn, false_tn, select_type,
04718           &new_ops );
04719 
04720   } else if( MTYPE_is_float(select_type) &&
04721       Is_Target_SSE2()            &&
04722       !MTYPE_is_quad(select_type) ){
04723     /* For case where <result>, <true_tn> and <false_tn> are fp type.
04724 
04725        Paraphrase Section 6.7 of AMD Opteron Optimization Guide:
04726        In: <result> = <cmp_kid1> <compare> <cmp_kid2> ? <true_tn> : <false_tn>
04727        Out: <result>
04728 
04729        tmp1 := cmpss  <cmp_kid1>, <cmp_kid2>, ctrl
04730        tmp2 := andps  <true_tn>, <tmp1>
04731        tmp3 := andnps <tmp1>, <false_tn>
04732        <result> := orps <tmp3>, <tmp2>
04733     */
04734 
04735     /* Notice that for a comparison made between integers, the result for int cmp
04736        is set to rflags, and there is no conditional mov for fp.
04737     */
04738     if( MTYPE_is_integral(desc) ){
04739       TN* cmp1 = Build_TN_Of_Mtype(select_type);
04740       TN* cmp2 = Build_TN_Of_Mtype(select_type);
04741 
04742       FmtAssert( !TN_has_value( cmp_kid1 ),
04743      ("Exp_Select_And_Condition: cmp kid1 does not have value") );
04744       Expand_Int_To_Float( cmp1, cmp_kid1, desc, select_type, &new_ops );
04745       cmp_kid1 = cmp1;
04746 
04747       if( TN_has_value( cmp_kid2 ) ){
04748   INT64 val = TN_value( cmp_kid2 );
04749   if( TN_size( cmp_kid2 ) == 4 )
04750     val = (INT32)val;
04751 
04752   // Bug 5084 - The following should expand val as an integer and not 
04753   // a float because we are going to convert this int back to float.
04754   //TCON tcon = Host_To_Targ_Float( is_rsize_double ? MTYPE_F8 : MTYPE_F4, val );
04755   TCON tcon = Host_To_Targ( is_rsize_double ? MTYPE_I8 : MTYPE_I4, val );
04756   ST* sym = New_Const_Sym( Enter_tcon(tcon),  Be_Type_Tbl( TCON_ty(tcon) ) );
04757   TN* tmp = Build_TN_Of_Mtype(desc);
04758 
04759   ST* base_sym = NULL;
04760   INT64 base_ofst = 0;
04761 
04762   Allocate_Object(sym);
04763   Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
04764       
04765   Expand_Const( tmp, Gen_Symbol_TN( base_sym, base_ofst, TN_RELOC_NONE ),
04766           desc, &new_ops );
04767   cmp_kid2 = tmp;
04768       }
04769 
04770       Expand_Int_To_Float( cmp2, cmp_kid2, desc, select_type, &new_ops );
04771       cmp_kid2 = cmp2;
04772 
04773       // The comparison between integers is changed to a comparison between
04774       // float/doubles, depending on the select's result type.  Update
04775       // is_ssize_double to reflect the new comparison's operand type.
04776       is_ssize_double = MTYPE_is_size_double(select_type);
04777     }
04778 
04779     TN* tmp1 = Build_TN_Like( cmp_kid1 );
04780     TN* tmp2 = Build_TN_Like( result );
04781     TN* tmp3 = Build_TN_Like( result );
04782     TN* ctrl = Generate_Cmp_Ctrl_TN( compare_opr );
04783     BOOL zero_tn = FALSE;
04784 
04785     // Bug 2297 - optimize the case where true_tn is 0.0
04786     // TODO_1.2 : false_tn could also be 0.0 in which case we have to reverse 
04787     // the compare and set a flag and fall thru. This is not not relevant to
04788     // this bug.
04789     if ( TN_is_rematerializable( true_tn ) ) {
04790       WN* home = TN_home(true_tn);
04791       if (WN_operator(home) == OPR_CONST) {
04792   ST* st = WN_st(home);
04793   TCON tcon = STC_val(st);
04794   TYPE_ID ty = TCON_ty(tcon);
04795   if (((ty == MTYPE_F4 || ty == MTYPE_V16F4) &&
04796        TCON_R4(tcon) == 0.0) ||
04797       ((ty == MTYPE_F8 || ty == MTYPE_V16F8) &&
04798        TCON_R8(tcon) == 0.0))
04799     zero_tn = TRUE;
04800       }
04801     }
04802 
04803     if (zero_tn) {
04804       OPERATOR rev_cmp_opr = compare_opr;
04805 
04806       switch( compare_opr ){
04807       case OPR_EQ: rev_cmp_opr = OPR_NE; break;
04808       case OPR_LT: rev_cmp_opr = OPR_GE; break;
04809       case OPR_LE: rev_cmp_opr = OPR_GT; break;
04810       case OPR_NE: rev_cmp_opr = OPR_EQ; break;
04811       case OPR_GE: rev_cmp_opr = OPR_LT; break;
04812       case OPR_GT: rev_cmp_opr = OPR_LE; break;
04813       }
04814 
04815       ctrl = Generate_Cmp_Ctrl_TN( rev_cmp_opr );
04816 
04817       if ( MTYPE_is_vector ( select_type ) ) {
04818   Build_OP( ( select_type == MTYPE_V16F8 ) ? TOP_cmppd : TOP_cmpps,
04819       tmp1, cmp_kid1, cmp_kid2, ctrl, &new_ops );
04820   is_rsize_double = (select_type == MTYPE_V16F8) ? TRUE : FALSE;
04821       } else {
04822   Build_OP(is_ssize_double ? TOP_cmpsd : TOP_cmpss,
04823      tmp1, cmp_kid1, cmp_kid2, ctrl, &new_ops );
04824         if (!is_ssize_double && is_rsize_double) {
04825     // cmpss sets the low-order 32 bits.  Extend these 32 bits to 64
04826     // bits.  Do this by replicating them across the entire 128-bit xmm
04827     // register.  Bug 9497.
04828     TN* cmp_64bit_result = Build_TN_Like(result);
04829     Build_OP(TOP_pshufd, cmp_64bit_result, tmp1, Gen_Literal_TN(0, 4),
04830        &new_ops);
04831     tmp1 = cmp_64bit_result;
04832   }
04833       }
04834       Build_OP( is_rsize_double ? TOP_andpd : TOP_andps, result, 
04835           false_tn, tmp1, &new_ops );
04836     
04837       if( Trace_Exp ){
04838   Print_OPS( &new_ops );
04839       }     
04840       OPS_Append_Ops(ops, &new_ops);      
04841       return;
04842     }
04843 
04844     if ( MTYPE_is_vector ( select_type ) ) {
04845       Build_OP( ( select_type == MTYPE_V16F8 ) ? TOP_cmppd : TOP_cmpps,
04846     tmp1, cmp_kid1, cmp_kid2, ctrl, &new_ops );
04847       is_rsize_double = (select_type == MTYPE_V16F8) ? TRUE : FALSE;
04848     } else {
04849       Build_OP(is_ssize_double ? TOP_cmpsd : TOP_cmpss,
04850          tmp1, cmp_kid1, cmp_kid2, ctrl, &new_ops );
04851       if (!is_ssize_double && is_rsize_double) {
04852   // cmpss sets the low-order 32 bits.  Extend these 32 bits to 64 bits.
04853   // Do this by replicating them across the entire 128-bit xmm register.
04854   // Bug 9497.
04855   TN* cmp_64bit_result = Build_TN_Like(result);
04856   Build_OP(TOP_pshufd, cmp_64bit_result, tmp1, Gen_Literal_TN(0, 4),
04857      &new_ops);
04858   tmp1 = cmp_64bit_result;
04859       }
04860     }
04861     Build_OP( is_rsize_double ? TOP_andpd : TOP_andps, tmp2, true_tn, tmp1, &new_ops );
04862 
04863     Build_OP( is_rsize_double ? TOP_andnpd : TOP_andnps, tmp3, tmp1, false_tn, &new_ops );
04864 
04865     Build_OP( is_rsize_double ? TOP_orpd : TOP_orps, result, tmp3, tmp2, &new_ops );
04866 
04867   } else {
04868     /* For case where <result>, <true_tn> and <false_tn> are
04869        integer type. */
04870 
04871     TOP cmov_top = TOP_UNDEFINED;
04872 
04873     switch( compare_opr ){
04874     case OPR_LT:
04875       cmov_top = MTYPE_is_float(select_type)
04876   ? TOP_fcmovb : ( MTYPE_is_signed(desc) ? TOP_cmovl : TOP_cmovb );
04877       break;
04878     case OPR_LE:
04879       cmov_top = MTYPE_is_float(select_type)
04880   ? TOP_fcmovbe : ( MTYPE_is_signed(desc) ? TOP_cmovle : TOP_cmovbe );
04881       break;
04882     case OPR_EQ:
04883       cmov_top = MTYPE_is_float(select_type) ? TOP_fcmove : TOP_cmove;
04884       break;
04885     case OPR_NE:
04886       cmov_top = MTYPE_is_float(select_type) ? TOP_fcmovne : TOP_cmovne;
04887       break;
04888     case OPR_GE:
04889       cmov_top = MTYPE_is_float(select_type)
04890   ? TOP_fcmovnb : ( MTYPE_is_signed(desc) ? TOP_cmovge : TOP_cmovae );
04891       break;
04892     case OPR_GT:
04893       cmov_top = MTYPE_is_float(select_type)
04894   ? TOP_fcmovnbe : ( MTYPE_is_signed(desc) ? TOP_cmovg : TOP_cmova );
04895       break;
04896     default:
04897       FmtAssert(FALSE, ("Unknown opcode"));
04898     }
04899 
04900     TN* rflags = Rflags_TN();
04901 
04902     TOP cmp_opcode = MTYPE_is_float(select_type)
04903       ? TOP_fucomi : ( is_ssize_double ? TOP_cmp64 : TOP_cmp32 );
04904 
04905     if( MTYPE_is_float(desc) ){
04906       cmp_opcode = ( MTYPE_is_quad(desc) || !Is_Target_SSE2() )
04907   ? TOP_fucomi : ( is_ssize_double ? TOP_comisd : TOP_comiss );
04908 
04909     } else if( TN_has_value( cmp_kid2 ) ){
04910       if( TN_value( cmp_kid2 ) == 0 ){
04911   cmp_opcode = is_ssize_double ? TOP_test64 : TOP_test32;
04912   cmp_kid2 = cmp_kid1;
04913 
04914       } else {
04915   cmp_opcode = is_ssize_double ? TOP_cmpi64 : TOP_cmpi32;
04916       }
04917     }
04918 
04919     if( result != false_tn || 
04920   ( Force_IEEE_Comparisons && 
04921     TOP_is_flop( cmp_opcode ))) {
04922       // Fix bug091.
04923       FmtAssert( result != true_tn,
04924      ("Exp_Select_And_Condition: result and true_tn are identical") );
04925       Expand_Copy( result, false_tn, select_type, &new_ops );
04926     }
04927 
04928     if (TN_has_value(cmp_kid1) && 
04929   !ISA_LC_Value_In_Class (TN_value(cmp_kid1), LC_simm32)) {
04930       TN* tmp = Build_TN_Of_Mtype(MTYPE_U8);
04931       Exp_Immediate(tmp, cmp_kid1, &new_ops);
04932       cmp_kid1 = tmp;
04933       cmp_opcode = is_ssize_double ? TOP_cmp64 : TOP_cmp32;
04934     }
04935     if (TN_has_value(cmp_kid2) && 
04936   !ISA_LC_Value_In_Class (TN_value(cmp_kid2), LC_simm32)) {
04937       TN* tmp = Build_TN_Of_Mtype(MTYPE_U8);
04938       Exp_Immediate(tmp, cmp_kid2, &new_ops);
04939       cmp_kid2 = tmp;
04940       cmp_opcode = is_ssize_double ? TOP_cmp64 : TOP_cmp32;
04941     }
04942 
04943     if( OP_NEED_PAIR( desc ) ){
04944       Expand_Split_Select( result, compare_opr, cmp_opcode,
04945          cmp_kid1, cmp_kid2, desc,
04946          true_tn, false_tn, select_type,
04947          &new_ops );
04948 
04949     } else {
04950       Build_OP( cmp_opcode, rflags, cmp_kid1, cmp_kid2, &new_ops );
04951 
04952       TN* result_hi = NULL;
04953       TN* true_tn_hi = NULL;
04954       if (OP_NEED_PAIR(select_type)) {
04955   result_hi = Get_TN_Pair(result);
04956   true_tn_hi = Get_TN_Pair(true_tn);
04957       }
04958 
04959       // Ugly hack to preserve the interaface to Expand_Ordered_Select_Compare,
04960       // which parses a straight line code involving cmov.  Expand_Cmov, which
04961       // was added later, breaks this interface by introducing basic blocks.
04962       // Bug 8087.
04963       if (Force_IEEE_Comparisons && TOP_is_flop(cmp_opcode)) {
04964   Build_OP(cmov_top, result, true_tn, rflags, &new_ops);
04965   Set_OP_cond_def_kind(OPS_last(&new_ops), OP_ALWAYS_COND_DEF);
04966   if (result_hi != NULL) {
04967     Build_OP(cmov_top, result_hi, true_tn_hi, rflags, &new_ops);
04968     Set_OP_cond_def_kind(OPS_last(&new_ops), OP_ALWAYS_COND_DEF);
04969   }
04970   // To avoid cluttering we will do a post-process on new_ops incase we
04971   // need to cover unordered FP comparisons.
04972   Expand_Ordered_Select_Compare ( &new_ops, cmov_top );
04973       } else {
04974   Expand_Cmov(cmov_top, result, true_tn, rflags, &new_ops,
04975         result_hi, true_tn_hi);
04976       }
04977     }
04978   }
04979 
04980   if( Trace_Exp ){
04981     Print_OPS( &new_ops );
04982   }
04983 
04984   OPS_Append_Ops(ops, &new_ops);
04985 }
04986 
04987 
04988 #define RESET_COND_DEF_LAST(ops) Set_OP_cond_def_kind(OPS_last(ops),OP_ALWAYS_UNC_DEF)
04989 
04990 static void
04991 Expand_SGI_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
04992 {
04993   /*  (p0) frsqrta.s0 f6,p2=src # y2 = ~1/sqrt(x)
04994    *
04995    *  (p2) ldfd f4=half   # f4 = 0.5 (0x3fe0000000000000)
04996    *  (p2) ldfd f7=ah   # f7 = 0x3fe0000000000001
04997    *
04998    *  (p2) fmpy.d.s1  f3=src,f6 # g = x*y2
04999    *  (p2) fmpy.d.s1  f2=f4,f6  # y = 0.5*y2
05000    *
05001    *  (p2) fnma.d.s1  f5=f3,f3,src  # d = x - g*g
05002    *
05003    *  (p2) fma.d.s1 f3=f2,f5,f3 # g = g + y*d # 16 bit approximation
05004    *
05005    *  (p2) fnma.d.s1  f8=f2,f3,f7 # e = ah - y*g
05006    *  (p2) fnma.d.s1  f5=f3,f3,src    # d = x - g*g
05007    *  (p2) fma.d.s1 f2=f8,f6,f2 # y = y + e*y2
05008    *
05009    *  (p2) fma.d.s1   f3=f2,f5,f3     # g = g + y*d # 32 bit approximation
05010    *  (p2) fadd.d.s1  f6=f3,f3        # y2 = y + y
05011    *
05012    *  (p2) fnma.d.s1  f8=f2,f3,f7 # e = ah - y*g
05013    *  (p2) fnma.d.s1  f5=f3,f3,src    # d = x - g*g
05014    *  (p2) fma.d.s1 f2=f8,f6,f2 # y = y + e*y2
05015    *
05016    *  (p2) fma.d.s1   f3=f2,f5,f3     # g = g + y*d # 64 bit approximation before rounding
05017    *  (p2) fadd.d.s1  f6=f3,f3        # y2 = y + y
05018    *
05019    *  (p2) fnma.d.s1  f8=f2,f3,f7 # e = ah - y*g
05020    *  (p2) fnma.d.s1  f5=f3,f3,src    # d = x - g*g
05021    *  (p2) fma.d.s1 f2=f8,f6,f2 # y = y + e*y2
05022    *
05023    *  (p2) fma.d.s0   f6=f2,f5,f3 # result = g + y*d
05024    */
05025   // 3-mar-00/ken: this doesn't work for MTYPE_F10!!!!
05026 }
05027 
05028 static void
05029 Expand_Intel_F10_Sqrt(TN *result, TN *src, OPS *ops)
05030 { FmtAssert(FALSE,("Unimplemented")); }
05031 
05032 
05033 static void
05034 Expand_Intel_Max_Thr_F8_Sqrt(TN *result, TN *src, OPS *ops)
05035 { FmtAssert(FALSE,("Unimplemented")); }
05036 
05037 
05038 static void
05039 Expand_Intel_Max_Thr_F4_Sqrt(TN *result, TN *src, OPS *ops)
05040 { FmtAssert(FALSE,("Unimplemented")); }
05041 
05042 
05043 static void
05044 Expand_Intel_Min_Lat_F8_Sqrt(TN *result, TN *src, OPS *ops)
05045 { FmtAssert(FALSE,("Unimplemented")); }
05046 
05047 
05048 static void
05049 Expand_Intel_Min_Lat_F4_Sqrt(TN *result, TN *src, OPS *ops)
05050 { FmtAssert(FALSE,("Unimplemented")); }
05051 
05052 
05053 static void
05054 Expand_Intel_Max_Thr_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
05055 {
05056   switch (mtype) {
05057   case MTYPE_F4:
05058     Expand_Intel_Max_Thr_F4_Sqrt(result, src, ops);
05059     break;
05060   case MTYPE_F8:
05061     Expand_Intel_Max_Thr_F8_Sqrt(result, src, ops);
05062     break;
05063   case MTYPE_F10:
05064     Expand_Intel_F10_Sqrt(result, src, ops);
05065     break;
05066   default:
05067     FmtAssert(FALSE, ("Bad type in Expand_Intel_Max_Thr_Sqrt"));
05068     /*NOTREACHED*/
05069   }
05070 }
05071 
05072 
05073 static void
05074 Expand_Intel_Min_Lat_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
05075 {
05076   switch (mtype) {
05077   case MTYPE_F4:
05078     Expand_Intel_Min_Lat_F4_Sqrt(result, src, ops);
05079     break;
05080   case MTYPE_F8:
05081     Expand_Intel_Min_Lat_F8_Sqrt(result, src, ops);
05082     break;
05083   case MTYPE_F10:
05084     Expand_Intel_F10_Sqrt(result, src, ops);
05085     break;
05086   default:
05087     FmtAssert(FALSE, ("Bad type in Expand_Intel_Min_Lat_Sqrt"));
05088     /*NOTREACHED*/
05089   }
05090 }
05091 
05092 
05093 static void 
05094 Expand_Fast_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
05095 {
05096   FmtAssert( mtype == MTYPE_F4 || mtype == MTYPE_V16F4 , ("NYI"));
05097   
05098   TN* tmp0 = Build_TN_Like(result);
05099   TN* tmp1 = Build_TN_Like(result);
05100   TN* tmp2 = Build_TN_Like(result);
05101   TN* tmp3 = Build_TN_Like(result);
05102   TN* tmp4 = Build_TN_Like(result);
05103   TN* tmp5 = Build_TN_Like(result);
05104   TN* tmp6 = Build_TN_Like(result);
05105   TN* tmp7 = Build_TN_Like(result);
05106   TN* const0 = Build_TN_Like(result);
05107   TN* const1 = Build_TN_Like(result);
05108   
05109   if ( mtype == MTYPE_F4 ) {
05110     Build_OP( TOP_xzero32, tmp0, ops);
05111     Build_OP( TOP_cmpneqss, tmp1, tmp0, src, ops );
05112     Build_OP( TOP_rsqrtss, tmp2, src, ops );
05113     Build_OP( TOP_fand128v32, tmp3, tmp2, tmp1, ops );
05114     Build_OP( TOP_mulss, tmp4, tmp3, src, ops );
05115     Build_OP( TOP_mulss, tmp5, tmp4, tmp3, ops );
05116     Expand_Const( const0, Gen_Const_Symbol_TN( 0x40400000, 0.0, MTYPE_I4 ),
05117       mtype, ops );
05118     Build_OP( TOP_subss, tmp6, tmp5, const0, ops );
05119     Build_OP( TOP_mulss, tmp7, tmp6, tmp4, ops );
05120     Expand_Const( const1, Gen_Const_Symbol_TN( 0xbf000000, 0.0, MTYPE_I4 ),
05121       mtype, ops );
05122     Build_OP( TOP_mulss, result, tmp7, const1, ops );    
05123 
05124   } else { // mtype == MTYPE_V16F4
05125     Build_OP( TOP_xzero128v32, tmp0, ops);
05126     Build_OP( TOP_cmpneqps, tmp1, tmp0, src, ops );
05127     Build_OP( TOP_frsqrt128v32, tmp2, src, ops );
05128     Build_OP( TOP_fand128v32, tmp3, tmp2, tmp1, ops );
05129     Build_OP( TOP_fmul128v32, tmp4, tmp3, src, ops );
05130     Build_OP( TOP_fmul128v32, tmp5, tmp4, tmp3, ops );
05131     TCON then0 = Host_To_Targ (MTYPE_I4, 0x40400000);
05132     TCON now0  = Create_Simd_Const (MTYPE_V16F4, then0);
05133     ST *sym0 = New_Const_Sym (Enter_tcon (now0), Be_Type_Tbl(TCON_ty(now0)));
05134     Allocate_Object(sym0);
05135     TN *sym_tn0 = Gen_Symbol_TN(sym0, 0, 0);
05136     Exp_Load(mtype, mtype, const0, TN_var(sym_tn0), TN_offset(sym_tn0), ops, 0);
05137     Build_OP( TOP_fsub128v32, tmp6, tmp5, const0, ops );
05138     Build_OP( TOP_fmul128v32, tmp7, tmp6, tmp4, ops );
05139     TCON then1 = Host_To_Targ (MTYPE_I4, 0xbf000000);
05140     TCON now1  = Create_Simd_Const (MTYPE_V16F4, then1);
05141     ST *sym1 = New_Const_Sym (Enter_tcon (now1), Be_Type_Tbl(TCON_ty(now1)));
05142     Allocate_Object(sym1);
05143     TN *sym_tn1 = Gen_Symbol_TN(sym1, 0, 0);
05144     Exp_Load(mtype, mtype, const1, TN_var(sym_tn1), TN_offset(sym_tn1), ops, 0);
05145     Build_OP( TOP_fmul128v32, result, tmp7, const1, ops );        
05146   }
05147 
05148   return;
05149 }
05150 
05151 void
05152 Expand_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
05153 {
05154   FmtAssert( MTYPE_is_float(mtype),
05155       ("Unimplemented sqrt for integer src/dest") );
05156 
05157   if ( Fast_Sqrt_Allowed && (mtype == MTYPE_F4 || mtype == MTYPE_V16F4) &&
05158        Is_Target_SSE2() ) {
05159     Expand_Fast_Sqrt(result, src, mtype, ops );
05160     return;
05161   }
05162     
05163 
05164   switch(mtype) {
05165   case MTYPE_V16F4:
05166     Build_OP(TOP_fsqrt128v32, result, src, ops);
05167     break;
05168   case MTYPE_V16F8:
05169     Build_OP(TOP_fsqrt128v64, result, src, ops);
05170     break;
05171   default:    
05172     if( MTYPE_is_quad(mtype) ||
05173   !Is_Target_SSE2() )
05174       Build_OP( TOP_fsqrt, result, src, ops);
05175     else
05176       Build_OP( mtype == MTYPE_F8 ? TOP_sqrtsd : TOP_sqrtss, result, src, ops);
05177     break;
05178   }
05179 }
05180 
05181 
05182 static void
05183 Expand_Float_Compares(TOP set_opcode, 
05184           TN *dest, TN *src1, TN *src2, TYPE_ID mtype, OPS *ops)
05185 {
05186   const BOOL is_double = MTYPE_is_size_double(mtype);
05187   const TOP top = ( MTYPE_is_quad(mtype) || !Is_Target_SSE2() )
05188     ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss );
05189 
05190   TN* rflags = Rflags_TN();
05191   if ( Force_IEEE_Comparisons && 
05192        ( set_opcode == TOP_setb || set_opcode == TOP_setbe ) )
05193     Build_OP( top, rflags, src2, src1, ops );
05194   else
05195     Build_OP( top, rflags, src1, src2, ops );
05196 
05197   TN *dest_tmp = Build_TN_Of_Mtype(MTYPE_U1);
05198 
05199   if ( Force_IEEE_Comparisons ) {
05200     if ( set_opcode == TOP_seta || set_opcode == TOP_setae )
05201       // We are as good before.
05202       Build_OP( set_opcode, dest_tmp, rflags, ops);
05203     else if ( set_opcode == TOP_setb || set_opcode == TOP_setbe )
05204       Build_OP( set_opcode == TOP_setb ? TOP_seta : TOP_setae, 
05205     dest_tmp, rflags, ops);
05206     else if ( set_opcode == TOP_sete ) {
05207       TN *dest_tmp1 = Build_TN_Of_Mtype(MTYPE_U1);
05208       TN *dest_tmp2 = Build_TN_Of_Mtype(MTYPE_U1);
05209       Build_OP( set_opcode, dest_tmp, rflags, ops );      
05210       Build_OP( TOP_setnp, dest_tmp1, rflags, ops );
05211       Build_OP (TOP_and8, dest_tmp2, dest_tmp, dest_tmp1, ops );
05212       dest_tmp = dest_tmp2;
05213     }
05214     else if (set_opcode == TOP_setne ) {
05215       TN *dest_tmp1 = Build_TN_Of_Mtype(MTYPE_U1);
05216       TN *dest_tmp2 = Build_TN_Of_Mtype(MTYPE_U1);
05217       Build_OP( set_opcode, dest_tmp, rflags, ops );      
05218       Build_OP( TOP_setp, dest_tmp1, rflags, ops );
05219       Build_OP (TOP_or8, dest_tmp2, dest_tmp, dest_tmp1, ops );
05220       dest_tmp = dest_tmp2;
05221     } else
05222       FmtAssert ( FALSE,
05223       ("Expand_Float_Compares: Unsupported opcode (%s)",
05224        TOP_Name(set_opcode)) );
05225   } else 
05226     Build_OP( set_opcode, dest_tmp, rflags, ops);
05227 
05228   if( Is_Target_32bit() &&
05229       TN_size(dest) == 8 ){
05230     Expand_Split_Cvtl( MTYPE_I8, TOP_movzbq, dest, dest_tmp, ops );
05231 
05232   } else {
05233     Build_OP( TN_size(dest) == 8 ? TOP_movzbq : TOP_movzbl,
05234         dest, dest_tmp, ops );
05235   }
05236 }
05237 
05238 void
05239 Expand_Float_Less (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05240 {
05241   Expand_Float_Compares(TOP_setb, dest, src1, src2, mtype, ops);
05242 }
05243 
05244 void
05245 Expand_Float_Greater (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05246 {
05247   Expand_Float_Compares(TOP_seta, dest, src1, src2, mtype, ops);
05248 }
05249 
05250 void
05251 Expand_Float_Less_Equal (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05252 {
05253   Expand_Float_Compares(TOP_setbe, dest, src1, src2, mtype, ops);
05254 }
05255 
05256 void
05257 Expand_Float_Greater_Equal (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05258 {
05259   Expand_Float_Compares(TOP_setae, dest, src1, src2, mtype, ops);
05260 }
05261 
05262 void
05263 Expand_Float_Equal (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05264 {
05265   Expand_Float_Compares(TOP_sete, dest, src1, src2, mtype, ops);
05266 }
05267 
05268 void
05269 Expand_Float_Not_Equal (TN *dest, TN *src1, TN *src2, VARIANT variant, TYPE_ID mtype, OPS *ops)
05270 {
05271   Expand_Float_Compares(TOP_setne, dest, src1, src2, mtype, ops);
05272 }
05273 
05274 void
05275 Expand_Recip_Sqrt (TN *result, TN *src, TYPE_ID mtype, OPS *ops)
05276 {
05277   /*  (p0) frsqrta.s0 f2,p2=src # y = ~1/sqrt(x)
05278    *
05279    *  (p2) ldfd f4=half   # f4 = 0.5
05280    *  (p2) fmpy.d.s1  f5=f4,src # hx = 0.5*x
05281    *
05282    *  (p2) fmpy.d.s1  f3=f2,f2  # y2 = y*y
05283    *  (p2) fnma.d.s1  f6=f5,f3,f4 # z = 0.5 - 0.5*x*y*y
05284    *  (p2) fma.d.s1   f2=f2,f6,f2 # y = y + y*z
05285    *
05286    *  (p2) fmpy.d.s1  f3=f2,f2  # y2 = y*y
05287    *  (p2) fnma.d.s1  f6=f5,f3,f4 # z = 0.5 - 0.5*x*y*y
05288    *  (p2) fma.d.s1   f2=f2,f6,f2 # y = y + y*z
05289    *
05290    *  (p2) fmpy.d.s1  f3=f2,f2  # y2 = y*y
05291    *  (p2) fnma.d.s1  f6=f5,f3,f4 # z = 0.5 - 0.5*x*y*y
05292    *  (p2) fma.d.s0   f2=f2,f6,f2 # result = y + y*z
05293    */
05294   ErrMsg( EC_Unimplemented, "Expand_Recip_Sqrt: NYI" );
05295 }
05296 
05297 
05298 /* Don't use TOP_rcpss, which gives non-accurate result. */
05299 static void Expand_Recip( TN* result, TN* src2, TYPE_ID mtype, OPS* ops )
05300 {
05301   const BOOL is_double = MTYPE_is_size_double(mtype);
05302 
05303   if( Recip_Allowed && Is_Target_SSE2() && mtype == MTYPE_V16F4 ) {
05304     TN *tmp1 = Build_TN_Like(result);
05305     TN *tmp2 = Build_TN_Like(result);
05306     TN *tmp3 = Build_TN_Like(result);
05307     TN *tmp4 = Build_TN_Like(result);
05308     Build_OP( TOP_frcp128v32, tmp1, src2, ops );
05309     // Bug 7218 - add a Newton-Raphson iteration to recip computation.
05310     Build_OP( TOP_fmul128v32, tmp2, src2, tmp1, ops );
05311     Build_OP( TOP_fmul128v32, tmp3, tmp2, tmp1, ops );
05312     Build_OP( TOP_fadd128v32, tmp4, tmp1, tmp1, ops );
05313 #if 1
05314     Build_OP( TOP_fsub128v32, result, tmp4, tmp3, ops );      
05315 #else
05316     // multiply result by 1.0
05317     TN *tmp5 = Build_TN_Like(result);
05318     TN *tmp6 = Build_TN_Like(result);
05319     Build_OP( TOP_fsub128v32, tmp5, tmp4, tmp3, ops );
05320     
05321     // Create vector {1.0, 1.0, 1.0, 1.0}
05322     TCON tcon;
05323     ST *sym;
05324     tcon = Create_Simd_Const ( MTYPE_V16F4,  
05325              Host_To_Targ_Float_4 ( MTYPE_F4, 1.0 ) );
05326     sym = New_Const_Sym( Enter_tcon(tcon),  Be_Type_Tbl( TCON_ty(tcon) ) );
05327     ST* base_sym = NULL;
05328     INT64 base_ofst = 0;      
05329     Allocate_Object(sym);
05330     Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
05331     
05332     Expand_Const( tmp6, Gen_Symbol_TN( base_sym, base_ofst, TN_RELOC_NONE ), 
05333       mtype, ops );
05334     
05335     Build_OP( TOP_fmul128v32, result, tmp5, tmp6, ops );      
05336 #endif
05337     return;
05338   } 
05339     
05340   TN* src1 = Build_TN_Like( src2 );
05341 
05342   TCON tcon;
05343   ST* sym;
05344 
05345   if (mtype == MTYPE_V16F4)
05346     tcon = Create_Simd_Const ( MTYPE_V16F4,  
05347              Host_To_Targ_Float_4 ( MTYPE_F4, 1.0 ) );
05348   else if (mtype == MTYPE_V16F8)
05349     tcon = Create_Simd_Const ( MTYPE_V16F8,
05350                                Host_To_Targ_Float_4 ( MTYPE_F8, 1.0 ) );
05351   else
05352     tcon = MTYPE_is_quad(mtype)
05353       ? Host_To_Targ_Quad( 1.0 ) : Host_To_Targ_Float( mtype, 1.0 );
05354 
05355   sym = New_Const_Sym( Enter_tcon(tcon),  Be_Type_Tbl( TCON_ty(tcon) ) );
05356 
05357   ST* base_sym = NULL;
05358   INT64 base_ofst = 0;
05359 
05360   Allocate_Object(sym);
05361   Base_Symbol_And_Offset_For_Addressing( sym, 0, &base_sym, &base_ofst );
05362 
05363   Expand_Const( src1, Gen_Symbol_TN( base_sym, base_ofst, TN_RELOC_NONE ), 
05364     mtype, ops );
05365 
05366   const BOOL non_sse2_fp = MTYPE_is_quad(mtype) || !Is_Target_SSE2();
05367   if (mtype == MTYPE_V16F4)    
05368     Build_OP( TOP_fdiv128v32, result, src1, src2, ops );
05369   else if (mtype == MTYPE_V16F8)
05370     Build_OP( TOP_fdiv128v64, result, src1, src2, ops );
05371   else
05372     Build_OP( non_sse2_fp ? TOP_fdiv : ( is_double ? TOP_divsd : TOP_divss ),
05373         result, src1, src2, ops );
05374 }
05375 
05376 static void Expand_Complex_Multiply( OPCODE opcode, TN *result, 
05377              TN *src1, TN *src2, OPS *ops )
05378 {
05379   FmtAssert(opcode == OPC_V16C4MPY || opcode == OPC_V16C8MPY, ("NYI"));
05380 
05381   if (opcode == OPC_V16C4MPY) {
05382     TN* tmp1 = Build_TN_Like(src1);
05383     TN* tmp2 = Build_TN_Like(src1);
05384     TN* tmp3 = Build_TN_Like(src1);
05385     TN* tmp4 = Build_TN_Like(src1);
05386     TN* tmp5 = Build_TN_Like(src1);
05387     
05388     Build_OP(TOP_fmovsldup, tmp1, src2, ops);
05389     Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops);
05390     Build_OP(TOP_fmovshdup,tmp3, src2, ops);
05391     Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
05392     Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
05393     Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops);
05394 
05395   } else { // OPC_V16C8MPY
05396     // The WN simplifier always orders a multiply between an iload and a ldid 
05397     // as 'iload * ldid' and so we need to commute the operation to make sure 
05398     // address folding opportunity is exposed to EBO.
05399     TN* src1_t = src2;
05400     TN* src2_t = src1;
05401     if (!Enable_Cfold_Aggressive) {
05402       src1_t = src1;
05403       src2_t = src2;
05404     }
05405     TN* tmp1 = Build_TN_Like(src1);
05406     TN* tmp2 = Build_TN_Like(src1);
05407     TN* tmp3 = Build_TN_Like(src1);
05408     TN* tmp4 = Build_TN_Like(src1);
05409     TN* tmp5 = Build_TN_Like(src1);
05410     TN* tmp6 = Build_TN_Like(src1);
05411     
05412     Build_OP(TOP_fmovddup, tmp1, src2_t, ops);
05413     Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops);
05414     Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
05415     Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
05416     Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
05417     Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
05418     Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops);
05419   }
05420   return;
05421 }
05422 
05423 static void Expand_Complex_Divide( OPCODE opcode, TN *result, 
05424              TN *src1, TN *src2, OPS *ops )
05425 {
05426   FmtAssert(opcode == OPC_V16C4DIV, ("NYI"));
05427   
05428   if (opcode == OPC_V16C4DIV) {
05429     TN* tmp1 = Build_TN_Like(src1);
05430     TN* tmp2 = Build_TN_Like(src1);
05431     TN* tmp3 = Build_TN_Like(src1);
05432     TN* tmp4 = Build_TN_Like(src1);
05433     TN* tmp5 = Build_TN_Like(src1);
05434     TN* tmp6 = Build_TN_Like(src1);
05435     TN* tmp7 = Build_TN_Like(src1);
05436     TN* tmp8 = Build_TN_Like(src1);
05437     TN* tmp9 = Build_TN_Like(src1);
05438     TN* tmp10 = Build_TN_Like(src1);
05439     TN* tmp11 = Build_TN_Like(src1);
05440     TN* tmp12 = Build_TN_Like(src1);
05441     TN* tmp13 = Build_TN_Like(src1);
05442     TN* tmp14 = Build_TN_Like(src1);
05443     TN* tmp15 = result;
05444     TN* tmp16 = Build_TN_Like(src1);
05445     TN* tmp17 = Build_TN_Like(src1);
05446     TN* tmp18 = Build_TN_Like(src1);
05447     TN* tmp19 = Build_TN_Like(src1);
05448     TN* tmp20 = Build_TN_Like(src1);
05449     TN* tmp21 = Build_TN_Like(src1);
05450     TN* tmp22 = Build_TN_Like(src1);
05451     TN* tmp23 = Build_TN_Like(src1);
05452     TN* tmp24 = Build_TN_Like(src1);
05453     TN* tmp25 = Build_TN_Like(src1);
05454     TN* tmp26 = Build_TN_Like(src1);
05455     TN* tmp27 = Build_TN_Like(src1);
05456     TN* tmp28 = Build_TN_Like(src1);
05457     
05458     Build_OP(TOP_cvtps2pd, tmp1, src1, ops);
05459     Build_OP(TOP_cvtps2pd, tmp2, src2, ops);
05460     Build_OP(TOP_fmul128v64, tmp3, tmp2, tmp2, ops);
05461     Build_OP(TOP_fmovddup, tmp4, tmp2, ops);
05462     Build_OP(TOP_unpckhpd, tmp5, tmp2, tmp2, ops);
05463     Build_OP(TOP_fmul128v64, tmp6, tmp5, tmp1, ops);
05464     Build_OP(TOP_shufpd, tmp7, tmp1, tmp1, Gen_Literal_TN(1, 1), ops);
05465     Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops);
05466     Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
05467     Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
05468     Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
05469     Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops);
05470     Build_OP(TOP_shufpd, tmp13, tmp12, tmp12, Gen_Literal_TN(1, 1), ops);
05471     Build_OP(TOP_fdiv128v64, tmp14, tmp13, tmp9, ops);
05472     Build_OP(TOP_cvtpd2ps, tmp15, tmp14, ops);
05473     Build_OP(TOP_shufps, tmp16, src2, src2, Gen_Literal_TN(238, 1), ops);
05474     Build_OP(TOP_cvtps2pd, tmp17, tmp16, ops);
05475     Build_OP(TOP_fmul128v64, tmp18, tmp17, tmp17, ops);
05476     Build_OP(TOP_fmovddup, tmp19, tmp17, ops);
05477     Build_OP(TOP_unpckhpd, tmp20, tmp17, tmp17, ops);
05478     Build_OP(TOP_fmul128v64, tmp21, tmp20, tmp11, ops);
05479     Build_OP(TOP_shufpd, tmp22, tmp11, tmp11, Gen_Literal_TN(1, 1), ops);
05480     Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
05481     Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
05482     Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops);
05483     Build_OP(TOP_shufpd, tmp26, tmp25, tmp25, Gen_Literal_TN(1, 1), ops);
05484     Build_OP(TOP_fdiv128v64, tmp27, tmp26, tmp24, ops);
05485     Build_OP(TOP_cvtpd2ps, tmp28, tmp27, ops);
05486     Build_OP(TOP_movlhps, result, tmp28, ops);
05487     Set_OP_cond_def_kind( OPS_last(ops), OP_ALWAYS_COND_DEF );
05488   }
05489 
05490   return;
05491 }
05492 
05493 void Expand_Flop( OPCODE opcode, TN *result, TN *src1, TN *src2, TN *src3, OPS *ops )
05494 {
05495   TOP opc;
05496 
05497   switch (opcode) {
05498   case OPC_F4ADD:
05499     if( Is_Target_SSE2() ){
05500       opc = TOP_addss;
05501       break;
05502     }  /* fall thru */
05503   case OPC_F8ADD:
05504     if( Is_Target_SSE2() ){
05505       opc = TOP_addsd;
05506       break;
05507     }  /* fall thru */
05508   case OPC_FQADD:
05509     opc = TOP_fadd;
05510     break;
05511   case OPC_V16F4ADD:
05512   case OPC_V16C4ADD:
05513     opc = TOP_fadd128v32;
05514     break;
05515   case OPC_V16F8ADD:
05516   case OPC_V16C8ADD:
05517     opc = TOP_fadd128v64;
05518     break;
05519   case OPC_V8F4ADD:
05520     opc = TOP_fadd128v32;
05521     break;
05522   case OPC_F4SUB:
05523     if( Is_Target_SSE2() ){
05524       opc = TOP_subss;
05525       break;
05526     }  /* fall thru */
05527   case OPC_F8SUB:
05528     if( Is_Target_SSE2() ){
05529       opc = TOP_subsd;
05530       break;
05531     }  /* fall thru */
05532   case OPC_FQSUB:
05533     opc = TOP_fsub;
05534     break;
05535   case OPC_V16F4SUB:
05536   case OPC_V16C4SUB:
05537     opc = TOP_fsub128v32;
05538     break;
05539   case OPC_V16F8SUB:
05540   case OPC_V16C8SUB:
05541     opc = TOP_fsub128v64;
05542     break;
05543   case OPC_F4MPY:
05544     if( Is_Target_SSE2() ){
05545       opc = TOP_mulss;
05546       break;
05547     }  /* fall thru */
05548   case OPC_F8MPY:
05549     if( Is_Target_SSE2() ){
05550       opc = TOP_mulsd;
05551       break;
05552     }  /* fall thru */
05553   case OPC_FQMPY:
05554     opc = TOP_fmul;
05555     break;
05556   case OPC_V16F4MPY:
05557     opc = TOP_fmul128v32;
05558     break;
05559   case OPC_V16F8MPY:
05560     opc = TOP_fmul128v64;
05561     break;
05562   case OPC_V16I2MPY:
05563     opc = TOP_mul128v16;
05564     break;
05565   case OPC_M8I2MPY:
05566     opc = TOP_pmullw;
05567     break;
05568   case OPC_F4MADD:  // (src2 * src3) + src1
05569   case OPC_F4NMADD: // -((src2 * src3) + src1)
05570   case OPC_F4MSUB:  // (src2 * src3) - src1
05571   case OPC_F4NMSUB: // -((src2 * src3) - src1)
05572   case OPC_F8MADD:  // (src2 * src3) + src1
05573   case OPC_F8NMADD: // -((src2 * src3) + src1)
05574   case OPC_F8MSUB:  // (src2 * src3) - src1
05575   case OPC_F8NMSUB: // -((src2 * src3) - src1)
05576     FmtAssert( false,
05577          ("Expand_Flop: Unsupported opcode (%s)", OPCODE_name(opcode)) );
05578     break;
05579   case OPC_F4DIV:
05580     if( Is_Target_SSE2() ){
05581       opc = TOP_divss;
05582       break;
05583     }  /* fall thru */
05584   case OPC_F8DIV:
05585     if( Is_Target_SSE2() ){
05586       opc = TOP_divsd;
05587       break;
05588     }  /* fall thru */
05589   case OPC_FQDIV:
05590     opc = TOP_fdiv;
05591     break;
05592   case OPC_V16F4DIV:
05593     opc = TOP_fdiv128v32;
05594     break;
05595   case OPC_V16F8DIV:
05596     opc = TOP_fdiv128v64;
05597     break;
05598   case OPC_F4RECIP:
05599   case OPC_F8RECIP:
05600   case OPC_FQRECIP:
05601   case OPC_V16F4RECIP:
05602   case OPC_V16F8RECIP:
05603     Expand_Recip( result, src1, OPCODE_rtype(opcode), ops );
05604     return;
05605   case OPC_F4RSQRT:
05606   case OPC_F4ATOMIC_RSQRT:  // bug 6123
05607     opc = TOP_rsqrtss;
05608     break;
05609   case OPC_V16F4RSQRT:
05610   case OPC_V16F4ATOMIC_RSQRT: // bug 6123
05611     opc = TOP_frsqrt128v32;
05612     break;
05613 
05614   case OPC_V16C4MPY:
05615   case OPC_V16C8MPY:
05616     Expand_Complex_Multiply(opcode, result, src1, src2, ops);
05617     return;
05618 
05619   case OPC_V16C4DIV:
05620     Expand_Complex_Divide(opcode, result, src1, src2, ops);
05621     return;
05622 
05623   default:
05624     #pragma mips_frequency_hint NEVER
05625     FmtAssert(FALSE, ("Unimplemented flop: %s", OPCODE_name(opcode)));
05626   }
05627 
05628   Build_OP( opc, result, src1, src2, ops );
05629 }
05630 
05631 void
05632 Expand_Replicate (OPCODE op, TN *result, TN *op1, OPS *ops)
05633 {
05634   TN* tmp = Build_TN_Like(result);
05635 
05636   switch (op) {
05637   case OPC_V16C4F8REPLICA:
05638     Expand_Copy(result, op1, MTYPE_C4, ops);
05639     Build_OP(TOP_shufps, result, result, op1, Gen_Literal_TN(68, 4), ops);
05640     break;
05641   case OPC_V16I8I8REPLICA:
05642   {
05643     TY_IDX ty = MTYPE_To_TY( MTYPE_I8 );
05644     ST* st = Gen_Temp_Symbol( ty, "movd" );
05645     Allocate_Temp_To_Memory( st );
05646     Exp_Store( MTYPE_I8, op1, st, 0, ops, 0);
05647     Exp_Load( MTYPE_F8, MTYPE_F8, tmp, st, 0, ops, 0);
05648     Expand_Copy(result, tmp, MTYPE_F8, ops);
05649     Build_OP(TOP_unpcklpd, result, result, tmp, ops);
05650     break;
05651   }
05652   case OPC_V16F8F8REPLICA:
05653     Expand_Copy(result, op1, MTYPE_F8, ops);
05654     Build_OP(TOP_unpcklpd, result, result, op1, ops);
05655     break;
05656   case OPC_V16I4I4REPLICA:
05657   {
05658     TY_IDX ty = MTYPE_To_TY( MTYPE_I4 );
05659     ST* st = Gen_Temp_Symbol( ty, "movd" );
05660     Allocate_Temp_To_Memory( st );
05661     Exp_Store( MTYPE_I4, op1, st, 0, ops, 0);
05662     Exp_Load( MTYPE_F4, MTYPE_F4, tmp, st, 0, ops, 0);
05663     Expand_Copy(result, tmp, MTYPE_F4, ops);
05664     Build_OP(TOP_unpcklps, result, result, tmp, ops);
05665     Build_OP(TOP_unpcklps, result, result, result, ops);
05666     break;
05667   }
05668   case OPC_V16F4F4REPLICA:
05669     Expand_Copy(result, op1, MTYPE_F4, ops);
05670     Build_OP(TOP_unpcklps, result, result, op1, ops);
05671     Build_OP(TOP_unpcklps, result, result, result, ops);
05672     break;
05673   case OPC_V16I2I2REPLICA:     
05674   {
05675     TN* tmp_a = Build_TN_Like(result);
05676     Build_OP(TOP_movg2x, tmp, op1, ops);
05677     Build_OP(TOP_punpcklwd, tmp_a, tmp, tmp, ops);
05678     Build_OP(TOP_pshufd, result, tmp_a, Gen_Literal_TN(0, 1), ops);
05679     break;
05680   }
05681   case OPC_V16I1I1REPLICA:
05682   {
05683     TN* tmp_a = Build_TN_Like(result);
05684     TN* tmp_b = Build_TN_Like(result);
05685     Build_OP(TOP_movg2x, tmp, op1, ops);
05686     Build_OP(TOP_punpcklbw, tmp_a, tmp, tmp, ops);
05687     Build_OP(TOP_punpcklbw, tmp_b, tmp_a, tmp_a, ops);
05688     Build_OP(TOP_pshufd, result, tmp_b, Gen_Literal_TN(0, 1), ops);
05689     break;
05690   }
05691   default:
05692     FmtAssert(FALSE, ("Handle this case"));
05693     break;
05694   }
05695   return;
05696 }
05697 
05698 void
05699 Expand_Reduce_Add (OPCODE op, TN *result, TN *op1, OPS *ops)
05700 {
05701   switch (op) {
05702   case OPC_F8V16F8REDUCE_ADD: 
05703   {
05704     TN* tmp = Build_TN_Like(op1);
05705     Build_OP(TOP_movapd, tmp, op1, ops);
05706     if ( Is_Target_SSE3() ) {
05707       Build_OP(TOP_fhadd128v64, result, tmp, tmp, ops);
05708     } else {
05709       TN* tmp_a = Build_TN_Like(op1);
05710       Build_OP(TOP_unpckhpd, tmp_a, tmp, op1, ops);
05711       Build_OP(TOP_addsd, result, tmp, tmp_a, ops);
05712     }
05713     break;
05714   }
05715   case OPC_F4V16F4REDUCE_ADD: 
05716   {
05717     TN* tmp = Build_TN_Like(op1);
05718     Build_OP(TOP_movaps, tmp, op1, ops);
05719     if ( Is_Target_SSE3() ) {
05720       Build_OP(TOP_fhadd128v32, tmp, op1, op1, ops);
05721       Build_OP(TOP_fhadd128v32, result, tmp, tmp, ops);
05722     } else {
05723       TN* tmp_a = Build_TN_Like(op1);
05724       TN* tmp_b = Build_TN_Like(op1);
05725       TN* tmp_c = Build_TN_Like(op1);
05726       TN* tmp_d = Build_TN_Like(op1);
05727       Build_OP(TOP_movhlps, tmp_a, tmp, ops);
05728       Build_OP(TOP_fadd128v32, tmp_b, tmp, tmp_a, ops);
05729       Build_OP(TOP_movaps, tmp_c, tmp_b, ops);
05730       Build_OP(TOP_shufps, tmp_d, tmp_c, tmp_c, Gen_Literal_TN(1, 1), ops);
05731       Build_OP(TOP_addss, result, tmp_b, tmp_d, ops);
05732     }
05733     break;
05734   }
05735   case OPC_I4V16I1REDUCE_ADD:
05736   {
05737     TN* tmp = Build_TN_Like(op1);
05738     TN* tmp_a = Build_TN_Like(op1);
05739     TN* tmp_b = Build_TN_Like(op1);
05740     TN* tmp_c = Build_TN_Like(op1);
05741     TN* tmp_d = Build_TN_Like(op1);
05742     TN* tmp_e = Build_TN_Like(op1);
05743     TN* tmp_f = Build_TN_Like(op1);
05744     TN* tmp_g = Build_TN_Like(op1);
05745     TN* tmp_h = Build_TN_Like(op1);
05746     TN* tmp_i = Build_TN_Like(op1);
05747     TN* tmp_j = Build_TN_Like(op1);
05748     TN* tmp_k = Build_TN_Like(op1);
05749     Build_OP(TOP_movdq, tmp, op1, ops);
05750     Build_OP(TOP_psrldq, tmp_a, tmp, Gen_Literal_TN(8, 1), ops);
05751     Build_OP(TOP_add128v8, tmp_b, tmp, tmp_a, ops);
05752     Build_OP(TOP_movdq, tmp_c, tmp_b, ops);
05753     Build_OP(TOP_psrldq, tmp_d, tmp_c, Gen_Literal_TN(4, 1), ops);
05754     Build_OP(TOP_add128v8, tmp_e, tmp_c, tmp_d, ops);
05755     Build_OP(TOP_movdq, tmp_f, tmp_e, ops);
05756     Build_OP(TOP_psrldq, tmp_g, tmp_f, Gen_Literal_TN(2, 1), ops);
05757     Build_OP(TOP_add128v8, tmp_h, tmp_f, tmp_g, ops);
05758     Build_OP(TOP_movdq, tmp_i, tmp_h, ops);
05759     Build_OP(TOP_psrldq, tmp_j, tmp_i, Gen_Literal_TN(1, 1), ops);
05760     Build_OP(TOP_add128v8, tmp_k, tmp_i, tmp_j, ops);
05761     Build_OP(TOP_movx2g, result, tmp_k, ops);
05762     break;
05763   }
05764   case OPC_I4V16I2REDUCE_ADD:
05765   {
05766     TN* tmp = Build_TN_Like(op1);
05767     TN* tmp_a = Build_TN_Like(op1);
05768     TN* tmp_b = Build_TN_Like(op1);
05769     TN* tmp_c = Build_TN_Like(op1);
05770     TN* tmp_d = Build_TN_Like(op1);
05771     TN* tmp_e = Build_TN_Like(op1);
05772     TN* tmp_f = Build_TN_Like(op1);
05773     TN* tmp_g = Build_TN_Like(op1);
05774     TN* tmp_h = Build_TN_Like(op1);
05775     TN* tmp_i = Build_TN_Like(op1);
05776     TN* tmp_j = Build_TN_Like(op1);
05777     TN* tmp_k = Build_TN_Like(op1);
05778     Build_OP(TOP_movdq, tmp, op1, ops);
05779     Build_OP(TOP_psrldq, tmp_a, tmp, Gen_Literal_TN(8, 1), ops);
05780     Build_OP(TOP_add128v16, tmp_b, tmp, tmp_a, ops);
05781     Build_OP(TOP_movdq, tmp_c, tmp_b, ops);
05782     Build_OP(TOP_psrldq, tmp_d, tmp_c, Gen_Literal_TN(4, 1), ops);
05783     Build_OP(TOP_add128v16, tmp_e, tmp_c, tmp_d, ops);
05784     Build_OP(TOP_movdq, tmp_f, tmp_e, ops);
05785     Build_OP(TOP_psrldq, tmp_g, tmp_f, Gen_Literal_TN(2, 1), ops);
05786     Build_OP(TOP_add128v16, tmp_h, tmp_f, tmp_g, ops);
05787     Build_OP(TOP_movx2g, result, tmp_h, ops);
05788     break;
05789   }
05790   case OPC_I4V16I4REDUCE_ADD:
05791   {
05792     TN* tmp = Build_TN_Like(op1);
05793     TN* tmp_a = Build_TN_Like(op1);
05794     TN* tmp_b = Build_TN_Like(op1);
05795     TN* tmp_c = Build_TN_Like(op1);
05796     TN* tmp_d = Build_TN_Like(op1);
05797     TN* tmp_e = Build_TN_Like(op1);
05798     Build_OP(TOP_movdq, tmp, op1, ops);
05799     Build_OP(TOP_psrldq, tmp_a, tmp, Gen_Literal_TN(8, 1), ops);
05800     Build_OP(TOP_add128v32, tmp_b, tmp, tmp_a, ops);
05801     Build_OP(TOP_movdq, tmp_c, tmp_b, ops);
05802     Build_OP(TOP_psrldq, tmp_d, tmp_c, Gen_Literal_TN(4, 1), ops);
05803     Build_OP(TOP_add128v32, tmp_e, tmp_c, tmp_d, ops);
05804     Build_OP(TOP_movx2g, result, tmp_e, ops);
05805     break;
05806   }
05807   case OPC_I8V16I8REDUCE_ADD:
05808   {
05809     TN* tmp = Build_TN_Like(op1);
05810     TN* tmp_a = Build_TN_Like(op1);
05811     TN* tmp_b = Build_TN_Like(op1);
05812     Build_OP(TOP_movdq, tmp, op1, ops);
05813     Build_OP(TOP_psrldq, tmp_a, tmp, Gen_Literal_TN(8, 1), ops);
05814     Build_OP(TOP_add128v64, tmp_b, tmp, tmp_a, ops);
05815     if (Is_Target_64bit())
05816       Build_OP(TOP_movx2g64, result, tmp_b, ops);
05817     else {
05818       TN* result_hi = Create_TN_Pair(result, MTYPE_I8);      
05819       TN *tmp_c = Build_TN_Like(op1);
05820       Build_OP(TOP_movx2g, result, tmp_b, ops);      
05821       Build_OP(TOP_psrlq128v64, tmp_c, tmp_b, Gen_Literal_TN(32, 4), ops);
05822       Build_OP(TOP_movx2g, result_hi, tmp_c, ops);      
05823     }
05824     break;
05825   }
05826   default:
05827     FmtAssert(FALSE, ("Expand_Reduce_Add: Unsupported opcode (%s)", OPCODE_name(op)));
05828   }
05829   return;
05830 }
05831 
05832 void
05833 Expand_Reduce_Mpy (OPCODE op, TN *result, TN *op1, OPS *ops)
05834 {
05835   switch (op) {
05836   case OPC_F8V16F8REDUCE_MPY: 
05837   {
05838     TN* tmp = Build_TN_Like(op1);
05839     TN* tmp_a = Build_TN_Like(op1);
05840     Build_OP(TOP_movapd, tmp, op1, ops);
05841     Build_OP(TOP_unpckhpd, tmp_a, tmp, tmp, ops);
05842     Build_OP(TOP_mulsd, result, tmp_a, tmp, ops);
05843     break;
05844   }
05845   case OPC_F4V16F4REDUCE_MPY:
05846   {
05847     TN* tmp = Build_TN_Like(op1);
05848     TN* tmp_a = Build_TN_Like(op1);
05849     TN* tmp_b = Build_TN_Like(op1);
05850     TN* tmp_c = Build_TN_Like(op1);
05851     TN* tmp_d = Build_TN_Like(op1);
05852     Build_OP(TOP_movaps, tmp, op1, ops);
05853     Build_OP(TOP_movhlps, tmp_a, tmp, ops);
05854     Build_OP(TOP_fmul128v32, tmp_b, tmp, tmp_a, ops);
05855     Build_OP(TOP_movaps, tmp_c, tmp_b, ops);
05856     Build_OP(TOP_shufps, tmp_d, tmp_c, tmp_c, Gen_Literal_TN(1, 1), ops);
05857     Build_OP(TOP_mulss, result, tmp_b, tmp_d, ops);
05858     break;
05859   }
05860   case OPC_I4V16I2REDUCE_MPY:
05861   {
05862     TN* tmp = Build_TN_Like(op1);
05863     TN* tmp_a = Build_TN_Like(op1);
05864     TN* tmp_b = Build_TN_Like(op1);
05865     TN* tmp_c = Build_TN_Like(op1);
05866     TN* tmp_d = Build_TN_Like(op1);
05867     TN* tmp_e = Build_TN_Like(op1);
05868     TN* tmp_f = Build_TN_Like(op1);
05869     TN* tmp_g = Build_TN_Like(op1);
05870     TN* tmp_h = Build_TN_Like(op1);
05871     Build_OP(TOP_movdq, tmp, op1, ops);
05872     Build_OP(TOP_psrldq, tmp_a, tmp, Gen_Literal_TN(8, 1), ops); 
05873     Build_OP(TOP_mul128v16, tmp_b, tmp, tmp_a, ops);
05874     Build_OP(TOP_movdq, tmp_c, tmp_b, ops);
05875     Build_OP(TOP_psrldq, tmp_d, tmp_c, Gen_Literal_TN(4, 1), ops);
05876     Build_OP(TOP_mul128v16, tmp_e, tmp_c, tmp_d, ops);
05877     Build_OP(TOP_movdq, tmp_f, tmp_e, ops);
05878     Build_OP(TOP_psrldq, tmp_g, tmp_f, Gen_Literal_TN(2, 1), ops);
05879     Build_OP(TOP_mul128v16, tmp_h, tmp_f, tmp_g, ops); 
05880     Build_OP(TOP_movx2g, result, tmp_h, ops);
05881     break;
05882   }
05883   default:
05884     FmtAssert( FALSE,
05885         ("Expand_Reduce_Mpy: Unsupported opcode (%s)", OPCODE_name(op) ) );
05886   }
05887   return;
05888 }
05889 
05890 void
05891 Expand_Reduce_Max (OPCODE op, TN *result, TN *op1, OPS *ops)
05892 {
05893   switch(op) {
05894   case OPC_F8V16F8REDUCE_MAX:
05895   {
05896     TN* tmp = Build_TN_Like(op1);
05897     TN* tmp_a = Build_TN_Like(op1);
05898     Build_OP(TOP_movapd, tmp, op1, ops);
05899     Build_OP(TOP_unpckhpd, tmp_a, tmp, tmp, ops);
05900     Build_OP(TOP_maxsd, result, tmp_a, tmp, ops);
05901     break;
05902   }
05903   case OPC_F4V16F4REDUCE_MAX:
05904   {
05905     TN* tmp = Build_TN_Like(op1);
05906     TN* tmp_a = Build_TN_Like(op1);
05907     TN* tmp_b = Build_TN_Like(op1);
05908     TN* tmp_c = Build_TN_Like(op1);
05909     TN* tmp_d = Build_TN_Like(op1);
05910     Build_OP(TOP_movaps, tmp, op1, ops);
05911     Build_OP(TOP_movhlps, tmp_a, tmp, ops);
05912     Build_OP(TOP_fmax128v32, tmp_b, tmp, tmp_a, ops);
05913     Build_OP(TOP_movaps, tmp_c, tmp_b, ops);
05914     Build_OP(TOP_shufps, tmp_d, tmp_c, tmp_c, Gen_Literal_TN(1, 1), ops);
05915     Build_OP(TOP_maxss, result, tmp_c, tmp_d, ops);
05916     break;
05917   }
05918   case OPC_I4V16I4REDUCE_MAX:
05919   {
05920     TN* tmp1 = Build_TN_Like(op1);
05921     TN* tmp2 = Build_TN_Like(op1);
05922     TN* tmp3 = Build_TN_Like(op1);
05923     TN* tmp4 = Build_TN_Like(op1);
05924     TN* tmp5 = Build_TN_Like(op1);
05925     TN* tmp6 = Build_TN_Like(op1);
05926     TN* tmp7 = Build_TN_Like(op1);
05927     TN* tmp8 = Build_TN_Like(op1);
05928     TN* tmp9 = Build_TN_Like(op1);
05929     TN* tmp10 = Build_TN_Like(op1);
05930     TN* tmp11 = Build_TN_Like(op1);
05931     TN* tmp12 = Build_TN_Like(op1);
05932     TN* tmp13 = Build_TN_Like(op1);
05933     TN* tmp14 = Build_TN_Like(op1);
05934     Build_OP(TOP_movdq, tmp1, op1, ops);
05935     Build_OP(TOP_movdq, tmp2, op1, ops);
05936     Build_OP(TOP_psrldq, tmp3, tmp1, Gen_Literal_TN(8, 1), ops);
05937     Build_OP(TOP_xor128v32, tmp4, op1, tmp3, ops);
05938     Build_OP(TOP_cmpgt128v32, tmp5, tmp2, tmp3, ops);
05939     Build_OP(TOP_and128v32, tmp6, tmp5, tmp4, ops);
05940     Build_OP(TOP_xor128v32, tmp7, tmp6, tmp3, ops);
05941     Build_OP(TOP_movdq, tmp8, tmp7, ops);
05942     Build_OP(TOP_movdq, tmp9, tmp7, ops);
05943     Build_OP(TOP_psrldq, tmp10, tmp8, Gen_Literal_TN(4, 1), ops);
05944     Build_OP(TOP_xor128v32, tmp11, tmp7, tmp10, ops);
05945     Build_OP(TOP_cmpgt128v32, tmp12, tmp9, tmp10, ops);
05946     Build_OP(TOP_and128v32, tmp13, tmp12, tmp11, ops);
05947     Build_OP(TOP_xor128v32, tmp14, tmp13, tmp10, ops);
05948     Build_OP(TOP_movx2g, result, tmp14, ops);
05949     break;
05950   }
05951   default: 
05952     FmtAssert( FALSE,
05953          ("Expand_Reduce_Max: Unsupported opcode (%s)", OPCODE_name(op) ) );
05954   }
05955   return;
05956 }
05957 
05958 void
05959 Expand_Reduce_Min (OPCODE op, TN *result, TN *op1, OPS *ops)
05960 {
05961   switch(op) {
05962   case OPC_F8V16F8REDUCE_MIN:
05963   {
05964     TN* tmp = Build_TN_Like(op1);
05965     TN* tmp_a = Build_TN_Like(op1);
05966     Build_OP(TOP_movapd, tmp, op1, ops);
05967     Build_OP(TOP_unpckhpd, tmp_a, tmp, tmp, ops);
05968     Build_OP(TOP_minsd, result, tmp_a, tmp, ops);
05969     break;
05970   }
05971   case OPC_F4V16F4REDUCE_MIN:
05972   {
05973     TN* tmp = Build_TN_Like(op1);
05974     TN* tmp_a = Build_TN_Like(op1);
05975     TN* tmp_b = Build_TN_Like(op1);
05976     TN* tmp_c = Build_TN_Like(op1);
05977     TN* tmp_d = Build_TN_Like(op1);
05978     Build_OP(TOP_movaps, tmp, op1, ops);
05979     Build_OP(TOP_movhlps, tmp_a, tmp, ops);
05980     Build_OP(TOP_fmin128v32, tmp_b, tmp, tmp_a, ops);
05981     Build_OP(TOP_movaps, tmp_c, tmp_b, ops);
05982     Build_OP(TOP_shufps, tmp_d, tmp_c, tmp_c, Gen_Literal_TN(1, 1), ops);
05983     Build_OP(TOP_minss, result, tmp_c, tmp_d, ops);
05984     break;
05985   }
05986   case OPC_I4V16I4REDUCE_MIN:
05987   {    
05988     TN* tmp1 = Build_TN_Like(op1);
05989     TN* tmp2 = Build_TN_Like(op1);
05990     TN* tmp3 = Build_TN_Like(op1);
05991     TN* tmp4 = Build_TN_Like(op1);
05992     TN* tmp5 = Build_TN_Like(op1);
05993     TN* tmp6 = Build_TN_Like(op1);
05994     TN* tmp7 = Build_TN_Like(op1);
05995     TN* tmp8 = Build_TN_Like(op1);
05996     TN* tmp9 = Build_TN_Like(op1);
05997     TN* tmp10 = Build_TN_Like(op1);
05998     TN* tmp11 = Build_TN_Like(op1);
05999     TN* tmp12 = Build_TN_Like(op1);
06000     TN* tmp13 = Build_TN_Like(op1);
06001     TN* tmp14 = Build_TN_Like(op1);
06002     Build_OP(TOP_movdq, tmp1, op1, ops);
06003     Build_OP(TOP_movdq, tmp2, op1, ops);
06004     Build_OP(TOP_psrldq, tmp3, tmp1, Gen_Literal_TN(8, 1), ops);
06005     Build_OP(TOP_cmpgt128v32, tmp4, tmp2, tmp3, ops);   
06006     Build_OP(TOP_xor128v32, tmp5, tmp3, op1, ops);
06007     Build_OP(TOP_and128v32, tmp6, tmp5, tmp4, ops);
06008     Build_OP(TOP_xor128v32, tmp7, tmp6, op1, ops);
06009     Build_OP(TOP_movdq, tmp8, tmp7, ops);
06010     Build_OP(TOP_movdq, tmp9, tmp7, ops);
06011     Build_OP(TOP_psrldq, tmp10, tmp8, Gen_Literal_TN(4, 1), ops);
06012     Build_OP(TOP_cmpgt128v32, tmp11, tmp9, tmp10, ops);    
06013     Build_OP(TOP_xor128v32, tmp12, tmp7, tmp10, ops);
06014     Build_OP(TOP_and128v32, tmp13, tmp12, tmp11, ops);
06015     Build_OP(TOP_xor128v32, tmp14, tmp13, tmp7, ops);
06016     Build_OP(TOP_movx2g, result, tmp14, ops);
06017     break;
06018   }
06019   default: 
06020     FmtAssert( FALSE,
06021          ("Expand_Reduce_Min: Unsupported opcode (%s)", OPCODE_name(op) ) );
06022   }
06023   return;
06024 }
06025 
06026 void
06027 Expand_Shuffle (OPCODE opc, TN* result, TN* op1, VARIANT variant, OPS *ops)
06028 {
06029   FmtAssert(variant == V_SHUFFLE_REVERSE, ("NYI"));
06030   switch(opc) {
06031   case OPC_V16C8V16C8SHUFFLE:
06032     Build_OP(TOP_shufpd, result, op1, op1, Gen_Literal_TN(0x1, 1), ops);
06033     break;    
06034   case OPC_V16F4V16F4SHUFFLE:
06035   case OPC_V16I4V16I4SHUFFLE:
06036     Build_OP(TOP_pshufd, result, op1, Gen_Literal_TN(0x1B, 1), ops);
06037     break;
06038   case OPC_V16I8V16I8SHUFFLE:
06039   case OPC_V16F8V16F8SHUFFLE:
06040     Build_OP(TOP_movhlps, result, op1, ops);
06041     Build_OP(TOP_movlhps, result, op1, ops);
06042     Set_OP_cond_def_kind( OPS_last(ops), OP_ALWAYS_COND_DEF );
06043     break;    
06044   case OPC_V16I2V16I2SHUFFLE:
06045     {
06046       TN* tmp1 = Build_TN_Like(result);
06047       TN* tmp2 = Build_TN_Like(result);
06048       Build_OP(TOP_movhlps, tmp1, op1, ops);
06049       Build_OP(TOP_movlhps, tmp1, op1, ops);
06050       Set_OP_cond_def_kind( OPS_last(ops), OP_ALWAYS_COND_DEF );
06051       Build_OP(TOP_pshuflw, tmp2, tmp1, Gen_Literal_TN(0x1B, 1), ops);
06052       Build_OP(TOP_pshufhw, result, tmp2, Gen_Literal_TN(0x1B, 1), ops);
06053       break;
06054     }
06055   default:
06056     FmtAssert(FALSE, ("NYI"));
06057   }
06058   return;
06059 }
06060 
06061 extern void
06062 Init_CG_Expand (void)
06063 {
06064   static BOOL Initialized = FALSE;
06065 
06066   // per PU:
06067   Trace_Exp = Get_Trace (TP_CGEXP, 1);
06068   /* whirl2ops uses -ttexp:2 */
06069   Trace_Exp2 = Get_Trace (TP_CGEXP, 4);
06070   
06071   if (Initialized) return;
06072   Initialized = TRUE;
06073   // once per file:
06074   Initialize_Branch_Variants();
06075 }
06076 
06077 
06078 /* ======================================================================
06079  * Exp_COPY_Ext
06080  * 
06081  * Generate a register transfer copy from 'src_tn' to 'tgt_tn' with
06082  * appropriate sign/zero extension.
06083  * ======================================================================*/
06084 void 
06085 Exp_COPY_Ext (TOP opcode, TN *tgt_tn, TN *src_tn, OPS *ops)
06086 {
06087   TOP new_op;
06088   switch (opcode) {
06089   case TOP_ldx8_32:
06090   case TOP_ldxx8_32:
06091   case TOP_ld8_32_n32:
06092   case TOP_ld8_32:
06093   case TOP_movsbl:
06094     new_op = TOP_movsbl;
06095     break;
06096   case TOP_ldu8_32_n32:
06097   case TOP_ldu8_32:
06098   case TOP_ldxu8_32:
06099   case TOP_ldxxu8_32:
06100   case TOP_movzbl:
06101     new_op = TOP_movzbl;
06102     break;
06103   case TOP_ld16_32_n32:
06104   case TOP_ld16_32:
06105   case TOP_ldx16_32:
06106   case TOP_ldxx16_32:
06107   case TOP_movswl:
06108     new_op = TOP_movswl;
06109     break;
06110   case TOP_ldu16_32_n32:
06111   case TOP_ldu16_32:
06112   case TOP_ldxu16_32:
06113   case TOP_ldxxu16_32:
06114   case TOP_movzwl:
06115     new_op = TOP_movzwl;
06116     break;
06117   case TOP_ld8_64:
06118   case TOP_ldx8_64:
06119   case TOP_ldxx8_64:
06120   case TOP_ld8_64_off:
06121   case TOP_movsbq:
06122     new_op = TOP_movsbq;
06123     break;
06124   case TOP_ldu8_64:
06125   case TOP_ldxu8_64:
06126   case TOP_ldxxu8_64:
06127   case TOP_ldu8_64_off:
06128   case TOP_movzbq:
06129     new_op = TOP_movzbq;
06130     break;
06131   case TOP_ld16_64:
06132   case TOP_ldx16_64:
06133   case TOP_ldxx16_64:
06134   case TOP_ld16_64_off:
06135   case TOP_movswq:
06136     new_op = TOP_movswq;
06137     break;
06138   case TOP_ldu16_64:
06139   case TOP_ldxu16_64:
06140   case TOP_ldxxu16_64:
06141   case TOP_ldu16_64_off:
06142   case TOP_movzwq:
06143     new_op = TOP_movzwq;
06144     break;
06145   case TOP_ld32_64:
06146   case TOP_ldx32_64:
06147   case TOP_ldxx32_64:
06148   case TOP_ld32_64_off:
06149   case TOP_movslq:
06150     new_op = TOP_movslq;
06151     break;
06152   case TOP_ld32_n32:
06153   case TOP_mov32:
06154     new_op = TOP_mov32;
06155     break;
06156   case TOP_fmovsldup:
06157   case TOP_fmovsldupx:
06158   case TOP_fmovsldupxx:
06159   case TOP_fmovsldupxxx:
06160     new_op = TOP_fmovsldup;
06161     break;
06162   case TOP_fmovshdup:
06163   case TOP_fmovshdupx:
06164   case TOP_fmovshdupxx:
06165   case TOP_fmovshdupxxx:
06166     new_op = TOP_fmovshdup;
06167     break;
06168   case TOP_fmovddupx:
06169   case TOP_fmovddupxx:
06170   case TOP_fmovddupxxx:
06171     new_op = TOP_fmovddup;
06172     break;
06173 
06174   default:
06175     FmtAssert( FALSE, ("Exp_COPY_Ext: Unsupported opcode (%s)", TOP_Name(opcode)) );
06176   }
06177   Build_OP( new_op, tgt_tn, src_tn, ops );
06178   // TODO: Are the extensions copies?
06179   // Set_OP_copy (OPS_last(ops));
06180 }
06181 
06182 /* ======================================================================
06183  * Exp_COPY
06184  * 
06185  * Generate a register transfer copy from 'src_tn' to 'tgt_tn'. 
06186  * ======================================================================*/
06187 void 
06188 Exp_COPY (TN *tgt_tn, TN *src_tn, OPS *ops, BOOL copy_pair)
06189 {
06190   // Warning: Don't return NOP even if src_tn == tgt_tn.  EBO expects a real
06191   // move OP in order to track the usage info of src_tn.
06192 
06193   // In m64, src_tn and tgt_tn can have different sizes.  If the sizes differ,
06194   // use 64-bit copy.  (See example in bug 14429.)
06195   // 
06196   // In m32, EBO can copy from 8-byte TN to 4-byte TN.  This means a 4-byte
06197   // copy where the src is a 4-byte hi/lo part of a 8-byte value (bug 14418).
06198   const BOOL is_64bit = Is_Target_64bit() ?
06199         (TN_size(src_tn) == 8) :
06200         (TN_size(src_tn) == 8 && TN_size(tgt_tn) == 8);
06201   const BOOL is_128bit = (TN_size(src_tn) == 16);
06202 
06203   if( TN_is_constant(src_tn) ){
06204     FmtAssert (TN_has_value(src_tn), ("Exp_COPY: illegal source tn"));
06205     /* expansion for INTCONST doesn't depend on size */
06206     Exp_OP1 (OPC_I4INTCONST, tgt_tn, src_tn, ops);
06207 
06208   } else {
06209     ISA_REGISTER_CLASS tgt_rc = TN_register_class(tgt_tn);
06210     ISA_REGISTER_CLASS src_rc = TN_register_class(src_tn);
06211 
06212     if (tgt_rc == src_rc && tgt_rc == ISA_REGISTER_CLASS_integer) {
06213       Build_OP( is_64bit ? TOP_mov64 : TOP_mov32, tgt_tn, src_tn, ops );
06214       Set_OP_copy (OPS_last(ops));
06215 
06216       // Copy the hi part of a TN pair.  Bug 8755.
06217       if (copy_pair) {
06218   TN *hi_src_tn = NULL;
06219   TN *hi_tgt_tn = NULL;
06220   if (!TN_is_dedicated(src_tn) &&
06221       (hi_src_tn = Get_TN_Pair(src_tn)) != NULL) {
06222     hi_tgt_tn = Get_TN_Pair(tgt_tn);
06223   } else if (!TN_is_dedicated(tgt_tn) &&
06224        (hi_tgt_tn = Get_TN_Pair(tgt_tn)) != NULL) {
06225     hi_src_tn = Get_TN_Pair(src_tn);
06226   }
06227   if (hi_src_tn != NULL ||
06228       hi_tgt_tn != NULL) {
06229     Is_True((hi_tgt_tn != NULL) && (hi_src_tn != NULL),
06230       ("Exp_COPY: src or target TN pair missing"));
06231     Build_OP(is_64bit ? TOP_mov64 : TOP_mov32, hi_tgt_tn, hi_src_tn, ops);
06232     Set_OP_copy (OPS_last(ops));
06233   }
06234       }
06235     } else if (tgt_rc == src_rc && tgt_rc == ISA_REGISTER_CLASS_float) {
06236       /* dedicated TNs always have size 8, so need to check both TNs */
06237       Build_OP(is_128bit ? TOP_movdq: (is_64bit ? TOP_movsd : TOP_movss), 
06238          tgt_tn, src_tn, ops);
06239       Set_OP_copy (OPS_last(ops));
06240 
06241     } else if( tgt_rc == src_rc && tgt_rc == ISA_REGISTER_CLASS_x87 ){
06242       Build_OP( TOP_fmov, tgt_tn, src_tn, ops );
06243       Set_OP_copy (OPS_last(ops));
06244 
06245     } else if( tgt_rc == src_rc && tgt_rc == ISA_REGISTER_CLASS_mmx ){
06246       Build_OP( TOP_mov64_m, tgt_tn, src_tn, ops );
06247       Set_OP_copy (OPS_last(ops));
06248 
06249     } else if( tgt_rc == ISA_REGISTER_CLASS_x87 &&
06250          src_rc == ISA_REGISTER_CLASS_float ){
06251       Expand_Float_To_Float( tgt_tn, src_tn, MTYPE_FQ,
06252            TN_size(src_tn) == 8 ? MTYPE_F8 : MTYPE_F4,
06253            ops );
06254 
06255     } else if( tgt_rc == ISA_REGISTER_CLASS_float &&
06256          src_rc == ISA_REGISTER_CLASS_x87 ){
06257       Expand_Float_To_Float( tgt_tn, src_tn,
06258            TN_size(tgt_tn) == 8 ? MTYPE_F8 : MTYPE_F4,
06259            MTYPE_FQ,
06260            ops );
06261 
06262     } else if( tgt_rc == ISA_REGISTER_CLASS_integer &&
06263          src_rc == ISA_REGISTER_CLASS_float ){
06264       // Exposed by Bug 955
06265       Expand_Float_To_Int_Trunc( tgt_tn, src_tn, 
06266          TN_size(tgt_tn) == 8 ? MTYPE_I8 : MTYPE_I4,
06267          TN_size(src_tn) == 8 ? MTYPE_F8 : MTYPE_F4,
06268          ops );
06269 
06270     } else if( src_rc == ISA_REGISTER_CLASS_integer &&
06271          tgt_rc == ISA_REGISTER_CLASS_mmx) {
06272       // mov int64 to mmx
06273       if (Is_Target_64bit()) {
06274         Build_OP (TOP_movi64_2m, tgt_tn, src_tn, ops);
06275       } else {
06276         // Move the 64-bit value via memory because there is no 64-bit int
06277         // register.
06278         TN *base_tn, *ofst_tn;
06279         Store_To_Temp_Stack(MTYPE_I8, src_tn, "int64_2_mmx", &base_tn, &ofst_tn,
06280                             ops);
06281         Build_OP(TOP_ld64_2m, tgt_tn, base_tn, ofst_tn, ops);
06282       }
06283     } else if( src_rc == ISA_REGISTER_CLASS_float &&
06284          tgt_rc == ISA_REGISTER_CLASS_mmx) {
06285       // mov sse to mmx
06286       Build_OP (TOP_movdq2q, tgt_tn, src_tn, ops);
06287     } else if( src_rc == ISA_REGISTER_CLASS_mmx &&
06288          tgt_rc == ISA_REGISTER_CLASS_float) {
06289       // mov mmx to sse
06290       Build_OP (TOP_movq2dq, tgt_tn, src_tn, ops);
06291     } else {
06292       /* dedicated TNs always have size 8, so need to check both TNs */
06293       FmtAssert( FALSE, ("UNIMPLEMENTED") );
06294 #if 0
06295       if (src_rc == ISA_REGISTER_CLASS_integer) { // tgt_tc is float class
06296   Build_OP(is_double ? TOP_dmtc1 : TOP_mtc1, tgt_tn, src_tn, ops);
06297       } else if (src_rc == ISA_REGISTER_CLASS_float) { // tgt_tc is integer class
06298   Build_OP(is_double ? TOP_dmfc1 : TOP_mfc1, tgt_tn, src_tn, ops);
06299       } else {
06300   FmtAssert(FALSE, ("Unimplemented Copy.\n"));
06301       }
06302 #endif
06303     }
06304   }
06305 }
06306 
06307 static ST *tmp_apply_arg = NULL;
06308 void
06309 Generate_Temp_Apply_Arg ( )
06310 {
06311   TY_IDX tyi;
06312   TY& ty = New_TY(tyi);
06313   TY_Init(ty, 144, KIND_STRUCT, MTYPE_M,
06314           Save_Str("__apply_arg"));
06315   Set_TY_align(tyi, 8);
06316   tmp_apply_arg = New_ST(CURRENT_SYMTAB);
06317   ST_Init(tmp_apply_arg, TY_name_idx(ty),
06318           CLASS_VAR, SCLASS_AUTO, EXPORT_LOCAL, tyi);
06319   Set_ST_is_temp_var(tmp_apply_arg);
06320   Allocate_Object(tmp_apply_arg);
06321 }
06322 
06323 
06324 static void Expand_INTRN_ANINT( TN* result, TN* src, TYPE_ID mtype, OPS* ops )
06325 {
06326   FmtAssert( mtype == MTYPE_F8, ("Expand_INTRN_ANINT: not double type") );
06327 
06328   if( Fast_ANINT_Allowed ){
06329     const double rnd_const = 6755399441055744.0;  /* 0.75d0 * 2d0**53  */
06330     TN* con_tn = Build_TN_Of_Mtype( mtype );
06331     Expand_Const( con_tn,
06332       Gen_Const_Symbol_TN( 0, rnd_const, mtype ),
06333       mtype, ops );
06334 
06335     Build_OP( TOP_addsd, result, src, con_tn, ops );
06336     Build_OP( TOP_subsd, result, result, con_tn, ops );
06337 
06338   } else {
06339 
06340     /* Prepapre all the required data. */
06341 
06342     TN* tmp0 = Build_TN_Like( result );
06343     TN* tmp4 = Build_TN_Like( result );
06344     TN* tmp7 = Build_TN_Like( result );
06345     TN* tmp2 = Build_TN_Like( result );
06346     TN* tmp6 = Build_TN_Like( result );
06347     TN* tmp3 = Build_TN_Like( result );
06348 
06349     TN* sign_mask = Build_TN_Of_Mtype( mtype );
06350     Expand_Const( sign_mask,
06351       Gen_Const_Symbol_TN( 0x8000000000000000ULL, 0.0, MTYPE_I8 ),
06352       mtype, ops );
06353     TN* two_exp_52 = Build_TN_Of_Mtype( mtype );
06354     Expand_Const( two_exp_52,
06355       Gen_Const_Symbol_TN( 0, 4503599627370496.0, mtype ),
06356       mtype, ops );
06357 
06358     TN* point_5 = Build_TN_Of_Mtype( mtype );
06359     Expand_Const( point_5,
06360       Gen_Const_Symbol_TN( 0, 0.5, mtype ),
06361       mtype, ops );
06362 
06363     TN* neg_point_5 = Build_TN_Of_Mtype( mtype );
06364     Expand_Const( neg_point_5,
06365       Gen_Const_Symbol_TN( 0, -0.5, mtype ),
06366       mtype, ops );
06367 
06368     /* Emit the algorithm. */
06369 
06370     Build_OP( TOP_andpd, tmp0, sign_mask, src, ops );
06371     Build_OP( TOP_xorpd, tmp4, tmp0, src, ops );
06372     Build_OP( TOP_addsd, tmp7, tmp4, two_exp_52, ops );
06373     Build_OP( TOP_subsd, tmp7, tmp7, two_exp_52, ops );
06374     Build_OP( TOP_addsd, tmp2, point_5, point_5, ops );
06375     Build_OP( TOP_subsd, tmp6, tmp7, tmp4, ops );
06376     Build_OP( TOP_cmpsd, tmp3,
06377         tmp6, neg_point_5, Generate_Cmp_Ctrl_TN(OPR_LE), ops );
06378     Build_OP( TOP_andpd, tmp3, tmp3, tmp2, ops );
06379     Build_OP( TOP_cmpsd, tmp6,
06380         tmp6, point_5, Generate_Cmp_Ctrl_TN(OPR_GT), ops );
06381     Build_OP( TOP_andpd, tmp6, tmp6, tmp2, ops );
06382     Build_OP( TOP_subsd, result, tmp7, tmp6, ops );
06383     Build_OP( TOP_addsd, result, result, tmp3, ops );
06384     Build_OP( TOP_orpd,  result, result, tmp0, ops );
06385   }
06386 }
06387 
06388 static void
06389 Expand_Count_Trailing_Zeros  (TN *result, TN *op, TYPE_ID mtype, OPS *ops)
06390 {
06391   TOP top = TOP_UNDEFINED;
06392   switch (mtype)
06393   {
06394     case MTYPE_I4:
06395     case MTYPE_U4:
06396       top = TOP_bsf32;
06397       break;
06398 
06399     case MTYPE_I8:
06400     case MTYPE_U8:
06401       top = TOP_bsf64;
06402       break;
06403     default:
06404       Fail_FmtAssertion ("Expand_Count_Trailing_Zeros: unexpected mtype");
06405   }
06406   Build_OP (top, result, op, ops);
06407   return;
06408 }
06409 
06410 // If safezero is TRUE, then be careful to map 0 --> MTYPE_bit_size(mtype).
06411 static void
06412 Expand_Count_Leading_Zeros (TN *result, TN *op, TYPE_ID mtype,
06413           BOOL safezero, OPS *ops)
06414 {
06415   // TN1 :- ldc32 (0x3f)
06416   // TN2 :- bsr32 TN_src;
06417   // TN2 :- cmove TN1 (%rflags)
06418   // TN3 :- xori32 TN2 (0x1f)
06419   if ( mtype != MTYPE_I1 && mtype != MTYPE_U1 &&
06420        mtype != MTYPE_I2 && mtype != MTYPE_U2 &&
06421        mtype != MTYPE_I4 && mtype != MTYPE_U4 &&
06422        mtype != MTYPE_I8 && mtype != MTYPE_U8 )
06423     Fail_FmtAssertion("Expand_Count_Leading_Zeros: unexpected mtype");
06424 
06425   // Bug 14167: Don't use bsr64 on 32-bit target
06426   if ( (mtype == MTYPE_I8 || mtype == MTYPE_U8) && Is_Target_32bit() ) {
06427     Expand_Split_Leading_Zeros( result, op, mtype, safezero, ops );
06428     return;
06429   }
06430 
06431   INT bitsize = MTYPE_bit_size(mtype);
06432   TN *tmp1 = op;
06433   if (bitsize < 32) {
06434     tmp1 = Build_TN_Of_Mtype( MTYPE_I4 );
06435     Expand_Binary_And( tmp1, op, Gen_Literal_TN( (1 << bitsize) - 1, 4 ),
06436            MTYPE_I4, ops );
06437   }
06438   TN *tmp3, *rflags = Rflags_TN();
06439   if (safezero) {
06440     tmp3 = Build_TN_Of_Mtype( MTYPE_I4 );
06441     Exp_Immediate( tmp3, Gen_Literal_TN(2 * bitsize - 1, 4), ops );
06442   }
06443   TN *tmp2 = Build_TN_Of_Mtype( MTYPE_I4 );
06444   Build_OP( bitsize == 64 ? TOP_bsr64 : TOP_bsr32, tmp2, tmp1, ops );
06445   if (safezero) {
06446     Expand_Cmov( TOP_cmove, tmp2, tmp3, rflags, ops );
06447   }
06448   Expand_Binary_Xor( result, tmp2, Gen_Literal_TN(bitsize - 1, 4),
06449          MTYPE_I4, ops );
06450 }
06451 
06452 void
06453 Exp_Intrinsic_Op (INTRINSIC id, TN *result, TN *op0, TN *op1, TN *op2, TYPE_ID mtype, OPS *ops)
06454 {
06455   TN* rflags = Rflags_TN();
06456   TN *result_tmp = Build_TN_Of_Mtype( MTYPE_U1 );
06457   const BOOL is_double = MTYPE_is_size_double(mtype);
06458   const TOP cmp_opcode = ( MTYPE_is_quad(mtype) || !Is_Target_SSE2() )
06459     ? TOP_fucomi : ( is_double ? TOP_comisd : TOP_comiss );
06460 
06461   if (INTRN_return_kind(id) == IRETURN_M8I1 ||
06462       INTRN_return_kind(id) == IRETURN_M8I2 ||
06463       INTRN_return_kind(id) == IRETURN_M8I4) { // convert operands to MMX TNs
06464     if (TN_register_class(op0) != ISA_REGISTER_CLASS_mmx) {
06465       TN *tmp0 = Build_TN_Like(result);
06466       Exp_COPY( tmp0, op0, ops );
06467       op0 = tmp0;
06468     }
06469     if (id != INTRN_PSHUFW && id != INTRN_PSHUFD &&
06470   TN_register_class(op1) != ISA_REGISTER_CLASS_mmx) {
06471       TN *tmp1 = Build_TN_Like(result);
06472       Exp_COPY( tmp1, op1, ops );
06473       op1 = tmp1;
06474     }
06475   }
06476   switch ( id ) {
06477   default: FmtAssert( FALSE,
06478           ("Exp_Intrinsic_Op: unsupported intrinsic (%s)",
06479            INTRN_rt_name(id)) );
06480     // Note: Frontend generates INTRN_CLZ/INTRN_CTZ64 for library calls,
06481     // and INTRN_CLZ32/INTRN_CTZ for clz/dclz/ctz/dctz instruction.
06482     // (Reasons related to x86.)  mtype is the rtype from op0.
06483   case INTRN_CTZ:
06484     Expand_Count_Trailing_Zeros (result, op0, mtype, ops);
06485     break;
06486   case INTRN_I1LEADZ:
06487   case INTRN_I2LEADZ:
06488   case INTRN_I4LEADZ:
06489   case INTRN_I8LEADZ:
06490   case INTRN_CLZ32:
06491     if ( id == INTRN_I1LEADZ ) mtype = MTYPE_I1;
06492     if ( id == INTRN_I2LEADZ ) mtype = MTYPE_I2;
06493     Expand_Count_Leading_Zeros (result, op0, mtype, id != INTRN_CLZ32, ops);
06494     break;
06495   case INTRN_F8ANINT:
06496     Expand_INTRN_ANINT( result, op0, mtype, ops );
06497     break;
06498   case INTRN_SUBSU2:
06499     {
06500       TN* tmp1 = Build_TN_Like(result);
06501       TN* tmp2 = Build_TN_Like(result);
06502       TN* rflags = Rflags_TN();
06503       Build_OP( TOP_movzwl, tmp1, op0, ops );
06504       Build_OP( TOP_sub32, tmp2, tmp1, op1, ops );
06505       Exp_Immediate( result, Gen_Literal_TN (0, 4), FALSE, ops );
06506       Build_OP( TOP_cmpi32, rflags, tmp2, Gen_Literal_TN(0, 4), ops );
06507       Expand_Cmov( TOP_cmovg, result, tmp2, rflags, ops );
06508       break;
06509     }
06510   case INTRN_SUBSV16I2:
06511     Build_OP( TOP_subus128v16, result, op0, op1, ops );
06512     break;
06513 //****************************************************************************
06514 // Bug 9140: generate code for vector F8SIGN and F4SIGN
06515 // algoeirhm: sign(x,y) = abs(x) | sign(y)
06516 //            (1) remove sign bit of x
06517 //            (2) extract sign bit of y
06518 //            (3) bitwise OR of the above result
06519 //****************************************************************************
06520   case INTRN_SIGNV16F8:
06521      {  
06522       // remove sign bit of x  
06523       TCON thenx = Host_To_Targ (MTYPE_I8, 0x7FFFFFFFFFFFFFFFULL);
06524       TCON nowx  = Create_Simd_Const (MTYPE_V16F8, thenx);
06525       ST *symx = New_Const_Sym (Enter_tcon (nowx), Be_Type_Tbl(TCON_ty(nowx)));
06526       Allocate_Object(symx);
06527       TN *sym_tnx = Gen_Symbol_TN(symx, 0, 0);
06528       TN *tmpx = Build_TN_Like(op0);
06529       Exp_Load(mtype, mtype, tmpx, TN_var(sym_tnx), TN_offset(sym_tnx), ops, 0);
06530       Build_OP(TOP_andpd, tmpx, tmpx, op0, ops);
06531 
06532       // extract sign bit of y
06533       TCON then = Host_To_Targ (MTYPE_I8, 0x8000000000000000ULL);
06534       TCON now  = Create_Simd_Const (MTYPE_V16F8, then);
06535       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
06536       Allocate_Object(sym);
06537       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
06538       TN *tmpy = Build_TN_Like(op1);
06539       Exp_Load(mtype, mtype, tmpy, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
06540       Build_OP(TOP_andpd, tmpy, tmpy, op1, ops);
06541 
06542       // bitwise OR to get result
06543       Build_OP(TOP_orpd, result, tmpx, tmpy, ops);
06544      } 
06545     break;
06546   case INTRN_SIGNV16F4:
06547     {
06548       // remove sign bit of x
06549       TCON thenx = Host_To_Targ (MTYPE_I4, 0x7FFFFFFF);
06550       TCON nowx  = Create_Simd_Const (MTYPE_V16F4, thenx);
06551       ST *symx = New_Const_Sym (Enter_tcon (nowx), Be_Type_Tbl(TCON_ty(nowx)));
06552       Allocate_Object(symx);
06553       TN *sym_tnx = Gen_Symbol_TN(symx, 0, 0);
06554       TN *tmpx = Build_TN_Like(op0);
06555       Exp_Load(mtype, mtype, tmpx, TN_var(sym_tnx), TN_offset(sym_tnx), ops, 0);
06556       Build_OP(TOP_andps, tmpx, tmpx, op0, ops);
06557 
06558       // extract sign bit of y
06559       TCON then = Host_To_Targ (MTYPE_I4, 0x80000000);
06560       TCON now  = Create_Simd_Const (MTYPE_V16F4, then);
06561       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
06562       Allocate_Object(sym);
06563       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
06564       TN *tmpy = Build_TN_Like(op1);
06565       Exp_Load(mtype, mtype, tmpy, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
06566       Build_OP(TOP_andps, tmpy, tmpy, op1, ops);
06567       
06568       // bitwise OR to get result
06569       Build_OP(TOP_orps, result, tmpx, tmpy, ops);
06570     }
06571    break;
06572   case INTRN_ISGREATER:
06573     Build_OP( cmp_opcode, rflags, op1, op0, ops );
06574     Build_OP( TOP_setb, result_tmp, rflags, ops );
06575     break;
06576   case INTRN_ISGREATEREQUAL:
06577     Build_OP( cmp_opcode, rflags, op1, op0, ops );
06578     Build_OP( TOP_setbe, result_tmp, rflags, ops );
06579     break;
06580   case INTRN_ISLESS:
06581     Build_OP( cmp_opcode, rflags, op0, op1, ops );
06582     Build_OP( TOP_setb, result_tmp, rflags, ops );
06583     break;
06584   case INTRN_ISLESSEQUAL:
06585     Build_OP( cmp_opcode, rflags, op0, op1, ops );
06586     Build_OP( TOP_setbe, result_tmp, rflags, ops );
06587     break;
06588   case INTRN_ISLESSGREATER:
06589     Build_OP( cmp_opcode, rflags, op1, op0, ops );
06590     Build_OP( TOP_setne, result_tmp, rflags, ops );
06591     break;
06592   case INTRN_ISUNORDERED:
06593     Build_OP( cmp_opcode, rflags, op1, op0, ops );
06594     Build_OP( TOP_setp, result_tmp, rflags, ops );
06595     break;
06596   case INTRN_ISORDERED:
06597     Build_OP( cmp_opcode, rflags, op1, op0, ops );
06598     Build_OP( TOP_setnp, result_tmp, rflags, ops );
06599     break;
06600   case INTRN_V16C8MPY_ADDSUB:
06601     {      
06602       TN* tmp1 = Build_TN_Like(result);
06603       TN* tmp2 = Build_TN_Like(result);
06604       TN* tmp3 = Build_TN_Like(result);
06605       TN* tmp4 = Build_TN_Like(result);
06606       TN* tmp5 = Build_TN_Like(result);
06607       Build_OP(TOP_fmovddup, tmp1, op2, ops);
06608       Build_OP(TOP_shufpd, tmp2, op2, op2, Gen_Literal_TN(1, 1), ops);
06609       Build_OP(TOP_fmovddup, tmp3, tmp2, ops);
06610       Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops);
06611       Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
06612       Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops);
06613       break;
06614     }
06615   case INTRN_V16C8CONJG:
06616     {
06617       TCON real = Host_To_Targ (MTYPE_I8, 0x0ULL);
06618       TCON imag = Host_To_Targ (MTYPE_I8, 0x8000000000000000ULL);
06619       TCON now = Make_Complex (MTYPE_V16C8, real, imag);
06620       ST *sym = New_Const_Sym (Enter_tcon (now), Be_Type_Tbl(TCON_ty(now)));
06621       Allocate_Object(sym);
06622       TN *sym_tn = Gen_Symbol_TN(sym, 0, 0);
06623       TN *tmp = Build_TN_Like(result);
06624       Exp_Load(mtype, mtype, tmp, TN_var(sym_tn), TN_offset(sym_tn), ops, 0);
06625       Build_OP(TOP_fxor128v64, result, op0, tmp, ops);
06626       break;
06627     }
06628   case INTRN_PADDSB:
06629     Build_OP( TOP_paddsb, result, op0, op1, ops );
06630     break;
06631   case INTRN_PADDSW:
06632     Build_OP( TOP_paddsw, result, op0, op1, ops );
06633     break;
06634   case INTRN_PSUBSB:
06635     Build_OP( TOP_psubsb, result, op0, op1, ops );
06636     break;
06637   case INTRN_PSUBSW:
06638     Build_OP( TOP_psubsw, result, op0, op1, ops );
06639     break;
06640   case INTRN_PADDUSB:
06641     Build_OP( TOP_paddusb, result, op0, op1, ops );
06642     break;
06643   case INTRN_PADDD128:
06644     Build_OP( TOP_add128v32, result, op0, op1, ops );
06645     break;
06646   case INTRN_PADDW128:
06647     Build_OP( TOP_add128v16, result, op0, op1, ops );
06648     break;
06649   case INTRN_PADDUSW:
06650     Build_OP( TOP_paddusw, result, op0, op1, ops );
06651     break;
06652   case INTRN_PSUBUSB:
06653     Build_OP( TOP_psubusb, result, op0, op1, ops );
06654     break;
06655   case INTRN_PSUBUSW:
06656     Build_OP( TOP_psubusw, result, op0, op1, ops );
06657     break;
06658   case INTRN_PMULLW:
06659     Build_OP( TOP_pmullw, result, op0, op1, ops );
06660     break;
06661   case INTRN_PMULHW:
06662     Build_OP( TOP_pmulhw, result, op0, op1, ops );
06663     break;
06664   case INTRN_PCMPEQB:
06665     Build_OP( TOP_pcmpeqb, result, op0, op1, ops );
06666     break;
06667   case INTRN_PCMPEQW:
06668     Build_OP( TOP_pcmpeqw, result, op0, op1, ops );
06669     break;
06670   case INTRN_PCMPEQD:
06671     Build_OP( TOP_pcmpeqd, result, op0, op1, ops );
06672     break;
06673   case INTRN_PCMPGTB:
06674     Build_OP( TOP_pcmpgtb, result, op0, op1, ops );
06675     break;
06676   case INTRN_PCMPGTW:
06677     Build_OP( TOP_pcmpgtw, result, op0, op1, ops );
06678     break;
06679   case INTRN_PCMPGTD:
06680     Build_OP( TOP_pcmpgtd, result, op0, op1, ops );
06681     break;
06682   case INTRN_PUNPCKHBW:
06683     Build_OP( TOP_punpckhbw, result, op0, op1, ops );
06684     break;
06685   case INTRN_PUNPCKHWD:
06686     Build_OP( TOP_punpckhwd, result, op0, op1, ops );
06687     break;
06688   case INTRN_PUNPCKHDQ:
06689     Build_OP( TOP_punpckhdq, result, op0, op1, ops );
06690     break;
06691   case INTRN_PUNPCKLBW:
06692     Build_OP( TOP_punpckl64v8, result, op0, op1, ops );
06693     break;
06694   case INTRN_PUNPCKLWD:
06695     Build_OP( TOP_punpckl64v16, result, op0, op1, ops );
06696     break;
06697   case INTRN_PUNPCKLDQ:
06698     Build_OP( TOP_punpckl64v32, result, op0, op1, ops );
06699     break;
06700   case INTRN_PACKSSWB:
06701     Build_OP( TOP_packsswb, result, op0, op1, ops );
06702     break;
06703   case INTRN_PACKSSDW:
06704     Build_OP( TOP_packssdw, result, op0, op1, ops );
06705     break;
06706   case INTRN_PACKUSWB:
06707     Build_OP( TOP_packuswb, result, op0, op1, ops );
06708     break;
06709   case INTRN_PMULHUW:
06710     Build_OP( TOP_pmulhuw, result, op0, op1, ops );
06711     break;
06712   case INTRN_PAVGB:
06713     Build_OP( TOP_pavgb, result, op0, op1, ops );
06714     break;
06715   case INTRN_PAVGW:
06716     Build_OP( TOP_pavgw, result, op0, op1, ops );
06717     break;
06718   case INTRN_PSADBW:
06719     Build_OP( TOP_psadbw, result, op0, op1, ops );
06720     break;
06721   case INTRN_PMAXUB:
06722     Build_OP( TOP_max64v8, result, op0, op1, ops );
06723     break;
06724   case INTRN_PMAXSW:
06725     Build_OP( TOP_max64v16, result, op0, op1, ops );
06726     break;
06727   case INTRN_PMINUB:
06728     Build_OP( TOP_min64v8, result, op0, op1, ops );
06729     break;
06730   case INTRN_PMINSW:
06731     Build_OP( TOP_min64v16, result, op0, op1, ops );
06732     break;
06733   case INTRN_PEXTRW0:
06734     Is_True (op1 == NULL, ("Imm operand should be null"));
06735     op1 = Gen_Literal_TN (0, 4);
06736     Build_OP( TOP_pextrw, result, op0, op1, ops );
06737     break;
06738   case INTRN_PEXTRW1:
06739     Is_True (op1 == NULL, ("Imm operand should be null"));
06740     op1 = Gen_Literal_TN (1, 4);
06741     Build_OP( TOP_pextrw, result, op0, op1, ops );
06742     break;
06743   case INTRN_PEXTRW2:
06744     Is_True (op1 == NULL, ("Imm operand should be null"));
06745     op1 = Gen_Literal_TN (2, 4);
06746     Build_OP( TOP_pextrw, result, op0, op1, ops );
06747     break;
06748   case INTRN_PEXTRW3:
06749     Is_True (op1 == NULL, ("Imm operand should be null"));
06750     op1 = Gen_Literal_TN (3, 4);
06751     Build_OP( TOP_pextrw, result, op0, op1, ops );
06752     break;
06753   case INTRN_PINSRW0:
06754     Is_True (op2 == NULL, ("Imm operand should be null"));
06755     op2 = Gen_Literal_TN (0, 4);
06756     Build_OP( TOP_pinsrw, result, op1, op2, ops );
06757     break;
06758   case INTRN_PINSRW1:
06759     Is_True (op2 == NULL, ("Imm operand should be null"));
06760     op2 = Gen_Literal_TN (1, 4);
06761     Build_OP( TOP_pinsrw, result, op1, op2, ops );
06762     break;
06763   case INTRN_PINSRW2:
06764     Is_True (op2 == NULL, ("Imm operand should be null"));
06765     op2 = Gen_Literal_TN (2, 4);
06766     Build_OP( TOP_pinsrw, result, op1, op2, ops );
06767     break;
06768   case INTRN_PINSRW3:
06769     Is_True (op2 == NULL, ("Imm operand should be null"));
06770     op2 = Gen_Literal_TN (3, 4);
06771     Build_OP( TOP_pinsrw, result, op1, op2, ops );
06772     break;
06773   case INTRN_PMOVMSKB:
06774     Build_OP( TOP_pmovmskb, result, op0, ops );
06775     break;
06776   case INTRN_PMOVMSKB128:
06777     Build_OP( TOP_pmovmskb128, result, op0, ops );
06778     break;
06779   case INTRN_COMIEQSS:
06780     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06781     Build_OP( TOP_sete, result, rflags, ops);
06782     break;
06783   case INTRN_COMILTSS:
06784     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06785     Build_OP( TOP_setb, result, rflags, ops);
06786     break;
06787   case INTRN_COMILESS:
06788     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06789     Build_OP( TOP_setbe, result, rflags, ops);
06790     break;
06791   case INTRN_COMIGTSS:
06792     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06793     Build_OP( TOP_seta, result, rflags, ops);
06794     break;
06795   case INTRN_COMIGESS:
06796     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06797     Build_OP( TOP_setae, result, rflags, ops);
06798     break;
06799   case INTRN_COMINEQSS:
06800     Build_OP( TOP_comiss, rflags, op0, op1, ops );
06801     Build_OP( TOP_setne, result, rflags, ops);
06802     break;
06803   case INTRN_COMIEQSD:
06804     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06805     Build_OP( TOP_sete, result, rflags, ops);
06806     break;
06807   case INTRN_COMILTSD:
06808     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06809     Build_OP( TOP_setb, result, rflags, ops);
06810     break;
06811   case INTRN_COMILESD:
06812     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06813     Build_OP( TOP_setbe, result, rflags, ops);
06814     break;
06815   case INTRN_COMIGTSD:
06816     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06817     Build_OP( TOP_seta, result, rflags, ops);
06818     break;
06819   case INTRN_COMIGESD:
06820     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06821     Build_OP( TOP_setae, result, rflags, ops);
06822     break;
06823   case INTRN_COMINEQSD:
06824     Build_OP( TOP_comisd, rflags, op0, op1, ops );
06825     Build_OP( TOP_setne, result, rflags, ops);
06826     break;
06827   case INTRN_ADDPS:
06828     Build_OP( TOP_fadd128v32, result, op0, op1, ops );
06829     break;
06830   case INTRN_SUBPS:
06831     Build_OP( TOP_fsub128v32, result, op0, op1, ops );
06832     break;
06833   case INTRN_MULPS:
06834     Build_OP( TOP_fmul128v32, result, op0, op1, ops );
06835     break;
06836   case INTRN_DIVPS:
06837     Build_OP( TOP_fdiv128v32, result, op0, op1, ops );
06838     break;
06839   case INTRN_ADDSS:
06840     Build_OP( TOP_addss, result, op0, op1, ops );
06841     break;
06842   case INTRN_SUBSS:
06843     Build_OP( TOP_subss, result, op0, op1, ops );
06844     break;
06845   case INTRN_MULSS:
06846     Build_OP( TOP_mulss, result, op0, op1, ops );
06847     break;
06848   case INTRN_DIVSS:
06849     Build_OP( TOP_divss, result, op0, op1, ops );
06850     break;
06851   case INTRN_CMPEQPS:
06852     Build_OP( TOP_cmpeqps, result, op0, op1, ops );
06853     break;
06854   case INTRN_CMPLTPS:
06855     Build_OP( TOP_cmpltps, result, op0, op1, ops );
06856     break;
06857   case INTRN_CMPLEPS:
06858     Build_OP( TOP_cmpleps, result, op0, op1, ops );
06859     break;
06860   case INTRN_CMPGTPS:
06861     Build_OP( TOP_cmpltps, result, op1, op0, ops );
06862     break;
06863   case INTRN_CMPGEPS:
06864     Build_OP( TOP_cmpleps, result, op1, op0, ops );
06865     break;
06866   case INTRN_CMPUNORDPS:
06867     Build_OP( TOP_cmpunordps, result, op0, op1, ops );
06868     break;
06869   case INTRN_CMPNEQPS:
06870     Build_OP( TOP_cmpneqps, result, op0, op1, ops );
06871     break;
06872   case INTRN_CMPNLTPS:
06873     Build_OP( TOP_cmpnltps, result, op0, op1, ops );
06874     break;
06875   case INTRN_CMPNLEPS:
06876     Build_OP( TOP_cmpnleps, result, op0, op1, ops );
06877     break;
06878   case INTRN_CMPNGTPS:
06879     Build_OP( TOP_cmpnltps, result, op1, op0, ops );
06880     break;
06881   case INTRN_CMPNGEPS:
06882     Build_OP( TOP_cmpnleps, result, op1, op0, ops );
06883     break;
06884   case INTRN_CMPORDPS:
06885     Build_OP( TOP_cmpordps, result, op0, op1, ops );
06886     break;
06887   case INTRN_CMPEQSS:
06888     Build_OP( TOP_cmpeqss, result, op0, op1, ops );
06889     break;
06890   case INTRN_CMPLTSS:
06891     Build_OP( TOP_cmpltss, result, op0, op1, ops );
06892     break;
06893   case INTRN_CMPLESS:
06894     Build_OP( TOP_cmpless, result, op0, op1, ops );
06895     break;
06896   case INTRN_CMPUNORDSS:
06897     Build_OP( TOP_cmpunordss, result, op0, op1, ops );
06898     break;
06899   case INTRN_CMPNEQSS:
06900     Build_OP( TOP_cmpneqss, result, op0, op1, ops );
06901     break;
06902   case INTRN_CMPNLTSS:
06903     Build_OP( TOP_cmpnltss, result, op0, op1, ops );
06904     break;
06905   case INTRN_CMPNLESS:
06906     Build_OP( TOP_cmpnless, result, op0, op1, ops );
06907     break;
06908   case INTRN_CMPORDSS:
06909     Build_OP( TOP_cmpordss, result, op0, op1, ops );
06910     break;
06911   case INTRN_MAXPS:
06912     Build_OP( TOP_fmax128v32, result, op0, op1, ops );
06913     break;
06914   case INTRN_MAXSS:
06915     Build_OP( TOP_maxss, result, op0, op1, ops );
06916     break;
06917   case INTRN_MINPS:
06918     Build_OP( TOP_fmin128v32, result, op0, op1, ops );
06919     break;
06920   case INTRN_MINSS:
06921     Build_OP( TOP_minss, result, op0, op1, ops );
06922     break;
06923   case INTRN_ANDPS:
06924     Build_OP( TOP_fand128v32, result, op0, op1, ops );
06925     break;
06926   case INTRN_ANDNPS:
06927     Build_OP( TOP_andnps, result, op0, op1, ops );
06928     break;
06929   case INTRN_ORPS:
06930     Build_OP( TOP_for128v32, result, op0, op1, ops );
06931     break;
06932   case INTRN_XORPS:
06933     Build_OP( TOP_fxor128v32, result, op0, op1, ops );
06934     break;
06935   case INTRN_MOVSS:
06936     Build_OP( TOP_movss, result, op0, op1, ops );
06937     break;
06938   case INTRN_MOVSD:
06939     Build_OP( TOP_movsd, result, op0, op1, ops );
06940     break;
06941   case INTRN_MOVHLPS:
06942     Build_OP( TOP_movhlps, result, op0, op1, ops );
06943     break;
06944   case INTRN_MOVLHPS:
06945     Build_OP( TOP_movlhps, result, op0, op1, ops );
06946     break;
06947   case INTRN_UNPCKHPS:
06948     Build_OP( TOP_unpckhps, result, op0, op1, ops );
06949     break;
06950   case INTRN_UNPCKLPS:
06951     Build_OP( TOP_unpcklps, result, op0, op1, ops );
06952     break;
06953   case INTRN_RCPPS:
06954     Build_OP( TOP_frcp128v32, result, op0, ops );
06955     break;
06956   case INTRN_RSQRTPS:
06957     Build_OP( TOP_frsqrt128v32, result, op0, ops );
06958     break;
06959   case INTRN_SQRTPS:
06960     Build_OP( TOP_fsqrt128v32, result, op0, ops );
06961     break;
06962   case INTRN_RCPSS:
06963     Build_OP( TOP_rcpss, result, op0, ops );
06964     break;
06965   case INTRN_RSQRTSS:
06966     Build_OP( TOP_rsqrtss, result, op0, ops );
06967     break;
06968   case INTRN_SQRTSS:
06969     Build_OP( TOP_sqrtss, result, op0, ops );
06970     break;
06971   case INTRN_SHUFPS:
06972     Build_OP( TOP_shufps, result, op0, op1, op2, ops );
06973     break;
06974   case INTRN_LOADAPS:
06975     Build_OP( TOP_ldaps, result, op0, Gen_Literal_TN(0,4), ops );
06976     break;
06977   case INTRN_PSLLDQ:
06978     Build_OP( TOP_pslldq, result, op0, op1, ops );
06979     break;
06980   case INTRN_PSRLDQ:
06981     Build_OP( TOP_psrldq, result, op0, op1, ops );
06982     break;
06983   case INTRN_PSLLW:
06984     Build_OP( TOP_psllw, result, op0, op1, ops );
06985     break;
06986   case INTRN_PSLLD:
06987     Build_OP( TOP_pslld, result, op0, op1, ops );
06988     break;
06989   case INTRN_PSLLQ:
06990     Build_OP( TOP_psllq, result, op0, op1, ops );
06991     break;
06992   case INTRN_PSRLW:
06993     Build_OP( TOP_psrlw, result, op0, op1, ops );
06994     break;
06995   case INTRN_PSRLD:
06996     Build_OP( TOP_psrld, result, op0, op1, ops );
06997     break;
06998   case INTRN_PSRLQ:
06999     Build_OP( TOP_psrlq, result, op0, op1, ops );
07000     break;
07001   case INTRN_PSRAW:
07002     Build_OP( TOP_psraw, result, op0, op1, ops );
07003     break;
07004   case INTRN_PSRAD:
07005     Build_OP( TOP_psrad, result, op0, op1, ops );
07006     break;
07007   case INTRN_LOADD:
07008     Build_OP( TOP_movg2x64, result, op0, ops );
07009     break;
07010   case INTRN_PSHUFD:
07011     Build_OP( TOP_pshufd, result, op0, op1, ops );
07012     break;
07013   case INTRN_LOADSS:
07014     Build_OP( TOP_ldss, result, op0, Gen_Literal_TN (0,4), ops );
07015     break;
07016   case INTRN_SHUFPD:
07017     Build_OP( TOP_shufpd, result, op0, op1, op2, ops );
07018     break;
07019   case INTRN_XORPD:
07020     Build_OP( TOP_fxor128v64, result, op0, op1, ops );
07021     break;
07022   case INTRN_ANDPD:
07023     Build_OP( TOP_fand128v64, result, op0, op1, ops );
07024     break;
07025   case INTRN_ORPD:
07026     Build_OP( TOP_for128v64, result, op0, op1, ops );
07027     break;
07028   case INTRN_LOADLPD:
07029     Build_OP( TOP_ldsd, result, op1, Gen_Literal_TN (0,4), ops );
07030     break;
07031   case INTRN_LOADHPD:
07032     Build_OP( TOP_ldhpd, result, op1, Gen_Literal_TN (0,4), ops );
07033     break;
07034   case INTRN_UNPCKLPD:
07035     Build_OP( TOP_unpcklpd, result, op0, op1, ops );
07036     break;
07037   case INTRN_UNPCKHPD:
07038     Build_OP( TOP_unpckhpd, result, op0, op1, ops );
07039     break;
07040   case INTRN_PSHUFW:
07041     Build_OP( TOP_pshufw64v16, result, op0, op1, ops );
07042     break;
07043   case INTRN_LOADDQA:
07044     Build_OP( TOP_lddqa, result, op0, Gen_Literal_TN (0,4), ops );
07045     break;
07046   case INTRN_LOADDQU:
07047     Build_OP( TOP_lddqu, result, op0, Gen_Literal_TN (0,4), ops );
07048     break;
07049 
07050   case INTRN_COSL:
07051     Build_OP( TOP_fcos, result, op0, ops );
07052     break;
07053   case INTRN_SINL:
07054     Build_OP( TOP_fsin, result, op0, ops );
07055     break;
07056   case INTRN_VEC_INIT_V2SI:
07057     {
07058       TN* tmp0 = Build_TN_Like(result);
07059       TN* tmp1 = Build_TN_Like(result);
07060       if ( TN_register_class(result) == ISA_REGISTER_CLASS_mmx ) {
07061         Build_OP( TOP_movi32_2m, tmp0, op0, ops );
07062         Build_OP( TOP_movi32_2m, tmp1, op1, ops );
07063         Build_OP( TOP_punpckl64v32, result, tmp0, tmp1, ops );
07064       } else {
07065         Build_OP( TOP_movg2x, tmp0, op0, ops );
07066         Build_OP( TOP_movg2x, tmp1, op1, ops );
07067         Build_OP( TOP_punpckldq, result, tmp0, tmp1, ops );
07068       }
07069       break;
07070     }
07071   case INTRN_VEC_EXT_V2SI:
07072     if ( TN_register_class(op0) == ISA_REGISTER_CLASS_mmx ) {
07073       if ( TN_has_value(op1) && TN_value(op1) == 1 ) {
07074         Build_OP( TOP_punpckhdq, op0, op0, op0, ops );
07075         Build_OP( TOP_movm_2i32, result, op0, ops );
07076       } else if ( TN_is_zero(op1)) {
07077         Build_OP( TOP_movm_2i32, result, op0, ops );
07078       } else {
07079         FmtAssert(0, ("op1 must be an integer constant in the range 0..1"));
07080       }
07081     } else {
07082       if ( TN_has_value(op1) && TN_value(op1) == 1 ) {
07083         TN* tmp=Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07084         Build_OP( TOP_movdq2q, tmp,  op0, ops);
07085         Build_OP( TOP_punpckhdq, tmp, tmp, tmp, ops);
07086         Build_OP( TOP_movm_2i32, result, tmp, ops );
07087       } else if ( TN_is_zero(op1)) {
07088         Build_OP( TOP_movx2g, result, op0, ops );
07089       } else {
07090         FmtAssert(0, ("op1 must be an integer constant in the range 0..1"));
07091       }
07092     }
07093     break;
07094   case INTRN_PMADDWD:
07095     Build_OP( TOP_pmaddwd, result, op0, op1, ops );
07096     break;
07097   case INTRN_PSLLW_MMX:
07098     Build_OP( TOP_psllw_mmx, result, op0, op1, ops );
07099     break;
07100   case INTRN_PSLLD_MMX:
07101     Build_OP( TOP_pslld_mmx, result, op0, op1, ops );
07102     break;
07103   case INTRN_PSRLW_MMX:
07104     Build_OP( TOP_psrlw_mmx, result, op0, op1, ops );
07105     break;
07106   case INTRN_PSRLD_MMX:
07107     Build_OP( TOP_psrld_mmx, result, op0, op1, ops );
07108     break;
07109   case INTRN_PSRAW_MMX:
07110     Build_OP( TOP_psraw_mmx, result, op0, op1, ops );
07111     break;
07112   case INTRN_PSRAD_MMX:
07113     Build_OP( TOP_psrad_mmx, result, op0, op1, ops );
07114     break;
07115   case INTRN_PAND_MMX:
07116     Build_OP( TOP_pand_mmx, result, op0, op1, ops );
07117     break;
07118   case INTRN_PANDN_MMX:
07119     Build_OP( TOP_pandn_mmx, result, op0, op1, ops );
07120     break;
07121   case INTRN_POR_MMX:
07122     Build_OP( TOP_por_mmx, result, op0, op1, ops );
07123     break;
07124   case INTRN_PXOR_MMX:
07125     Build_OP( TOP_pxor_mmx, result, op0, op1, ops );
07126     break;
07127   case INTRN_CVTPI2PS:
07128     if (TN_register_class(op0) != ISA_REGISTER_CLASS_mmx) {
07129       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07130       Exp_COPY( tmp0, op0, ops );
07131       op0 = tmp0;
07132     }
07133     Build_OP( TOP_cvtpi2ps, result, op0, ops );
07134     break;
07135   case INTRN_CVTPS2PI: {
07136       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07137       Build_OP( TOP_cvtps2pi, tmp0, op0, ops );
07138       // mov mmx to sse
07139       Build_OP (TOP_movq2dq, result, tmp0, ops);
07140       break;
07141     }
07142   case INTRN_CVTTPS2PI: {
07143       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07144       Build_OP( TOP_cvttps2pi, tmp0, op0, ops );
07145       // mov mmx to sse
07146       Build_OP (TOP_movq2dq, result, tmp0, ops);
07147       break;
07148     }
07149   case INTRN_CVTPI2PD:
07150     if (TN_register_class(op0) != ISA_REGISTER_CLASS_mmx) {
07151       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07152       Exp_COPY( tmp0, op0, ops );
07153       op0 = tmp0;
07154     }
07155     Build_OP( TOP_cvtpi2pd, result, op0, ops );
07156     break;
07157   case INTRN_CVTPD2PI: {
07158       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07159       Build_OP( TOP_cvtpd2pi, tmp0, op0, ops );
07160       // mov mmx to sse
07161       Build_OP (TOP_movq2dq, result, tmp0, ops);
07162       break;
07163     }
07164   case INTRN_CVTTPD2PI: {
07165       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_mmx);
07166       Build_OP( TOP_cvttpd2pi, tmp0, op0, ops );
07167       // mov mmx to sse
07168       Build_OP (TOP_movq2dq, result, tmp0, ops);
07169       break;
07170     }
07171   case INTRN_CVTSI2SS:
07172     if (TN_register_class(op0) != ISA_REGISTER_CLASS_integer) {
07173       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_integer);
07174       Build_OP( TOP_movx2g, tmp0, op0, ops );
07175       op0 = tmp0;
07176     }
07177     Build_OP( TOP_cvtsi2ss, result, op0, ops );
07178     break;
07179   case INTRN_CVTSI642SS:
07180     if (TN_register_class(op0) != ISA_REGISTER_CLASS_integer) {
07181       TN *tmp0 = Build_RCLASS_TN(ISA_REGISTER_CLASS_integer);
07182       Build_OP( TOP_movx2g64, tmp0, op0, ops );
07183       op0 = tmp0;
07184     }
07185     Build_OP( TOP_cvtsi2ssq, result, op0, ops );
07186     break;
07187   case INTRN_CVTSS2SI:
07188     Build_OP( TOP_cvtss2si, result, op0, ops );
07189     break;
07190   case INTRN_CVTSS2SI64:
07191     Build_OP( TOP_cvtss2siq, result, op0, ops );
07192     break;
07193   case INTRN_CVTTSS2SI:
07194     Build_OP( TOP_cvttss2si, result, op0, ops );
07195     break;
07196   case INTRN_CVTTSS2SI64:
07197     Build_OP( TOP_cvttss2siq, result, op0, ops );
07198     break;
07199   case INTRN_CVTSI2SD:
07200     if (TN_register_class(op0) != ISA_REGISTER_CLASS_integer) {
07201       TN *tmp0 =