00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #define MXCSR_DAZ (1 << 6)
00035 #define MXCSR_FTZ (1 << 15)
00036
00037 #define FXSAVE (1 << 24)
00038 #define SSE (1 << 25)
00039
00040 static void __attribute__((constructor))
00041 #ifndef __x86_64__
00042
00043
00044
00045 __attribute__ ((force_align_arg_pointer))
00046 #endif
00047 set_fast_math (void)
00048 {
00049 #ifndef __x86_64__
00050
00051
00052 unsigned int eax, ebx, ecx, edx;
00053
00054
00055 asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
00056 "pushl %0; popfl; pushfl; popl %0; popfl"
00057 : "=&r" (eax), "=&r" (ebx)
00058 : "i" (0x00200000));
00059
00060 if (((eax ^ ebx) & 0x00200000) == 0)
00061 return;
00062
00063
00064 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
00065 : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
00066 : "0" (0));
00067
00068 if (eax == 0)
00069 return;
00070
00071 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
00072 : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
00073 : "0" (1));
00074
00075 if (edx & SSE)
00076 {
00077 unsigned int mxcsr = __builtin_ia32_stmxcsr ();
00078
00079 mxcsr |= MXCSR_FTZ;
00080
00081 if (edx & FXSAVE)
00082 {
00083
00084 struct
00085 {
00086 unsigned short int cwd;
00087 unsigned short int swd;
00088 unsigned short int twd;
00089 unsigned short int fop;
00090 long int fip;
00091 long int fcs;
00092 long int foo;
00093 long int fos;
00094 long int mxcsr;
00095 long int mxcsr_mask;
00096 long int st_space[32];
00097 long int xmm_space[32];
00098 long int padding[56];
00099 } __attribute__ ((aligned (16))) fxsave;
00100
00101 __builtin_memset (&fxsave, 0, sizeof (fxsave));
00102
00103 asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave));
00104
00105 if (fxsave.mxcsr_mask & MXCSR_DAZ)
00106 mxcsr |= MXCSR_DAZ;
00107 }
00108
00109 __builtin_ia32_ldmxcsr (mxcsr);
00110 }
00111 #else
00112 unsigned int mxcsr = __builtin_ia32_stmxcsr ();
00113 mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
00114 __builtin_ia32_ldmxcsr (mxcsr);
00115 #endif
00116 }