00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include <stdio.h>
00032 #include <math.h>
00033
00034 #define BITS 5
00035 #define N_ENTRIES (1 << BITS)
00036 #define CUTOFF_BITS 20
00037
00038 #define BIAS (-330)
00039
00040 double max_defect = 0.;
00041 double max_defect_x;
00042
00043 double min_defect = 1e9;
00044 double min_defect_x;
00045
00046 double max_defect2 = 0.;
00047 double max_defect2_x;
00048
00049 double min_defect2 = 0.;
00050 double min_defect2_x;
00051
00052 double min_defect3 = 01e9;
00053 double min_defect3_x;
00054 int min_defect3_val;
00055
00056 double max_defect3 = 0.;
00057 double max_defect3_x;
00058 int max_defect3_val;
00059
00060 static double note_defect3 (int val, double d2, double y2d, double x)
00061 {
00062 int cutoff_val = val >> CUTOFF_BITS;
00063 double cutoff;
00064 double defect;
00065
00066 if (val < 0)
00067 cutoff_val++;
00068 cutoff = (cutoff_val * (1<<CUTOFF_BITS) - val) * y2d;
00069 defect = cutoff + val * d2;
00070 if (val < 0)
00071 defect = - defect;
00072 if (defect > max_defect3)
00073 {
00074 max_defect3 = defect;
00075 max_defect3_x = x;
00076 max_defect3_val = val;
00077 }
00078 if (defect < min_defect3)
00079 {
00080 min_defect3 = defect;
00081 min_defect3_x = x;
00082 min_defect3_val = val;
00083 }
00084 }
00085
00086
00087 static double
00088 calc_defect (double x, int constant, int factor)
00089 {
00090 double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.;
00091 double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30)));
00092 double y2d0, y2d;
00093 int y2d1;
00094 double d, d2;
00095
00096 y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024);
00097 d = y1 - 1 / x;
00098 if (d > max_defect)
00099 {
00100 max_defect = d;
00101 max_defect_x = x;
00102 }
00103 if (d < min_defect)
00104 {
00105 min_defect = d;
00106 min_defect_x = x;
00107 }
00108 y2d0 = floor (y1 * x * (1LL << 60-16));
00109 y2d1 = (int) (long long) y2d0;
00110 y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44);
00111 d2 = y1 + y2d - 1/x;
00112 if (d2 > max_defect2)
00113 {
00114 max_defect2 = d2;
00115 max_defect2_x = x;
00116 }
00117 if (d2 < min_defect2)
00118 {
00119 min_defect2 = d2;
00120 min_defect2_x = x;
00121 }
00122
00123 note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x);
00124 note_defect3 (1 << CUTOFF_BITS, d2, y2d, x);
00125 note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x);
00126 note_defect3 ((1U << 31) - 1, d2, y2d, x);
00127 note_defect3 (-1, d2, y2d, x);
00128 note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x);
00129 note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x);
00130 note_defect3 (-(1U << 31), d2, y2d, x);
00131 return d;
00132 }
00133
00134 int
00135 main ()
00136 {
00137 int i;
00138 unsigned char factors[N_ENTRIES];
00139 short constants[N_ENTRIES];
00140 int steps = N_ENTRIES / 2;
00141 double step = 1. / steps;
00142 double eps30 = 1. / (1024 * 1024 * 1024);
00143
00144 for (i = 0; i < N_ENTRIES; i++)
00145 {
00146 double x_low = (i < steps ? 1. : -3.) + i * step;
00147 double x_high = x_low + step - eps30;
00148 double x_med;
00149 int factor, constant;
00150 double low_defect, med_defect, high_defect, max_defect;
00151
00152 factor = (1./x_low- 1./x_high) / step * 256. + 0.5;
00153 if (factor == 256)
00154 factor = 255;
00155 factors[i] = factor;
00156
00157 x_med = sqrt (256./factor);
00158 if (x_low < 0)
00159 x_med = - x_med;
00160 low_defect = 1. / x_low + x_low * factor / 256.;
00161 high_defect = 1. / x_high + x_high * factor / 256.;
00162 med_defect = 1. / x_med + x_med * factor / 256.;
00163 max_defect
00164 = ((low_defect > high_defect) ^ (x_med < 0)) ? low_defect : high_defect;
00165 constant = (med_defect + max_defect) * 0.5 * 16384. + 0.5;
00166 if (constant < -32768 || constant > 32767)
00167 abort ();
00168 constants[i] = constant;
00169 calc_defect (x_low, constant, factor);
00170 calc_defect (x_med, constant, factor);
00171 calc_defect (x_high, constant, factor);
00172 }
00173 printf ("/* This table has been generated by divtab.c .\n");
00174 printf ("Defects for bias %d:\n", BIAS);
00175 printf (" Max defect: %e at %e\n", max_defect, max_defect_x);
00176 printf (" Min defect: %e at %e\n", min_defect, min_defect_x);
00177 printf (" Max 2nd step defect: %e at %e\n", max_defect2, max_defect2_x);
00178 printf (" Min 2nd step defect: %e at %e\n", min_defect2, min_defect2_x);
00179 printf (" Max div defect: %e at %d:%e\n", max_defect3, max_defect3_val, max_defect3_x);
00180 printf (" Min div defect: %e at %d:%e\n", min_defect3, min_defect3_val, min_defect3_x);
00181 printf (" Defect at 1: %e\n",
00182 calc_defect (1., constants[0], factors[0]));
00183 printf (" Defect at -2: %e */\n",
00184 calc_defect (-2., constants[steps], factors[steps]));
00185 printf ("\t.section\t.rodata\n");
00186 printf ("\t.balign 2\n");
00187 printf ("/* negative division constants */\n");
00188 for (i = steps; i < 2 * steps; i++)
00189 printf ("\t.word\t%d\n", constants[i]);
00190 printf ("/* negative division factors */\n");
00191 for (i = steps; i < 2*steps; i++)
00192 printf ("\t.byte\t%d\n", factors[i]);
00193 printf ("\t.skip %d\n", steps);
00194 printf ("\t.global GLOBAL(div_table):\n");
00195 printf ("GLOBAL(div_table):\n");
00196 printf ("\t.skip %d\n", steps);
00197 printf ("/* positive division factors */\n");
00198 for (i = 0; i < steps; i++)
00199 printf ("\t.byte\t%d\n", factors[i]);
00200 printf ("/* positive division constants */\n");
00201 for (i = 0; i < steps; i++)
00202 printf ("\t.word\t%d\n", constants[i]);
00203 exit (0);
00204 }