00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "f90_intrinsic.h"
00039
00040 void
00041 _RESHAPE(
00042 DopeVectorType *result,
00043 DopeVectorType *array,
00044 DopeVectorType *shape,
00045 DopeVectorType *pad,
00046 DopeVectorType *order)
00047 {
00048 char * result_p, * result_b ;
00049 char * array_p, * array_b ;
00050 i4 * shape_p, * shape_b ;
00051 char * pad_p, * pad_b ;
00052 i4 * order_p, * order_b ;
00053
00054 size_t src_extent [MAX_NARY_DIMS] ;
00055 size_t src_stride [MAX_NARY_DIMS] ;
00056 size_t src_offset [MAX_NARY_DIMS] ;
00057 size_t counter[MAX_NARY_DIMS] ;
00058
00059 size_t res_stride [MAX_NARY_DIMS] ;
00060 size_t res_extent [MAX_NARY_DIMS] ;
00061 size_t res_offset [MAX_NARY_DIMS] ;
00062 size_t res_counter[MAX_NARY_DIMS] ;
00063
00064 size_t pad_stride [MAX_NARY_DIMS] ;
00065 size_t pad_extent [MAX_NARY_DIMS] ;
00066 size_t pad_offset [MAX_NARY_DIMS] ;
00067
00068 int32_t l_order[MAX_NARY_DIMS] ;
00069 int32_t l_order_chk[MAX_NARY_DIMS] ;
00070 int32_t l_shape[MAX_NARY_DIMS] ;
00071
00072 int32_t j,ii;
00073 char *rp, *ap ;
00074 int32_t *gp1 ;
00075 int32_t pad_rank ;
00076 int32_t shp_rank ;
00077 int32_t res_rank ;
00078 int32_t src_rank = GET_RANK_FROM_DESC(array) - 1;
00079
00080 size_t typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00081
00082 size_t a_size,a_stride,r_stride, i,k ;
00083 size_t l_size,l_stride;
00084 size_t tot_shape, tot_source;
00085 char * l_p ;
00086 int8_t zero_szd_shape = FALSE;
00087 int8_t zero_szd_order = FALSE;
00088 int8_t zero_szd_source = FALSE;
00089 int8_t zero_szd_pad = FALSE;
00090 int8_t byte_aligned = FALSE;
00091
00092 int32_t ddim ;
00093
00094 size_t num_trues ;
00095 int32_t local_alloc ;
00096 size_t tot_ext ;
00097 size_t str_sz ;
00098
00099 size_t src_size ;
00100
00101 size_t res_sz;
00102 size_t xfer_sz;
00103 size_t tot_sz;
00104
00105 tot_source = 1 ;
00106 for( j = 0 ; j <= src_rank ; j ++ ) {
00107 src_extent[j] = GET_EXTENT_FROM_DESC(array,j) ;
00108 src_stride[j] = GET_STRIDE_FROM_DESC(array,j) ;
00109 counter[j] = 0 ;
00110 zero_szd_source = zero_szd_source || (src_extent[j] == 0) ;
00111 tot_source *= src_extent[j];
00112 }
00113
00114 for ( j = 1 ; j <= src_rank ; j ++ )
00115 src_offset[j-1] = src_stride[j] - (src_stride [j-1] * (src_extent[j-1])) ;
00116
00117 byte_aligned = GET_BYTEALIGNED_FROM_DESC(array) ;
00118
00119 res_rank = GET_EXTENT_FROM_DESC(shape,0) ;
00120 if (res_rank == 0)
00121 ERROR(_LELVL_ABORT, FESHPSZZ);
00122
00123 l_stride = GET_STRIDE_FROM_DESC(shape,0);
00124 l_size = GET_ELEMENT_SZ_FROM_DESC(shape);
00125 l_p = GET_ADDRESS_FROM_DESC(shape);
00126
00127 switch (l_size) {
00128 case 1:
00129 for (j = 0 ; j < res_rank ; j++ ) {
00130 l_shape[j] = * (int8_t *) l_p ;
00131 l_p += l_stride;
00132 }
00133 break;
00134
00135 case 2:
00136 for (j = 0 ; j < res_rank ; j++ ) {
00137 l_shape[j] = * (int16_t *) l_p ;
00138 l_p += l_stride;
00139 }
00140 break;
00141
00142 case 4:
00143 for (j = 0 ; j < res_rank ; j++ ) {
00144 l_shape[j] = * (int32_t *) l_p ;
00145 l_p += l_stride;
00146 }
00147 break;
00148
00149 case 8:
00150 for (j = 0 ; j < res_rank ; j++ ) {
00151 l_shape[j] = * (int64_t *) l_p ;
00152 l_p += l_stride;
00153 }
00154 break;
00155 }
00156
00157 tot_shape = 1;
00158 for (i = 0; i < res_rank; i++) {
00159 if (l_shape[i] < 0)
00160 ERROR (_LELVL_ABORT, FERSHNEG);
00161 zero_szd_shape = zero_szd_shape || (l_shape[i] == 0);
00162 tot_shape *= l_shape[i];
00163
00164 }
00165
00166 if (order == NULL) {
00167 for (j = 0 ; j < res_rank ; j++ )
00168 l_order[j] = j + 1;
00169
00170 } else {
00171
00172 l_stride = GET_STRIDE_FROM_DESC(order,0);
00173 l_size = GET_ELEMENT_SZ_FROM_DESC(order);
00174 l_p = GET_ADDRESS_FROM_DESC(order);
00175
00176 switch (l_size) {
00177 case 1:
00178 for (j = 0 ; j < res_rank ; j++ ) {
00179 l_order_chk[j] = FALSE;
00180 l_order[j] = * (int8_t *) l_p ;
00181 l_p += l_stride;
00182 }
00183 break;
00184
00185 case 2:
00186 for (j = 0 ; j < res_rank ; j++ ) {
00187 l_order_chk[j] = FALSE;
00188 l_order[j] = * (int16_t *) l_p ;
00189 l_p += l_stride;
00190 }
00191 break;
00192
00193 case 4:
00194 for (j = 0 ; j < res_rank ; j++ ) {
00195 l_order_chk[j] = FALSE;
00196 l_order[j] = * (int32_t *) l_p ;
00197 l_p += l_stride;
00198 }
00199 break;
00200
00201 case 8:
00202 for (j = 0 ; j < res_rank ; j++ ) {
00203 l_order_chk[j] = FALSE;
00204 l_order[j] = * (int64_t *) l_p ;
00205 l_p += l_stride;
00206 }
00207 break;
00208 }
00209
00210 for (i = 0; i < res_rank; i++) {
00211 if (l_order[i] <= 0 || l_order[i] > res_rank)
00212 ERROR(_LELVL_ABORT, FEBDORDR);
00213 l_order_chk[l_order[i]-1] = TRUE;
00214 zero_szd_order = zero_szd_order || (l_order[i] == 0) ;
00215 }
00216 for (i = 0; i < res_rank; i++) {
00217 if (!l_order_chk[i])
00218 ERROR(_LELVL_ABORT, FEBDORDR);
00219 }
00220 }
00221
00222 if (pad != NULL ) {
00223 pad_p = GET_ADDRESS_FROM_DESC(pad);
00224 pad_rank = GET_RANK_FROM_DESC(pad) - 1;
00225 for ( j = 0 ; j <= pad_rank ; j ++ ) {
00226 pad_extent[j] = GET_EXTENT_FROM_DESC(pad,j) ;
00227 pad_stride[j] = GET_STRIDE_FROM_DESC(pad,j) ;
00228 zero_szd_pad = zero_szd_pad || (pad_extent[j] == 0) ;
00229
00230 }
00231 for ( j = 1 ; j <= pad_rank ; j ++ )
00232 pad_offset[j-1] = pad_stride[j] - (pad_stride [j-1] * (pad_extent[j-1])) ;
00233
00234 } else if (tot_shape > tot_source) {
00235 ERROR(_LELVL_ABORT, FERSHNPD);
00236 }
00237
00238 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00239
00240 size_t nbytes ;
00241 char *p ;
00242
00243 SET_ADDRESS_IN_DESC(result,NULL);
00244 SET_ORIG_BS_IN_DESC(result,NULL) ;
00245 SET_ORIG_SZ_IN_DESC(result,0) ;
00246 SET_RANK_IN_DESC(result,res_rank) ;
00247
00248 p = NULL ;
00249 tot_ext = 1 ;
00250 nbytes = typ_sz ;
00251 str_sz = MK_STRIDE(byte_aligned,typ_sz);
00252
00253 for ( i = 0 ; i < res_rank ; i ++) {
00254 SET_LBOUND_IN_DESC(result,i,1);
00255 SET_EXTENT_IN_DESC(result,i,l_shape[i]);
00256 SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
00257 tot_ext *= l_shape[i];
00258 nbytes *= l_shape[i];
00259 }
00260
00261 if (nbytes > 0 && !zero_szd_order) {
00262 p = (void *) malloc (nbytes);
00263 if (p == NULL)
00264 ERROR(_LELVL_ABORT, FENOMEMY);
00265
00266 SET_ADDRESS_IN_DESC(result,p);
00267 }
00268
00269 SET_ASSOCIATED_IN_DESC(result);
00270 SET_CONTIG_IN_DESC(result);
00271 SET_ALEN_IN_DESC(result,GET_ALEN_FROM_DESC(array));
00272 if (GET_DV_ASCII_FROM_DESC(array)) {
00273 SET_CHARPTR_IN_DESC(result,p,typ_sz);
00274 }
00275 SET_ORIG_BS_IN_DESC(result,p) ;
00276 SET_ORIG_SZ_IN_DESC(result,nbytes*8) ;
00277 }
00278
00279 if (zero_szd_shape || zero_szd_order)
00280 return ;
00281
00282 if (zero_szd_source && (pad == NULL || zero_szd_pad))
00283 ERROR(_LELVL_ABORT, FERSHNPD);
00284
00285 for ( j = 0 , gp1 = l_order ; j < res_rank ; j ++ ) {
00286 if (gp1 == NULL)
00287 ii = j ;
00288 else
00289 ii = (*gp1++)-1 ;
00290
00291 res_stride[j] = GET_STRIDE_FROM_DESC(result,ii) ;
00292 res_extent[j] = GET_EXTENT_FROM_DESC(result,ii) ;
00293 res_counter[j] = 0 ;
00294 }
00295
00296 for ( j = 1 ; j < res_rank ; j ++ )
00297 res_offset[j-1] = res_stride[j] - (res_stride [j-1] * (res_extent[j-1])) ;
00298 res_rank -- ;
00299 if (zero_szd_source)
00300 if (pad != NULL)
00301 for (i = 0 ; i <= src_rank ; i ++) src_extent[i] = 0;
00302 else
00303 return ;
00304
00305 a_size = src_extent[0] ;
00306 a_stride = src_stride[0] ;
00307 r_stride = res_stride[0] ;
00308 array_p = GET_ADDRESS_FROM_DESC(array);
00309 result_p = GET_ADDRESS_FROM_DESC(result);
00310
00311 if (typ_sz == sizeof(i1) && ALIGNED_i1(array_p) && ALIGNED_i1(result_p) && ((pad_p == NULL) || ALIGNED_i1(pad_p))) {
00312
00313 for (;;) {
00314 while (counter[src_rank] < src_extent[src_rank] ) {
00315 for ( i = 0 ; i < a_size ; i ++ ) {
00316 *(i1 *)result_p = *(i1 *)array_p ;
00317 array_p += a_stride ;
00318 result_p += r_stride ;
00319
00320 j = 0 ;
00321 res_counter[0] ++ ;
00322
00323 while (res_counter[j] == res_extent[j]) {
00324 if (j == res_rank ) return ;
00325 result_p += res_offset[j] ;
00326 res_counter[j+1]++ ;
00327 res_counter[j] = 0 ;
00328 j ++ ;
00329 }
00330 }
00331 counter[0] = a_size ;
00332 j = 0 ;
00333 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00334 array_p += src_offset[j] ;
00335 counter[j+1]++ ;
00336 counter[j] = 0 ;
00337 j ++ ;
00338 }
00339
00340 }
00341 if (pad != NULL) {
00342
00343 src_rank = pad_rank ;
00344
00345 for ( j = 0 ; j <= src_rank ; j ++ ) {
00346 src_extent [j] = pad_extent[j] ;
00347 src_stride [j] = pad_stride[j] ;
00348 counter[j] = 0 ;
00349 src_offset [j] = pad_offset[j] ;
00350 }
00351 array_p = pad_p ;
00352 a_size = src_extent [0] ;
00353 a_stride = src_stride [0] ;
00354 }
00355 }
00356 } else if (typ_sz == sizeof(i2) && ALIGNED_i2(array_p) && ALIGNED_i2(result_p) && ((pad_p == NULL) || ALIGNED_i2(pad_p))) {
00357
00358 for (;;) {
00359 while (counter[src_rank] < src_extent[src_rank] ) {
00360 for ( i = 0 ; i < a_size ; i ++ ) {
00361 *(i2 *)result_p = *(i2 *)array_p ;
00362 array_p += a_stride ;
00363 result_p += r_stride ;
00364
00365 j = 0 ;
00366 res_counter[0] ++ ;
00367
00368 while (res_counter[j] == res_extent[j]) {
00369 if (j == res_rank ) return ;
00370 result_p += res_offset[j] ;
00371 res_counter[j+1]++ ;
00372 res_counter[j] = 0 ;
00373 j ++ ;
00374 }
00375 }
00376 counter[0] = a_size ;
00377 j = 0 ;
00378 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00379 array_p += src_offset[j] ;
00380 counter[j+1]++ ;
00381 counter[j] = 0 ;
00382 j ++ ;
00383 }
00384
00385 }
00386 if (pad != NULL) {
00387
00388 src_rank = pad_rank ;
00389
00390 for ( j = 0 ; j <= src_rank ; j ++ ) {
00391 src_extent [j] = pad_extent[j] ;
00392 src_stride [j] = pad_stride[j] ;
00393 counter[j] = 0 ;
00394 src_offset [j] = pad_offset[j] ;
00395 }
00396 array_p = pad_p ;
00397 a_size = src_extent [0] ;
00398 a_stride = src_stride [0] ;
00399 }
00400 }
00401 } else if (typ_sz == sizeof(r4) && ALIGNED_r4(array_p) && ALIGNED_r4(result_p) && ((pad_p == NULL) || ALIGNED_r4(pad_p))) {
00402
00403 for (;;) {
00404 while (counter[src_rank] < src_extent[src_rank] ) {
00405 for ( i = 0 ; i < a_size ; i ++ ) {
00406 *(r4 *)result_p = *(r4 *)array_p ;
00407 array_p += a_stride ;
00408 result_p += r_stride ;
00409
00410 j = 0 ;
00411 res_counter[0] ++ ;
00412
00413 while (res_counter[j] == res_extent[j]) {
00414 if (j == res_rank ) return ;
00415 result_p += res_offset[j] ;
00416 res_counter[j+1]++ ;
00417 res_counter[j] = 0 ;
00418 j ++ ;
00419 }
00420 }
00421 counter[0] = a_size ;
00422 j = 0 ;
00423 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00424 array_p += src_offset[j] ;
00425 counter[j+1]++ ;
00426 counter[j] = 0 ;
00427 j ++ ;
00428 }
00429
00430 }
00431 if (pad != NULL) {
00432
00433 src_rank = pad_rank ;
00434
00435 for ( j = 0 ; j <= src_rank ; j ++ ) {
00436 src_extent [j] = pad_extent[j] ;
00437 src_stride [j] = pad_stride[j] ;
00438 counter[j] = 0 ;
00439 src_offset [j] = pad_offset[j] ;
00440 }
00441 array_p = pad_p ;
00442 a_size = src_extent [0] ;
00443 a_stride = src_stride [0] ;
00444 }
00445 }
00446 } else if (typ_sz == sizeof(r8) && ALIGNED_r8(array_p) && ALIGNED_r8(result_p) && ((pad_p == NULL) || ALIGNED_r8(pad_p))) {
00447
00448 for (;;) {
00449 while (counter[src_rank] < src_extent[src_rank] ) {
00450 for ( i = 0 ; i < a_size ; i ++ ) {
00451 *(r8 *)result_p = *(r8 *)array_p ;
00452 array_p += a_stride ;
00453 result_p += r_stride ;
00454
00455 j = 0 ;
00456 res_counter[0] ++ ;
00457
00458 while (res_counter[j] == res_extent[j]) {
00459 if (j == res_rank ) return ;
00460 result_p += res_offset[j] ;
00461 res_counter[j+1]++ ;
00462 res_counter[j] = 0 ;
00463 j ++ ;
00464 }
00465 }
00466 counter[0] = a_size ;
00467 j = 0 ;
00468 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00469 array_p += src_offset[j] ;
00470 counter[j+1]++ ;
00471 counter[j] = 0 ;
00472 j ++ ;
00473 }
00474
00475 }
00476 if (pad != NULL) {
00477
00478 src_rank = pad_rank ;
00479
00480 for ( j = 0 ; j <= src_rank ; j ++ ) {
00481 src_extent [j] = pad_extent[j] ;
00482 src_stride [j] = pad_stride[j] ;
00483 counter[j] = 0 ;
00484 src_offset [j] = pad_offset[j] ;
00485 }
00486 array_p = pad_p ;
00487 a_size = src_extent [0] ;
00488 a_stride = src_stride [0] ;
00489 }
00490 }
00491 } else if (typ_sz == sizeof(r16) && ALIGNED_r16(array_p) && ALIGNED_r16(result_p) && ((pad_p == NULL) || ALIGNED_r16(pad_p))) {
00492
00493 for (;;) {
00494 while (counter[src_rank] < src_extent[src_rank] ) {
00495 for ( i = 0 ; i < a_size ; i ++ ) {
00496 *(r16 *)result_p = *(r16 *)array_p ;
00497 array_p += a_stride ;
00498 result_p += r_stride ;
00499
00500 j = 0 ;
00501 res_counter[0] ++ ;
00502
00503 while (res_counter[j] == res_extent[j]) {
00504 if (j == res_rank ) return ;
00505 result_p += res_offset[j] ;
00506 res_counter[j+1]++ ;
00507 res_counter[j] = 0 ;
00508 j ++ ;
00509 }
00510 }
00511 counter[0] = a_size ;
00512 j = 0 ;
00513 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00514 array_p += src_offset[j] ;
00515 counter[j+1]++ ;
00516 counter[j] = 0 ;
00517 j ++ ;
00518 }
00519
00520 }
00521 if (pad != NULL) {
00522
00523 src_rank = pad_rank ;
00524
00525 for ( j = 0 ; j <= src_rank ; j ++ ) {
00526 src_extent [j] = pad_extent[j] ;
00527 src_stride [j] = pad_stride[j] ;
00528 counter[j] = 0 ;
00529 src_offset [j] = pad_offset[j] ;
00530 }
00531 array_p = pad_p ;
00532 a_size = src_extent [0] ;
00533 a_stride = src_stride [0] ;
00534 }
00535 }
00536 } else {
00537 for (;;) {
00538 while (counter[src_rank] < src_extent[src_rank] ) {
00539 for ( i = 0 ; i < a_size ; i ++ ) {
00540 ap = array_p ;
00541 rp = result_p ;
00542 if (typ_sz > BIGDEFAULTSZ)
00543 (void) memcpy (rp, ap, typ_sz);
00544 else
00545 for (j = 0 ; j < typ_sz ; j ++) *rp++ = *ap ++ ;
00546 array_p += a_stride ;
00547 result_p += r_stride ;
00548
00549 j = 0 ;
00550 res_counter[0] ++ ;
00551
00552 while (res_counter[j] == res_extent[j]) {
00553 if (j == res_rank ) return ;
00554 result_p += res_offset[j] ;
00555 res_counter[j+1]++ ;
00556 res_counter[j] = 0 ;
00557 j ++ ;
00558 }
00559 }
00560 counter[0] = a_size ;
00561 j = 0 ;
00562 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00563 array_p += src_offset[j] ;
00564 counter[j+1]++ ;
00565 counter[j] = 0 ;
00566 j ++ ;
00567 }
00568
00569 }
00570 if (pad != NULL) {
00571
00572 src_rank = pad_rank ;
00573
00574 for ( j = 0 ; j <= src_rank ; j ++ ) {
00575 src_extent [j] = pad_extent[j] ;
00576 src_stride [j] = pad_stride[j] ;
00577 counter[j] = 0 ;
00578 src_offset [j] = pad_offset[j] ;
00579 }
00580 array_p = pad_p ;
00581 a_size = src_extent [0] ;
00582 a_stride = src_stride [0] ;
00583 }
00584 }
00585 }
00586 }