00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "f90_intrinsic.h"
00039
00040 static size_t read_source_desc(DopeVectorType * array,
00041 size_t src_extent[MAX_NARY_DIMS],
00042 size_t src_stride[MAX_NARY_DIMS],
00043 size_t src_offset[MAX_NARY_DIMS],
00044 int32_t ddim) ;
00045
00046 static void
00047 get_offset_and_stride(DopeVectorType * array,
00048 size_t src_extent[MAX_NARY_DIMS],
00049 size_t src_stride[MAX_NARY_DIMS],
00050 size_t src_offset[MAX_NARY_DIMS],
00051 int32_t ddim) ;
00052
00053 static int32_t read_dim(DopeVectorType * dim) ;
00054
00055 static void alloc_res(DopeVectorType * result,
00056 size_t src_extent[MAX_NARY_DIMS]);
00057
00058 void
00059 _SUM__I1(
00060 DopeVectorType *result,
00061 DopeVectorType *array,
00062 DopeVectorType *dim,
00063 DopeVectorType *mask)
00064 {
00065 char * result_p, * result_b ;
00066 char * array_p, * array_b ;
00067 char * dim_p, * dim_b ;
00068 char * mask_p, * mask_b ;
00069
00070 size_t src_extent [MAX_NARY_DIMS] ;
00071 size_t counter [MAX_NARY_DIMS] ;
00072 size_t src_offset [MAX_NARY_DIMS] ;
00073 size_t src_stride [MAX_NARY_DIMS] ;
00074 size_t src_size ;
00075
00076 size_t res_stride [MAX_NARY_DIMS] ;
00077 size_t res_offset [MAX_NARY_DIMS] ;
00078
00079 size_t msk_stride [MAX_NARY_DIMS] ;
00080 size_t msk_offset [MAX_NARY_DIMS] ;
00081
00082 int32_t ddim ;
00083 uint32_t src_rank ;
00084 uint32_t res_rank ;
00085
00086 size_t j,k,i ;
00087 size_t msk_typ_sz;
00088
00089 i1 accum ;
00090 i1 const initv = 0 ;
00091 size_t a_size,a_stride;
00092 size_t m_stride ;
00093
00094 i1 temp,new ;
00095
00096 if (mask == NULL) {
00097 if (dim != NULL) {
00098 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00099 mask = (DopeVectorType *) dim ;
00100 dim = NULL;
00101 }
00102 }
00103 }
00104
00105 if (dim != NULL) {
00106 ddim = read_dim(dim);
00107 } else
00108 ddim = 0 ;
00109
00110 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00111 src_rank = GET_RANK_FROM_DESC(array) - 1;
00112
00113 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00114
00115 for (i = 0 ; i <= src_rank ; i ++)
00116 counter[i] = 0 ;
00117
00118 if ((ddim > src_rank ) || (ddim < 0))
00119 ERROR(_LELVL_ABORT,FESCIDIM);
00120
00121 res_rank = GET_RANK_FROM_DESC(result);
00122
00123 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00124 alloc_res(result,src_extent);
00125 }
00126
00127 res_stride[0] = 0;
00128 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00129 for (j = 0 ; j < res_rank ; j ++ ) {
00130 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00131 }
00132
00133 res_offset[0] = res_stride[0] ;
00134 for ( j = 1 ; j < res_rank ; j ++ )
00135 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00136
00137 result_b = GET_ADDRESS_FROM_DESC(result);
00138
00139 if (mask != NULL) {
00140
00141 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00142 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00143
00144 if (GET_RANK_FROM_DESC(mask) == 0) {
00145 if (*mask_b) {
00146 mask = NULL;
00147 } else {
00148 src_size = 0;
00149 for (j = 0 ; j <= src_rank ; j ++) {
00150 msk_stride[j] = 0 ;
00151 msk_offset[j] = 0 ;
00152 }
00153 }
00154
00155 } else {
00156
00157 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00158 }
00159 }
00160
00161 accum = initv ;
00162
00163 if (src_size == 0 ) {
00164 for (i = 1 ; i <= src_rank ; i ++ )
00165 if (src_extent[i] == 0)
00166 return ;
00167 }
00168 array_p = array_b ;
00169 result_p = result_b ;
00170 if (mask == NULL) {
00171
00172 a_size = src_extent[0] ;
00173 a_stride = src_stride[0] ;
00174
00175 while (counter[src_rank] < src_extent[src_rank] ) {
00176
00177 if(res_rank != 0) accum = initv ;
00178
00179 for ( i = 0 ; i < a_size ; i ++ ) {
00180 accum += *(i1 *)array_p ;
00181
00182 array_p += a_stride ;
00183 }
00184 *(i1 *) result_p = accum ;
00185 counter[0] = a_size ;
00186 j = 0 ;
00187 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00188 array_p += src_offset[j] ;
00189 result_p += res_offset[j] ;
00190 counter[j+1]++ ;
00191 counter[j] = 0 ;
00192 j ++ ;
00193 }
00194 }
00195 } else {
00196
00197 a_size = src_extent[0] ;
00198 a_stride = src_stride[0] ;
00199 m_stride = msk_stride[0] ;
00200 mask_p = mask_b ;
00201
00202 while (counter[src_rank] < src_extent[src_rank] ) {
00203
00204 if(res_rank != 0) accum = initv ;
00205
00206 for ( i = 0 ; i < a_size ; i ++ ) {
00207 if (*mask_p) {
00208 accum += *(i1 *)array_p ;
00209
00210 }
00211 array_p += a_stride ;
00212 mask_p += m_stride ;
00213 }
00214 *(i1 *) result_p = accum ;
00215 counter[0] = a_size ;
00216 j = 0 ;
00217 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00218 array_p += src_offset[j] ;
00219 mask_p += msk_offset[j] ;
00220 result_p += res_offset[j] ;
00221 counter[j+1]++ ;
00222 counter[j] = 0 ;
00223 j ++ ;
00224 }
00225 }
00226 }
00227 }
00228 void
00229 _SUM__I2(
00230 DopeVectorType *result,
00231 DopeVectorType *array,
00232 DopeVectorType *dim,
00233 DopeVectorType *mask)
00234 {
00235 char * result_p, * result_b ;
00236 char * array_p, * array_b ;
00237 char * dim_p, * dim_b ;
00238 char * mask_p, * mask_b ;
00239
00240 size_t src_extent [MAX_NARY_DIMS] ;
00241 size_t counter [MAX_NARY_DIMS] ;
00242 size_t src_offset [MAX_NARY_DIMS] ;
00243 size_t src_stride [MAX_NARY_DIMS] ;
00244 size_t src_size ;
00245
00246 size_t res_stride [MAX_NARY_DIMS] ;
00247 size_t res_offset [MAX_NARY_DIMS] ;
00248
00249 size_t msk_stride [MAX_NARY_DIMS] ;
00250 size_t msk_offset [MAX_NARY_DIMS] ;
00251
00252 int32_t ddim ;
00253 uint32_t src_rank ;
00254 uint32_t res_rank ;
00255
00256 size_t j,k,i ;
00257 size_t msk_typ_sz;
00258
00259 i2 accum ;
00260 i2 const initv = 0 ;
00261 size_t a_size,a_stride;
00262 size_t m_stride ;
00263
00264 i2 temp,new ;
00265
00266 if (mask == NULL) {
00267 if (dim != NULL) {
00268 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00269 mask = (DopeVectorType *) dim ;
00270 dim = NULL;
00271 }
00272 }
00273 }
00274
00275 if (dim != NULL) {
00276 ddim = read_dim(dim);
00277 } else
00278 ddim = 0 ;
00279
00280 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00281 src_rank = GET_RANK_FROM_DESC(array) - 1;
00282
00283 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00284
00285 for (i = 0 ; i <= src_rank ; i ++)
00286 counter[i] = 0 ;
00287
00288 if ((ddim > src_rank ) || (ddim < 0))
00289 ERROR(_LELVL_ABORT,FESCIDIM);
00290
00291 res_rank = GET_RANK_FROM_DESC(result);
00292
00293 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00294 alloc_res(result,src_extent);
00295 }
00296
00297 res_stride[0] = 0;
00298 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00299 for (j = 0 ; j < res_rank ; j ++ ) {
00300 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00301 }
00302
00303 res_offset[0] = res_stride[0] ;
00304 for ( j = 1 ; j < res_rank ; j ++ )
00305 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00306
00307 result_b = GET_ADDRESS_FROM_DESC(result);
00308
00309 if (mask != NULL) {
00310
00311 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00312 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00313
00314 if (GET_RANK_FROM_DESC(mask) == 0) {
00315 if (*mask_b) {
00316 mask = NULL;
00317 } else {
00318 src_size = 0;
00319 for (j = 0 ; j <= src_rank ; j ++) {
00320 msk_stride[j] = 0 ;
00321 msk_offset[j] = 0 ;
00322 }
00323 }
00324
00325 } else {
00326
00327 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00328 }
00329 }
00330
00331 accum = initv ;
00332
00333 if (src_size == 0 ) {
00334 for (i = 1 ; i <= src_rank ; i ++ )
00335 if (src_extent[i] == 0)
00336 return ;
00337 }
00338 array_p = array_b ;
00339 result_p = result_b ;
00340 if (mask == NULL) {
00341
00342 a_size = src_extent[0] ;
00343 a_stride = src_stride[0] ;
00344
00345 while (counter[src_rank] < src_extent[src_rank] ) {
00346
00347 if(res_rank != 0) accum = initv ;
00348
00349 for ( i = 0 ; i < a_size ; i ++ ) {
00350 accum += *(i2 *)array_p ;
00351
00352 array_p += a_stride ;
00353 }
00354 *(i2 *) result_p = accum ;
00355 counter[0] = a_size ;
00356 j = 0 ;
00357 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00358 array_p += src_offset[j] ;
00359 result_p += res_offset[j] ;
00360 counter[j+1]++ ;
00361 counter[j] = 0 ;
00362 j ++ ;
00363 }
00364 }
00365 } else {
00366
00367 a_size = src_extent[0] ;
00368 a_stride = src_stride[0] ;
00369 m_stride = msk_stride[0] ;
00370 mask_p = mask_b ;
00371
00372 while (counter[src_rank] < src_extent[src_rank] ) {
00373
00374 if(res_rank != 0) accum = initv ;
00375
00376 for ( i = 0 ; i < a_size ; i ++ ) {
00377 if (*mask_p) {
00378 accum += *(i2 *)array_p ;
00379
00380 }
00381 array_p += a_stride ;
00382 mask_p += m_stride ;
00383 }
00384 *(i2 *) result_p = accum ;
00385 counter[0] = a_size ;
00386 j = 0 ;
00387 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00388 array_p += src_offset[j] ;
00389 mask_p += msk_offset[j] ;
00390 result_p += res_offset[j] ;
00391 counter[j+1]++ ;
00392 counter[j] = 0 ;
00393 j ++ ;
00394 }
00395 }
00396 }
00397 }
00398 void
00399 _SUM__I4(
00400 DopeVectorType *result,
00401 DopeVectorType *array,
00402 DopeVectorType *dim,
00403 DopeVectorType *mask)
00404 {
00405 char * result_p, * result_b ;
00406 char * array_p, * array_b ;
00407 char * dim_p, * dim_b ;
00408 char * mask_p, * mask_b ;
00409
00410 size_t src_extent [MAX_NARY_DIMS] ;
00411 size_t counter [MAX_NARY_DIMS] ;
00412 size_t src_offset [MAX_NARY_DIMS] ;
00413 size_t src_stride [MAX_NARY_DIMS] ;
00414 size_t src_size ;
00415
00416 size_t res_stride [MAX_NARY_DIMS] ;
00417 size_t res_offset [MAX_NARY_DIMS] ;
00418
00419 size_t msk_stride [MAX_NARY_DIMS] ;
00420 size_t msk_offset [MAX_NARY_DIMS] ;
00421
00422 int32_t ddim ;
00423 uint32_t src_rank ;
00424 uint32_t res_rank ;
00425
00426 size_t j,k,i ;
00427 size_t msk_typ_sz;
00428
00429 i4 accum ;
00430 i4 const initv = 0 ;
00431 size_t a_size,a_stride;
00432 size_t m_stride ;
00433
00434 i4 temp,new ;
00435
00436 if (mask == NULL) {
00437 if (dim != NULL) {
00438 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00439 mask = (DopeVectorType *) dim ;
00440 dim = NULL;
00441 }
00442 }
00443 }
00444
00445 if (dim != NULL) {
00446 ddim = read_dim(dim);
00447 } else
00448 ddim = 0 ;
00449
00450 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00451 src_rank = GET_RANK_FROM_DESC(array) - 1;
00452
00453 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00454
00455 for (i = 0 ; i <= src_rank ; i ++)
00456 counter[i] = 0 ;
00457
00458 if ((ddim > src_rank ) || (ddim < 0))
00459 ERROR(_LELVL_ABORT,FESCIDIM);
00460
00461 res_rank = GET_RANK_FROM_DESC(result);
00462
00463 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00464 alloc_res(result,src_extent);
00465 }
00466
00467 res_stride[0] = 0;
00468 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00469 for (j = 0 ; j < res_rank ; j ++ ) {
00470 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00471 }
00472
00473 res_offset[0] = res_stride[0] ;
00474 for ( j = 1 ; j < res_rank ; j ++ )
00475 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00476
00477 result_b = GET_ADDRESS_FROM_DESC(result);
00478
00479 if (mask != NULL) {
00480
00481 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00482 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00483
00484 if (GET_RANK_FROM_DESC(mask) == 0) {
00485 if (*mask_b) {
00486 mask = NULL;
00487 } else {
00488 src_size = 0;
00489 for (j = 0 ; j <= src_rank ; j ++) {
00490 msk_stride[j] = 0 ;
00491 msk_offset[j] = 0 ;
00492 }
00493 }
00494
00495 } else {
00496
00497 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00498 }
00499 }
00500
00501 accum = initv ;
00502
00503 if (src_size == 0 ) {
00504 for (i = 1 ; i <= src_rank ; i ++ )
00505 if (src_extent[i] == 0)
00506 return ;
00507 }
00508 array_p = array_b ;
00509 result_p = result_b ;
00510 if (mask == NULL) {
00511
00512 a_size = src_extent[0] ;
00513 a_stride = src_stride[0] ;
00514
00515 while (counter[src_rank] < src_extent[src_rank] ) {
00516
00517 if(res_rank != 0) accum = initv ;
00518
00519 for ( i = 0 ; i < a_size ; i ++ ) {
00520 accum += *(i4 *)array_p ;
00521
00522 array_p += a_stride ;
00523 }
00524 *(i4 *) result_p = accum ;
00525 counter[0] = a_size ;
00526 j = 0 ;
00527 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00528 array_p += src_offset[j] ;
00529 result_p += res_offset[j] ;
00530 counter[j+1]++ ;
00531 counter[j] = 0 ;
00532 j ++ ;
00533 }
00534 }
00535 } else {
00536
00537 a_size = src_extent[0] ;
00538 a_stride = src_stride[0] ;
00539 m_stride = msk_stride[0] ;
00540 mask_p = mask_b ;
00541
00542 while (counter[src_rank] < src_extent[src_rank] ) {
00543
00544 if(res_rank != 0) accum = initv ;
00545
00546 for ( i = 0 ; i < a_size ; i ++ ) {
00547 if (*mask_p) {
00548 accum += *(i4 *)array_p ;
00549
00550 }
00551 array_p += a_stride ;
00552 mask_p += m_stride ;
00553 }
00554 *(i4 *) result_p = accum ;
00555 counter[0] = a_size ;
00556 j = 0 ;
00557 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00558 array_p += src_offset[j] ;
00559 mask_p += msk_offset[j] ;
00560 result_p += res_offset[j] ;
00561 counter[j+1]++ ;
00562 counter[j] = 0 ;
00563 j ++ ;
00564 }
00565 }
00566 }
00567 }
00568 void
00569 _SUM__J(
00570 DopeVectorType *result,
00571 DopeVectorType *array,
00572 DopeVectorType *dim,
00573 DopeVectorType *mask)
00574 {
00575 char * result_p, * result_b ;
00576 char * array_p, * array_b ;
00577 char * dim_p, * dim_b ;
00578 char * mask_p, * mask_b ;
00579
00580 size_t src_extent [MAX_NARY_DIMS] ;
00581 size_t counter [MAX_NARY_DIMS] ;
00582 size_t src_offset [MAX_NARY_DIMS] ;
00583 size_t src_stride [MAX_NARY_DIMS] ;
00584 size_t src_size ;
00585
00586 size_t res_stride [MAX_NARY_DIMS] ;
00587 size_t res_offset [MAX_NARY_DIMS] ;
00588
00589 size_t msk_stride [MAX_NARY_DIMS] ;
00590 size_t msk_offset [MAX_NARY_DIMS] ;
00591
00592 int32_t ddim ;
00593 uint32_t src_rank ;
00594 uint32_t res_rank ;
00595
00596 size_t j,k,i ;
00597 size_t msk_typ_sz;
00598
00599 i8 accum ;
00600 i8 const initv = 0 ;
00601 size_t a_size,a_stride;
00602 size_t m_stride ;
00603
00604 i8 temp,new ;
00605
00606 if (mask == NULL) {
00607 if (dim != NULL) {
00608 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00609 mask = (DopeVectorType *) dim ;
00610 dim = NULL;
00611 }
00612 }
00613 }
00614
00615 if (dim != NULL) {
00616 ddim = read_dim(dim);
00617 } else
00618 ddim = 0 ;
00619
00620 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00621 src_rank = GET_RANK_FROM_DESC(array) - 1;
00622
00623 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00624
00625 for (i = 0 ; i <= src_rank ; i ++)
00626 counter[i] = 0 ;
00627
00628 if ((ddim > src_rank ) || (ddim < 0))
00629 ERROR(_LELVL_ABORT,FESCIDIM);
00630
00631 res_rank = GET_RANK_FROM_DESC(result);
00632
00633 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00634 alloc_res(result,src_extent);
00635 }
00636
00637 res_stride[0] = 0;
00638 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00639 for (j = 0 ; j < res_rank ; j ++ ) {
00640 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00641 }
00642
00643 res_offset[0] = res_stride[0] ;
00644 for ( j = 1 ; j < res_rank ; j ++ )
00645 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00646
00647 result_b = GET_ADDRESS_FROM_DESC(result);
00648
00649 if (mask != NULL) {
00650
00651 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00652 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00653
00654 if (GET_RANK_FROM_DESC(mask) == 0) {
00655 if (*mask_b) {
00656 mask = NULL;
00657 } else {
00658 src_size = 0;
00659 for (j = 0 ; j <= src_rank ; j ++) {
00660 msk_stride[j] = 0 ;
00661 msk_offset[j] = 0 ;
00662 }
00663 }
00664
00665 } else {
00666
00667 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00668 }
00669 }
00670
00671 accum = initv ;
00672
00673 if (src_size == 0 ) {
00674 for (i = 1 ; i <= src_rank ; i ++ )
00675 if (src_extent[i] == 0)
00676 return ;
00677 }
00678 array_p = array_b ;
00679 result_p = result_b ;
00680 if (mask == NULL) {
00681
00682 a_size = src_extent[0] ;
00683 a_stride = src_stride[0] ;
00684
00685 while (counter[src_rank] < src_extent[src_rank] ) {
00686
00687 if(res_rank != 0) accum = initv ;
00688
00689 for ( i = 0 ; i < a_size ; i ++ ) {
00690 accum += *(i8 *)array_p ;
00691
00692 array_p += a_stride ;
00693 }
00694 *(i8 *) result_p = accum ;
00695 counter[0] = a_size ;
00696 j = 0 ;
00697 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00698 array_p += src_offset[j] ;
00699 result_p += res_offset[j] ;
00700 counter[j+1]++ ;
00701 counter[j] = 0 ;
00702 j ++ ;
00703 }
00704 }
00705 } else {
00706
00707 a_size = src_extent[0] ;
00708 a_stride = src_stride[0] ;
00709 m_stride = msk_stride[0] ;
00710 mask_p = mask_b ;
00711
00712 while (counter[src_rank] < src_extent[src_rank] ) {
00713
00714 if(res_rank != 0) accum = initv ;
00715
00716 for ( i = 0 ; i < a_size ; i ++ ) {
00717 if (*mask_p) {
00718 accum += *(i8 *)array_p ;
00719
00720 }
00721 array_p += a_stride ;
00722 mask_p += m_stride ;
00723 }
00724 *(i8 *) result_p = accum ;
00725 counter[0] = a_size ;
00726 j = 0 ;
00727 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00728 array_p += src_offset[j] ;
00729 mask_p += msk_offset[j] ;
00730 result_p += res_offset[j] ;
00731 counter[j+1]++ ;
00732 counter[j] = 0 ;
00733 j ++ ;
00734 }
00735 }
00736 }
00737 }
00738 void
00739 _SUM__S4(
00740 DopeVectorType *result,
00741 DopeVectorType *array,
00742 DopeVectorType *dim,
00743 DopeVectorType *mask)
00744 {
00745 char * result_p, * result_b ;
00746 char * array_p, * array_b ;
00747 char * dim_p, * dim_b ;
00748 char * mask_p, * mask_b ;
00749
00750 size_t src_extent [MAX_NARY_DIMS] ;
00751 size_t counter [MAX_NARY_DIMS] ;
00752 size_t src_offset [MAX_NARY_DIMS] ;
00753 size_t src_stride [MAX_NARY_DIMS] ;
00754 size_t src_size ;
00755
00756 size_t res_stride [MAX_NARY_DIMS] ;
00757 size_t res_offset [MAX_NARY_DIMS] ;
00758
00759 size_t msk_stride [MAX_NARY_DIMS] ;
00760 size_t msk_offset [MAX_NARY_DIMS] ;
00761
00762 int32_t ddim ;
00763 uint32_t src_rank ;
00764 uint32_t res_rank ;
00765
00766 size_t j,k,i ;
00767 size_t msk_typ_sz;
00768
00769 r4 accum ;
00770 r4 const initv = 0.0 ;
00771 size_t a_size,a_stride;
00772 size_t m_stride ;
00773
00774 r4 temp,new ;
00775
00776 if (mask == NULL) {
00777 if (dim != NULL) {
00778 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00779 mask = (DopeVectorType *) dim ;
00780 dim = NULL;
00781 }
00782 }
00783 }
00784
00785 if (dim != NULL) {
00786 ddim = read_dim(dim);
00787 } else
00788 ddim = 0 ;
00789
00790 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00791 src_rank = GET_RANK_FROM_DESC(array) - 1;
00792
00793 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00794
00795 for (i = 0 ; i <= src_rank ; i ++)
00796 counter[i] = 0 ;
00797
00798 if ((ddim > src_rank ) || (ddim < 0))
00799 ERROR(_LELVL_ABORT,FESCIDIM);
00800
00801 res_rank = GET_RANK_FROM_DESC(result);
00802
00803 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00804 alloc_res(result,src_extent);
00805 }
00806
00807 res_stride[0] = 0;
00808 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00809 for (j = 0 ; j < res_rank ; j ++ ) {
00810 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00811 }
00812
00813 res_offset[0] = res_stride[0] ;
00814 for ( j = 1 ; j < res_rank ; j ++ )
00815 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00816
00817 result_b = GET_ADDRESS_FROM_DESC(result);
00818
00819 if (mask != NULL) {
00820
00821 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00822 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00823
00824 if (GET_RANK_FROM_DESC(mask) == 0) {
00825 if (*mask_b) {
00826 mask = NULL;
00827 } else {
00828 src_size = 0;
00829 for (j = 0 ; j <= src_rank ; j ++) {
00830 msk_stride[j] = 0 ;
00831 msk_offset[j] = 0 ;
00832 }
00833 }
00834
00835 } else {
00836
00837 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00838 }
00839 }
00840
00841 accum = initv ;
00842
00843 if (src_size == 0 ) {
00844 for (i = 1 ; i <= src_rank ; i ++ )
00845 if (src_extent[i] == 0)
00846 return ;
00847 }
00848 array_p = array_b ;
00849 result_p = result_b ;
00850 if (mask == NULL) {
00851
00852 a_size = src_extent[0] ;
00853 a_stride = src_stride[0] ;
00854
00855 while (counter[src_rank] < src_extent[src_rank] ) {
00856
00857 if(res_rank != 0) accum = initv ;
00858
00859 for ( i = 0 ; i < a_size ; i ++ ) {
00860 accum += *(r4 *)array_p ;
00861
00862 array_p += a_stride ;
00863 }
00864 *(r4 *) result_p = accum ;
00865 counter[0] = a_size ;
00866 j = 0 ;
00867 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00868 array_p += src_offset[j] ;
00869 result_p += res_offset[j] ;
00870 counter[j+1]++ ;
00871 counter[j] = 0 ;
00872 j ++ ;
00873 }
00874 }
00875 } else {
00876
00877 a_size = src_extent[0] ;
00878 a_stride = src_stride[0] ;
00879 m_stride = msk_stride[0] ;
00880 mask_p = mask_b ;
00881
00882 while (counter[src_rank] < src_extent[src_rank] ) {
00883
00884 if(res_rank != 0) accum = initv ;
00885
00886 for ( i = 0 ; i < a_size ; i ++ ) {
00887 if (*mask_p) {
00888 accum += *(r4 *)array_p ;
00889
00890 }
00891 array_p += a_stride ;
00892 mask_p += m_stride ;
00893 }
00894 *(r4 *) result_p = accum ;
00895 counter[0] = a_size ;
00896 j = 0 ;
00897 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00898 array_p += src_offset[j] ;
00899 mask_p += msk_offset[j] ;
00900 result_p += res_offset[j] ;
00901 counter[j+1]++ ;
00902 counter[j] = 0 ;
00903 j ++ ;
00904 }
00905 }
00906 }
00907 }
00908 void
00909 _SUM__S(
00910 DopeVectorType *result,
00911 DopeVectorType *array,
00912 DopeVectorType *dim,
00913 DopeVectorType *mask)
00914 {
00915 char * result_p, * result_b ;
00916 char * array_p, * array_b ;
00917 char * dim_p, * dim_b ;
00918 char * mask_p, * mask_b ;
00919
00920 size_t src_extent [MAX_NARY_DIMS] ;
00921 size_t counter [MAX_NARY_DIMS] ;
00922 size_t src_offset [MAX_NARY_DIMS] ;
00923 size_t src_stride [MAX_NARY_DIMS] ;
00924 size_t src_size ;
00925
00926 size_t res_stride [MAX_NARY_DIMS] ;
00927 size_t res_offset [MAX_NARY_DIMS] ;
00928
00929 size_t msk_stride [MAX_NARY_DIMS] ;
00930 size_t msk_offset [MAX_NARY_DIMS] ;
00931
00932 int32_t ddim ;
00933 uint32_t src_rank ;
00934 uint32_t res_rank ;
00935
00936 size_t j,k,i ;
00937 size_t msk_typ_sz;
00938
00939 r8 accum ;
00940 r8 const initv = 0.0 ;
00941 size_t a_size,a_stride;
00942 size_t m_stride ;
00943
00944 r8 temp,new ;
00945
00946 if (mask == NULL) {
00947 if (dim != NULL) {
00948 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00949 mask = (DopeVectorType *) dim ;
00950 dim = NULL;
00951 }
00952 }
00953 }
00954
00955 if (dim != NULL) {
00956 ddim = read_dim(dim);
00957 } else
00958 ddim = 0 ;
00959
00960 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00961 src_rank = GET_RANK_FROM_DESC(array) - 1;
00962
00963 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00964
00965 for (i = 0 ; i <= src_rank ; i ++)
00966 counter[i] = 0 ;
00967
00968 if ((ddim > src_rank ) || (ddim < 0))
00969 ERROR(_LELVL_ABORT,FESCIDIM);
00970
00971 res_rank = GET_RANK_FROM_DESC(result);
00972
00973 if (!GET_ASSOCIATED_FROM_DESC(result)) {
00974 alloc_res(result,src_extent);
00975 }
00976
00977 res_stride[0] = 0;
00978 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
00979 for (j = 0 ; j < res_rank ; j ++ ) {
00980 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
00981 }
00982
00983 res_offset[0] = res_stride[0] ;
00984 for ( j = 1 ; j < res_rank ; j ++ )
00985 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
00986
00987 result_b = GET_ADDRESS_FROM_DESC(result);
00988
00989 if (mask != NULL) {
00990
00991 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00992 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00993
00994 if (GET_RANK_FROM_DESC(mask) == 0) {
00995 if (*mask_b) {
00996 mask = NULL;
00997 } else {
00998 src_size = 0;
00999 for (j = 0 ; j <= src_rank ; j ++) {
01000 msk_stride[j] = 0 ;
01001 msk_offset[j] = 0 ;
01002 }
01003 }
01004
01005 } else {
01006
01007 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01008 }
01009 }
01010
01011 accum = initv ;
01012
01013 if (src_size == 0 ) {
01014 for (i = 1 ; i <= src_rank ; i ++ )
01015 if (src_extent[i] == 0)
01016 return ;
01017 }
01018 array_p = array_b ;
01019 result_p = result_b ;
01020 if (mask == NULL) {
01021
01022 a_size = src_extent[0] ;
01023 a_stride = src_stride[0] ;
01024
01025 while (counter[src_rank] < src_extent[src_rank] ) {
01026
01027 if(res_rank != 0) accum = initv ;
01028
01029 for ( i = 0 ; i < a_size ; i ++ ) {
01030 accum += *(r8 *)array_p ;
01031
01032 array_p += a_stride ;
01033 }
01034 *(r8 *) result_p = accum ;
01035 counter[0] = a_size ;
01036 j = 0 ;
01037 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01038 array_p += src_offset[j] ;
01039 result_p += res_offset[j] ;
01040 counter[j+1]++ ;
01041 counter[j] = 0 ;
01042 j ++ ;
01043 }
01044 }
01045 } else {
01046
01047 a_size = src_extent[0] ;
01048 a_stride = src_stride[0] ;
01049 m_stride = msk_stride[0] ;
01050 mask_p = mask_b ;
01051
01052 while (counter[src_rank] < src_extent[src_rank] ) {
01053
01054 if(res_rank != 0) accum = initv ;
01055
01056 for ( i = 0 ; i < a_size ; i ++ ) {
01057 if (*mask_p) {
01058 accum += *(r8 *)array_p ;
01059
01060 }
01061 array_p += a_stride ;
01062 mask_p += m_stride ;
01063 }
01064 *(r8 *) result_p = accum ;
01065 counter[0] = a_size ;
01066 j = 0 ;
01067 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01068 array_p += src_offset[j] ;
01069 mask_p += msk_offset[j] ;
01070 result_p += res_offset[j] ;
01071 counter[j+1]++ ;
01072 counter[j] = 0 ;
01073 j ++ ;
01074 }
01075 }
01076 }
01077 }
01078 void
01079 _SUM__D(
01080 DopeVectorType *result,
01081 DopeVectorType *array,
01082 DopeVectorType *dim,
01083 DopeVectorType *mask)
01084 {
01085 char * result_p, * result_b ;
01086 char * array_p, * array_b ;
01087 char * dim_p, * dim_b ;
01088 char * mask_p, * mask_b ;
01089
01090 size_t src_extent [MAX_NARY_DIMS] ;
01091 size_t counter [MAX_NARY_DIMS] ;
01092 size_t src_offset [MAX_NARY_DIMS] ;
01093 size_t src_stride [MAX_NARY_DIMS] ;
01094 size_t src_size ;
01095
01096 size_t res_stride [MAX_NARY_DIMS] ;
01097 size_t res_offset [MAX_NARY_DIMS] ;
01098
01099 size_t msk_stride [MAX_NARY_DIMS] ;
01100 size_t msk_offset [MAX_NARY_DIMS] ;
01101
01102 int32_t ddim ;
01103 uint32_t src_rank ;
01104 uint32_t res_rank ;
01105
01106 size_t j,k,i ;
01107 size_t msk_typ_sz;
01108
01109 r16 accum ;
01110 r16 const initv = 0.0 ;
01111 size_t a_size,a_stride;
01112 size_t m_stride ;
01113
01114 r16 temp,new ;
01115
01116 if (mask == NULL) {
01117 if (dim != NULL) {
01118 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01119 mask = (DopeVectorType *) dim ;
01120 dim = NULL;
01121 }
01122 }
01123 }
01124
01125 if (dim != NULL) {
01126 ddim = read_dim(dim);
01127 } else
01128 ddim = 0 ;
01129
01130 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01131 src_rank = GET_RANK_FROM_DESC(array) - 1;
01132
01133 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01134
01135 for (i = 0 ; i <= src_rank ; i ++)
01136 counter[i] = 0 ;
01137
01138 if ((ddim > src_rank ) || (ddim < 0))
01139 ERROR(_LELVL_ABORT,FESCIDIM);
01140
01141 res_rank = GET_RANK_FROM_DESC(result);
01142
01143 if (!GET_ASSOCIATED_FROM_DESC(result)) {
01144 alloc_res(result,src_extent);
01145 }
01146
01147 res_stride[0] = 0;
01148 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
01149 for (j = 0 ; j < res_rank ; j ++ ) {
01150 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
01151 }
01152
01153 res_offset[0] = res_stride[0] ;
01154 for ( j = 1 ; j < res_rank ; j ++ )
01155 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
01156
01157 result_b = GET_ADDRESS_FROM_DESC(result);
01158
01159 if (mask != NULL) {
01160
01161 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01162 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01163
01164 if (GET_RANK_FROM_DESC(mask) == 0) {
01165 if (*mask_b) {
01166 mask = NULL;
01167 } else {
01168 src_size = 0;
01169 for (j = 0 ; j <= src_rank ; j ++) {
01170 msk_stride[j] = 0 ;
01171 msk_offset[j] = 0 ;
01172 }
01173 }
01174
01175 } else {
01176
01177 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01178 }
01179 }
01180
01181 accum = initv ;
01182
01183 if (src_size == 0 ) {
01184 for (i = 1 ; i <= src_rank ; i ++ )
01185 if (src_extent[i] == 0)
01186 return ;
01187 }
01188 array_p = array_b ;
01189 result_p = result_b ;
01190 if (mask == NULL) {
01191
01192 a_size = src_extent[0] ;
01193 a_stride = src_stride[0] ;
01194
01195 while (counter[src_rank] < src_extent[src_rank] ) {
01196
01197 if(res_rank != 0) accum = initv ;
01198
01199 for ( i = 0 ; i < a_size ; i ++ ) {
01200 accum += *(r16 *)array_p ;
01201
01202 array_p += a_stride ;
01203 }
01204 *(r16 *) result_p = accum ;
01205 counter[0] = a_size ;
01206 j = 0 ;
01207 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01208 array_p += src_offset[j] ;
01209 result_p += res_offset[j] ;
01210 counter[j+1]++ ;
01211 counter[j] = 0 ;
01212 j ++ ;
01213 }
01214 }
01215 } else {
01216
01217 a_size = src_extent[0] ;
01218 a_stride = src_stride[0] ;
01219 m_stride = msk_stride[0] ;
01220 mask_p = mask_b ;
01221
01222 while (counter[src_rank] < src_extent[src_rank] ) {
01223
01224 if(res_rank != 0) accum = initv ;
01225
01226 for ( i = 0 ; i < a_size ; i ++ ) {
01227 if (*mask_p) {
01228 accum += *(r16 *)array_p ;
01229
01230 }
01231 array_p += a_stride ;
01232 mask_p += m_stride ;
01233 }
01234 *(r16 *) result_p = accum ;
01235 counter[0] = a_size ;
01236 j = 0 ;
01237 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01238 array_p += src_offset[j] ;
01239 mask_p += msk_offset[j] ;
01240 result_p += res_offset[j] ;
01241 counter[j+1]++ ;
01242 counter[j] = 0 ;
01243 j ++ ;
01244 }
01245 }
01246 }
01247 }
01248 void
01249 _SUM__C4(
01250 DopeVectorType *result,
01251 DopeVectorType *array,
01252 DopeVectorType *dim,
01253 DopeVectorType *mask)
01254 {
01255 char * result_p, * result_b ;
01256 char * array_p, * array_b ;
01257 char * dim_p, * dim_b ;
01258 char * mask_p, * mask_b ;
01259
01260 size_t src_extent [MAX_NARY_DIMS] ;
01261 size_t counter [MAX_NARY_DIMS] ;
01262 size_t src_offset [MAX_NARY_DIMS] ;
01263 size_t src_stride [MAX_NARY_DIMS] ;
01264 size_t src_size ;
01265
01266 size_t res_stride [MAX_NARY_DIMS] ;
01267 size_t res_offset [MAX_NARY_DIMS] ;
01268
01269 size_t msk_stride [MAX_NARY_DIMS] ;
01270 size_t msk_offset [MAX_NARY_DIMS] ;
01271
01272 int32_t ddim ;
01273 uint32_t src_rank ;
01274 uint32_t res_rank ;
01275
01276 size_t j,k,i ;
01277 size_t msk_typ_sz;
01278
01279 c8 accum ;
01280 c8 const initv = {
01281 0.0,0.0 };
01282 size_t a_size,a_stride;
01283 size_t m_stride ;
01284
01285 c8 temp,new ;
01286
01287 if (mask == NULL) {
01288 if (dim != NULL) {
01289 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01290 mask = (DopeVectorType *) dim ;
01291 dim = NULL;
01292 }
01293 }
01294 }
01295
01296 if (dim != NULL) {
01297 ddim = read_dim(dim);
01298 } else
01299 ddim = 0 ;
01300
01301 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01302 src_rank = GET_RANK_FROM_DESC(array) - 1;
01303
01304 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01305
01306 for (i = 0 ; i <= src_rank ; i ++)
01307 counter[i] = 0 ;
01308
01309 if ((ddim > src_rank ) || (ddim < 0))
01310 ERROR(_LELVL_ABORT,FESCIDIM);
01311
01312 res_rank = GET_RANK_FROM_DESC(result);
01313
01314 if (!GET_ASSOCIATED_FROM_DESC(result)) {
01315 alloc_res(result,src_extent);
01316 }
01317
01318 res_stride[0] = 0;
01319 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
01320 for (j = 0 ; j < res_rank ; j ++ ) {
01321 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
01322 }
01323
01324 res_offset[0] = res_stride[0] ;
01325 for ( j = 1 ; j < res_rank ; j ++ )
01326 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
01327
01328 result_b = GET_ADDRESS_FROM_DESC(result);
01329
01330 if (mask != NULL) {
01331
01332 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01333 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01334
01335 if (GET_RANK_FROM_DESC(mask) == 0) {
01336 if (*mask_b) {
01337 mask = NULL;
01338 } else {
01339 src_size = 0;
01340 for (j = 0 ; j <= src_rank ; j ++) {
01341 msk_stride[j] = 0 ;
01342 msk_offset[j] = 0 ;
01343 }
01344 }
01345
01346 } else {
01347
01348 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01349 }
01350 }
01351
01352 accum = initv ;
01353
01354 if (src_size == 0 ) {
01355 for (i = 1 ; i <= src_rank ; i ++ )
01356 if (src_extent[i] == 0)
01357 return ;
01358 }
01359 array_p = array_b ;
01360 result_p = result_b ;
01361 if (mask == NULL) {
01362
01363 a_size = src_extent[0] ;
01364 a_stride = src_stride[0] ;
01365
01366 while (counter[src_rank] < src_extent[src_rank] ) {
01367
01368 if(res_rank != 0) accum = initv ;
01369
01370 for ( i = 0 ; i < a_size ; i ++ ) {
01371 accum.r += (*(c8 *)array_p).r ;
01372 accum.i +=(*(c8 *)array_p).i ;
01373
01374 array_p += a_stride ;
01375 }
01376 (*(c8 *) result_p).r = accum.r ;
01377 (*(c8 *) result_p).i = accum.i ;
01378 counter[0] = a_size ;
01379 j = 0 ;
01380 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01381 array_p += src_offset[j] ;
01382 result_p += res_offset[j] ;
01383 counter[j+1]++ ;
01384 counter[j] = 0 ;
01385 j ++ ;
01386 }
01387 }
01388 } else {
01389
01390 a_size = src_extent[0] ;
01391 a_stride = src_stride[0] ;
01392 m_stride = msk_stride[0] ;
01393 mask_p = mask_b ;
01394
01395 while (counter[src_rank] < src_extent[src_rank] ) {
01396
01397 if(res_rank != 0) accum = initv ;
01398
01399 for ( i = 0 ; i < a_size ; i ++ ) {
01400 if (*mask_p) {
01401 accum.r += (*(c8 *)array_p).r ;
01402 accum.i +=(*(c8 *)array_p).i ;
01403
01404 }
01405 array_p += a_stride ;
01406 mask_p += m_stride ;
01407 }
01408 (*(c8 *) result_p).r = accum.r ;
01409 (*(c8 *) result_p).i = accum.i ;
01410 counter[0] = a_size ;
01411 j = 0 ;
01412 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01413 array_p += src_offset[j] ;
01414 mask_p += msk_offset[j] ;
01415 result_p += res_offset[j] ;
01416 counter[j+1]++ ;
01417 counter[j] = 0 ;
01418 j ++ ;
01419 }
01420 }
01421 }
01422 }
01423 void
01424 _SUM__C(
01425 DopeVectorType *result,
01426 DopeVectorType *array,
01427 DopeVectorType *dim,
01428 DopeVectorType *mask)
01429 {
01430 char * result_p, * result_b ;
01431 char * array_p, * array_b ;
01432 char * dim_p, * dim_b ;
01433 char * mask_p, * mask_b ;
01434
01435 size_t src_extent [MAX_NARY_DIMS] ;
01436 size_t counter [MAX_NARY_DIMS] ;
01437 size_t src_offset [MAX_NARY_DIMS] ;
01438 size_t src_stride [MAX_NARY_DIMS] ;
01439 size_t src_size ;
01440
01441 size_t res_stride [MAX_NARY_DIMS] ;
01442 size_t res_offset [MAX_NARY_DIMS] ;
01443
01444 size_t msk_stride [MAX_NARY_DIMS] ;
01445 size_t msk_offset [MAX_NARY_DIMS] ;
01446
01447 int32_t ddim ;
01448 uint32_t src_rank ;
01449 uint32_t res_rank ;
01450
01451 size_t j,k,i ;
01452 size_t msk_typ_sz;
01453
01454 c16 accum ;
01455 c16 const initv = {
01456 0.0,0.0 };
01457 size_t a_size,a_stride;
01458 size_t m_stride ;
01459
01460 c16 temp,new ;
01461
01462 if (mask == NULL) {
01463 if (dim != NULL) {
01464 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01465 mask = (DopeVectorType *) dim ;
01466 dim = NULL;
01467 }
01468 }
01469 }
01470
01471 if (dim != NULL) {
01472 ddim = read_dim(dim);
01473 } else
01474 ddim = 0 ;
01475
01476 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01477 src_rank = GET_RANK_FROM_DESC(array) - 1;
01478
01479 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01480
01481 for (i = 0 ; i <= src_rank ; i ++)
01482 counter[i] = 0 ;
01483
01484 if ((ddim > src_rank ) || (ddim < 0))
01485 ERROR(_LELVL_ABORT,FESCIDIM);
01486
01487 res_rank = GET_RANK_FROM_DESC(result);
01488
01489 if (!GET_ASSOCIATED_FROM_DESC(result)) {
01490 alloc_res(result,src_extent);
01491 }
01492
01493 res_stride[0] = 0;
01494 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
01495 for (j = 0 ; j < res_rank ; j ++ ) {
01496 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
01497 }
01498
01499 res_offset[0] = res_stride[0] ;
01500 for ( j = 1 ; j < res_rank ; j ++ )
01501 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
01502
01503 result_b = GET_ADDRESS_FROM_DESC(result);
01504
01505 if (mask != NULL) {
01506
01507 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01508 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01509
01510 if (GET_RANK_FROM_DESC(mask) == 0) {
01511 if (*mask_b) {
01512 mask = NULL;
01513 } else {
01514 src_size = 0;
01515 for (j = 0 ; j <= src_rank ; j ++) {
01516 msk_stride[j] = 0 ;
01517 msk_offset[j] = 0 ;
01518 }
01519 }
01520
01521 } else {
01522
01523 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01524 }
01525 }
01526
01527 accum = initv ;
01528
01529 if (src_size == 0 ) {
01530 for (i = 1 ; i <= src_rank ; i ++ )
01531 if (src_extent[i] == 0)
01532 return ;
01533 }
01534 array_p = array_b ;
01535 result_p = result_b ;
01536 if (mask == NULL) {
01537
01538 a_size = src_extent[0] ;
01539 a_stride = src_stride[0] ;
01540
01541 while (counter[src_rank] < src_extent[src_rank] ) {
01542
01543 if(res_rank != 0) accum = initv ;
01544
01545 for ( i = 0 ; i < a_size ; i ++ ) {
01546 accum.r += (*(c16 *)array_p).r ;
01547 accum.i +=(*(c16 *)array_p).i ;
01548
01549 array_p += a_stride ;
01550 }
01551 (*(c16 *) result_p).r = accum.r ;
01552 (*(c16 *) result_p).i = accum.i ;
01553 counter[0] = a_size ;
01554 j = 0 ;
01555 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01556 array_p += src_offset[j] ;
01557 result_p += res_offset[j] ;
01558 counter[j+1]++ ;
01559 counter[j] = 0 ;
01560 j ++ ;
01561 }
01562 }
01563 } else {
01564
01565 a_size = src_extent[0] ;
01566 a_stride = src_stride[0] ;
01567 m_stride = msk_stride[0] ;
01568 mask_p = mask_b ;
01569
01570 while (counter[src_rank] < src_extent[src_rank] ) {
01571
01572 if(res_rank != 0) accum = initv ;
01573
01574 for ( i = 0 ; i < a_size ; i ++ ) {
01575 if (*mask_p) {
01576 accum.r += (*(c16 *)array_p).r ;
01577 accum.i +=(*(c16 *)array_p).i ;
01578
01579 }
01580 array_p += a_stride ;
01581 mask_p += m_stride ;
01582 }
01583 (*(c16 *) result_p).r = accum.r ;
01584 (*(c16 *) result_p).i = accum.i ;
01585 counter[0] = a_size ;
01586 j = 0 ;
01587 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01588 array_p += src_offset[j] ;
01589 mask_p += msk_offset[j] ;
01590 result_p += res_offset[j] ;
01591 counter[j+1]++ ;
01592 counter[j] = 0 ;
01593 j ++ ;
01594 }
01595 }
01596 }
01597 }
01598 void
01599 _SUM__Z(
01600 DopeVectorType *result,
01601 DopeVectorType *array,
01602 DopeVectorType *dim,
01603 DopeVectorType *mask)
01604 {
01605 char * result_p, * result_b ;
01606 char * array_p, * array_b ;
01607 char * dim_p, * dim_b ;
01608 char * mask_p, * mask_b ;
01609
01610 size_t src_extent [MAX_NARY_DIMS] ;
01611 size_t counter [MAX_NARY_DIMS] ;
01612 size_t src_offset [MAX_NARY_DIMS] ;
01613 size_t src_stride [MAX_NARY_DIMS] ;
01614 size_t src_size ;
01615
01616 size_t res_stride [MAX_NARY_DIMS] ;
01617 size_t res_offset [MAX_NARY_DIMS] ;
01618
01619 size_t msk_stride [MAX_NARY_DIMS] ;
01620 size_t msk_offset [MAX_NARY_DIMS] ;
01621
01622 int32_t ddim ;
01623 uint32_t src_rank ;
01624 uint32_t res_rank ;
01625
01626 size_t j,k,i ;
01627 size_t msk_typ_sz;
01628
01629 c32 accum ;
01630 c32 const initv = {
01631 0.0,0.0 };
01632 size_t a_size,a_stride;
01633 size_t m_stride ;
01634
01635 c32 temp,new ;
01636
01637 if (mask == NULL) {
01638 if (dim != NULL) {
01639 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01640 mask = (DopeVectorType *) dim ;
01641 dim = NULL;
01642 }
01643 }
01644 }
01645
01646 if (dim != NULL) {
01647 ddim = read_dim(dim);
01648 } else
01649 ddim = 0 ;
01650
01651 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01652 src_rank = GET_RANK_FROM_DESC(array) - 1;
01653
01654 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01655
01656 for (i = 0 ; i <= src_rank ; i ++)
01657 counter[i] = 0 ;
01658
01659 if ((ddim > src_rank ) || (ddim < 0))
01660 ERROR(_LELVL_ABORT,FESCIDIM);
01661
01662 res_rank = GET_RANK_FROM_DESC(result);
01663
01664 if (!GET_ASSOCIATED_FROM_DESC(result)) {
01665 alloc_res(result,src_extent);
01666 }
01667
01668 res_stride[0] = 0;
01669 for (j = 0 ; j <= src_rank; j ++ ) res_offset[j] = 0 ;
01670 for (j = 0 ; j < res_rank ; j ++ ) {
01671 res_stride[j] = GET_STRIDE_FROM_DESC(result,j) ;
01672 }
01673
01674 res_offset[0] = res_stride[0] ;
01675 for ( j = 1 ; j < res_rank ; j ++ )
01676 res_offset[j] = res_stride[j] - (res_stride[j-1]*(src_extent[j])) ;
01677
01678 result_b = GET_ADDRESS_FROM_DESC(result);
01679
01680 if (mask != NULL) {
01681
01682 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01683 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01684
01685 if (GET_RANK_FROM_DESC(mask) == 0) {
01686 if (*mask_b) {
01687 mask = NULL;
01688 } else {
01689 src_size = 0;
01690 for (j = 0 ; j <= src_rank ; j ++) {
01691 msk_stride[j] = 0 ;
01692 msk_offset[j] = 0 ;
01693 }
01694 }
01695
01696 } else {
01697
01698 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01699 }
01700 }
01701
01702 accum = initv ;
01703
01704 if (src_size == 0 ) {
01705 for (i = 1 ; i <= src_rank ; i ++ )
01706 if (src_extent[i] == 0)
01707 return ;
01708 }
01709 array_p = array_b ;
01710 result_p = result_b ;
01711 if (mask == NULL) {
01712
01713 a_size = src_extent[0] ;
01714 a_stride = src_stride[0] ;
01715
01716 while (counter[src_rank] < src_extent[src_rank] ) {
01717
01718 if(res_rank != 0) accum = initv ;
01719
01720 for ( i = 0 ; i < a_size ; i ++ ) {
01721 accum.r += (*(c32 *)array_p).r ;
01722 accum.i +=(*(c32 *)array_p).i ;
01723
01724 array_p += a_stride ;
01725 }
01726 (*(c32 *) result_p).r = accum.r ;
01727 (*(c32 *) result_p).i = accum.i ;
01728 counter[0] = a_size ;
01729 j = 0 ;
01730 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01731 array_p += src_offset[j] ;
01732 result_p += res_offset[j] ;
01733 counter[j+1]++ ;
01734 counter[j] = 0 ;
01735 j ++ ;
01736 }
01737 }
01738 } else {
01739
01740 a_size = src_extent[0] ;
01741 a_stride = src_stride[0] ;
01742 m_stride = msk_stride[0] ;
01743 mask_p = mask_b ;
01744
01745 while (counter[src_rank] < src_extent[src_rank] ) {
01746
01747 if(res_rank != 0) accum = initv ;
01748
01749 for ( i = 0 ; i < a_size ; i ++ ) {
01750 if (*mask_p) {
01751 accum.r += (*(c32 *)array_p).r ;
01752 accum.i +=(*(c32 *)array_p).i ;
01753
01754 }
01755 array_p += a_stride ;
01756 mask_p += m_stride ;
01757 }
01758 (*(c32 *) result_p).r = accum.r ;
01759 (*(c32 *) result_p).i = accum.i ;
01760 counter[0] = a_size ;
01761 j = 0 ;
01762 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01763 array_p += src_offset[j] ;
01764 mask_p += msk_offset[j] ;
01765 result_p += res_offset[j] ;
01766 counter[j+1]++ ;
01767 counter[j] = 0 ;
01768 j ++ ;
01769 }
01770 }
01771 }
01772 }
01773 static void
01774 alloc_res(DopeVectorType * result,
01775 size_t src_extent[MAX_NARY_DIMS])
01776 {
01777 size_t tot_ext ;
01778 size_t str_sz ;
01779 size_t nbytes ;
01780 size_t esz ;
01781 int32_t res_rank ;
01782 char *p = NULL ;
01783 int32_t i ;
01784
01785 SET_ADDRESS_IN_DESC(result,NULL);
01786 SET_ORIG_BS_IN_DESC(result,NULL) ;
01787 SET_ORIG_SZ_IN_DESC(result,0) ;
01788
01789 res_rank = GET_RANK_FROM_DESC(result);
01790 tot_ext = 1 ;
01791 esz = GET_ALEN_FROM_DESC(result) >> 3 ;
01792 nbytes = esz ;
01793 str_sz = MK_STRIDE(FALSE,esz);
01794
01795 for ( i = 0 ; i < res_rank ; i ++) {
01796 SET_LBOUND_IN_DESC(result,i,1);
01797 SET_EXTENT_IN_DESC(result,i,src_extent[i+1]);
01798 SET_STRMULT_IN_DESC(result,i,tot_ext * str_sz );
01799 tot_ext *= src_extent[i+1] ;
01800 }
01801 nbytes *= tot_ext;
01802 if (nbytes > 0 ) {
01803 p = (void *) malloc (nbytes);
01804 if (p == NULL)
01805 ERROR(_LELVL_ABORT, FENOMEMY);
01806
01807 SET_ADDRESS_IN_DESC(result,p);
01808 }
01809 SET_ASSOCIATED_IN_DESC(result);
01810 SET_CONTIG_IN_DESC(result);
01811 SET_ORIG_BS_IN_DESC(result,p) ;
01812 SET_ORIG_SZ_IN_DESC(result,nbytes * 8) ;
01813 }
01814
01815 static int32_t
01816 read_dim(DopeVectorType * dim)
01817 {
01818 int32_t ddim ;
01819 char * dim_p ;
01820
01821 dim_p = (char *) GET_ADDRESS_FROM_DESC(dim) ;
01822
01823 switch (GET_ELEMENT_SZ_FROM_DESC(dim)) {
01824 case sizeof(int8_t):
01825 ddim = * (int8_t *) dim_p ;
01826 break;
01827
01828 case sizeof(int16_t):
01829 ddim = * (int16_t *) dim_p ;
01830 break;
01831
01832 case sizeof(int32_t):
01833 ddim = * (int32_t *) dim_p ;
01834 break;
01835
01836 case sizeof(int64_t):
01837 ddim = * (int64_t *) dim_p ;
01838 break;
01839 }
01840
01841 return (ddim - 1) ;
01842 }
01843
01844 static size_t
01845 read_source_desc(DopeVectorType * array,
01846 size_t src_extent[MAX_NARY_DIMS],
01847 size_t src_stride[MAX_NARY_DIMS],
01848 size_t src_offset[MAX_NARY_DIMS],
01849 int32_t ddim)
01850 {
01851 int32_t src_rank ,k,j ;
01852 size_t src_size ;
01853
01854 src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ;
01855 src_rank = GET_RANK_FROM_DESC(array);
01856
01857 src_size = src_extent[0];
01858
01859 for ( k = 1, j = 0 ; j < src_rank ; j ++ ) {
01860 if (j != ddim ) {
01861 src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ;
01862 src_size *= src_extent[k];
01863 k++ ;
01864 }
01865 }
01866 get_offset_and_stride(array, src_extent, src_stride, src_offset, ddim);
01867
01868 return src_size;
01869 }
01870
01871 static void
01872 get_offset_and_stride(DopeVectorType * array,
01873 size_t src_extent[MAX_NARY_DIMS],
01874 size_t src_stride[MAX_NARY_DIMS],
01875 size_t src_offset[MAX_NARY_DIMS],
01876 int32_t ddim)
01877 {
01878
01879 int32_t src_rank ,k,j ;
01880
01881 src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ;
01882 src_offset[0] = 0;
01883 src_rank = GET_RANK_FROM_DESC(array);
01884
01885 for ( k = 1, j = 0 ; j < src_rank ; j ++ ) {
01886 if (j != ddim ) {
01887 src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ;
01888 src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1])) ;
01889 k++ ;
01890 }
01891 }
01892 }