00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "f90_intrinsic.h"
00039
00040 static size_t read_source_desc(DopeVectorType * array,
00041 size_t src_extent[MAX_NARY_DIMS],
00042 size_t src_stride[MAX_NARY_DIMS],
00043 size_t src_offset[MAX_NARY_DIMS],
00044 int32_t ddim) ;
00045
00046 static void
00047 get_offset_and_stride(DopeVectorType * array,
00048 size_t src_extent[MAX_NARY_DIMS],
00049 size_t src_stride[MAX_NARY_DIMS],
00050 size_t src_offset[MAX_NARY_DIMS],
00051 int32_t ddim) ;
00052
00053 static uint32_t
00054 find_contig_axes(size_t src_extent[MAX_NARY_DIMS],
00055 size_t src_stride[MAX_NARY_DIMS],
00056 size_t src_offset[MAX_NARY_DIMS],
00057 uint32_t src_rank,
00058 size_t typ_sz,
00059 size_t msk_stride[MAX_NARY_DIMS],
00060 size_t msk_offset[MAX_NARY_DIMS],
00061 size_t msk_typ_sz) ;
00062
00063 static int32_t read_dim(DopeVectorType * dim) ;
00064
00065 i1
00066 _SUM0__I1(
00067 DopeVectorType *array,
00068 DopeVectorType *dim,
00069 DopeVectorType *mask)
00070 {
00071 char * array_p, * array_b ;
00072 char * dim_p, * dim_b ;
00073 char * mask_p, * mask_b ;
00074
00075 size_t src_extent [MAX_NARY_DIMS] ;
00076 size_t counter [MAX_NARY_DIMS] ;
00077 size_t src_offset [MAX_NARY_DIMS] ;
00078 size_t src_stride [MAX_NARY_DIMS] ;
00079 size_t src_size ;
00080
00081 size_t msk_stride [MAX_NARY_DIMS] ;
00082 size_t msk_offset [MAX_NARY_DIMS] ;
00083
00084 int32_t ddim ;
00085 uint32_t src_rank ;
00086 uint32_t res_rank ;
00087 uint32_t jrank ;
00088
00089 size_t j,k,i ;
00090 size_t typ_sz;
00091 size_t msk_typ_sz;
00092
00093 i1 accum ;
00094 i1 const initv = 0 ;
00095 size_t a_size,a_stride;
00096 size_t m_stride ;
00097
00098 i1 temp,new ;
00099
00100 if (mask == NULL) {
00101 if (dim != NULL) {
00102 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00103 mask = (DopeVectorType *) dim ;
00104 dim = NULL;
00105 }
00106 }
00107 }
00108
00109 if (dim != NULL) {
00110 ddim = read_dim(dim);
00111 } else
00112 ddim = 0 ;
00113
00114 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00115 src_rank = GET_RANK_FROM_DESC(array) - 1;
00116 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00117
00118 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00119
00120 for (i = 0 ; i <= src_rank ; i ++)
00121 counter[i] = 0 ;
00122
00123 if (mask != NULL) {
00124
00125 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00126 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00127
00128 if (GET_RANK_FROM_DESC(mask) == 0) {
00129 if (*mask_b) {
00130 mask = NULL;
00131 } else {
00132 src_size = 0;
00133 for (j = 0 ; j <= src_rank ; j ++) {
00134 msk_stride[j] = 0 ;
00135 msk_offset[j] = 0 ;
00136 }
00137 }
00138
00139 } else {
00140
00141 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00142 }
00143 }
00144
00145 accum = initv ;
00146
00147 if (src_size == 0 ) {
00148 return accum;
00149 }
00150 array_p = array_b ;
00151 if (mask == NULL) {
00152
00153 {
00154 size_t *p1 = NULL;
00155 size_t *p2 = NULL;
00156 size_t p3 = 0;
00157
00158 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00159 }
00160
00161 a_size = src_extent[0] ;
00162 a_stride = src_stride[0] ;
00163
00164 while (counter[src_rank] < src_extent[src_rank] ) {
00165
00166 for ( i = 0 ; i < a_size ; i ++ ) {
00167 accum += *(i1 *)array_p ;
00168
00169 array_p += a_stride ;
00170 }
00171 counter[0] = a_size ;
00172 j = 0 ;
00173 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00174 array_p += src_offset[j] ;
00175 counter[j+1]++ ;
00176 counter[j] = 0 ;
00177 j ++ ;
00178 }
00179 }
00180 } else {
00181
00182 {
00183 size_t *p1 = NULL;
00184 size_t *p2 = NULL;
00185 size_t p3 = 0;
00186
00187 p1 = msk_stride ;
00188 p2 = msk_offset ;
00189 p3 = msk_typ_sz ;
00190
00191 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00192 }
00193
00194 a_size = src_extent[0] ;
00195 a_stride = src_stride[0] ;
00196 m_stride = msk_stride[0] ;
00197 mask_p = mask_b ;
00198
00199 while (counter[src_rank] < src_extent[src_rank] ) {
00200
00201 for ( i = 0 ; i < a_size ; i ++ ) {
00202 if (*mask_p) {
00203 accum += *(i1 *)array_p ;
00204
00205 }
00206 array_p += a_stride ;
00207 mask_p += m_stride ;
00208 }
00209 counter[0] = a_size ;
00210 j = 0 ;
00211 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00212 array_p += src_offset[j] ;
00213 mask_p += msk_offset[j] ;
00214 counter[j+1]++ ;
00215 counter[j] = 0 ;
00216 j ++ ;
00217 }
00218 }
00219 }
00220 return accum ;
00221 }
00222 i2
00223 _SUM0__I2(
00224 DopeVectorType *array,
00225 DopeVectorType *dim,
00226 DopeVectorType *mask)
00227 {
00228 char * array_p, * array_b ;
00229 char * dim_p, * dim_b ;
00230 char * mask_p, * mask_b ;
00231
00232 size_t src_extent [MAX_NARY_DIMS] ;
00233 size_t counter [MAX_NARY_DIMS] ;
00234 size_t src_offset [MAX_NARY_DIMS] ;
00235 size_t src_stride [MAX_NARY_DIMS] ;
00236 size_t src_size ;
00237
00238 size_t msk_stride [MAX_NARY_DIMS] ;
00239 size_t msk_offset [MAX_NARY_DIMS] ;
00240
00241 int32_t ddim ;
00242 uint32_t src_rank ;
00243 uint32_t res_rank ;
00244 uint32_t jrank ;
00245
00246 size_t j,k,i ;
00247 size_t typ_sz;
00248 size_t msk_typ_sz;
00249
00250 i2 accum ;
00251 i2 const initv = 0 ;
00252 size_t a_size,a_stride;
00253 size_t m_stride ;
00254
00255 i2 temp,new ;
00256
00257 if (mask == NULL) {
00258 if (dim != NULL) {
00259 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00260 mask = (DopeVectorType *) dim ;
00261 dim = NULL;
00262 }
00263 }
00264 }
00265
00266 if (dim != NULL) {
00267 ddim = read_dim(dim);
00268 } else
00269 ddim = 0 ;
00270
00271 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00272 src_rank = GET_RANK_FROM_DESC(array) - 1;
00273 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00274
00275 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00276
00277 for (i = 0 ; i <= src_rank ; i ++)
00278 counter[i] = 0 ;
00279
00280 if (mask != NULL) {
00281
00282 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00283 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00284
00285 if (GET_RANK_FROM_DESC(mask) == 0) {
00286 if (*mask_b) {
00287 mask = NULL;
00288 } else {
00289 src_size = 0;
00290 for (j = 0 ; j <= src_rank ; j ++) {
00291 msk_stride[j] = 0 ;
00292 msk_offset[j] = 0 ;
00293 }
00294 }
00295
00296 } else {
00297
00298 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00299 }
00300 }
00301
00302 accum = initv ;
00303
00304 if (src_size == 0 ) {
00305 return accum;
00306 }
00307 array_p = array_b ;
00308 if (mask == NULL) {
00309
00310 {
00311 size_t *p1 = NULL;
00312 size_t *p2 = NULL;
00313 size_t p3 = 0;
00314
00315 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00316 }
00317
00318 a_size = src_extent[0] ;
00319 a_stride = src_stride[0] ;
00320
00321 while (counter[src_rank] < src_extent[src_rank] ) {
00322
00323 for ( i = 0 ; i < a_size ; i ++ ) {
00324 accum += *(i2 *)array_p ;
00325
00326 array_p += a_stride ;
00327 }
00328 counter[0] = a_size ;
00329 j = 0 ;
00330 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00331 array_p += src_offset[j] ;
00332 counter[j+1]++ ;
00333 counter[j] = 0 ;
00334 j ++ ;
00335 }
00336 }
00337 } else {
00338
00339 {
00340 size_t *p1 = NULL;
00341 size_t *p2 = NULL;
00342 size_t p3 = 0;
00343
00344 p1 = msk_stride ;
00345 p2 = msk_offset ;
00346 p3 = msk_typ_sz ;
00347
00348 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00349 }
00350
00351 a_size = src_extent[0] ;
00352 a_stride = src_stride[0] ;
00353 m_stride = msk_stride[0] ;
00354 mask_p = mask_b ;
00355
00356 while (counter[src_rank] < src_extent[src_rank] ) {
00357
00358 for ( i = 0 ; i < a_size ; i ++ ) {
00359 if (*mask_p) {
00360 accum += *(i2 *)array_p ;
00361
00362 }
00363 array_p += a_stride ;
00364 mask_p += m_stride ;
00365 }
00366 counter[0] = a_size ;
00367 j = 0 ;
00368 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00369 array_p += src_offset[j] ;
00370 mask_p += msk_offset[j] ;
00371 counter[j+1]++ ;
00372 counter[j] = 0 ;
00373 j ++ ;
00374 }
00375 }
00376 }
00377 return accum ;
00378 }
00379 i4
00380 _SUM0__I4(
00381 DopeVectorType *array,
00382 DopeVectorType *dim,
00383 DopeVectorType *mask)
00384 {
00385 char * array_p, * array_b ;
00386 char * dim_p, * dim_b ;
00387 char * mask_p, * mask_b ;
00388
00389 size_t src_extent [MAX_NARY_DIMS] ;
00390 size_t counter [MAX_NARY_DIMS] ;
00391 size_t src_offset [MAX_NARY_DIMS] ;
00392 size_t src_stride [MAX_NARY_DIMS] ;
00393 size_t src_size ;
00394
00395 size_t msk_stride [MAX_NARY_DIMS] ;
00396 size_t msk_offset [MAX_NARY_DIMS] ;
00397
00398 int32_t ddim ;
00399 uint32_t src_rank ;
00400 uint32_t res_rank ;
00401 uint32_t jrank ;
00402
00403 size_t j,k,i ;
00404 size_t typ_sz;
00405 size_t msk_typ_sz;
00406
00407 i4 accum ;
00408 i4 const initv = 0 ;
00409 size_t a_size,a_stride;
00410 size_t m_stride ;
00411
00412 i4 temp,new ;
00413
00414 if (mask == NULL) {
00415 if (dim != NULL) {
00416 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00417 mask = (DopeVectorType *) dim ;
00418 dim = NULL;
00419 }
00420 }
00421 }
00422
00423 if (dim != NULL) {
00424 ddim = read_dim(dim);
00425 } else
00426 ddim = 0 ;
00427
00428 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00429 src_rank = GET_RANK_FROM_DESC(array) - 1;
00430 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00431
00432 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00433
00434 for (i = 0 ; i <= src_rank ; i ++)
00435 counter[i] = 0 ;
00436
00437 if (mask != NULL) {
00438
00439 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00440 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00441
00442 if (GET_RANK_FROM_DESC(mask) == 0) {
00443 if (*mask_b) {
00444 mask = NULL;
00445 } else {
00446 src_size = 0;
00447 for (j = 0 ; j <= src_rank ; j ++) {
00448 msk_stride[j] = 0 ;
00449 msk_offset[j] = 0 ;
00450 }
00451 }
00452
00453 } else {
00454
00455 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00456 }
00457 }
00458
00459 accum = initv ;
00460
00461 if (src_size == 0 ) {
00462 return accum;
00463 }
00464 array_p = array_b ;
00465 if (mask == NULL) {
00466
00467 {
00468 size_t *p1 = NULL;
00469 size_t *p2 = NULL;
00470 size_t p3 = 0;
00471
00472 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00473 }
00474
00475 a_size = src_extent[0] ;
00476 a_stride = src_stride[0] ;
00477
00478 while (counter[src_rank] < src_extent[src_rank] ) {
00479
00480 for ( i = 0 ; i < a_size ; i ++ ) {
00481 accum += *(i4 *)array_p ;
00482
00483 array_p += a_stride ;
00484 }
00485 counter[0] = a_size ;
00486 j = 0 ;
00487 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00488 array_p += src_offset[j] ;
00489 counter[j+1]++ ;
00490 counter[j] = 0 ;
00491 j ++ ;
00492 }
00493 }
00494 } else {
00495
00496 {
00497 size_t *p1 = NULL;
00498 size_t *p2 = NULL;
00499 size_t p3 = 0;
00500
00501 p1 = msk_stride ;
00502 p2 = msk_offset ;
00503 p3 = msk_typ_sz ;
00504
00505 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00506 }
00507
00508 a_size = src_extent[0] ;
00509 a_stride = src_stride[0] ;
00510 m_stride = msk_stride[0] ;
00511 mask_p = mask_b ;
00512
00513 while (counter[src_rank] < src_extent[src_rank] ) {
00514
00515 for ( i = 0 ; i < a_size ; i ++ ) {
00516 if (*mask_p) {
00517 accum += *(i4 *)array_p ;
00518
00519 }
00520 array_p += a_stride ;
00521 mask_p += m_stride ;
00522 }
00523 counter[0] = a_size ;
00524 j = 0 ;
00525 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00526 array_p += src_offset[j] ;
00527 mask_p += msk_offset[j] ;
00528 counter[j+1]++ ;
00529 counter[j] = 0 ;
00530 j ++ ;
00531 }
00532 }
00533 }
00534 return accum ;
00535 }
00536 i8
00537 _SUM0__J(
00538 DopeVectorType *array,
00539 DopeVectorType *dim,
00540 DopeVectorType *mask)
00541 {
00542 char * array_p, * array_b ;
00543 char * dim_p, * dim_b ;
00544 char * mask_p, * mask_b ;
00545
00546 size_t src_extent [MAX_NARY_DIMS] ;
00547 size_t counter [MAX_NARY_DIMS] ;
00548 size_t src_offset [MAX_NARY_DIMS] ;
00549 size_t src_stride [MAX_NARY_DIMS] ;
00550 size_t src_size ;
00551
00552 size_t msk_stride [MAX_NARY_DIMS] ;
00553 size_t msk_offset [MAX_NARY_DIMS] ;
00554
00555 int32_t ddim ;
00556 uint32_t src_rank ;
00557 uint32_t res_rank ;
00558 uint32_t jrank ;
00559
00560 size_t j,k,i ;
00561 size_t typ_sz;
00562 size_t msk_typ_sz;
00563
00564 i8 accum ;
00565 i8 const initv = 0 ;
00566 size_t a_size,a_stride;
00567 size_t m_stride ;
00568
00569 i8 temp,new ;
00570
00571 if (mask == NULL) {
00572 if (dim != NULL) {
00573 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00574 mask = (DopeVectorType *) dim ;
00575 dim = NULL;
00576 }
00577 }
00578 }
00579
00580 if (dim != NULL) {
00581 ddim = read_dim(dim);
00582 } else
00583 ddim = 0 ;
00584
00585 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00586 src_rank = GET_RANK_FROM_DESC(array) - 1;
00587 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00588
00589 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00590
00591 for (i = 0 ; i <= src_rank ; i ++)
00592 counter[i] = 0 ;
00593
00594 if (mask != NULL) {
00595
00596 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00597 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00598
00599 if (GET_RANK_FROM_DESC(mask) == 0) {
00600 if (*mask_b) {
00601 mask = NULL;
00602 } else {
00603 src_size = 0;
00604 for (j = 0 ; j <= src_rank ; j ++) {
00605 msk_stride[j] = 0 ;
00606 msk_offset[j] = 0 ;
00607 }
00608 }
00609
00610 } else {
00611
00612 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00613 }
00614 }
00615
00616 accum = initv ;
00617
00618 if (src_size == 0 ) {
00619 return accum;
00620 }
00621 array_p = array_b ;
00622 if (mask == NULL) {
00623
00624 {
00625 size_t *p1 = NULL;
00626 size_t *p2 = NULL;
00627 size_t p3 = 0;
00628
00629 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00630 }
00631
00632 a_size = src_extent[0] ;
00633 a_stride = src_stride[0] ;
00634
00635 while (counter[src_rank] < src_extent[src_rank] ) {
00636
00637 for ( i = 0 ; i < a_size ; i ++ ) {
00638 accum += *(i8 *)array_p ;
00639
00640 array_p += a_stride ;
00641 }
00642 counter[0] = a_size ;
00643 j = 0 ;
00644 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00645 array_p += src_offset[j] ;
00646 counter[j+1]++ ;
00647 counter[j] = 0 ;
00648 j ++ ;
00649 }
00650 }
00651 } else {
00652
00653 {
00654 size_t *p1 = NULL;
00655 size_t *p2 = NULL;
00656 size_t p3 = 0;
00657
00658 p1 = msk_stride ;
00659 p2 = msk_offset ;
00660 p3 = msk_typ_sz ;
00661
00662 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00663 }
00664
00665 a_size = src_extent[0] ;
00666 a_stride = src_stride[0] ;
00667 m_stride = msk_stride[0] ;
00668 mask_p = mask_b ;
00669
00670 while (counter[src_rank] < src_extent[src_rank] ) {
00671
00672 for ( i = 0 ; i < a_size ; i ++ ) {
00673 if (*mask_p) {
00674 accum += *(i8 *)array_p ;
00675
00676 }
00677 array_p += a_stride ;
00678 mask_p += m_stride ;
00679 }
00680 counter[0] = a_size ;
00681 j = 0 ;
00682 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00683 array_p += src_offset[j] ;
00684 mask_p += msk_offset[j] ;
00685 counter[j+1]++ ;
00686 counter[j] = 0 ;
00687 j ++ ;
00688 }
00689 }
00690 }
00691 return accum ;
00692 }
00693 r4
00694 _SUM0__S4(
00695 DopeVectorType *array,
00696 DopeVectorType *dim,
00697 DopeVectorType *mask)
00698 {
00699 char * array_p, * array_b ;
00700 char * dim_p, * dim_b ;
00701 char * mask_p, * mask_b ;
00702
00703 size_t src_extent [MAX_NARY_DIMS] ;
00704 size_t counter [MAX_NARY_DIMS] ;
00705 size_t src_offset [MAX_NARY_DIMS] ;
00706 size_t src_stride [MAX_NARY_DIMS] ;
00707 size_t src_size ;
00708
00709 size_t msk_stride [MAX_NARY_DIMS] ;
00710 size_t msk_offset [MAX_NARY_DIMS] ;
00711
00712 int32_t ddim ;
00713 uint32_t src_rank ;
00714 uint32_t res_rank ;
00715 uint32_t jrank ;
00716
00717 size_t j,k,i ;
00718 size_t typ_sz;
00719 size_t msk_typ_sz;
00720
00721 r4 accum ;
00722 r4 const initv = 0.0 ;
00723 size_t a_size,a_stride;
00724 size_t m_stride ;
00725
00726 r4 temp,new ;
00727
00728 if (mask == NULL) {
00729 if (dim != NULL) {
00730 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00731 mask = (DopeVectorType *) dim ;
00732 dim = NULL;
00733 }
00734 }
00735 }
00736
00737 if (dim != NULL) {
00738 ddim = read_dim(dim);
00739 } else
00740 ddim = 0 ;
00741
00742 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00743 src_rank = GET_RANK_FROM_DESC(array) - 1;
00744 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00745
00746 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00747
00748 for (i = 0 ; i <= src_rank ; i ++)
00749 counter[i] = 0 ;
00750
00751 if (mask != NULL) {
00752
00753 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00754 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00755
00756 if (GET_RANK_FROM_DESC(mask) == 0) {
00757 if (*mask_b) {
00758 mask = NULL;
00759 } else {
00760 src_size = 0;
00761 for (j = 0 ; j <= src_rank ; j ++) {
00762 msk_stride[j] = 0 ;
00763 msk_offset[j] = 0 ;
00764 }
00765 }
00766
00767 } else {
00768
00769 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00770 }
00771 }
00772
00773 accum = initv ;
00774
00775 if (src_size == 0 ) {
00776 return accum;
00777 }
00778 array_p = array_b ;
00779 if (mask == NULL) {
00780
00781 {
00782 size_t *p1 = NULL;
00783 size_t *p2 = NULL;
00784 size_t p3 = 0;
00785
00786 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00787 }
00788
00789 a_size = src_extent[0] ;
00790 a_stride = src_stride[0] ;
00791
00792 while (counter[src_rank] < src_extent[src_rank] ) {
00793
00794 for ( i = 0 ; i < a_size ; i ++ ) {
00795 accum += *(r4 *)array_p ;
00796
00797 array_p += a_stride ;
00798 }
00799 counter[0] = a_size ;
00800 j = 0 ;
00801 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00802 array_p += src_offset[j] ;
00803 counter[j+1]++ ;
00804 counter[j] = 0 ;
00805 j ++ ;
00806 }
00807 }
00808 } else {
00809
00810 {
00811 size_t *p1 = NULL;
00812 size_t *p2 = NULL;
00813 size_t p3 = 0;
00814
00815 p1 = msk_stride ;
00816 p2 = msk_offset ;
00817 p3 = msk_typ_sz ;
00818
00819 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00820 }
00821
00822 a_size = src_extent[0] ;
00823 a_stride = src_stride[0] ;
00824 m_stride = msk_stride[0] ;
00825 mask_p = mask_b ;
00826
00827 while (counter[src_rank] < src_extent[src_rank] ) {
00828
00829 for ( i = 0 ; i < a_size ; i ++ ) {
00830 if (*mask_p) {
00831 accum += *(r4 *)array_p ;
00832
00833 }
00834 array_p += a_stride ;
00835 mask_p += m_stride ;
00836 }
00837 counter[0] = a_size ;
00838 j = 0 ;
00839 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00840 array_p += src_offset[j] ;
00841 mask_p += msk_offset[j] ;
00842 counter[j+1]++ ;
00843 counter[j] = 0 ;
00844 j ++ ;
00845 }
00846 }
00847 }
00848 return accum ;
00849 }
00850 r8
00851 _SUM0__S(
00852 DopeVectorType *array,
00853 DopeVectorType *dim,
00854 DopeVectorType *mask)
00855 {
00856 char * array_p, * array_b ;
00857 char * dim_p, * dim_b ;
00858 char * mask_p, * mask_b ;
00859
00860 size_t src_extent [MAX_NARY_DIMS] ;
00861 size_t counter [MAX_NARY_DIMS] ;
00862 size_t src_offset [MAX_NARY_DIMS] ;
00863 size_t src_stride [MAX_NARY_DIMS] ;
00864 size_t src_size ;
00865
00866 size_t msk_stride [MAX_NARY_DIMS] ;
00867 size_t msk_offset [MAX_NARY_DIMS] ;
00868
00869 int32_t ddim ;
00870 uint32_t src_rank ;
00871 uint32_t res_rank ;
00872 uint32_t jrank ;
00873
00874 size_t j,k,i ;
00875 size_t typ_sz;
00876 size_t msk_typ_sz;
00877
00878 r8 accum ;
00879 r8 const initv = 0.0 ;
00880 size_t a_size,a_stride;
00881 size_t m_stride ;
00882
00883 r8 temp,new ;
00884
00885 if (mask == NULL) {
00886 if (dim != NULL) {
00887 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
00888 mask = (DopeVectorType *) dim ;
00889 dim = NULL;
00890 }
00891 }
00892 }
00893
00894 if (dim != NULL) {
00895 ddim = read_dim(dim);
00896 } else
00897 ddim = 0 ;
00898
00899 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
00900 src_rank = GET_RANK_FROM_DESC(array) - 1;
00901 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
00902
00903 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
00904
00905 for (i = 0 ; i <= src_rank ; i ++)
00906 counter[i] = 0 ;
00907
00908 if (mask != NULL) {
00909
00910 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
00911 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
00912
00913 if (GET_RANK_FROM_DESC(mask) == 0) {
00914 if (*mask_b) {
00915 mask = NULL;
00916 } else {
00917 src_size = 0;
00918 for (j = 0 ; j <= src_rank ; j ++) {
00919 msk_stride[j] = 0 ;
00920 msk_offset[j] = 0 ;
00921 }
00922 }
00923
00924 } else {
00925
00926 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
00927 }
00928 }
00929
00930 accum = initv ;
00931
00932 if (src_size == 0 ) {
00933 return accum;
00934 }
00935 array_p = array_b ;
00936 if (mask == NULL) {
00937
00938 {
00939 size_t *p1 = NULL;
00940 size_t *p2 = NULL;
00941 size_t p3 = 0;
00942
00943 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00944 }
00945
00946 a_size = src_extent[0] ;
00947 a_stride = src_stride[0] ;
00948
00949 while (counter[src_rank] < src_extent[src_rank] ) {
00950
00951 for ( i = 0 ; i < a_size ; i ++ ) {
00952 accum += *(r8 *)array_p ;
00953
00954 array_p += a_stride ;
00955 }
00956 counter[0] = a_size ;
00957 j = 0 ;
00958 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00959 array_p += src_offset[j] ;
00960 counter[j+1]++ ;
00961 counter[j] = 0 ;
00962 j ++ ;
00963 }
00964 }
00965 } else {
00966
00967 {
00968 size_t *p1 = NULL;
00969 size_t *p2 = NULL;
00970 size_t p3 = 0;
00971
00972 p1 = msk_stride ;
00973 p2 = msk_offset ;
00974 p3 = msk_typ_sz ;
00975
00976 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
00977 }
00978
00979 a_size = src_extent[0] ;
00980 a_stride = src_stride[0] ;
00981 m_stride = msk_stride[0] ;
00982 mask_p = mask_b ;
00983
00984 while (counter[src_rank] < src_extent[src_rank] ) {
00985
00986 for ( i = 0 ; i < a_size ; i ++ ) {
00987 if (*mask_p) {
00988 accum += *(r8 *)array_p ;
00989
00990 }
00991 array_p += a_stride ;
00992 mask_p += m_stride ;
00993 }
00994 counter[0] = a_size ;
00995 j = 0 ;
00996 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
00997 array_p += src_offset[j] ;
00998 mask_p += msk_offset[j] ;
00999 counter[j+1]++ ;
01000 counter[j] = 0 ;
01001 j ++ ;
01002 }
01003 }
01004 }
01005 return accum ;
01006 }
01007 r16
01008 _SUM0__D(
01009 DopeVectorType *array,
01010 DopeVectorType *dim,
01011 DopeVectorType *mask)
01012 {
01013 char * array_p, * array_b ;
01014 char * dim_p, * dim_b ;
01015 char * mask_p, * mask_b ;
01016
01017 size_t src_extent [MAX_NARY_DIMS] ;
01018 size_t counter [MAX_NARY_DIMS] ;
01019 size_t src_offset [MAX_NARY_DIMS] ;
01020 size_t src_stride [MAX_NARY_DIMS] ;
01021 size_t src_size ;
01022
01023 size_t msk_stride [MAX_NARY_DIMS] ;
01024 size_t msk_offset [MAX_NARY_DIMS] ;
01025
01026 int32_t ddim ;
01027 uint32_t src_rank ;
01028 uint32_t res_rank ;
01029 uint32_t jrank ;
01030
01031 size_t j,k,i ;
01032 size_t typ_sz;
01033 size_t msk_typ_sz;
01034
01035 r16 accum ;
01036 r16 const initv = 0.0 ;
01037 size_t a_size,a_stride;
01038 size_t m_stride ;
01039
01040 r16 temp,new ;
01041
01042 if (mask == NULL) {
01043 if (dim != NULL) {
01044 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01045 mask = (DopeVectorType *) dim ;
01046 dim = NULL;
01047 }
01048 }
01049 }
01050
01051 if (dim != NULL) {
01052 ddim = read_dim(dim);
01053 } else
01054 ddim = 0 ;
01055
01056 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01057 src_rank = GET_RANK_FROM_DESC(array) - 1;
01058 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
01059
01060 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01061
01062 for (i = 0 ; i <= src_rank ; i ++)
01063 counter[i] = 0 ;
01064
01065 if (mask != NULL) {
01066
01067 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01068 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01069
01070 if (GET_RANK_FROM_DESC(mask) == 0) {
01071 if (*mask_b) {
01072 mask = NULL;
01073 } else {
01074 src_size = 0;
01075 for (j = 0 ; j <= src_rank ; j ++) {
01076 msk_stride[j] = 0 ;
01077 msk_offset[j] = 0 ;
01078 }
01079 }
01080
01081 } else {
01082
01083 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01084 }
01085 }
01086
01087 accum = initv ;
01088
01089 if (src_size == 0 ) {
01090 return accum;
01091 }
01092 array_p = array_b ;
01093 if (mask == NULL) {
01094
01095 {
01096 size_t *p1 = NULL;
01097 size_t *p2 = NULL;
01098 size_t p3 = 0;
01099
01100 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01101 }
01102
01103 a_size = src_extent[0] ;
01104 a_stride = src_stride[0] ;
01105
01106 while (counter[src_rank] < src_extent[src_rank] ) {
01107
01108 for ( i = 0 ; i < a_size ; i ++ ) {
01109 accum += *(r16 *)array_p ;
01110
01111 array_p += a_stride ;
01112 }
01113 counter[0] = a_size ;
01114 j = 0 ;
01115 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01116 array_p += src_offset[j] ;
01117 counter[j+1]++ ;
01118 counter[j] = 0 ;
01119 j ++ ;
01120 }
01121 }
01122 } else {
01123
01124 {
01125 size_t *p1 = NULL;
01126 size_t *p2 = NULL;
01127 size_t p3 = 0;
01128
01129 p1 = msk_stride ;
01130 p2 = msk_offset ;
01131 p3 = msk_typ_sz ;
01132
01133 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01134 }
01135
01136 a_size = src_extent[0] ;
01137 a_stride = src_stride[0] ;
01138 m_stride = msk_stride[0] ;
01139 mask_p = mask_b ;
01140
01141 while (counter[src_rank] < src_extent[src_rank] ) {
01142
01143 for ( i = 0 ; i < a_size ; i ++ ) {
01144 if (*mask_p) {
01145 accum += *(r16 *)array_p ;
01146
01147 }
01148 array_p += a_stride ;
01149 mask_p += m_stride ;
01150 }
01151 counter[0] = a_size ;
01152 j = 0 ;
01153 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01154 array_p += src_offset[j] ;
01155 mask_p += msk_offset[j] ;
01156 counter[j+1]++ ;
01157 counter[j] = 0 ;
01158 j ++ ;
01159 }
01160 }
01161 }
01162 return accum ;
01163 }
01164 c8
01165 _SUM0__C4(
01166 DopeVectorType *array,
01167 DopeVectorType *dim,
01168 DopeVectorType *mask)
01169 {
01170 char * array_p, * array_b ;
01171 char * dim_p, * dim_b ;
01172 char * mask_p, * mask_b ;
01173
01174 size_t src_extent [MAX_NARY_DIMS] ;
01175 size_t counter [MAX_NARY_DIMS] ;
01176 size_t src_offset [MAX_NARY_DIMS] ;
01177 size_t src_stride [MAX_NARY_DIMS] ;
01178 size_t src_size ;
01179
01180 size_t msk_stride [MAX_NARY_DIMS] ;
01181 size_t msk_offset [MAX_NARY_DIMS] ;
01182
01183 int32_t ddim ;
01184 uint32_t src_rank ;
01185 uint32_t res_rank ;
01186 uint32_t jrank ;
01187
01188 size_t j,k,i ;
01189 size_t typ_sz;
01190 size_t msk_typ_sz;
01191
01192 c8 accum ;
01193 c8 const initv = {
01194 0.0,0.0 };
01195 size_t a_size,a_stride;
01196 size_t m_stride ;
01197
01198 c8 temp,new ;
01199
01200 if (mask == NULL) {
01201 if (dim != NULL) {
01202 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01203 mask = (DopeVectorType *) dim ;
01204 dim = NULL;
01205 }
01206 }
01207 }
01208
01209 if (dim != NULL) {
01210 ddim = read_dim(dim);
01211 } else
01212 ddim = 0 ;
01213
01214 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01215 src_rank = GET_RANK_FROM_DESC(array) - 1;
01216 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
01217
01218 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01219
01220 for (i = 0 ; i <= src_rank ; i ++)
01221 counter[i] = 0 ;
01222
01223 if (mask != NULL) {
01224
01225 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01226 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01227
01228 if (GET_RANK_FROM_DESC(mask) == 0) {
01229 if (*mask_b) {
01230 mask = NULL;
01231 } else {
01232 src_size = 0;
01233 for (j = 0 ; j <= src_rank ; j ++) {
01234 msk_stride[j] = 0 ;
01235 msk_offset[j] = 0 ;
01236 }
01237 }
01238
01239 } else {
01240
01241 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01242 }
01243 }
01244
01245 accum = initv ;
01246
01247 if (src_size == 0 ) {
01248 return accum;
01249 }
01250 array_p = array_b ;
01251 if (mask == NULL) {
01252
01253 {
01254 size_t *p1 = NULL;
01255 size_t *p2 = NULL;
01256 size_t p3 = 0;
01257
01258 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01259 }
01260
01261 a_size = src_extent[0] ;
01262 a_stride = src_stride[0] ;
01263
01264 while (counter[src_rank] < src_extent[src_rank] ) {
01265
01266 for ( i = 0 ; i < a_size ; i ++ ) {
01267 accum.r += (*(c8 *)array_p).r ;
01268 accum.i +=(*(c8 *)array_p).i ;
01269
01270 array_p += a_stride ;
01271 }
01272 counter[0] = a_size ;
01273 j = 0 ;
01274 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01275 array_p += src_offset[j] ;
01276 counter[j+1]++ ;
01277 counter[j] = 0 ;
01278 j ++ ;
01279 }
01280 }
01281 } else {
01282
01283 {
01284 size_t *p1 = NULL;
01285 size_t *p2 = NULL;
01286 size_t p3 = 0;
01287
01288 p1 = msk_stride ;
01289 p2 = msk_offset ;
01290 p3 = msk_typ_sz ;
01291
01292 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01293 }
01294
01295 a_size = src_extent[0] ;
01296 a_stride = src_stride[0] ;
01297 m_stride = msk_stride[0] ;
01298 mask_p = mask_b ;
01299
01300 while (counter[src_rank] < src_extent[src_rank] ) {
01301
01302 for ( i = 0 ; i < a_size ; i ++ ) {
01303 if (*mask_p) {
01304 accum.r += (*(c8 *)array_p).r ;
01305 accum.i +=(*(c8 *)array_p).i ;
01306
01307 }
01308 array_p += a_stride ;
01309 mask_p += m_stride ;
01310 }
01311 counter[0] = a_size ;
01312 j = 0 ;
01313 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01314 array_p += src_offset[j] ;
01315 mask_p += msk_offset[j] ;
01316 counter[j+1]++ ;
01317 counter[j] = 0 ;
01318 j ++ ;
01319 }
01320 }
01321 }
01322 return accum ;
01323 }
01324 c16
01325 _SUM0__C(
01326 DopeVectorType *array,
01327 DopeVectorType *dim,
01328 DopeVectorType *mask)
01329 {
01330 char * array_p, * array_b ;
01331 char * dim_p, * dim_b ;
01332 char * mask_p, * mask_b ;
01333
01334 size_t src_extent [MAX_NARY_DIMS] ;
01335 size_t counter [MAX_NARY_DIMS] ;
01336 size_t src_offset [MAX_NARY_DIMS] ;
01337 size_t src_stride [MAX_NARY_DIMS] ;
01338 size_t src_size ;
01339
01340 size_t msk_stride [MAX_NARY_DIMS] ;
01341 size_t msk_offset [MAX_NARY_DIMS] ;
01342
01343 int32_t ddim ;
01344 uint32_t src_rank ;
01345 uint32_t res_rank ;
01346 uint32_t jrank ;
01347
01348 size_t j,k,i ;
01349 size_t typ_sz;
01350 size_t msk_typ_sz;
01351
01352 c16 accum ;
01353 c16 const initv = {
01354 0.0,0.0 };
01355 size_t a_size,a_stride;
01356 size_t m_stride ;
01357
01358 c16 temp,new ;
01359
01360 if (mask == NULL) {
01361 if (dim != NULL) {
01362 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01363 mask = (DopeVectorType *) dim ;
01364 dim = NULL;
01365 }
01366 }
01367 }
01368
01369 if (dim != NULL) {
01370 ddim = read_dim(dim);
01371 } else
01372 ddim = 0 ;
01373
01374 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01375 src_rank = GET_RANK_FROM_DESC(array) - 1;
01376 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
01377
01378 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01379
01380 for (i = 0 ; i <= src_rank ; i ++)
01381 counter[i] = 0 ;
01382
01383 if (mask != NULL) {
01384
01385 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01386 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01387
01388 if (GET_RANK_FROM_DESC(mask) == 0) {
01389 if (*mask_b) {
01390 mask = NULL;
01391 } else {
01392 src_size = 0;
01393 for (j = 0 ; j <= src_rank ; j ++) {
01394 msk_stride[j] = 0 ;
01395 msk_offset[j] = 0 ;
01396 }
01397 }
01398
01399 } else {
01400
01401 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01402 }
01403 }
01404
01405 accum = initv ;
01406
01407 if (src_size == 0 ) {
01408 return accum;
01409 }
01410 array_p = array_b ;
01411 if (mask == NULL) {
01412
01413 {
01414 size_t *p1 = NULL;
01415 size_t *p2 = NULL;
01416 size_t p3 = 0;
01417
01418 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01419 }
01420
01421 a_size = src_extent[0] ;
01422 a_stride = src_stride[0] ;
01423
01424 while (counter[src_rank] < src_extent[src_rank] ) {
01425
01426 for ( i = 0 ; i < a_size ; i ++ ) {
01427 accum.r += (*(c16 *)array_p).r ;
01428 accum.i +=(*(c16 *)array_p).i ;
01429
01430 array_p += a_stride ;
01431 }
01432 counter[0] = a_size ;
01433 j = 0 ;
01434 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01435 array_p += src_offset[j] ;
01436 counter[j+1]++ ;
01437 counter[j] = 0 ;
01438 j ++ ;
01439 }
01440 }
01441 } else {
01442
01443 {
01444 size_t *p1 = NULL;
01445 size_t *p2 = NULL;
01446 size_t p3 = 0;
01447
01448 p1 = msk_stride ;
01449 p2 = msk_offset ;
01450 p3 = msk_typ_sz ;
01451
01452 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01453 }
01454
01455 a_size = src_extent[0] ;
01456 a_stride = src_stride[0] ;
01457 m_stride = msk_stride[0] ;
01458 mask_p = mask_b ;
01459
01460 while (counter[src_rank] < src_extent[src_rank] ) {
01461
01462 for ( i = 0 ; i < a_size ; i ++ ) {
01463 if (*mask_p) {
01464 accum.r += (*(c16 *)array_p).r ;
01465 accum.i +=(*(c16 *)array_p).i ;
01466
01467 }
01468 array_p += a_stride ;
01469 mask_p += m_stride ;
01470 }
01471 counter[0] = a_size ;
01472 j = 0 ;
01473 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01474 array_p += src_offset[j] ;
01475 mask_p += msk_offset[j] ;
01476 counter[j+1]++ ;
01477 counter[j] = 0 ;
01478 j ++ ;
01479 }
01480 }
01481 }
01482 return accum ;
01483 }
01484 c32
01485 _SUM0__Z(
01486 DopeVectorType *array,
01487 DopeVectorType *dim,
01488 DopeVectorType *mask)
01489 {
01490 char * array_p, * array_b ;
01491 char * dim_p, * dim_b ;
01492 char * mask_p, * mask_b ;
01493
01494 size_t src_extent [MAX_NARY_DIMS] ;
01495 size_t counter [MAX_NARY_DIMS] ;
01496 size_t src_offset [MAX_NARY_DIMS] ;
01497 size_t src_stride [MAX_NARY_DIMS] ;
01498 size_t src_size ;
01499
01500 size_t msk_stride [MAX_NARY_DIMS] ;
01501 size_t msk_offset [MAX_NARY_DIMS] ;
01502
01503 int32_t ddim ;
01504 uint32_t src_rank ;
01505 uint32_t res_rank ;
01506 uint32_t jrank ;
01507
01508 size_t j,k,i ;
01509 size_t typ_sz;
01510 size_t msk_typ_sz;
01511
01512 c32 accum ;
01513 c32 const initv = {
01514 0.0,0.0 };
01515 size_t a_size,a_stride;
01516 size_t m_stride ;
01517
01518 c32 temp,new ;
01519
01520 if (mask == NULL) {
01521 if (dim != NULL) {
01522 if (GET_DV_LOGICAL_FROM_DESC(dim)) {
01523 mask = (DopeVectorType *) dim ;
01524 dim = NULL;
01525 }
01526 }
01527 }
01528
01529 if (dim != NULL) {
01530 ddim = read_dim(dim);
01531 } else
01532 ddim = 0 ;
01533
01534 array_b = (char *) GET_ADDRESS_FROM_DESC(array) ;
01535 src_rank = GET_RANK_FROM_DESC(array) - 1;
01536 typ_sz = GET_ELEMENT_SZ_FROM_DESC(array);
01537
01538 src_size = read_source_desc(array, src_extent, src_stride, src_offset, ddim);
01539
01540 for (i = 0 ; i <= src_rank ; i ++)
01541 counter[i] = 0 ;
01542
01543 if (mask != NULL) {
01544
01545 msk_typ_sz = GET_ELEMENT_SZ_FROM_DESC(mask);
01546 mask_b = (char *) GET_ADDRESS_FROM_DESC(mask) + OFFSET_TO_TF_BYTE(msk_typ_sz) ;
01547
01548 if (GET_RANK_FROM_DESC(mask) == 0) {
01549 if (*mask_b) {
01550 mask = NULL;
01551 } else {
01552 src_size = 0;
01553 for (j = 0 ; j <= src_rank ; j ++) {
01554 msk_stride[j] = 0 ;
01555 msk_offset[j] = 0 ;
01556 }
01557 }
01558
01559 } else {
01560
01561 get_offset_and_stride(mask, src_extent, msk_stride, msk_offset, ddim);
01562 }
01563 }
01564
01565 accum = initv ;
01566
01567 if (src_size == 0 ) {
01568 return accum;
01569 }
01570 array_p = array_b ;
01571 if (mask == NULL) {
01572
01573 {
01574 size_t *p1 = NULL;
01575 size_t *p2 = NULL;
01576 size_t p3 = 0;
01577
01578 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01579 }
01580
01581 a_size = src_extent[0] ;
01582 a_stride = src_stride[0] ;
01583
01584 while (counter[src_rank] < src_extent[src_rank] ) {
01585
01586 for ( i = 0 ; i < a_size ; i ++ ) {
01587 accum.r += (*(c32 *)array_p).r ;
01588 accum.i +=(*(c32 *)array_p).i ;
01589
01590 array_p += a_stride ;
01591 }
01592 counter[0] = a_size ;
01593 j = 0 ;
01594 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01595 array_p += src_offset[j] ;
01596 counter[j+1]++ ;
01597 counter[j] = 0 ;
01598 j ++ ;
01599 }
01600 }
01601 } else {
01602
01603 {
01604 size_t *p1 = NULL;
01605 size_t *p2 = NULL;
01606 size_t p3 = 0;
01607
01608 p1 = msk_stride ;
01609 p2 = msk_offset ;
01610 p3 = msk_typ_sz ;
01611
01612 src_rank = find_contig_axes(src_extent, src_stride, src_offset, src_rank, typ_sz, p1,p2,p3) ;
01613 }
01614
01615 a_size = src_extent[0] ;
01616 a_stride = src_stride[0] ;
01617 m_stride = msk_stride[0] ;
01618 mask_p = mask_b ;
01619
01620 while (counter[src_rank] < src_extent[src_rank] ) {
01621
01622 for ( i = 0 ; i < a_size ; i ++ ) {
01623 if (*mask_p) {
01624 accum.r += (*(c32 *)array_p).r ;
01625 accum.i +=(*(c32 *)array_p).i ;
01626
01627 }
01628 array_p += a_stride ;
01629 mask_p += m_stride ;
01630 }
01631 counter[0] = a_size ;
01632 j = 0 ;
01633 while ((counter[j] == src_extent[j]) && (j < src_rank)) {
01634 array_p += src_offset[j] ;
01635 mask_p += msk_offset[j] ;
01636 counter[j+1]++ ;
01637 counter[j] = 0 ;
01638 j ++ ;
01639 }
01640 }
01641 }
01642 return accum ;
01643 }
01644
01645 static int32_t
01646 read_dim(DopeVectorType * dim)
01647 {
01648 int32_t ddim ;
01649 char * dim_p ;
01650
01651 dim_p = (char *) GET_ADDRESS_FROM_DESC(dim) ;
01652
01653 switch (GET_ELEMENT_SZ_FROM_DESC(dim)) {
01654 case sizeof(int8_t):
01655 ddim = * (int8_t *) dim_p ;
01656 break;
01657
01658 case sizeof(int16_t):
01659 ddim = * (int16_t *) dim_p ;
01660 break;
01661
01662 case sizeof(int32_t):
01663 ddim = * (int32_t *) dim_p ;
01664 break;
01665
01666 case sizeof(int64_t):
01667 ddim = * (int64_t *) dim_p ;
01668 break;
01669 }
01670
01671 return (ddim - 1) ;
01672 }
01673
01674 static size_t
01675 read_source_desc(DopeVectorType * array,
01676 size_t src_extent[MAX_NARY_DIMS],
01677 size_t src_stride[MAX_NARY_DIMS],
01678 size_t src_offset[MAX_NARY_DIMS],
01679 int32_t ddim)
01680 {
01681 int32_t src_rank ,k,j ;
01682 size_t src_size ;
01683
01684 src_extent[0] = GET_EXTENT_FROM_DESC(array,ddim) ;
01685 src_rank = GET_RANK_FROM_DESC(array);
01686
01687 src_size = src_extent[0];
01688
01689 for ( k = 1, j = 0 ; j < src_rank ; j ++ ) {
01690 if (j != ddim ) {
01691 src_extent[k] = GET_EXTENT_FROM_DESC(array,j) ;
01692 src_size *= src_extent[k];
01693 k++ ;
01694 }
01695 }
01696 get_offset_and_stride(array, src_extent, src_stride, src_offset, ddim);
01697
01698 return src_size;
01699 }
01700
01701 static void
01702 get_offset_and_stride(DopeVectorType * array,
01703 size_t src_extent[MAX_NARY_DIMS],
01704 size_t src_stride[MAX_NARY_DIMS],
01705 size_t src_offset[MAX_NARY_DIMS],
01706 int32_t ddim)
01707 {
01708
01709 int32_t src_rank ,k,j ;
01710
01711 src_stride[0] = GET_STRIDE_FROM_DESC(array,ddim) ;
01712 src_offset[0] = 0;
01713 src_rank = GET_RANK_FROM_DESC(array);
01714
01715 for ( k = 1, j = 0 ; j < src_rank ; j ++ ) {
01716 if (j != ddim ) {
01717 src_stride[k] = GET_STRIDE_FROM_DESC(array,j) ;
01718 src_offset[k-1] = src_stride[k] - (src_stride [k-1] * (src_extent[k-1])) ;
01719 k++ ;
01720 }
01721 }
01722 }
01723
01724 static uint32_t
01725 find_contig_axes(size_t src_extent[MAX_NARY_DIMS],
01726 size_t src_stride[MAX_NARY_DIMS],
01727 size_t src_offset[MAX_NARY_DIMS],
01728 uint32_t src_rank,
01729 size_t typ_sz,
01730 size_t msk_stride[MAX_NARY_DIMS],
01731 size_t msk_offset[MAX_NARY_DIMS],
01732 size_t msk_typ_sz)
01733 {
01734 uint32_t i,j ;
01735 size_t jrank;
01736 size_t * p ;
01737
01738 p = msk_stride ;
01739
01740 jrank = src_rank ;
01741 j = 1 ;
01742 while ((j <= jrank) &&((src_extent[0] * typ_sz) == src_stride[j]) &&(((p == NULL) ||((src_extent[0] * msk_typ_sz) ==
01743 msk_stride[j]))) ) {
01744 src_extent[0] *= src_extent[j] ;
01745 src_offset[0] = src_offset[j] ;
01746 if (p != NULL)
01747 msk_offset[0] = msk_offset[j] ;
01748 src_rank -- ;
01749 j++ ;
01750 }
01751 for (i = j ; ( i <= jrank && i > 1) ; i ++ ) {
01752 src_stride[i-j+1] = src_stride[i] ;
01753 src_offset[i-j+1] = src_offset[i] ;
01754 src_extent[i-j+1] = src_extent[i] ;
01755 if (p != NULL) {
01756 msk_stride[i-j+1] = msk_stride[i] ;
01757 msk_offset[i-j+1] = msk_offset[i] ;
01758 }
01759 }
01760 return src_rank ;
01761 }