00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #define __STDC_LIMIT_MACROS
00041 #include <stdint.h>
00042 #ifdef USE_PCH
00043 #include "lno_pch.h"
00044 #endif // USE_PCH
00045 #pragma hdrstop
00046
00047 #include <math.h>
00048 #ifdef KEY // to get DBL_MAX
00049 #include <float.h>
00050 #endif
00051 #include <sys/types.h>
00052 #include <limits.h>
00053 #include "pu_info.h"
00054 #include "lnoutils.h"
00055 #include "lnopt_main.h"
00056 #include "stdlib.h"
00057 #include "fiz_fuse.h"
00058 #include "ara.h"
00059 #include "cross_snl.h"
00060 #include "snl_utils.h"
00061 #include "cross_cache.h"
00062 #define NOMINAL_PROCS 8
00063
00064 static INT cross_loop_debug_level = 2;
00065 BOOL running_cross_loop_analysis = FALSE;
00066
00067
00068 ARA_REF_INFO::ARA_REF_INFO(ARA_REF* ref, ARA_LOOP_INFO* leaf)
00069 {
00070
00071 _is_messy = ref->Is_Messy();
00072
00073 _ref = CXX_NEW(ARA_REF(*ref), &LNO_local_pool);
00074 _proj_ref = CXX_NEW(ARA_REF(*ref), &LNO_local_pool);
00075
00076 if (!ref->Is_Too_Messy()) {
00077 REGION_ITER iter(&_ref->Image());
00078 _dim = iter.First()->Num_Dim();
00079 } else {
00080 _dim = -1;
00081 }
00082
00083 ARA_LOOP_INFO *node = leaf;
00084
00085 while (node) {
00086 _proj_ref->Image().RegionUN_Projection(node->Depth(), *node);
00087 node = node->Parent();
00088 }
00089
00090 if (cross_loop_debug_level >= 3) {
00091 fprintf(stdout, "Before : \n");
00092 _ref->Print(stdout);
00093 fprintf(stdout, "After : \n");
00094 _proj_ref->Print(stdout);
00095 }
00096 }
00097
00098
00099 void ARA_REF_INFO::Print(FILE *file)
00100 {
00101 fprintf(file, "IS_MESSY : %s\n", (_is_messy) ? "TRUE" : "FALSE");
00102 fprintf(file, "Before Projection :\n");
00103 _ref->Print(file);
00104 fprintf(file, "After Projection :\n");
00105 _proj_ref->Print(file);
00106 fprintf(file, "Dimensions : %d\n", _dim);
00107
00108 }
00109
00110
00111 ARRAY_SNL_INFO::ARRAY_SNL_INFO(WN* wn_outer, INT nloops,
00112 ARA_LOOP_INFO *ara_root) :
00113 _rd_ref_list(&LNO_local_pool),
00114 _wr_ref_list(&LNO_local_pool)
00115 {
00116 _snl_root = wn_outer;
00117 _depth = nloops;
00118 _ara_root = ara_root;
00119
00120 _snl_leaf = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00121 DO_LOOP_INFO *dli = Get_Do_Loop_Info(_snl_leaf);
00122 _ara_leaf = dli->ARA_Info;
00123 }
00124
00125 void ARRAY_SNL_INFO::Add_Reference(ARA_REF_INFO_ST * ref_list, ARA_REF *ref)
00126 {
00127
00128 REGION_ITER iter(&ref->Image());
00129
00130
00131 for (REGION *curr = iter.First(); !iter.Is_Empty(); curr = iter.Next()) {
00132 ARA_REF * new_ref = CXX_NEW(ARA_REF(*ref), &LNO_local_pool);
00133
00134
00135
00136 while (!new_ref->Image().Is_Empty()) {
00137 REGION * tmp = new_ref->Image().Remove_Headnode();
00138 }
00139
00140
00141
00142 if (cross_loop_debug_level >= 3) {
00143 fprintf(stdout, "Before Inserting :\n");
00144 curr->Print(stdout);
00145 new_ref->Print(stdout);
00146 }
00147
00148 new_ref->Image().Append(CXX_NEW(REGION(*curr), &LNO_local_pool));
00149 if (cross_loop_debug_level >= 3) {
00150 fprintf(stdout, "After Inserting :\n");
00151 new_ref->Print(stdout);
00152 }
00153
00154
00155 ref_list->Push(CXX_NEW(ARA_REF_INFO(new_ref, _ara_leaf), &LNO_local_pool));
00156 }
00157 }
00158
00159 void ARRAY_SNL_INFO::Add_Write_Reference(ARA_REF *ref)
00160 {
00161 Add_Reference(&_wr_ref_list, ref);
00162 }
00163
00164 void ARRAY_SNL_INFO::Add_Read_Reference(ARA_REF *ref)
00165 {
00166 Add_Reference(&_rd_ref_list, ref);
00167 }
00168
00169
00170
00171
00172
00173
00174
00175
00176 void ARRAY_SNL_INFO::Walk_SNL(void)
00177 {
00178
00179
00180
00181
00182 WN* inner_most = _snl_leaf;
00183 ARA_LOOP_INFO *ali = _ara_leaf;
00184
00185 if (ali == NULL) {
00186 return;
00187 }
00188
00189 ali->Walk_Block(WN_do_body(inner_most));
00190
00191 if (cross_loop_debug_level >= 3) {
00192 fprintf(stdout, "References :\n");
00193 ali->Print(stdout, FALSE);
00194 fprintf(stdout,"\n");
00195 }
00196
00197
00198
00199
00200 ARA_REF_ST & def_stack = ali->DEF();
00201 INT i;
00202 for (i = 0; i < def_stack.Elements(); ++i) {
00203 Add_Write_Reference(def_stack.Bottom_nth(i));
00204 }
00205
00206 ARA_REF_ST & use_stack = ali->USE();
00207 for (i = 0; i < use_stack.Elements(); ++i) {
00208 Add_Read_Reference(use_stack.Bottom_nth(i));
00209 }
00210 }
00211
00212 void ARRAY_SNL_INFO::Print(FILE *file)
00213 {
00214 fprintf(file, "Read References : \n");
00215 INT i;
00216 for (i = 0; i < _rd_ref_list.Elements(); ++i) {
00217 _rd_ref_list.Bottom_nth(i)->Print(file);
00218 }
00219
00220 fprintf(file, "Write References : \n");
00221 for (i = 0; i < _wr_ref_list.Elements(); ++i) {
00222 _wr_ref_list.Bottom_nth(i)->Print(file);
00223 }
00224 fprintf(file, "_snl_root = %p\n _snl_leaf = %p\n _depth = %d\n"
00225 "_ara_root = %p\n _ara_leaf = %p\n",
00226 _snl_root, _snl_leaf, _depth, _ara_root, _ara_leaf);
00227
00228 }
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 void Gather_Array_References (ARRAY_SNL_INFO *asi)
00239 {
00240 asi->Walk_SNL();
00241 }
00242
00243
00244
00245
00246
00247
00248
00249
00250 void SNL_Array_Analysis(WN* wn_outer, INT nloops)
00251 {
00252
00253
00254 ARA_LOOP_INFO *ara_root =
00255 CXX_NEW(ARA_LOOP_INFO(wn_outer, NULL, TRUE), &ARA_memory_pool);
00256 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_outer);
00257 ARA_Initialize_Loops(wn_outer, ara_root);
00258
00259 ARRAY_SNL_INFO *asi = CXX_NEW(ARRAY_SNL_INFO(wn_outer, nloops, ara_root),
00260 &LNO_local_pool);
00261
00262
00263 Gather_Array_References(asi);
00264
00265 if (cross_loop_debug_level >= 2) {
00266 asi->Print(stdout);
00267 }
00268
00269 ARA_Cleanup(wn_outer);
00270 }
00271
00272 #if 0
00273
00274
00275
00276
00277
00278 extern void Cross_Loop_Cache_Analysis(PU_Info* current_pu, WN* func_nd)
00279 {
00280 #if CROSS_TODO_FIXED
00281 cross_loop_debug_level = Get_Trace(TP_LNOPT2, TT_CROSS_LOOP_DEBUG)
00282 ? 1 : 0;
00283 #else
00284 cross_loop_debug_level = 1;
00285 #endif
00286
00287 MEM_POOL_Push(&LNO_local_pool);
00288
00289 if (cross_loop_debug_level >= 1) {
00290 fprintf(stdout, "### Cross Loop Analysis (Begin)\n");
00291 fprintf(TFile, "### Cross Loop Analysis (Begin)\n");
00292 }
00293
00294
00295 FIZ_FUSE_INFO* ffi=
00296 CXX_NEW(FIZ_FUSE_INFO(&LNO_local_pool), &LNO_local_pool);
00297 ffi->Build(func_nd, TRUE);
00298
00299 for (INT i = 0; i < ffi->Num_Snl(); i++) {
00300
00301 if (ffi->Get_Type(i) == Invalid || ffi->Get_Type(i) == Non_SNL)
00302 continue;
00303
00304 WN* wn_outer_loop = ffi->Get_Wn(i);
00305 INT nloops = ffi->Get_Depth(i);
00306 SNL_Array_Analysis(wn_outer_loop, nloops);
00307 }
00308
00309 MEM_POOL_Pop(&LNO_local_pool);
00310
00311 if (cross_loop_debug_level >= 1) {
00312 fprintf(stdout, "### Cross Loop Analysis (End)\n");
00313 fprintf(TFile, "### Cross Loop Analysis (End)\n");
00314 }
00315 }
00316
00317 #endif
00318
00319 INT Intersect_References(CACHE_CONTENTS *cc, ARRAY_SNL_INFO *asi, INT32 parallel_loop,
00320 ARA_REF_INFO_ST *refs, double *denom, double *numer)
00321 {
00322 *denom = 0.0;
00323 *numer = 0.0;
00324
00325 for (INT i = 0; i < refs->Elements(); ++i) {
00326 ARA_REF_INFO *ari = refs->Bottom_nth(i);
00327
00328 FmtAssert(!ari->Is_Messy(), ("Determiner_Intersection : messy reference"));
00329 CACHE_REGION *cr = CXX_NEW(CACHE_REGION(ari, asi, parallel_loop), &LNO_local_pool);
00330 if (cr->Is_Messy()) {
00331 return 0;
00332 }
00333
00334 if (cross_loop_debug_level >= 3) {
00335 fprintf(stdout, "Cache Region : \n");
00336 cr->Print(stdout);
00337 }
00338
00339 INT32 isize = cc->Intersect_Region(cr);
00340 INT32 rsize = cr->Region_Size();
00341
00342 if (rsize == -1 || isize == -1) {
00343 return 0;
00344 }
00345
00346 FmtAssert(isize != -1, ("Determine_Intersection : intersection failed"));
00347 FmtAssert(rsize != -1, ("Determine_Intersection : region size failed"));
00348
00349 if (cross_loop_debug_level >= 3) {
00350 fprintf(stdout, "Intersection size : %d\n", isize);
00351 fprintf(stdout, "Region size : %d\n", rsize);
00352 }
00353
00354 *numer += isize;
00355 *denom += rsize;
00356
00357 CXX_DELETE(cr, &LNO_local_pool);
00358 }
00359
00360 return 1;
00361 }
00362
00363 void Add_References(CACHE_CONTENTS *cc, ARRAY_SNL_INFO *asi, INT32 parallel_loop,
00364 ARA_REF_INFO_ST *refs, ACCESS_TYPE atype)
00365 {
00366 for (INT i = 0; i < refs->Elements(); ++i) {
00367 ARA_REF_INFO *ari = refs->Bottom_nth(i);
00368
00369 FmtAssert(!ari->Is_Messy(), ("Add_References : messy reference"));
00370 CACHE_REGION *cr = CXX_NEW(CACHE_REGION(ari, asi, parallel_loop), &LNO_local_pool);
00371
00372 if (cross_loop_debug_level >= 3) {
00373 fprintf(stdout, "Cache Region : \n");
00374 cr->Print(stdout);
00375 }
00376
00377 cc->Add_Region(cr, atype);
00378 }
00379 }
00380
00381
00382
00383 double Determine_Intersection(CACHE_CONTENTS *cc, ARRAY_SNL_INFO *asi, INT32 parallel_loop)
00384 {
00385 double rd_numer = 0.0;
00386 double rd_denom = 0.0;
00387 double wr_numer = 0.0;
00388 double wr_denom = 0.0;
00389
00390 ARA_REF_INFO_ST *rd_refs = & asi->Read_Refs();
00391 ARA_REF_INFO_ST *wr_refs = & asi->Write_Refs();
00392
00393 if (!Intersect_References(cc, asi, parallel_loop, rd_refs, &rd_denom, &rd_numer)) {
00394 return 0.0;
00395 }
00396
00397 if (!Intersect_References(cc, asi, parallel_loop, wr_refs, &wr_denom, &wr_numer)) {
00398 return 0.0;
00399 }
00400
00401 if (rd_denom + wr_denom == 0.0) {
00402 return 1.0;
00403 } else {
00404 double ratio = (rd_numer + wr_numer) / (rd_denom + wr_denom);
00405 return MIN(ratio, 1.0);
00406 }
00407 }
00408
00409
00410
00411 void Update_Cache_Contents(CACHE_CONTENTS *cc, ARRAY_SNL_INFO *asi, INT32 parallel_loop)
00412 {
00413 ARA_REF_INFO_ST *rd_refs = & asi->Read_Refs();
00414 ARA_REF_INFO_ST *wr_refs = & asi->Write_Refs();
00415
00416
00417
00418
00419 Add_References(cc, asi, parallel_loop, wr_refs, CACHE_WRITE_ONLY);
00420 Add_References(cc, asi, parallel_loop, rd_refs, CACHE_READ_ONLY);
00421 }
00422
00423
00424
00425 static double Explore_Path(SNL_STREAM *sst, INT32* curr_path, double curr_min)
00426 {
00427 if (cross_loop_debug_level >= 1) {
00428 fprintf(stdout, "Execution path : [");
00429 for (INT i = 0; i < sst->Num_SNL(); ++i) {
00430 fprintf(stdout, " %d ", curr_path[i]);
00431 }
00432 fprintf(stdout, "]\n");
00433 }
00434
00435 CACHE_CONTENTS *cc = CXX_NEW(CACHE_CONTENTS(FULLY_ASSOCIATIVE, LONG_MAX, NOMINAL_PROCS,
00436 sst->Get_Ali()), &LNO_local_pool);
00437
00438 double cost = 0.0;
00439 for (INT i = 0; i < sst->Num_SNL(); ++i) {
00440 CROSS_SNL_INFO *csi = sst->Get_SNL(i);
00441 double stage_cost = 0.0;
00442
00443
00444 INT parallel_loop = 0;
00445 PARALLEL_INFO *pi;
00446 if (curr_path[i] != -1) {
00447 pi = csi->Get_Parallel_Option(curr_path[i]);
00448 parallel_loop = pi->Parallel_Loop() + 1;
00449 FmtAssert(parallel_loop >= 1 && parallel_loop <= csi->SNL_Depth(),
00450 ("Illegal parallel loop"));
00451 if (cross_loop_debug_level >= 2) {
00452 pi->Print(stdout);
00453 }
00454
00455 stage_cost = pi->Machine_Cost() + pi->Reduction_Cost()
00456 + pi->Parallel_Overhead_Cost();
00457
00458 if (pi->Is_Doacross()) {
00459 stage_cost += pi->Doacross_Overhead();
00460 }
00461 } else {
00462 stage_cost = csi->Get_Seq_Machine_Cost();
00463
00464
00465 if (cross_loop_debug_level >= 2) {
00466 fprintf(stdout, "seq machine cost = %lf seq cache cost = %lf\n",
00467 csi->Get_Seq_Machine_Cost(),csi->Get_Seq_Cache_Cost());
00468 }
00469 }
00470
00471 double iratio = Determine_Intersection(cc, csi->Get_Array_References(),
00472 parallel_loop);
00473 if (curr_path[i] != -1) {
00474 stage_cost = stage_cost + (1.0 - iratio) * pi->Cache_Cost();
00475 } else {
00476 stage_cost = stage_cost + (1.0 - iratio) * csi->Get_Seq_Cache_Cost();
00477 }
00478
00479 cost = cost + stage_cost;
00480
00481 if (cross_loop_debug_level >= 2) {
00482 fprintf(stdout, "Intersection ratio : %lf\n", iratio);
00483 fprintf(stdout, "stage cost = %lf curr cost = %lf \n", stage_cost, cost);
00484 }
00485
00486 if (cross_loop_debug_level >= 3) {
00487 fprintf(stdout, "CACHE_CONTENTS before updating : \n");
00488 cc->Print(stdout);
00489 fprintf(stdout, "References of current SNL : \n");
00490 csi->Get_Array_References()->Print(stdout);
00491 }
00492
00493 Update_Cache_Contents(cc, csi->Get_Array_References(), parallel_loop);
00494
00495 if (cross_loop_debug_level >= 3) {
00496 fprintf(stdout, "CACHE_CONTENTS after updating : \n");
00497 cc->Print(stdout);
00498 }
00499
00500 cc->Compact_Cache();
00501
00502 if (cross_loop_debug_level >= 3) {
00503 fprintf(stdout, "CACHE_CONTENTS after compacting : \n");
00504 cc->Print(stdout);
00505 }
00506
00507 if (cost > curr_min) {
00508 break;
00509 }
00510 }
00511
00512 if (cross_loop_debug_level >= 1) {
00513 if (cost > curr_min) {
00514 fprintf(stdout, "Cost of path >= %lf\n", cost);
00515 } else {
00516 fprintf(stdout, "Cost of path : %lf\n", cost);
00517 }
00518 }
00519
00520 CXX_DELETE(cc, &LNO_local_pool);
00521 return cost;
00522 }
00523
00524
00525
00526 static void Evaluate_Parallel_Paths(SNL_STREAM *sst)
00527 {
00528 double min = DBL_MAX;
00529 for (sst->Stream_Init(FALSE); !sst->Stream_Over(); sst->Stream_Next()) {
00530 INT32* curr_path = sst->Stream_Curr();
00531 double cost = Explore_Path(sst, curr_path, min);
00532 if (cost < min) {
00533 min = cost;
00534 sst->Set_Min_Path(min);
00535 }
00536 }
00537 }
00538
00539
00540
00541 static void Stream_Analysis(SNL_STREAM *sst)
00542 {
00543 INT i;
00544 for (i = 0; i < sst->Num_SNL(); ++i) {
00545 CROSS_SNL_INFO *csi = sst->Get_SNL(i);
00546
00547
00548
00549 double seq_mc_cost;
00550 double seq_cc_cost;
00551 SNL_Parallelization_Costs(csi->SNL_Root(), csi->SNL_Depth(),
00552 csi->Parallel_Options(),& seq_cc_cost, & seq_mc_cost);
00553 csi->Set_Seq_Machine_Cost(seq_mc_cost);
00554 csi->Set_Seq_Cache_Cost(seq_cc_cost);
00555
00556 ARA_LOOP_INFO *ara_root =
00557 CXX_NEW(ARA_LOOP_INFO(csi->SNL_Root(), NULL, TRUE), &ARA_memory_pool);
00558 DO_LOOP_INFO* dli = Get_Do_Loop_Info(csi->SNL_Root());
00559 ARA_Initialize_Loops(csi->SNL_Root(), ara_root);
00560
00561 ARRAY_SNL_INFO *asi = CXX_NEW(ARRAY_SNL_INFO(csi->SNL_Root(), csi->SNL_Depth(), ara_root),
00562 &LNO_local_pool);
00563
00564
00565 Gather_Array_References(asi);
00566 csi->Set_Array_References(asi);
00567
00568 if (cross_loop_debug_level >= 1) {
00569 PARALLEL_INFO_ST *pist = csi->Parallel_Options();
00570
00571 fprintf(stdout, "SNL : %d\n", i);
00572 for (INT j = 0; j < pist->Elements(); ++j) {
00573 pist->Bottom_nth(j)->Print(stdout);
00574 }
00575
00576 if (cross_loop_debug_level >= 2) {
00577 asi->Print(stdout);
00578 }
00579 }
00580
00581
00582 csi->Weed_Out_Inner();
00583 csi->Sort_Parallel_Options();
00584 }
00585
00586
00587 Evaluate_Parallel_Paths(sst);
00588
00589 if (cross_loop_debug_level >= 1) {
00590 sst->Print(stdout);
00591 }
00592
00593 for (i = 0; i < sst->Num_SNL(); ++i) {
00594 CROSS_SNL_INFO *csi = sst->Get_SNL(i);
00595 ARA_Cleanup(csi->SNL_Root());
00596 }
00597 }
00598
00599
00600
00601
00602
00603 WN * Get_Parent_Loop(WN *wn)
00604 {
00605 if (wn) {
00606 WN * parent = LWN_Get_Parent(wn);
00607 if (parent == NULL) {
00608 return NULL;
00609 }
00610 if (WN_opcode(parent) == OPC_DO_LOOP) {
00611 return parent;
00612 } else {
00613 return Get_Parent_Loop(parent);
00614 }
00615 }
00616 return NULL;
00617 }
00618
00619
00620
00621
00622 extern void Cross_Loop_Cache_Analysis(PU_Info* current_pu, WN* func_nd)
00623 {
00624 cross_loop_debug_level = Get_Trace(TP_LNOPT2, TT_CROSS_LOOP);
00625
00626 MEM_POOL_Push(&LNO_local_pool);
00627
00628 if (cross_loop_debug_level >= 1) {
00629 fprintf(stdout, "### Cross Loop Analysis (Begin)\n");
00630 }
00631
00632 FIZ_FUSE_INFO* ffi=
00633 CXX_NEW(FIZ_FUSE_INFO(&LNO_local_pool), &LNO_local_pool);
00634 ffi->Build(func_nd, TRUE);
00635
00636
00637
00638
00639
00640
00641
00642 SNL_STREAM_ST snl_streams(&LNO_local_pool);
00643 WN_MAP snl_map = WN_MAP_Create(&LNO_local_pool);
00644
00645 INT i;
00646 for (i = 0; i < ffi->Num_Snl(); i++) {
00647
00648 if (ffi->Get_Type(i) == Invalid || ffi->Get_Type(i) == Non_SNL)
00649 continue;
00650
00651 if (cross_loop_debug_level >= 1) {
00652 fprintf(stdout, "SNL : %d\n", i);
00653 }
00654
00655 WN* wn_outer_loop = ffi->Get_Wn(i);
00656 INT nloops = ffi->Get_Depth(i);
00657 WN* parent = Get_Parent_Loop(wn_outer_loop);
00658 CROSS_SNL_INFO *csi = CXX_NEW(CROSS_SNL_INFO(wn_outer_loop, nloops), &LNO_local_pool);
00659 WN * key = (parent == NULL) ? func_nd : parent;
00660 SNL_STREAM *sst = (SNL_STREAM *) WN_MAP_Get(snl_map, key);
00661 if (sst != NULL) {
00662 sst->Add_SNL(csi);
00663 } else {
00664 SNL_STREAM * sst = CXX_NEW(SNL_STREAM(parent), & LNO_local_pool);
00665 sst->Add_SNL(csi);
00666 WN_MAP_Set(snl_map, key, sst);
00667 snl_streams.Push(sst);
00668 }
00669 }
00670
00671 for (i = 0; i < snl_streams.Elements(); ++i) {
00672 SNL_STREAM *sst = snl_streams.Bottom_nth(i);
00673 Stream_Analysis(sst);
00674 sst->Cleanup();
00675 }
00676
00677 MEM_POOL_Pop(&LNO_local_pool);
00678
00679 if (cross_loop_debug_level >= 1) {
00680 fprintf(TFile, "### Cross Loop Analysis (End)\n");
00681 }
00682 }
00683
00684
00685
00686
00687 CROSS_SNL_INFO::CROSS_SNL_INFO(WN* parent, INT32 nloops)
00688 : _pist(&LNO_local_pool)
00689 {
00690 _snl_root = parent;
00691 _depth = nloops;
00692 _asi = NULL;
00693 }
00694
00695
00696 void CROSS_SNL_INFO::Weed_Out_Minimum(INT nmin)
00697 {
00698 if (nmin > _pist.Elements())
00699 return;
00700
00701
00702
00703
00704 PARALLEL_INFO_ST tmp_pist(&LNO_local_pool);
00705
00706
00707 while (tmp_pist.Elements() != nmin) {
00708 double min_cost = DBL_MAX;
00709 PARALLEL_INFO *min_pi = NULL;
00710
00711 for (INT i = 0; i < _pist.Elements(); ++i) {
00712 PARALLEL_INFO *pi = _pist.Bottom_nth(i);
00713
00714
00715 BOOL found = false;
00716 for (INT j = 0; j < tmp_pist.Elements(); ++j ) {
00717 if (tmp_pist.Bottom_nth(j) == pi) {
00718 found = TRUE;
00719 break;
00720 }
00721 }
00722
00723 if (found) {
00724 continue;
00725 }
00726
00727 if (pi->Cost() < min_cost) {
00728 min_pi = pi;
00729 min_cost = pi->Cost();
00730 }
00731 }
00732
00733 FmtAssert(min_pi != NULL, ("Could not find the minimum costs"));
00734 tmp_pist.Push(min_pi);
00735 }
00736
00737 _pist.Clear();
00738
00739
00740 for (INT j = 0; j < tmp_pist.Elements(); ++j) {
00741 _pist.Push(tmp_pist.Bottom_nth(j));
00742 }
00743 }
00744
00745
00746
00747 void CROSS_SNL_INFO::Sort_Parallel_Options(void)
00748 {
00749 INT nmin = _pist.Elements();
00750
00751
00752
00753
00754 PARALLEL_INFO_ST tmp_pist(&LNO_local_pool);
00755
00756
00757 while (tmp_pist.Elements() != nmin) {
00758 double min_cost = DBL_MAX;
00759 PARALLEL_INFO *min_pi = NULL;
00760
00761 for (INT i = 0; i < _pist.Elements(); ++i) {
00762 PARALLEL_INFO *pi = _pist.Bottom_nth(i);
00763
00764
00765 BOOL found = false;
00766 for (INT j = 0; j < tmp_pist.Elements(); ++j ) {
00767 if (tmp_pist.Bottom_nth(j) == pi) {
00768 found = TRUE;
00769 break;
00770 }
00771 }
00772
00773 if (found) {
00774 continue;
00775 }
00776
00777 if (pi->Cost() < min_cost) {
00778 min_pi = pi;
00779 min_cost = pi->Cost();
00780 }
00781 }
00782
00783 FmtAssert(min_pi != NULL, ("Could not find the minimum costs"));
00784 tmp_pist.Push(min_pi);
00785 }
00786
00787 _pist.Clear();
00788
00789
00790 for (INT j = 0; j < tmp_pist.Elements(); ++j) {
00791 _pist.Push(tmp_pist.Bottom_nth(nmin-j-1));
00792 }
00793 }
00794
00795
00796 void CROSS_SNL_INFO::Weed_Out_Inner(void)
00797 {
00798 PARALLEL_INFO_ST tmp_pist(&LNO_local_pool);
00799
00800
00801
00802 for (INT i = 0; i < _depth; ++i) {
00803 PARALLEL_INFO *min_pi = NULL;
00804 INT min_depth = INT_MAX;
00805
00806 for (INT j = 0; j < _pist.Elements(); ++j) {
00807 PARALLEL_INFO *pi = _pist.Bottom_nth(j);
00808 if (pi->Parallel_Loop() != i) {
00809 continue;
00810 }
00811
00812 if (pi->Parallel_Depth() <= min_depth) {
00813 if (pi->Parallel_Depth() == min_depth && pi->Cost() > min_pi->Cost()) {
00814 continue;
00815 }
00816 min_pi = pi;
00817 min_depth = pi->Parallel_Depth();
00818 }
00819 }
00820
00821 if (min_pi != NULL) {
00822 tmp_pist.Push(min_pi);
00823 }
00824 }
00825
00826
00827 _pist.Clear();
00828
00829
00830 for (INT j = 0; j < tmp_pist.Elements(); ++j) {
00831 _pist.Push(tmp_pist.Bottom_nth(j));
00832 }
00833 }
00834
00835 void CROSS_SNL_INFO::Print(FILE *f)
00836 {
00837 fprintf(f, "number of loops (_depth) = %d\n", _depth);
00838 fprintf(f, "sequential machine cost = %lf\n", _seq_mch_cost);
00839 fprintf(f, "sequential cache cost = %lf\n", _seq_cache_cost);
00840
00841 for (INT i = 0; i < _pist.Elements(); ++i) {
00842 fprintf(f, "Parallel option %d : \n", i);
00843 _pist.Bottom_nth(i)->Print(f);
00844 }
00845
00846 fprintf(f, "\n_snl_root = %p\n _asi = %p\n", _snl_root, _asi);
00847 }
00848
00849
00850
00851 SNL_STREAM::SNL_STREAM(WN *parent)
00852 : _st(&LNO_local_pool)
00853 {
00854 _parent = parent;
00855 _ara_info = CXX_NEW(ARA_LOOP_INFO(parent, NULL, TRUE), &LNO_local_pool);
00856
00857 _parallel_only = TRUE;
00858 _options = NULL;
00859 _min_path = NULL;
00860 _finished = TRUE;
00861 _min_cost = DBL_MAX;
00862 }
00863
00864 SNL_STREAM::~SNL_STREAM(void)
00865 {
00866 Cleanup();
00867
00868 if (_options) {
00869 CXX_DELETE_ARRAY(_options, &LNO_local_pool);
00870 }
00871 if (_min_path) {
00872 CXX_DELETE_ARRAY(_min_path, &LNO_local_pool);
00873 }
00874 }
00875
00876
00877 void SNL_STREAM::Print(FILE *f)
00878 {
00879 fprintf(f, "_parent = %p\n _ara_info = %p\n", _parent, _ara_info);
00880
00881 if (_min_path) {
00882 fprintf(f, "Minimum Cost : %lf\n", _min_cost);
00883
00884 fprintf (f, "Minimum Path : [");
00885
00886 INT i;
00887 for (i = 0; i < _st.Elements(); ++i) {
00888 fprintf(f, " %d", _min_path[i]);
00889 }
00890 fprintf(f, "]\n");
00891
00892 for (i = 0; i < _st.Elements(); ++i) {
00893 fprintf(f, "%d :\n", i);
00894 if (_min_path[i] != -1) {
00895 _st.Bottom_nth(i)->Get_Parallel_Option(_min_path[i])->Print(f);
00896 } else {
00897 fprintf(f, "Sequential\n");
00898 }
00899 }
00900 }
00901 }
00902
00903 void SNL_STREAM::Cleanup(void)
00904 {
00905 if (_ara_info) {
00906 CXX_DELETE(_ara_info, &LNO_local_pool);
00907 _ara_info = NULL;
00908 }
00909 if (_parent != NULL) {
00910 DO_LOOP_INFO *dli = Get_Do_Loop_Info(_parent);
00911 dli->ARA_Info = NULL;
00912 _parent = NULL;
00913 }
00914 }
00915
00916
00917 void SNL_STREAM::Stream_Init(BOOL parallel_only)
00918 {
00919 _parallel_only = parallel_only;
00920
00921 if (_options == NULL) {
00922 _options = CXX_NEW_ARRAY(INT32, _st.Elements(), &LNO_local_pool);
00923 }
00924
00925 for (INT i = 0; i < _st.Elements(); ++i) {
00926 CROSS_SNL_INFO *csi = _st.Bottom_nth(i);
00927 _options[i] = csi->Num_Parallel_Options() - 1;
00928 }
00929
00930 _finished = FALSE;
00931 }
00932
00933
00934 INT32 *SNL_STREAM::Stream_Curr(void)
00935 {
00936 FmtAssert(_options != NULL && !_finished, ("Illegal SNL_Stream operation : curr"));
00937
00938 return _options;
00939 }
00940
00941
00942 void SNL_STREAM::Set_Min_Path(double cost)
00943 {
00944 if (_min_path == NULL) {
00945 _min_path = CXX_NEW_ARRAY(INT32, _st.Elements(), &LNO_local_pool);
00946 }
00947
00948 for (INT i = 0; i < _st.Elements(); ++i) {
00949 _min_path[i] = _options[i];
00950 }
00951 _min_cost = cost;
00952 }
00953
00954
00955
00956 void SNL_STREAM::Stream_Next(void)
00957 {
00958 FmtAssert(_options != NULL,("Illegal SNL_Stream operation : next")) ;
00959
00960 INT i = _st.Elements() - 1;
00961 if (!_finished) {
00962 while (i >= 0) {
00963 if (_parallel_only) {
00964
00965 if (_options[i] > 0) {
00966 _options[i] = _options[i] - 1;
00967 break;
00968 } else {
00969 _options[i] = _st.Bottom_nth(i)->Num_Parallel_Options() - 1;
00970 i--;
00971 }
00972 } else {
00973
00974 if (_options[i] > -1) {
00975 _options[i] = _options[i] - 1;
00976 break;
00977 } else {
00978 _options[i] = _st.Bottom_nth(i)->Num_Parallel_Options() - 1;
00979 i--;
00980 }
00981 }
00982 }
00983
00984 if (i < 0) {
00985 _finished = TRUE;
00986 }
00987 }
00988 }
00989
00990
00991 BOOL SNL_STREAM::Stream_Over(void)
00992 {
00993 return _finished;
00994 }
00995
00996
00997
00998