00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #if 0
00043
00044 #include <math.h>
00045 #include <sys/types.h>
00046 #include <limits.h>
00047 #include "pu_info.h"
00048 #include "lnoutils.h"
00049 #include "lnopt_main.h"
00050 #include "stab.h"
00051 #include "targ_const.h"
00052 #include "wn_simp.h"
00053 #include "stdlib.h"
00054 #include "lwn_util.h"
00055 #include "strtab.h"
00056 #include "config.h"
00057 #include "optimizer.h"
00058 #include "opt_du.h"
00059 #include "name.h"
00060 #include "wintrinsic.h"
00061 #include "lno_bv.h"
00062 #include "dep_graph.h"
00063 #include "debug.h"
00064 #include "scalar_expand.h"
00065 #include "cxx_memory.h"
00066 #include "reduc.h"
00067 #include "snl_utils.h"
00068 #include "sxlist.h"
00069 #include "snl_dist.h"
00070 #include "permute.h"
00071 #include "sxlimit.h"
00072 #include "parallel.h"
00073 #include "fiz_fuse.h"
00074 #include "ara.h"
00075 #include "snl_deps.h"
00076 #include "lego_util.h"
00077 #include "tile.h"
00078 #include "model.h"
00079 #include "cache_model.h"
00080 #include "config_cache.h"
00081 #include "parmodel.h"
00082 #include "sdlist.h"
00083 #include "doacross.h"
00084 #include "prompf.h"
00085 #include "anl_driver.h"
00086 #include "parids.h"
00087 #include "cond.h"
00088 #include "move.h"
00089 #include "tlog.h"
00090 #include "call_info.h"
00091 #include "cross_snl.h"
00092
00093 #endif
00094
00095 static double Parallel_Cost(WN* wn_outer,
00096 INT permutation[],
00097 INT nloops,
00098 INT parallel_depth,
00099 INT sd_split_depth,
00100 INT split_depth,
00101 SX_PLIST* plist,
00102 double machine_cycles,
00103 double *cache_cycles_per_iter,
00104 double *loop_cycles,
00105 double *reduction_cycles,
00106 double *parallel_cycles,
00107 double *cache_cycles)
00108 {
00109 *cache_cycles_per_iter = 0.0;
00110 *loop_cycles = 0.0;
00111 *reduction_cycles = 0.0;
00112 *parallel_cycles = 0.0;
00113 *cache_cycles = 0.0;
00114
00115 if (parallel_depth == -1)
00116 return (double) DBL_MAX;
00117 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
00118 ? Parallel_Debug_Level : 0;
00119 PAR_STAT::id_count = 0;
00120 PAR_STAT* ps = CXX_NEW(PAR_STAT(wn_outer, nloops, &LNO_local_pool),
00121 &LNO_local_pool);
00122 #ifdef Is_True_On
00123 ps->Sanity_Check(stdout);
00124 #endif
00125 if (parallel_debug_level >= 3) {
00126 fprintf(stdout, "Before:\n");
00127 ps->Print(stdout);
00128 }
00129
00130 ps = ps->Parallel_Interchange(wn_outer, permutation, nloops,
00131 parallel_depth, sd_split_depth, split_depth);
00132 #ifdef Is_True_On
00133 ps->Sanity_Check(stdout);
00134 #endif
00135 double cost = ps->Cycle_Count(wn_outer, permutation, nloops,
00136 parallel_depth, plist, split_depth, machine_cycles,
00137 cache_cycles_per_iter);
00138
00139 *loop_cycles = ps->Loop_Overhead_Cost();
00140 *parallel_cycles = ps->Parallel_Overhead_Cost();
00141 *reduction_cycles = ps->Reduction_Cost();
00142 *cache_cycles = ps->Num_Estimated_Iters() * (*cache_cycles_per_iter) / NOMINAL_PROCS;
00143
00144 if (parallel_debug_level >= 3) {
00145 ps->Sanity_Check(stdout);
00146 fprintf(stdout, "After:\n");
00147 ps->Print(stdout);
00148 }
00149
00150 return cost;
00151 }
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 static double Doacross_Cost(WN* wn_outer,
00175 INT permutation[],
00176 INT nloops,
00177 INT parallel_depth,
00178 SNL_DEP_MATRIX** sdm_inv,
00179 BOOL sdm_scl[],
00180 SX_INFO* sx_info,
00181 SD_INFO* sd_info,
00182 INT sd_split_depth,
00183 double machine_cycles,
00184 double *cache_cycles_per_iter,
00185 double work_estimate,
00186 INT* doacross_tile_size_p,
00187 INT sync_distances[],
00188 INT* doacross_overhead_p,
00189 double *loop_cycles,
00190 double *reduction_cycles,
00191 double *parallel_cycles,
00192 double *cache_cycles)
00193 {
00194 *cache_cycles_per_iter = 0.0;
00195 *loop_cycles = 0.0;
00196 *reduction_cycles = 0.0;
00197 *parallel_cycles = 0.0;
00198 *cache_cycles = 0.0;
00199
00200 INT outer_depth=Do_Loop_Depth(wn_outer);
00201
00202
00203 if (parallel_depth < outer_depth)
00204 return (double)DBL_MAX;
00205
00206 if (parallel_depth >= outer_depth+nloops-1)
00207 return DBL_MAX;
00208
00209
00210
00211 if (!Is_Perfectly_Nested(wn_outer, permutation, nloops, parallel_depth))
00212 return DBL_MAX;
00213
00214 MEM_POOL_Push(&LNO_local_pool);
00215
00216
00217
00218 DOLOOP_STACK* loop_stack = CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00219 &LNO_local_pool);
00220 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_outer, nloops);
00221 Build_Doloop_Stack(wn_inner, loop_stack);
00222 INT outer=MIN(outer_depth+permutation[parallel_depth-outer_depth],
00223 outer_depth+permutation[parallel_depth-outer_depth+1]);
00224 INT inner=MAX(outer_depth+permutation[parallel_depth-outer_depth],
00225 outer_depth+permutation[parallel_depth-outer_depth+1]);
00226 if (!SNL_Is_Invariant(loop_stack, outer, inner)) {
00227 MEM_POOL_Pop(&LNO_local_pool);
00228 return DBL_MAX;
00229 }
00230
00231
00232
00233 SNL_DEP_MATRIX** sdm_red = Red_Dep_Info(wn_outer, permutation, nloops,
00234 parallel_depth, TRUE, FALSE);
00235 INT red_split_depth = Parallelizable_At_Depth(wn_outer, nloops,
00236 permutation, sdm_red, sdm_scl, sx_info, sd_info, sd_split_depth,
00237 parallel_depth);
00238
00239 MEM_POOL_Pop(&LNO_local_pool);
00240
00241
00242 if (red_split_depth == Do_Loop_Depth(wn_outer) + nloops)
00243 return DBL_MAX;
00244
00245 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
00246 ? Parallel_Debug_Level : 0;
00247
00248 if (parallel_debug_level >= 2)
00249 Print_Permutation_Vector(stdout,permutation,nloops,parallel_depth,TRUE);
00250
00251
00252
00253 BOOL *retained=CXX_NEW_ARRAY(INT, nloops, &LNO_local_pool);
00254 for (INT i=0; i<nloops; i++)
00255 retained[i]=SNL_Perm_Retained_Section(i,permutation,nloops);
00256 Compute_Sync_Distances(wn_outer,nloops,permutation,parallel_depth,
00257 sdm_inv,retained,sync_distances);
00258
00259
00260
00261 INT doacross_tile_size=
00262 Get_Doacross_Tile_Size(sync_distances,wn_outer, permutation, nloops,
00263 parallel_depth,NOMINAL_PROCS,work_estimate);
00264 (*doacross_tile_size_p) = doacross_tile_size;
00265
00266
00267
00268 double doall_cycle;
00269 {
00270 MEM_POOL_Push(&LNO_local_pool);
00271 PAR_STAT::id_count = 0;
00272 PAR_STAT* ps = CXX_NEW(PAR_STAT(wn_outer, nloops, &LNO_local_pool),
00273 &LNO_local_pool);
00274 ps = ps->Parallel_Interchange(wn_outer, permutation, nloops,
00275 parallel_depth, sd_split_depth, red_split_depth);
00276 doall_cycle = ps->Cycle_Count(wn_outer, permutation, nloops,
00277 parallel_depth, &sx_info->Plist, red_split_depth, machine_cycles,
00278 cache_cycles_per_iter, TRUE);
00279 *loop_cycles = ps->Loop_Overhead_Cost();
00280 *parallel_cycles = ps->Parallel_Overhead_Cost();
00281 *reduction_cycles = ps->Reduction_Cost();
00282 *cache_cycles = ps->Num_Estimated_Iters() * (*cache_cycles_per_iter) / NOMINAL_PROCS;
00283 MEM_POOL_Pop(&LNO_local_pool);
00284 }
00285
00286
00287 double doacross_delay_cycle=
00288 Compute_Doacross_Delay_Cycle(wn_outer, permutation, parallel_depth,
00289 NOMINAL_PROCS, doacross_tile_size,
00290 sync_distances, machine_cycles);
00291
00292 double doacross_sync_cycle=
00293 Compute_Doacross_Sync_Cycle(wn_outer, permutation, parallel_depth,
00294 doacross_tile_size, sync_distances);
00295
00296
00297
00298 double cost;
00299 if (doacross_delay_cycle == DBL_MAX)
00300 cost = DBL_MAX;
00301 else
00302 cost = doall_cycle + doacross_delay_cycle + doacross_sync_cycle;
00303
00304 (*doacross_overhead_p) = int(doacross_delay_cycle + doacross_sync_cycle);
00305 if (parallel_debug_level >= 2) {
00306 printf(" sync vectors = ");
00307 if (sync_distances[0]!= NULL_DIST)
00308 printf("(%d -1) ",sync_distances[0]);
00309 if (sync_distances[1]!= NULL_DIST)
00310 printf("(%d 1)",sync_distances[1]);
00311 printf("\n");
00312 if (doacross_delay_cycle == DBL_MAX) {
00313 printf(" delay cycles = inf\n");
00314 printf(" sync cycles = inf\n");
00315 printf(" *doacross cycles = inf\n");
00316 } else {
00317 printf(" delay cycles = %13.2f\n", doacross_delay_cycle);
00318 printf(" sync cycles = %13.2f\n", doacross_sync_cycle);
00319 printf(" *doacross cycles = %13.2f\n", cost);
00320 }
00321 }
00322
00323 return cost;
00324 }
00325
00326
00327 PARALLEL_INFO::PARALLEL_INFO(WN* wn_outer,
00328 INT permutation[],
00329 INT nloops,
00330 INT parallel_depth,
00331 INTERCHANGE_TYPE int_type,
00332 SNL_DEP_MATRIX** sdm_inv,
00333 BOOL sdm_scl[],
00334 SX_INFO* sx_info,
00335 SD_INFO* sd_info,
00336 INT sd_split_depth,
00337 double machine_cycles,
00338 double work_estimate,
00339 BOOL dummy)
00340 {
00341 _wn_outer = wn_outer;
00342 _nloops = nloops;
00343 INT i;
00344 for (i = 0; i < nloops; i++)
00345 _permutation[i] = permutation[i];
00346 _int_type = int_type;
00347 _is_doacross = FALSE;
00348 _doacross_overhead = 0;
00349 _preferred_concurrent = FALSE;
00350 for (i = 0; i < 2; i++)
00351 _sync_distances[i] = NULL_DIST;
00352 _sd_split_depth = sd_split_depth;
00353 _split_depth = Parallelizable(wn_outer, permutation, nloops,
00354 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info, _sd_split_depth);
00355 BOOL is_doall = (_split_depth != Do_Loop_Depth(wn_outer) + nloops);
00356 double doall_cost=DBL_MAX;
00357 double doacross_cost=DBL_MAX;
00358
00359
00360
00361
00362
00363
00364
00365 _machine_cycles = machine_cycles;
00366
00367 switch (LNO_Run_Doacross) {
00368 case 0: if (is_doall) {
00369 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00370 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00371 machine_cycles, &_cache_cycles_per_iter, &_loop_cycles,
00372 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00373 }
00374 break;
00375 case 1: if (is_doall) {
00376 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00377 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00378 machine_cycles, &_cache_cycles_per_iter, &_loop_cycles,
00379 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00380 } else if (LNO_Pseudo_Lower) {
00381 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00382 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00383 sd_split_depth, machine_cycles, &_cache_cycles_per_iter,
00384 work_estimate, &_doacross_tile_size, _sync_distances,
00385 &_doacross_overhead, &_loop_cycles,
00386 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00387 }
00388 break;
00389 case 2: if (is_doall)
00390 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00391 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00392 machine_cycles, &_cache_cycles_per_iter,&_loop_cycles,
00393 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00394 if (LNO_Pseudo_Lower)
00395 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00396 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00397 sd_split_depth, machine_cycles, &_cache_cycles_per_iter,
00398 work_estimate, &_doacross_tile_size, _sync_distances,
00399 &_doacross_overhead, &_loop_cycles,
00400 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00401 break;
00402 case 3: if (LNO_Pseudo_Lower)
00403 {
00404 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00405 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00406 sd_split_depth, machine_cycles, &_cache_cycles_per_iter,
00407 work_estimate, &_doacross_tile_size, _sync_distances,
00408 &_doacross_overhead, &_loop_cycles,
00409 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00410 BOOL is_doacross= (doacross_cost != DBL_MAX);
00411 if (!is_doacross && is_doall) {
00412 doall_cost = Parallel_Cost(wn_outer, permutation, nloops,
00413 parallel_depth, _sd_split_depth, _split_depth, &sx_info->Plist,
00414 machine_cycles, &_cache_cycles_per_iter, &_loop_cycles,
00415 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00416 }
00417 }
00418 break;
00419 case 4: if (LNO_Pseudo_Lower)
00420 doacross_cost=Doacross_Cost(wn_outer, permutation, nloops,
00421 parallel_depth, sdm_inv, sdm_scl, sx_info, sd_info,
00422 sd_split_depth, machine_cycles, &_cache_cycles_per_iter,
00423 work_estimate, &_doacross_tile_size, _sync_distances,
00424 &_doacross_overhead, &_loop_cycles,
00425 &_reduction_cycles, &_parallel_cycles, &_cache_cycles);
00426 break;
00427 default: FmtAssert(0,("Invalid -LNO:doacross value"));
00428 }
00429
00430 if (doall_cost==DBL_MAX && doacross_cost==DBL_MAX) {
00431
00432 _parallel_depth = -1;
00433 _work_estimate = 0;
00434 _cost = DBL_MAX;
00435 _is_doacross = FALSE;
00436 _doacross_tile_size = 0;
00437 _sync_distances[0] = 0;
00438 _sync_distances[1] = 0;
00439 _doacross_overhead = 0;
00440 } else if (doall_cost<doacross_cost) {
00441
00442 _parallel_depth = parallel_depth;
00443 _work_estimate = (int) Compute_Work_Estimate(work_estimate,
00444 _cache_cycles_per_iter);
00445 _cost = doall_cost;
00446 _is_doacross = FALSE;
00447 _doacross_tile_size = 0;
00448 _sync_distances[0] = 0;
00449 _sync_distances[1] = 0;
00450 _doacross_overhead = 0;
00451 } else {
00452
00453 _parallel_depth = parallel_depth;
00454 _work_estimate = (int) Compute_Work_Estimate(work_estimate,
00455 _cache_cycles_per_iter);
00456 _cost = doacross_cost;
00457 _is_doacross = TRUE;
00458 _split_depth = -1;
00459 }
00460 }
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470 void SNL_Parallelization_Costs(WN* wn_outer, INT nloops, PARALLEL_INFO_ST *pist,
00471 double *min_seq_cache_cost, double *min_seq_machine_cost)
00472 {
00473
00474 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
00475 ? Parallel_Debug_Level : 0;
00476
00477
00478 if (Outermore_Parallel_Construct_Or_Lego_Loop(wn_outer)
00479 || Innermore_Parallel_Or_Lego_Loop(wn_outer))
00480 return;
00481
00482
00483 WN* wn_new_outer = Minimal_Kernel(wn_outer, nloops);
00484 if (wn_new_outer == NULL)
00485 return;
00486 INT new_nloops =
00487 nloops - (Do_Loop_Depth(wn_new_outer) - Do_Loop_Depth(wn_outer));
00488
00489
00490 new_nloops -= SNL_Inner_Exit_Count(wn_new_outer, new_nloops);
00491 if (new_nloops == 0)
00492 return;
00493
00494
00495 if (new_nloops > MAX_PARALLEL_NLOOPS) {
00496 INT outer_depth = Do_Loop_Depth(wn_new_outer);
00497 WN* wn_inner = SNL_Get_Inner_Snl_Loop(wn_new_outer, new_nloops);
00498 DOLOOP_STACK stack(&LNO_local_pool);
00499 Build_Doloop_Stack(wn_inner, &stack);
00500 INT extra_nloops = new_nloops;
00501 for (INT i = 0; i < new_nloops; i += MAX_PARALLEL_NLOOPS) {
00502 WN* wn_local_outer = stack.Bottom_nth(outer_depth + i);
00503 INT local_nloops = extra_nloops >= MAX_PARALLEL_NLOOPS
00504 ? MAX_PARALLEL_NLOOPS : extra_nloops;
00505 SNL_Parallelization_Costs(wn_local_outer, local_nloops, pist,
00506 min_seq_cache_cost, min_seq_machine_cost);
00507 extra_nloops -= MAX_PARALLEL_NLOOPS;
00508 }
00509 return;
00510 }
00511
00512
00513 INT* permutation = CXX_NEW_ARRAY(INT, new_nloops, &LNO_local_pool);
00514
00515
00516 ARA_LOOP_INFO *ara_root =
00517 CXX_NEW(ARA_LOOP_INFO(wn_new_outer, NULL, TRUE), &ARA_memory_pool);
00518 DO_LOOP_INFO* dli = Get_Do_Loop_Info(wn_new_outer);
00519 ARA_Initialize_Loops(wn_new_outer, ara_root);
00520 dli->ARA_Info->Walk_Loop();
00521
00522
00523 SX_INFO sx_info(&LNO_local_pool);
00524 sx_info.Make_Sx_Info(wn_new_outer, new_nloops, TRUE);
00525 SX_PLIST* plist = &sx_info.Plist;
00526 SD_INFO sd_info(&LNO_local_pool);
00527 sd_info.Make_Sd_Info(wn_new_outer, new_nloops);
00528 SD_PLIST* sd_plist = &sd_info.Plist;
00529
00530
00531 BOOL *sdm_scl = Scl_Dep_Info(wn_new_outer, new_nloops);
00532 SNL_DEP_MATRIX** sdm_inv = Inv_Dep_Info(wn_new_outer, new_nloops, TRUE,
00533 FALSE);
00534
00535
00536 if (sdm_inv[new_nloops - 1] == NULL) {
00537 ARA_Cleanup(wn_new_outer);
00538 return;
00539 }
00540
00541
00542 double machine_cycles = 0.0;
00543 double work_estimate = 0.0;
00544 double min_parallel_cycles = 0.0;
00545
00546
00547 double min_seq_cost = DBL_MAX;
00548 *min_seq_machine_cost = DBL_MAX;
00549 *min_seq_cache_cost = DBL_MAX;
00550
00551 INT outer_depth = Do_Loop_Depth(wn_new_outer);
00552 BOOL is_fully_permutable = Fully_Permutable_Permutation(wn_new_outer,
00553 new_nloops);
00554 SNL_DEP_MATRIX** sdm_inv_np = Inv_Dep_Info(wn_new_outer, new_nloops,
00555 is_fully_permutable, TRUE);
00556 PAR_DIR_TYPE* par_directive = CXX_NEW_ARRAY(PAR_DIR_TYPE, new_nloops,
00557 &LNO_local_pool);
00558 BOOL par_pref = Parallel_Directive_Class(wn_new_outer, new_nloops,
00559 par_directive);
00560 for (INT i = outer_depth; i < outer_depth + new_nloops; i++) {
00561 INT ii = i - outer_depth;
00562 machine_cycles = SNL_Machine_Cost(wn_new_outer, new_nloops, i, plist,
00563 &work_estimate, TRUE);
00564 if (work_estimate == 0.0)
00565 DevWarn("Work Estimate for loop %s at %d is 0",
00566 WB_Whirl_Symbol(wn_new_outer), (INT) WN_linenum(wn_new_outer));
00567 min_parallel_cycles = SNL_Min_Parallel_Overhead_Cost(wn_new_outer,
00568 new_nloops, i);
00569
00570
00571 for (INT j = 0; j < new_nloops; j++) {
00572 if (par_directive[j] == PD_NO_CONCURRENT)
00573 continue;
00574
00575 for (INT k = 0; k < Choose(new_nloops - 1, ii); k++) {
00576 Permutation_Vector(ii, j, k, new_nloops, permutation);
00577 INT sd_split_depth = -1;
00578 INTERCHANGE_TYPE int_type = Is_Legal_Permutation_Class(wn_new_outer,
00579 permutation, new_nloops, i, &sx_info, &sd_info, sdm_inv_np,
00580 &sd_split_depth, FALSE,
00581 is_fully_permutable);
00582 if (int_type == INT_NONE)
00583 continue;
00584
00585 PARALLEL_INFO* pi = CXX_NEW(PARALLEL_INFO(wn_new_outer, permutation,
00586 new_nloops, i, int_type, sdm_inv, sdm_scl, &sx_info, &sd_info,
00587 sd_split_depth, machine_cycles, work_estimate, TRUE), &LNO_local_pool);
00588
00589 if (pi->Parallel_Depth() >= 0) {
00590
00591
00592 double seq_cost = machine_cycles * NOMINAL_PROCS + pi->Cache_Cost() * NOMINAL_PROCS;
00593 if (seq_cost < min_seq_cost) {
00594 min_seq_cost = seq_cost;
00595 *min_seq_cache_cost = pi->Cache_Cost() * NOMINAL_PROCS;
00596 *min_seq_machine_cost = machine_cycles * NOMINAL_PROCS;
00597 }
00598
00599
00600 pist->Push(pi);
00601
00602 if (par_directive[j] == PD_PREFER_CONCURRENT) {
00603 pi->Set_Preferred();
00604 }
00605 } else {
00606 CXX_DELETE(pi,&LNO_local_pool);
00607 }
00608 }
00609 }
00610 }
00611
00612 if (*min_seq_cache_cost == DBL_MAX) {
00613 *min_seq_cache_cost = 0.0;
00614 }
00615
00616 if (*min_seq_machine_cost == DBL_MAX) {
00617 *min_seq_machine_cost = 0.0;
00618 }
00619
00620 ARA_Cleanup(wn_new_outer);
00621 }
00622
00623 void PARALLEL_INFO::Print(FILE *file)
00624 {
00625 Print_Permutation_Vector(file, _permutation, _nloops, _parallel_depth, _is_doacross);
00626 fprintf(file, "parallel_depth : %d\n", _parallel_depth);
00627 fprintf(file, "cost : %lf\n", _cost);
00628 fprintf(file, "rc = %lf lc = %lf pc = %lf mc = %lf cci = %lf cc= %lf\n",
00629 _reduction_cycles, _loop_cycles, _parallel_cycles,
00630 _machine_cycles, _cache_cycles_per_iter, _cache_cycles);
00631
00632 if (_is_doacross) {
00633 fprintf(file, "doacross overhead = %lf\n", (double) _doacross_overhead);
00634 }
00635 }