00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322 #define __STDC_LIMIT_MACROS
00323 #include <stdint.h>
00324 #ifdef USE_PCH
00325 #include "lno_pch.h"
00326 #endif // USE_PCH
00327 #pragma hdrstop
00328
00329 const static char *source_file = __FILE__;
00330 const static char *rcs_id = "$Source: ../../be/lno/SCCS/s.model.cxx $ $Revision: 1.21 $";
00331
00332 #include <sys/types.h>
00333 #include <alloca.h>
00334 #include <math.h>
00335
00336 #include "model.h"
00337 #include "cxx_memory.h"
00338 #include "lnopt_main.h"
00339 #include "stab.h"
00340 #include "lnotarget.h"
00341 #include "dep_graph.h"
00342 #include "lwn_util.h"
00343 #include "cache_model.h"
00344 #include "reduc.h"
00345 #include "config.h"
00346 #include "config_targ_opt.h"
00347 #include "config_cache.h"
00348 #include "config_lno.h"
00349 #include "config_opt.h"
00350 #include "tlog.h"
00351
00352 typedef HASH_TABLE<WN*,INT> WN2INT;
00353
00354 #ifdef TARG_MIPS
00355 #define Reserved_Int_Regs 9 // $0, $26-$29, loop ub, fudge (3)
00356 #endif
00357 #ifdef TARG_IA64
00358 #define Reserved_Int_Regs 10 // r0, r1, r12, r13, loop ub, fudge (5)
00359 #endif
00360 #ifdef TARG_IA32
00361 #define Reserved_Int_Regs 3 // $sp, $bp, fudge(1)
00362 #endif
00363 #ifdef TARG_X8664
00364 #define Reserved_Int_Regs 3 // $sp, $bp, fudge(1)
00365 #endif
00366
00367 #define MTYPE_is_double(m) (MTYPE_size_reg(m)==MTYPE_size_reg(MTYPE_I8))
00368
00369 static MEM_POOL Model_Local_Pool;
00370 static MEM_POOL Model_Lat_Pool;
00371 static BOOL model_mempool_initialized;
00372
00373
00374 static INT Num_Good(WN *wn)
00375 {
00376 if (!wn) return(0);
00377 if (WN_operator(wn) == OPR_DO_LOOP) {
00378 if (Do_Loop_Is_Good(wn)) {
00379 return(1+Num_Good(LWN_Get_Parent(wn)));
00380 } else {
00381 return(0);
00382 }
00383 } else {
00384 return(Num_Good(LWN_Get_Parent(wn)));
00385 }
00386 }
00387
00388
00389
00390 static INT Find_Step(WN *wn, INT i)
00391 {
00392 if (!wn) return(0);
00393 if (WN_operator(wn) == OPR_DO_LOOP) {
00394 DO_LOOP_INFO *dli = (DO_LOOP_INFO *) WN_MAP_Get(LNO_Info_Map,wn);
00395 if (dli->Depth < i) {
00396 return(0);
00397 } else if (dli->Depth > i) {
00398 return(Find_Step(LWN_Get_Parent(wn),i));
00399 } else {
00400 ACCESS_VECTOR *Step = dli->Step;
00401 if (Step->Is_Const()) {
00402 return(Step->Const_Offset);
00403 } else {
00404 return 0;
00405 }
00406 }
00407 } else {
00408 return(Find_Step(LWN_Get_Parent(wn),i));
00409 }
00410 }
00411
00412
00413
00414
00415
00416 void ARRAY_REF_NODE::Print(FILE *fp) const
00417 {
00418 fprintf(fp,"(size=%d) ", _element_size);
00419 fprintf(fp, "Wn = 0x%p ", Wn);
00420 Array->Print(fp);
00421 }
00422
00423 INT Max_Unroll_Prod=16;
00424 INT Max_Cse_Dist = 6;
00425
00426 INT LOOP_MODEL::_model_no = 0;
00427
00428 LOOP_MODEL::~LOOP_MODEL()
00429 {
00430 CXX_DELETE_ARRAY(_block_number,Malloc_Mem_Pool);
00431 CXX_DELETE_ARRAY(_block_number_inner,Malloc_Mem_Pool);
00432 CXX_DELETE_ARRAY(_iloop,Malloc_Mem_Pool);
00433 CXX_DELETE_ARRAY(_iloop_inner,Malloc_Mem_Pool);
00434 CXX_DELETE_ARRAY(_stripsz,Malloc_Mem_Pool);
00435 CXX_DELETE_ARRAY(_stripsz_inner,Malloc_Mem_Pool);
00436 CXX_DELETE_ARRAY(_striplevel,Malloc_Mem_Pool);
00437 CXX_DELETE_ARRAY(_striplevel_inner,Malloc_Mem_Pool);
00438 CXX_DELETE_ARRAY(_new_order,Malloc_Mem_Pool);
00439 CXX_DELETE_ARRAY(_new_order_inner,Malloc_Mem_Pool);
00440 }
00441
00442 static BOOL debug_model;
00443
00444 static INT Inner_Invariant_Refs;
00445 static double Invariant_Ref_Coeff;
00446 static INT Num_Complex_Mpy;
00447
00448 INT64 *SNL_Line_Numbers;
00449
00450 extern INT Debug_Cache_Model;
00451
00452 void
00453 LOOP_MODEL::Model(WN* wn,
00454 BOOL* can_be_inner,
00455 BOOL* can_be_unrolled,
00456 INT outermost_can_be_tiled,
00457 ARRAY_DIRECTED_GRAPH16* array_graph,
00458 SX_INFO* pi,
00459 INT SNL_Depth,
00460 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
00461 {
00462 if (!model_mempool_initialized) {
00463 MEM_POOL_Initialize(&Model_Local_Pool,"Model_Local_Pool",FALSE);
00464 MEM_POOL_Initialize(&Model_Lat_Pool,"Model_Local_Pool",FALSE);
00465 model_mempool_initialized = TRUE;
00466 }
00467
00468 MEM_POOL_Push(&Model_Local_Pool);
00469
00470 INT wndepth = Do_Loop_Depth(wn);
00471 _num_loops = wndepth+1;
00472
00473 Is_True(WN_operator(wn)==OPR_DO_LOOP, ("non DO loop passed to LOOP_MODEL"));
00474 Is_True(Do_Loop_Is_Inner(wn),("non inner loop passed to LOOP_MODEL"));
00475 _est_num_iterations=CXX_NEW_ARRAY(INT64, wndepth+1,&Model_Local_Pool);
00476 _required_unroll=CXX_NEW_ARRAY(INT, wndepth+1,&Model_Local_Pool);
00477 _required_blocksize=CXX_NEW_ARRAY(INT,
00478 (wndepth+1)*MHD_MAX_LEVELS,&Model_Local_Pool);
00479 _required_permutation=CXX_NEW_ARRAY(INT, wndepth+1,&Model_Local_Pool);
00480 for (INT iii = 0; iii <= wndepth; iii++)
00481 _required_permutation[iii] = -1;
00482 _block_number = CXX_NEW_ARRAY(INT, Do_Loop_Depth(wn)+1,Malloc_Mem_Pool);
00483 _block_number_inner = CXX_NEW_ARRAY(INT, wndepth+1,Malloc_Mem_Pool);
00484 _iloop = CXX_NEW_ARRAY(INT, (wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00485 _iloop_inner = CXX_NEW_ARRAY(INT,(wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00486 _stripsz = CXX_NEW_ARRAY(INT,(wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00487 _stripsz_inner = CXX_NEW_ARRAY(INT,(wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00488 _striplevel = CXX_NEW_ARRAY(INT,(wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00489 _striplevel_inner = CXX_NEW_ARRAY(INT,(wndepth+1)*MHD_MAX_LEVELS,Malloc_Mem_Pool);
00490 _new_order = CXX_NEW_ARRAY(INT,wndepth+1,Malloc_Mem_Pool);
00491 _new_order_inner = CXX_NEW_ARRAY(INT,wndepth+1,Malloc_Mem_Pool);
00492 _num_cycles = -1.0;
00493 _wn = wn;
00494 _can_be_inner = can_be_inner;
00495 _num_evaluations = 0;
00496 _num_fp_regs = Target_FPRs + 1;
00497 _num_fp_refs = 0;
00498 _model_no++;
00499 _lat_graph = NULL;
00500
00501 debug_model = Get_Trace(TP_LNOPT, TT_LNO_MODEL);
00502 if (debug_model) {
00503 fprintf(TFile,"Modeling an SNL\n");
00504 }
00505
00506
00507 INT i;
00508 _inner_loop = wndepth;
00509 for (i = 0; i <= wndepth; i++) {
00510 _block_number[i] = 1;
00511 _new_order[i] = i;
00512 }
00513 _nstrips = 0;
00514
00515 if (LNO_Analysis || LNO_Tlog) {
00516 SNL_Line_Numbers = CXX_NEW_ARRAY(INT64,1+wndepth,Malloc_Mem_Pool);
00517 }
00518 i = wndepth;
00519 WN *tmp = wn;
00520 _blocking_disabled = LNO_Blocking == 0;
00521
00522 #if 0
00523
00524
00525
00526 #ifdef TARG_X8664
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536 #endif
00537 #endif
00538
00539 #ifdef TARG_X8664
00540
00541
00542 if(Is_Aggressive_Vintr_Loop(wn))
00543 _blocking_disabled = TRUE;
00544 #endif
00545
00546 if (LNO_Interchange == FALSE) {
00547 for (INT j = 0; j <= wndepth; j++)
00548 _required_permutation[j] = j;
00549 }
00550 #ifdef TARG_X8664
00551
00552
00553
00554
00555 else if(Is_Vectorizable_Loop(wn) && Is_Vectorization_Beneficial(WN_do_body(wn))){
00556 _required_permutation[_inner_loop] = _inner_loop;
00557 for (INT j = 0; j <= wndepth; j++)
00558 _can_be_inner[j] = (j == _inner_loop);
00559 }
00560 #endif
00561
00562 INT loop_count = 0;
00563 while (tmp) {
00564 if (WN_operator(tmp) == OPR_DO_LOOP) {
00565 loop_count++;
00566 if (LNO_Analysis || LNO_Tlog) {
00567 SNL_Line_Numbers[i] = WN_Get_Linenum(tmp);
00568 }
00569 DO_LOOP_INFO *dli = Get_Do_Loop_Info(tmp);
00570 _required_unroll[i] = dli->Required_Unroll;
00571 for (INT ll = 0; ll < MHD_MAX_LEVELS; ll++) {
00572 Is_True(dli->Required_Blocksize[ll] >= -1 &&
00573 dli->Required_Blocksize[ll] <= 10000,
00574 ("Suspicious required blocksize %d for loopno=%d level=%d",
00575 dli->Required_Blocksize[ll], i, ll));
00576 _required_blocksize[i*MHD_MAX_LEVELS+ll] =
00577 dli->Required_Blocksize[ll];
00578 }
00579 if (_blocking_disabled == FALSE && loop_count <= SNL_Depth)
00580 _blocking_disabled = dli->Cannot_Block;
00581 if (dli->Permutation_Spec_Count > 0) {
00582 for (INT j = 0; j < dli->Permutation_Spec_Count; j++) {
00583 _required_permutation[i+j] = dli->Permutation_Spec_Array[j] + i;
00584 }
00585 }
00586 _est_num_iterations[i--] = dli->Est_Num_Iterations;
00587 }
00588 tmp = LWN_Get_Parent(tmp);
00589 }
00590 Is_True(i == -1, ("Bad loop depth"));
00591
00592 BOOL error = FALSE;
00593
00594 INT jj;
00595 for (jj = 0; !error && jj <= wndepth; jj++) {
00596 if (_required_permutation[jj] >= 0) {
00597
00598 for (INT jjj = 0; jjj < jj; jjj++)
00599 if (_required_permutation[jj] == _required_permutation[jjj])
00600 error = TRUE;
00601
00602 if ( ! LNO_Apply_Illegal_Transformation_Directives ) {
00603 if (jj < outermost_can_be_tiled ||
00604 _required_permutation[jj] < outermost_can_be_tiled)
00605 error = TRUE;
00606 }
00607 }
00608 }
00609 if (error) {
00610
00611 for (INT jj = 0; jj <= wndepth; jj++)
00612 _required_permutation[jj] = -1;
00613 }
00614 for (jj = 0; jj <= wndepth; jj++)
00615 if (_required_permutation[jj] != -1)
00616 _can_be_inner[_required_permutation[jj]] = (jj == wndepth);
00617
00618
00619 _OP_issue = 0.0;
00620 INT num_good = Num_Good(wn);
00621 INT num_bad = Do_Loop_Depth(wn)+1-num_good;
00622
00623 Num_Complex_Mpy = 0;
00624
00625 _OP_resource_count = OP_Resources(WN_do_body(wn), &_OP_issue, invar_table);
00626 if (_OP_resource_count) {
00627 if (debug_model) {
00628 fprintf(TFile, "OP_Resource_Count:\n");
00629 TI_RES_COUNT_Print(TFile, _OP_resource_count);
00630 fprintf(TFile, "\n");
00631 }
00632 TI_RES_COUNT* resource_count = TI_RES_COUNT_Alloc(&Model_Local_Pool);
00633 _loop_rcycles_unroll_by =
00634 CXX_NEW_ARRAY(double, Max_Unroll_Prod, &Model_Local_Pool);
00635 for (i = 0; i < Max_Unroll_Prod; i++) {
00636 TI_RES_COUNT* tmp_resource_count = TI_RES_COUNT_Alloc(&Model_Local_Pool);
00637 TI_RES_COUNT_Add(tmp_resource_count, resource_count, _OP_resource_count);
00638 LNOTARGET_Loop_Inc_Test_Res(tmp_resource_count);
00639 _loop_rcycles_unroll_by[i] =
00640 TI_RES_COUNT_Min_Cycles(tmp_resource_count)/(i+1);
00641 TI_RES_COUNT_Add(resource_count, resource_count, _OP_resource_count);
00642 }
00643 }
00644
00645 _LOOP_INIT_issue = 2.0;
00646 _base_int_regs = Reserved_Int_Regs;
00647
00648 if (Is_Target_R8K()) {
00649 _issue_rate = 4.0;
00650 _base_fp_regs = 18;
00651 _num_mem_units = 2.0;
00652 } else if (Is_Target_R10K()) {
00653 _issue_rate = 4.0;
00654 _base_fp_regs = 14;
00655 _num_mem_units = 1.0;
00656 } else if (Is_Target_R4K()) {
00657 _issue_rate = 1.0;
00658 _base_fp_regs = 12;
00659 _num_mem_units = 1.0;
00660 } else if (Is_Target_R5K()) {
00661 _issue_rate = 2.0;
00662 _base_fp_regs = 14;
00663 _num_mem_units = 1.0;
00664 #ifdef TARG_MIPS
00665 } else if (Is_Target_Sb1()) {
00666 _issue_rate = 4.0;
00667 _base_fp_regs = 18;
00668 _num_mem_units = 2.0;
00669 #endif
00670 #ifdef TARG_IA64
00671 } else if (Is_Target_Itanium()) {
00672
00673 _issue_rate = 6.0;
00674 _base_fp_regs = 32;
00675 _num_mem_units = 2.0;
00676 #endif
00677 #ifdef TARG_X8664
00678 } else if (Is_Target_x86_64()) {
00679 _issue_rate = 3.0;
00680 _base_fp_regs = 16;
00681 _num_mem_units = 2.0;
00682 #endif
00683 } else {
00684 Lmt_DevWarn(1, ("TODO: LNO machine model parameters are just wild guesses"));
00685 _issue_rate = 4.0;
00686 _base_fp_regs = 4;
00687 _num_mem_units = 2.0;
00688 }
00689
00690 _arl = CXX_NEW(ARRAY_REF(WN_do_body(wn),
00691 SNL_Depth,
00692 &Model_Local_Pool,
00693 invar_table),
00694 &Model_Local_Pool);
00695
00696 _num_tlb = _arl->Elements();
00697
00698 if (_OP_resource_count) {
00699
00700 MEM_POOL_Push(&Model_Lat_Pool);
00701 _lat_graph = CXX_NEW(LAT_DIRECTED_GRAPH16(50,200,num_good,num_bad,
00702 &Model_Lat_Pool,array_graph),
00703 &Model_Lat_Pool);
00704 if (_lat_graph->Add_Vertices_Op_Edges(WN_do_body(wn),invar_table) == -1)
00705 _OP_resource_count = NULL;
00706 if (_lat_graph->Add_Flow_Edges() == -1)
00707 _OP_resource_count = NULL;
00708
00709
00710 _scalar_fp_regs = Unique_Unstored_Fp_Scalar_Refs(WN_do_body(wn),_arl,pi);
00711 _scalar_int_regs = Unique_Unstored_Int_Scalar_Refs(WN_do_body(wn),_arl,pi);
00712 }
00713
00714 _num_fp_array_refs = _arl->Num_Fp_Refs();
00715 _num_int_array_refs = _arl->Num_Int_Refs();
00716
00717
00718
00719
00720 if (_num_fp_array_refs > 0) {
00721 Invariant_Ref_Coeff = 1 / (double)(_num_fp_array_refs * Max_Unroll_Prod);
00722 }
00723 else {
00724 Invariant_Ref_Coeff = 0.0;
00725 }
00726 if (debug_model) {
00727 fprintf(TFile, "Invariant_Ref_Coeff = 1 / (%d * %d) = %f\n",
00728 _num_fp_array_refs, Max_Unroll_Prod, Invariant_Ref_Coeff);
00729 }
00730
00731
00732 for (i=num_good+num_bad-1; i>=0; i--) {
00733 if (can_be_inner[i]) {
00734 Try_Inner(can_be_unrolled,outermost_can_be_tiled,
00735 i,num_good+num_bad);
00736 }
00737 }
00738
00739 if (_lat_graph) {
00740 CXX_DELETE(_lat_graph, &Model_Lat_Pool);
00741 MEM_POOL_Pop(&Model_Lat_Pool);
00742 _lat_graph = NULL;
00743 }
00744
00745 CXX_DELETE_ARRAY(_est_num_iterations,&Model_Local_Pool);
00746 CXX_DELETE_ARRAY(_required_unroll,&Model_Local_Pool);
00747 CXX_DELETE_ARRAY(_required_blocksize,&Model_Local_Pool);
00748 CXX_DELETE_ARRAY(_required_permutation,&Model_Local_Pool);
00749 MEM_POOL_Pop(&Model_Local_Pool);
00750 if (debug_model) {
00751 fprintf(TFile,"Evaluated %d combinations\n",_num_evaluations);
00752 fprintf(TFile,"And the results are:\n");
00753 fprintf(TFile,"Transform<loop,reg> = (");
00754 for (INT b=0; b<num_good+num_bad; b++)
00755 fprintf(TFile,"<%d,%d>", _new_order[b], _block_number[b]);
00756 if (_nstrips > 0) {
00757 fprintf(TFile," cblk[stripdepth=%d]=",_stripdepth);
00758 for (INT s=0; s<_nstrips; s++)
00759 fprintf(TFile,"(%d,%d)",_iloop[s],_stripsz[s]);
00760 }
00761 fprintf(TFile,")\n");
00762 if (_OP_resource_count==NULL)
00763 fprintf(TFile,"Couldn't accurately evaluate ops\n");
00764 else if (_num_fp_regs > Target_FPRs)
00765 fprintf(TFile,"Couldn't FP register allocate\n");
00766 else if (_num_int_regs > Target_INTRs)
00767 fprintf(TFile,"Couldn't INT register allocate (%d)\n", _num_int_regs);
00768 else {
00769 fprintf(TFile,"The number of cycles is %f \n", _num_cycles);
00770 fprintf(TFile,"The number of fp registers needed is %d\n", _num_fp_regs);
00771 fprintf(TFile,"The number of int registers needed is %d\n", _num_int_regs);
00772 }
00773 }
00774 if (LNO_Verbose) {
00775 printf("Evaluated %d combinations\n",_num_evaluations);
00776 #if 0 // the snl code prints this out later in an easier to read format
00777 printf("And the results are:\n");
00778 printf("Transform<loop,reg> = (");
00779 for (INT b=0; b<num_good+num_bad; b++)
00780 printf("<%d,%d>", _new_order[b], _block_number[b]);
00781 if (_nstrips > 0) {
00782 printf("cblk(sd=%d)=",_stripdepth);
00783 for (INT s=0; s<_nstrips; s++)
00784 printf("(%d,%d)",_iloop[s],_stripsz[s]);
00785 }
00786 printf(")\n");
00787 #endif
00788 if (_OP_resource_count==NULL)
00789 printf("Couldn't accurately evaluate ops\n");
00790 else if (_num_fp_regs > Target_FPRs)
00791 printf("Couldn't FP register allocate\n");
00792 else if (_num_int_regs > Target_INTRs)
00793 printf("Couldn't INT register allocate (%d)\n", _num_int_regs);
00794 else {
00795 printf("The number of cycles is %f \n", _num_cycles);
00796 printf("The number of fp registers needed is %d\n", _num_fp_regs);
00797 printf("The number of int registers needed is %d\n", _num_int_regs);
00798 }
00799
00800 }
00801 if ( LNO_Tlog ) {
00802 #define TLOG_MSG_LENGTH 640 // 30 chars * 16 loops + misc.
00803
00804 char message[TLOG_MSG_LENGTH];
00805 char buffer[32];
00806 UINT16 char_count=0;
00807 message[0]='\0';
00808
00809 sprintf(buffer,"comb=%d ",_num_evaluations);
00810 if (char_count+strlen(buffer)>TLOG_MSG_LENGTH) {
00811 DevWarn("Tlog message buffer overflow");
00812 char_count=0;
00813 message[0]='\0';
00814 } else {
00815 strcpy(message+char_count, buffer);
00816 char_count+=strlen(buffer);
00817 }
00818
00819 for (INT b=0; b<num_good+num_bad; b++) {
00820 sprintf(buffer,"<%d,%d>", _new_order[b], _block_number[b]);
00821 if (char_count+strlen(buffer)>TLOG_MSG_LENGTH) {
00822 DevWarn("Tlog message buffer overflow");
00823 char_count=0;
00824 message[0]='\0';
00825 } else {
00826 strcpy(message+char_count, buffer);
00827 char_count+=strlen(buffer);
00828 }
00829 }
00830 if (_nstrips > 0) {
00831 sprintf(buffer," sd=%d ",_stripdepth);
00832 if (char_count+strlen(buffer)>TLOG_MSG_LENGTH) {
00833 DevWarn("Tlog message buffer overflow");
00834 char_count=0;
00835 message[0]='\0';
00836 } else {
00837 strcpy(message+char_count, buffer);
00838 char_count+=strlen(buffer);
00839 }
00840 for (INT s=0; s<_nstrips; s++) {
00841 sprintf(buffer,"(%d,%d)",_iloop[s],_stripsz[s]);
00842 if (char_count+strlen(buffer)>TLOG_MSG_LENGTH) {
00843 DevWarn("Tlog message buffer overflow");
00844 char_count=0;
00845 message[0]='\0';
00846 } else {
00847 strcpy(message+char_count, buffer);
00848 char_count+=strlen(buffer);
00849 }
00850 }
00851 }
00852 sprintf(buffer," cycles=%f fpreg=%d", _num_cycles, _num_fp_regs);
00853 if (char_count+strlen(buffer)>TLOG_MSG_LENGTH) {
00854 DevWarn("Tlog message buffer overflow");
00855 char_count=0;
00856 message[0]='\0';
00857 } else {
00858 strcpy(message+char_count, buffer);
00859 char_count+=strlen(buffer);
00860 }
00861 sprintf(buffer,"INNER_LOOP %d",
00862 Srcpos_To_Line(SNL_Line_Numbers[_new_order[wndepth]]));
00863
00864 SRCPOS srcpos=WN_Get_Linenum(wn);
00865 Generate_Tlog("LNO","snl", Srcpos_To_Line(srcpos),
00866 ST_name(WN_st(WN_index(wn))), message, buffer, "");
00867
00868 }
00869
00870 if (LNO_Analysis) {
00871 fprintf(LNO_Analysis," (INNER_LOOP %d)\n",
00872 Srcpos_To_Line(SNL_Line_Numbers[_new_order[wndepth]]));
00873 }
00874 if (LNO_Analysis) {
00875 CXX_DELETE_ARRAY(SNL_Line_Numbers, Malloc_Mem_Pool);
00876 }
00877 if (_num_tlb > Mhd.L[0].TLB_Entries) {
00878 _num_cycles += Mhd.L[0].TLB_Miss_Penalty * (Mhd.L[0].TLB_Entries-_num_tlb);
00879 }
00880 }
00881
00882
00883
00884
00885
00886
00887
00888 static INT
00889 Num_Invariant_Refs(ARRAY_REF_LIST* arl, INT loop)
00890 {
00891 if (debug_model) {
00892 arl->Base_Array->Print(TFile);
00893 fprintf(TFile, "\n");
00894 }
00895
00896 INT num_invar_refs = 0;
00897 arl->Remove_Cse(loop, 0, 1);
00898 arl->Mark_Invariants(loop);
00899 ARRAY_REF_ITER iter(arl);
00900 for (ARRAY_REF_NODE* node = iter.First(); node; node = iter.Next()) {
00901 if (node->_is_invariant) {
00902
00903 INT regs_per_ref = 1;
00904 if (arl->_is_scalar_expanded) {
00905 if (MTYPE_is_complex(arl->Base_Array->Type))
00906 regs_per_ref *= 2;
00907 if (MTYPE_is_quad(arl->Base_Array->Type))
00908 regs_per_ref *= 2;
00909 }
00910 else {
00911 WN* parent = LWN_Get_Parent(node->Wn);
00912 if (MTYPE_is_complex(WN_desc(parent)) ||
00913 MTYPE_is_complex(WN_rtype(parent)))
00914 regs_per_ref *= 2;
00915 if (MTYPE_is_quad(WN_desc(parent)) ||
00916 MTYPE_is_quad(WN_rtype(parent)))
00917 regs_per_ref *= 2;
00918 }
00919
00920 if (node->_has_store) {
00921 num_invar_refs += regs_per_ref;
00922 }
00923 if (node->_has_load) {
00924 num_invar_refs += regs_per_ref;
00925 }
00926 if (debug_model) {
00927 fprintf(TFile, " INV: ");
00928 if (node->_has_load) fprintf(TFile, "LOAD ");
00929 if (node->_has_store) fprintf(TFile, "STORE");
00930 node->Print(TFile);
00931 }
00932 }
00933 else {
00934 if (debug_model) {
00935 fprintf(TFile, " MIXED INV/NOINV -> Assume INV = 0\n");
00936 }
00937 return 0;
00938 }
00939 }
00940 return num_invar_refs;
00941 }
00942
00943 static INT
00944 Num_Invariant_Refs(ARRAY_REF* ar, INT loop)
00945 {
00946 INT num_invar_refs = 0;
00947 ARRAY_REF local_ar(ar, &Model_Local_Pool);
00948 for (INT i = 0; i < local_ar.Elements(); i++) {
00949 num_invar_refs += Num_Invariant_Refs(local_ar.Array_Ref_List(i), loop);
00950 }
00951 return num_invar_refs;
00952 }
00953
00954
00955
00956 void
00957 LOOP_MODEL::Try_Inner(BOOL* can_be_unrolled,
00958 INT outermost_can_be_tiled,
00959 INT inner,
00960 INT num_loops)
00961 {
00962 INT i, j;
00963 double machine_cycles;
00964
00965 if (debug_model) {
00966 fprintf(TFile,"Trying loop %d for inner \n",inner);
00967 }
00968
00969 _model_limit = MODEL_LIMIT_UNSET;
00970
00971 MEM_POOL_Push(&Model_Local_Pool);
00972
00973 INT* unroll_factors = CXX_NEW_ARRAY(INT, num_loops, &Model_Local_Pool);
00974 for (i = 0; i < num_loops; i++) {
00975 unroll_factors[i] = 1;
00976 _block_number_inner[i] = 1;
00977 }
00978
00979 _num_int_regs_inner = Target_INTRs + 1;
00980 _num_int_refs_inner = 0;
00981 _num_fp_regs_inner = Target_FPRs + 1;
00982 _num_fp_refs_inner = 0;
00983
00984 ARRAY_REF* arl_for_cache =
00985 CXX_NEW(ARRAY_REF(_arl, &Model_Local_Pool), &Model_Local_Pool);
00986
00987 if (_OP_resource_count &&
00988 _base_fp_regs + _scalar_fp_regs <= Target_FPRs &&
00989 _base_int_regs + _scalar_int_regs <= Target_INTRs) {
00990
00991 _latency_cycles =
00992 _lat_graph->Max_Cycle(inner, _loop_rcycles_unroll_by[Max_Unroll_Prod-1]);
00993
00994 if (debug_model) {
00995 fprintf(TFile, "Latency cycles for inner loop %d: %7.2f\n",
00996 inner, _latency_cycles);
00997 }
00998
00999 BOOL can_reg_allocate;
01000 BOOL try_further_unroll = TRUE;
01001
01002 _num_cycles_inner = -1.0;
01003
01004 Inner_Invariant_Refs = Num_Invariant_Refs(_arl, inner);
01005 if (debug_model) {
01006 fprintf(TFile, "For inner loop %d there are %d invariant refs\n",
01007 inner, Inner_Invariant_Refs);
01008 }
01009
01010 ARRAY_REF* new_arl =
01011 CXX_NEW(ARRAY_REF(_arl, &Model_Local_Pool), &Model_Local_Pool);
01012
01013 Try_Unroll(can_be_unrolled, inner, num_loops, unroll_factors, 0, 1,
01014 &can_reg_allocate, &try_further_unroll, new_arl);
01015
01016 for (i = 0; i < num_loops; i++)
01017 if (_block_number_inner[i] > 1)
01018 arl_for_cache->Unroll(i, _block_number_inner[i]);
01019 }
01020 else {
01021
01022 _num_cycles_inner = 0.01;
01023 _latency_cycles = 0;
01024 machine_cycles = -1.0;
01025 }
01026
01027 arl_for_cache->Remove_Cse(inner, 0, 1);
01028
01029 double cycles_per_iter;
01030 double overhead_cycles;
01031
01032 machine_cycles = _num_cycles_inner;
01033
01034 FmtAssert(num_loops < 64,
01035 ("Impossibly large number of loops %d", num_loops));
01036
01037 INT legal_inners[64];
01038 INT legal_tiles[64];
01039 INT* pinners = legal_inners;
01040 INT* ptiles = legal_tiles;
01041
01042
01043 for (i = num_loops - 1; i >= 0; i--) {
01044 if (_can_be_inner[i])
01045 *pinners++ = i;
01046 else if (i >= outermost_can_be_tiled)
01047 *ptiles++ = i;
01048 _new_order_inner[i] = _required_permutation[i];
01049 }
01050 DOLOOP_STACK stack(&LNO_local_pool);
01051 Build_Doloop_Stack(_wn, &stack);
01052 Cache_Model(arl_for_cache, num_loops-1, inner,
01053 legal_inners, pinners - legal_inners,
01054 legal_tiles, ptiles - legal_tiles,
01055 _block_number_inner, &stack,
01056 _blocking_disabled, _required_blocksize,
01057 _num_cycles_inner, _num_fp_refs_inner+_num_int_refs_inner,
01058 _new_order_inner,
01059 &_nstrips_inner, &_stripdepth_inner,
01060 _iloop_inner, _stripsz_inner, _striplevel_inner,
01061 &cycles_per_iter, &overhead_cycles);
01062
01063 if (LNO_Verbose || Debug_Cache_Model)
01064 printf("inner = %d -> cycle est %g (before cache) ",
01065 inner, _num_cycles_inner);
01066 if (debug_model)
01067 fprintf(TFile,"inner = %d -> cycle est %g (before cache) ",
01068 inner, _num_cycles_inner);
01069
01070 _num_cycles_inner += cycles_per_iter;
01071
01072 double cache_cycles = _num_cycles_inner - machine_cycles - overhead_cycles;
01073
01074 if (LNO_Verbose || Debug_Cache_Model)
01075 printf(" %g (after cache)\n", _num_cycles_inner);
01076 if (debug_model)
01077 fprintf(TFile," %g (after cache)\n", _num_cycles_inner);
01078
01079 if (LNO_Verbose || Debug_Cache_Model)
01080 printf(" %g cache cycles %g overhead cycles\n", cache_cycles,
01081 overhead_cycles);
01082 if (debug_model)
01083 fprintf(TFile, " %g cache cycles %g overhead cycles\n", cache_cycles,
01084 overhead_cycles);
01085
01086 if (_num_cycles == -1.0 ||
01087 _num_cycles_inner < _num_cycles ||
01088 (_num_cycles_inner == _num_cycles &&
01089 (_num_fp_regs_inner+_num_fp_regs_inner < _num_int_regs+_num_fp_regs ||
01090 _unroll_prod_inner < _unroll_prod))) {
01091
01092 _num_cycles = _num_cycles_inner;
01093 _num_int_regs = _num_int_regs_inner;
01094 _num_int_refs = _num_int_refs_inner;
01095 _num_fp_regs = _num_fp_regs_inner;
01096 _num_fp_refs = _num_fp_refs_inner;
01097 _unroll_prod = _unroll_prod_inner;
01098 _nstrips = _nstrips_inner;
01099 _stripdepth = _stripdepth_inner;
01100 _inner_loop = _inner_loop_inner;
01101
01102 for (i = 0; i < num_loops; i++) {
01103 _new_order[i] = _new_order_inner[i];
01104 _block_number[i] = _block_number_inner[i];
01105 }
01106
01107 for (j = 0; j < _nstrips; j++) {
01108 _iloop[j] = _iloop_inner[j];
01109 _stripsz[j] = _stripsz_inner[j];
01110 _striplevel[j] = _striplevel_inner[j];
01111 }
01112
01113 if (debug_model) {
01114 fprintf(TFile,"An overall best\n");
01115 }
01116 }
01117
01118 if (LNO_Analysis) {
01119 Model_Results_Analysis(inner, num_loops, outermost_can_be_tiled,
01120 machine_cycles, cache_cycles, overhead_cycles);
01121 }
01122 CXX_DELETE_ARRAY(unroll_factors, &Model_Local_Pool);
01123 MEM_POOL_Pop(&Model_Local_Pool);
01124 }
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136 void
01137 LOOP_MODEL::Try_Unroll(BOOL* can_be_unrolled, INT inner, INT num_loops,
01138 INT* unroll_factors, INT l, INT unroll_product,
01139 BOOL* can_reg_allocate, BOOL* try_further_unroll,
01140 ARRAY_REF* arl)
01141 {
01142 if (l >= num_loops) {
01143 Evaluate(inner, num_loops, unroll_factors, unroll_product,
01144 can_reg_allocate, try_further_unroll, arl, can_be_unrolled);
01145 }
01146 else if ((l != inner) && can_be_unrolled[l]) {
01147 INT known_unroll =
01148 _required_unroll[l] ? _required_unroll[l] : LNO_Outer_Unroll;
01149 INT u = known_unroll ? known_unroll : 1;
01150 double prod = unroll_product;
01151
01152
01153
01154
01155
01156
01157 BOOL can_allocate = TRUE;
01158 INT max_unroll_prod = 999999;
01159 INT max_unroll = 999999;
01160 if (known_unroll) {
01161 max_unroll = known_unroll;
01162 }
01163 else {
01164 if (LNO_Outer_Unroll_Max)
01165 max_unroll = LNO_Outer_Unroll_Max;
01166 if (LNO_Outer_Unroll_Prod_Max)
01167 max_unroll_prod = LNO_Outer_Unroll_Prod_Max;
01168 if (_est_num_iterations[l] > 0)
01169 max_unroll = MIN(max_unroll,_est_num_iterations[l]);
01170 }
01171
01172 while (can_allocate &&
01173 *try_further_unroll &&
01174 (unroll_product * u) <= max_unroll_prod &&
01175 u <= max_unroll) {
01176 if (!known_unroll &&
01177 _est_num_iterations[l] > 0 &&
01178 u != max_unroll &&
01179 u*2 > _est_num_iterations[l]) {
01180
01181
01182
01183
01184
01185 u = max_unroll;
01186 continue;
01187 }
01188 MEM_POOL_Push(&Model_Local_Pool);
01189 ARRAY_REF* new_arl;
01190 unroll_factors[l] = u;
01191 if (u > 1) {
01192 new_arl = CXX_NEW(ARRAY_REF(arl,&Model_Local_Pool),&Model_Local_Pool);
01193 new_arl->Unroll(l,u);
01194 prod = unroll_product*u;
01195 }
01196 else {
01197 new_arl = arl;
01198 }
01199 new_arl->Remove_Cse(inner, Max_Cse_Dist, Find_Step(_wn,inner));
01200 new_arl->Mark_Invariants(inner);
01201 Try_Unroll(can_be_unrolled,inner,num_loops,unroll_factors,l+1,int(prod),
01202 &can_allocate, try_further_unroll, new_arl);
01203 if ((u == 1) && !can_allocate) {
01204 *can_reg_allocate = FALSE;
01205 }
01206
01207 u++;
01208 MEM_POOL_Pop(&Model_Local_Pool);
01209 }
01210 unroll_factors[l] = 1;
01211 }
01212 else {
01213 Try_Unroll(can_be_unrolled, inner, num_loops, unroll_factors, l+1,
01214 unroll_product, can_reg_allocate, try_further_unroll, arl);
01215 }
01216 }
01217
01218
01219
01220
01221
01222
01223
01224 void
01225 LOOP_MODEL::Evaluate(INT inner, INT num_loops, INT* unroll_factors,
01226 INT unroll_product, BOOL* can_reg_allocate,
01227 BOOL* try_further_unroll, ARRAY_REF* arl,
01228 BOOL* can_be_unrolled)
01229 {
01230 _num_evaluations++;
01231
01232 if (debug_model) {
01233 fprintf(TFile, "\nDoing an evaluation ");
01234 fprintf(TFile, "unroll_factors = (");
01235 for (INT b = 0; b < num_loops; b++) {
01236 fprintf(TFile, " %d ", unroll_factors[b]);
01237 }
01238 fprintf(TFile, ") \n");
01239 }
01240
01241 BOOL did_unroll = FALSE;
01242 for (INT b = 0; b < num_loops && !did_unroll; b++) {
01243 if (unroll_factors[b] > 1 &&
01244 !(unroll_factors[b] == _required_unroll[b]) &&
01245 !(_required_unroll[b] <= 0 && unroll_factors[b] == LNO_Outer_Unroll))
01246 did_unroll = TRUE;
01247 }
01248
01249 MODEL_LIMIT this_limit = MODEL_LIMIT_UNSET;
01250
01251
01252 INT num_fp_regs, num_fp_refs, num_fp_variant_stores, num_fp_invariant_stores;
01253 INT num_int_regs, num_int_refs, num_int_variant_stores, num_int_invariant_stores;
01254 INT new_base_regs = _base_fp_regs;
01255 INT num_fp_spills = 0;
01256 INT num_int_spills = 0;
01257 arl->Calc_Regs_And_Refs(&num_fp_regs, &num_fp_refs,
01258 &num_fp_variant_stores, &num_fp_invariant_stores,
01259 &num_int_regs, &num_int_refs,
01260 &num_int_variant_stores, &num_int_invariant_stores);
01261
01262
01263
01264
01265
01266 double unequal_unroll_penalty = 1.0;
01267 if (num_loops >= 2) {
01268 INT un1, un2;
01269 if (unroll_factors[0] > unroll_factors[1]) {
01270 un1 = unroll_factors[0], un2 = unroll_factors[1];
01271 }
01272 else {
01273 un1 = unroll_factors[1], un2 = unroll_factors[0];
01274 }
01275 for (INT i = 2; i < num_loops; i++) {
01276 if (unroll_factors[i] > un1) {
01277 un2 = un1;
01278 un1 = unroll_factors[i];
01279 }
01280 else if (unroll_factors[i] > un2) {
01281 un2 = unroll_factors[i];
01282 }
01283 }
01284 unequal_unroll_penalty = pow(((double)un2)/un1, 0.3);
01285 }
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300 #ifndef KEY
01301 #define MINVAR_MAGIC_COEFF (0.20 * _loop_rcycles_unroll_by[Max_Unroll_Prod-1])
01302 #else
01303
01304 #define MINVAR_MAGIC_COEFF 0.20
01305 #endif
01306 double minvar_benefit = MINVAR_MAGIC_COEFF
01307 * Inner_Invariant_Refs
01308 * unroll_product
01309 * unequal_unroll_penalty
01310 * Invariant_Ref_Coeff;
01311
01312 if (debug_model) {
01313 fprintf(TFile,
01314 "Minvar benefit: %.3f * %d * %d * %.3f * %.3f = %.3f\n",
01315 MINVAR_MAGIC_COEFF,
01316 Inner_Invariant_Refs,
01317 unroll_product,
01318 unequal_unroll_penalty,
01319 Invariant_Ref_Coeff,
01320 minvar_benefit);
01321 }
01322
01323
01324 if (num_fp_invariant_stores > 4*num_fp_variant_stores) {
01325 new_base_regs /= 3;
01326 }
01327
01328
01329 if (_est_num_iterations[inner] > LNO_Small_Trip_Count) {
01330 num_fp_regs += (Num_Complex_Mpy * unroll_product);
01331 }
01332 else {
01333
01334 new_base_regs = 0;
01335 num_fp_regs = 0;
01336 }
01337
01338 *can_reg_allocate = TRUE;
01339 if (num_fp_regs + new_base_regs +_scalar_fp_regs > Target_FPRs) {
01340 if (!did_unroll) {
01341 double fp_refs_per_reg = (_num_fp_array_refs + _num_fp_scalar_refs)
01342 / (num_fp_regs + _scalar_fp_regs);
01343 num_fp_spills =
01344 (INT)((num_fp_regs+new_base_regs+_scalar_fp_regs-Target_FPRs)
01345 * fp_refs_per_reg);
01346 num_fp_refs += num_fp_spills;
01347 }
01348 if (debug_model) {
01349 fprintf(TFile,"Can't FP register allocate \n");
01350 }
01351 *can_reg_allocate = FALSE;
01352 }
01353 if (num_int_regs + _base_int_regs + _scalar_int_regs > Target_INTRs) {
01354 if (!did_unroll) {
01355 double int_refs_per_reg = (_num_int_array_refs + _num_int_scalar_refs)
01356 / (num_int_regs + _scalar_int_regs);
01357 num_int_spills =
01358 (INT)((num_int_regs + _base_int_regs + _scalar_int_regs - Target_INTRs)
01359 * int_refs_per_reg);
01360 num_int_refs += num_int_spills;
01361 }
01362 if (debug_model) {
01363 fprintf(TFile, "Couldn't INT register allocate (%d)\n",
01364 num_int_regs + _base_int_regs + _scalar_int_regs);
01365 }
01366 *can_reg_allocate = FALSE;
01367 }
01368
01369
01370 double MEM_issue = ((double) (num_fp_refs+num_int_refs)) / unroll_product;
01371 double MEM_rcycles = MEM_issue/_num_mem_units;
01372 if (debug_model) {
01373 fprintf(TFile, "MEM_rcycles: %.2f\n", MEM_rcycles);
01374 fprintf(TFile, "Issue: %.0f + %.0f + %.0f = %.0f\n",
01375 _OP_issue * unroll_product,
01376 _LOOP_INIT_issue,
01377 MEM_issue * unroll_product,
01378 _OP_issue * unroll_product +
01379 _LOOP_INIT_issue +
01380 MEM_issue * unroll_product);
01381 }
01382
01383 double MEM_issue_minus_spills =
01384 ((double)(num_fp_refs - num_fp_spills + num_int_refs - num_int_spills))
01385 / unroll_product;
01386 double MEM_rcycles_minus_spills = MEM_issue_minus_spills/_num_mem_units;
01387
01388 double issue_limit =
01389 (_OP_issue + _LOOP_INIT_issue/unroll_product + MEM_issue) / _issue_rate;
01390 double issue_limit_minus_spills =
01391 (_OP_issue + _LOOP_INIT_issue/unroll_product + MEM_issue_minus_spills)
01392 / _issue_rate;
01393 double ideal_resource_cycles = _loop_rcycles_unroll_by[Max_Unroll_Prod-1];
01394 double resource_cycles =
01395 MAX(_loop_rcycles_unroll_by[unroll_product-1],
01396 MAX(MEM_rcycles, issue_limit)) - minvar_benefit;
01397 double resource_cycles_minus_spills =
01398 MAX(_loop_rcycles_unroll_by[unroll_product-1],
01399 MAX(MEM_rcycles_minus_spills, issue_limit_minus_spills));
01400 #ifdef KEY
01401
01402
01403
01404 resource_cycles_minus_spills -= minvar_benefit;
01405 #endif
01406
01407
01408 if (!(*can_reg_allocate) ||
01409 (resource_cycles > ideal_resource_cycles) ||
01410 (minvar_benefit > 0.0) ||
01411 (_latency_cycles/unroll_product > resource_cycles)) {
01412 *try_further_unroll = TRUE;
01413 }
01414 else {
01415 *try_further_unroll = FALSE;
01416 this_limit = MODEL_LIMIT_IDEAL;
01417 }
01418
01419 #define SWP_INSTR_LIMIT 80
01420
01421
01422 if (unroll_product > 1 &&
01423 issue_limit * _issue_rate * unroll_product > SWP_INSTR_LIMIT) {
01424 if (debug_model) {
01425 fprintf(TFile, "Unrolled loop would be too large for SWP\n");
01426 }
01427 *try_further_unroll = FALSE;
01428 return;
01429 }
01430
01431 double cycles = MAX(resource_cycles, _latency_cycles/unroll_product);
01432 double cycles_minus_spills = MAX(resource_cycles_minus_spills,
01433 _latency_cycles/unroll_product);
01434
01435 if (!(*can_reg_allocate) &&
01436 !did_unroll &&
01437 cycles == cycles_minus_spills) {
01438
01439 *can_reg_allocate = TRUE;
01440 num_fp_regs = Target_FPRs - new_base_regs - _scalar_fp_regs;
01441 num_int_regs = Target_INTRs - _base_int_regs - _scalar_int_regs;
01442 }
01443
01444 INT fp_reg_usage = num_fp_regs + new_base_regs +_scalar_fp_regs;
01445 INT int_reg_usage = num_int_regs + _base_int_regs +_scalar_int_regs;
01446
01447 if (!*can_reg_allocate) {
01448 if (did_unroll) {
01449 if (debug_model) {
01450 fprintf(TFile,"Can't register allocate\n");
01451 }
01452 return;
01453 }
01454 }
01455 else if (fp_reg_usage > Target_FPRs-2) {
01456 cycles *= 1.1;
01457 }
01458 else if (int_reg_usage > Target_INTRs-2){
01459 cycles *= 1.1;
01460 }
01461
01462
01463
01464
01465
01466 double worst_mod = 0.0;
01467 for (INT l = 0; l < num_loops; l++) {
01468 if(unroll_factors[l] &&
01469 _est_num_iterations[l] > 0 &&
01470 _est_num_iterations[l] < LNO_Outer_Unroll_Max) {
01471 double this_mod = (_est_num_iterations[l] % unroll_factors[l]);
01472 if (this_mod > _est_num_iterations[l]*worst_mod) {
01473 worst_mod = this_mod/_est_num_iterations[l];
01474 }
01475 }
01476 }
01477 if (worst_mod > 0.1) {
01478 cycles *= 1.05;
01479 }
01480
01481 #ifdef TARG_IA64
01482
01483
01484 INT num_fp_ops = (INT)(_OP_issue * unroll_product);
01485 if ((num_fp_ops & 1) &&
01486 issue_limit * _issue_rate * unroll_product > SWP_INSTR_LIMIT / 2) {
01487 cycles *= 1.05;
01488 }
01489 #endif
01490
01491 if (debug_model) {
01492 fprintf(TFile,"Ignoring cache effects ");
01493 fprintf(TFile,"this loop takes %f cycles\n",cycles);
01494 fprintf(TFile,"Uses %d int and %d fp regs\n", int_reg_usage, fp_reg_usage);
01495 }
01496 if (this_limit == MODEL_LIMIT_IDEAL) {
01497 if (debug_model) fprintf(TFile,"Ideal schedule \n");
01498 }
01499 else if (resource_cycles > _latency_cycles/unroll_product) {
01500 this_limit = MODEL_LIMIT_RES;
01501 if (debug_model) fprintf(TFile,"Number of cycles limited by resources \n");
01502 }
01503 else {
01504 this_limit = MODEL_LIMIT_LAT;
01505 if (debug_model) fprintf(TFile,"Number of cycles limited by latency \n");
01506 }
01507
01508 if (_num_cycles_inner == -1.0 ||
01509 1.01*cycles < _num_cycles_inner ||
01510 (cycles <= _num_cycles_inner &&
01511 (unroll_product < _unroll_prod_inner ||
01512 fp_reg_usage+int_reg_usage<_num_fp_regs_inner+_num_int_regs_inner))) {
01513
01514 _inner_loop_inner = inner;
01515 for (INT i=0; i<num_loops; i++) {
01516 _block_number_inner[i] = unroll_factors[i];
01517 }
01518 _num_cycles_inner = cycles;
01519 _num_fp_regs_inner = fp_reg_usage;
01520 _num_fp_refs_inner = num_fp_refs;
01521 _num_int_regs_inner = int_reg_usage;
01522 _num_int_refs_inner = num_int_refs;
01523 _unroll_prod_inner = unroll_product;
01524 _model_limit = this_limit;
01525
01526 if (debug_model) {
01527 fprintf(TFile,
01528 "Found a new best for unrolling factors for this inner loop\n");
01529 }
01530 }
01531 }
01532
01533
01534
01535
01536
01537
01538 TI_RES_COUNT*
01539 LOOP_MODEL::OP_Resources(WN* wn,
01540 double* num_instr,
01541 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
01542 {
01543 TI_RES_COUNT* resource_count = TI_RES_COUNT_Alloc(&Model_Local_Pool);
01544 if (OP_Resources_R(wn, resource_count, num_instr, invar_table) == -1) {
01545 return(NULL);
01546 }
01547 return(resource_count);
01548 }
01549
01550
01551
01552
01553 double
01554 LOOP_MODEL::OP_Cycles(REGISTER_MODEL* rmodel,
01555 double* num_instr,
01556 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table,
01557 MEM_POOL* pool)
01558 {
01559 TI_RES_COUNT* resource_count = TI_RES_COUNT_Alloc(pool);
01560 for (INT i = 0; i < rmodel->Num_Statements(); i++) {
01561 if (OP_Resources_R(rmodel->Statement(i),
01562 resource_count,
01563 num_instr,
01564 invar_table) == -1) {
01565 return -1.0;
01566 }
01567 }
01568 LNOTARGET_Loop_Inc_Test_Res(resource_count);
01569 return TI_RES_COUNT_Min_Cycles(resource_count);
01570 }
01571
01572
01573
01574 static BOOL
01575 Multiply_Will_Be_Strength_Reduced(WN* wn)
01576 {
01577 Is_True(WN_operator(wn) == OPR_MPY, ("Expected an OPR_MPY node"));
01578 WN* ldid;
01579 if (WN_operator(WN_kid0(wn)) == OPR_INTCONST) {
01580 ldid = WN_kid1(wn);
01581 }
01582 else if (WN_operator(WN_kid1(wn)) == OPR_INTCONST) {
01583 ldid = WN_kid0(wn);
01584 }
01585 else {
01586 return FALSE;
01587 }
01588 if (WN_operator(ldid) != OPR_LDID) {
01589 return FALSE;
01590 }
01591 #ifdef TARG_X8664
01592
01593
01594 return TRUE;
01595 #endif
01596 for (WN* parent = ldid; parent; parent = LWN_Get_Parent(parent)) {
01597 if (WN_operator(parent) == OPR_DO_LOOP) {
01598 WN* idx_var = WN_index(parent);
01599 if (WN_st_idx(idx_var) == WN_st_idx(ldid)) {
01600 return TRUE;
01601 }
01602 }
01603 }
01604 return FALSE;
01605 }
01606
01607
01608
01609 static inline BOOL
01610 Target_ISA_Has_Madd()
01611 {
01612 return (Is_Target_ISA_M4Plus() || Is_Target_ISA_I1Plus());
01613 }
01614
01615
01616
01617
01618
01619
01620 INT
01621 LOOP_MODEL::OP_Resources_R(WN* wn,
01622 TI_RES_COUNT* resource_count,
01623 double* num_instr,
01624 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
01625 {
01626 TOP top;
01627 OPERATOR oper = WN_operator(wn);
01628 TYPE_ID rtype = WN_rtype(wn);
01629 TYPE_ID desc = WN_desc(wn);
01630
01631 if (OPERATOR_is_leaf(oper)) {
01632 return (1);
01633 }
01634
01635
01636
01637 #ifdef TARG_X8664
01638 if(MTYPE_is_vector(rtype) || MTYPE_is_vector(desc))
01639 return -1;
01640 #endif
01641
01642 if (oper == OPR_BLOCK) {
01643 WN *kid = WN_first (wn);
01644 while (kid) {
01645 if (OP_Resources_R(kid,resource_count,num_instr,invar_table) == -1) {
01646 return -1;
01647 }
01648 kid = WN_next(kid);
01649 }
01650 return 1;
01651 }
01652
01653 if (invar_table) {
01654 BIT_VECTOR* bv = invar_table->Find(wn);
01655 if (bv && bv->Pop_Count()) {
01656 return 1;
01657 }
01658 }
01659
01660 if (oper == OPR_CVT ||
01661 oper == OPR_RND ||
01662 oper == OPR_CEIL ||
01663 oper == OPR_TRUNC ||
01664 #ifdef TARG_X8664
01665
01666 oper == OPR_FLOOR &&
01667 !(desc == MTYPE_F4 && rtype == MTYPE_F4) &&
01668 !(desc == MTYPE_F8 && rtype == MTYPE_F8)
01669 #else
01670 oper == OPR_FLOOR
01671 #endif
01672 ) {
01673 if (OP_Cycles_Cvt(WN_opcode(wn), resource_count, num_instr) == -1) {
01674 return -1;
01675 }
01676 }
01677 else if (oper == OPR_INTRINSIC_OP) {
01678 if (FP_Cycles_Intrinsic(wn, resource_count, num_instr) == -1) {
01679 return -1;
01680 }
01681 }
01682 else if (oper == OPR_REALPART ||
01683 oper == OPR_IMAGPART ||
01684 oper == OPR_PAREN ||
01685 oper == OPR_PARM) {
01686
01687 }
01688 else if (OPERATOR_is_expression(oper) &&
01689 !OPERATOR_is_load(oper) &&
01690 oper != OPR_CONST) {
01691
01692 if (desc == MTYPE_FQ ||
01693 desc == MTYPE_CQ ||
01694 rtype == MTYPE_FQ ||
01695 rtype == MTYPE_CQ) {
01696 return -1;
01697 }
01698
01699 else if (desc == MTYPE_F4 ||
01700 desc == MTYPE_F8 ||
01701 #if defined(TARG_IA64)
01702 desc == MTYPE_F10 ||
01703 rtype == MTYPE_F10 ||
01704 #endif
01705 rtype == MTYPE_F4 ||
01706 rtype == MTYPE_F8) {
01707
01708 if (Target_ISA_Has_Madd() &&
01709 (oper == OPR_ADD || oper == OPR_SUB) &&
01710 (WN_operator(WN_kid0(wn)) == OPR_MPY ||
01711 WN_operator(WN_kid1(wn)) == OPR_MPY)) {
01712 return FP_Cycles_Madd(wn, resource_count, num_instr, invar_table);
01713 }
01714 else if (oper == OPR_MAX || oper == OPR_MIN) {
01715 *num_instr += LNOTARGET_FP_Min_Max_Res(resource_count, rtype);
01716 }
01717 else if (oper == OPR_SQRT) {
01718 *num_instr += LNOTARGET_FP_Sqrt_Res(resource_count, rtype);
01719 }
01720 else if ((top = LNOTARGET_Whirl_To_Top(wn)) != TOP_UNDEFINED) {
01721 *num_instr += 1.0;
01722 TI_RES_COUNT_Add_Op_Resources(resource_count, top);
01723 }
01724 else if (oper == OPR_DIV) {
01725 *num_instr += LNOTARGET_FP_Div_Res(resource_count, rtype);
01726 }
01727 else if (oper == OPR_RECIP) {
01728 *num_instr += LNOTARGET_FP_Recip_Res(resource_count, rtype);
01729 }
01730 else if (oper == OPR_RSQRT
01731 #ifdef TARG_X8664
01732 || oper == OPR_ATOMIC_RSQRT
01733 #endif
01734 ) {
01735 *num_instr += LNOTARGET_FP_Rsqrt_Res(resource_count, rtype);
01736 }
01737 #ifdef TARG_X8664
01738 else if (oper == OPR_FLOOR) {
01739 *num_instr += LNOTARGET_FP_Floor_Res(resource_count, rtype);
01740 }
01741 else if (oper == OPR_SELECT) {
01742 *num_instr += LNOTARGET_Fp_Select_Res(resource_count, rtype);
01743 }
01744 else if (OPCODE_is_compare(WN_opcode(wn))) {
01745 *num_instr += LNOTARGET_Fp_Compare_Res(resource_count, rtype);
01746 }
01747 #endif
01748 else {
01749 return -1;
01750 }
01751 }
01752 else if (desc == MTYPE_C4 ||
01753 desc == MTYPE_C8 ||
01754 #if defined(TARG_IA64)
01755 desc == MTYPE_C10 ||
01756 rtype == MTYPE_C10 ||
01757 #endif
01758 rtype == MTYPE_C4 ||
01759 rtype == MTYPE_C8) {
01760 if (oper == OPR_ADD || oper== OPR_SUB) {
01761 *num_instr += LNOTARGET_Complex_Add_Res(resource_count, rtype);
01762 }
01763 else if (oper == OPR_MPY) {
01764 *num_instr += LNOTARGET_Complex_Mult_Res(resource_count, rtype);
01765
01766
01767
01768 WN* kid0 = WN_kid0(wn);
01769 WN* kid1 = WN_kid1(wn);
01770 if ((WN_operator(kid0) != OPR_COMPLEX
01771 || ((WN_operator(WN_kid0(kid0)) != OPR_CONST
01772 || !Targ_Is_Zero(STC_val(WN_st(WN_kid0(kid0)))))
01773 && (WN_operator(WN_kid1(kid0)) != OPR_CONST
01774 || !Targ_Is_Zero(STC_val(WN_st(WN_kid1(kid0)))))))
01775 &&
01776 (WN_operator(kid1) != OPR_COMPLEX
01777 || ((WN_operator(WN_kid0(kid1)) != OPR_CONST
01778 || !Targ_Is_Zero(STC_val(WN_st(WN_kid0(kid1)))))
01779 && (WN_operator(WN_kid1(kid1)) != OPR_CONST
01780 || !Targ_Is_Zero(STC_val(WN_st(WN_kid1(kid1))))))))
01781 {
01782 Num_Complex_Mpy++;
01783 }
01784 }
01785 else if (oper == OPR_NEG) {
01786 *num_instr += LNOTARGET_Complex_Neg_Res(resource_count, rtype);
01787 }
01788 else if (oper == OPR_COMPLEX) {
01789 ;
01790 }
01791 else {
01792 return -1;
01793 }
01794
01795 }
01796 else if (desc == MTYPE_B ||
01797 desc == MTYPE_I1 ||
01798 desc == MTYPE_I2 ||
01799 desc == MTYPE_I4 ||
01800 desc == MTYPE_I8 ||
01801 desc == MTYPE_U1 ||
01802 desc == MTYPE_U2 ||
01803 desc == MTYPE_U4 ||
01804 desc == MTYPE_U8 ||
01805 rtype == MTYPE_B ||
01806 rtype == MTYPE_I1 ||
01807 rtype == MTYPE_I2 ||
01808 rtype == MTYPE_I4 ||
01809 rtype == MTYPE_I8 ||
01810 rtype == MTYPE_U1 ||
01811 rtype == MTYPE_U2 ||
01812 rtype == MTYPE_U4 ||
01813 rtype == MTYPE_U8) {
01814
01815 BOOL double_word = (MTYPE_is_double(desc) || MTYPE_is_double(rtype));
01816
01817 switch (oper) {
01818 case OPR_ARRAY:
01819 return 1;
01820 case OPR_INTRINSIC_OP:
01821 return -1;
01822 case OPR_TAS:
01823 (*num_instr)++;
01824 break;
01825 #ifdef TARG_X8664
01826 case OPR_SELECT:
01827 *num_instr += LNOTARGET_Int_Select_Res(resource_count, rtype);
01828 break;
01829 #else
01830 case OPR_SELECT:
01831 *num_instr += LNOTARGET_Int_Select_Res(resource_count);
01832 break;
01833 #endif
01834 case OPR_CVTL:
01835 #ifdef TARG_X8664
01836 *num_instr += LNOTARGET_Int_Cvtl_Res(resource_count, rtype,
01837 WN_cvtl_bits(wn));
01838 #else
01839 *num_instr += LNOTARGET_Int_Cvtl_Res(resource_count);
01840 #endif
01841 break;
01842 case OPR_NEG:
01843 *num_instr += LNOTARGET_Int_Neg_Res(resource_count, double_word);
01844 break;
01845 case OPR_ABS:
01846 *num_instr += LNOTARGET_Int_Abs_Res(resource_count, double_word);
01847 break;
01848 case OPR_PAREN:
01849 break;
01850 #ifdef TARG_X8664
01851 case OPR_BNOT:
01852 *num_instr += LNOTARGET_Int_Bnot_Res(resource_count, rtype);
01853 break;
01854 case OPR_LNOT:
01855 *num_instr += LNOTARGET_Int_Lnot_Res(resource_count, rtype);
01856 break;
01857 #else
01858 case OPR_BNOT:
01859 *num_instr += LNOTARGET_Int_Bnot_Res(resource_count);
01860 break;
01861 case OPR_LNOT:
01862 *num_instr += LNOTARGET_Int_Lnot_Res(resource_count);
01863 break;
01864 #endif
01865 case OPR_MPY:
01866 #ifdef TARG_X8664
01867
01868 if (WN_operator_is(WN_kid0(wn), OPR_INTCONST))
01869 *num_instr += LNOTARGET_Int_Mult_Str_Red_Res(resource_count,
01870 rtype,
01871 WN_const_val(WN_kid0(wn)));
01872 else if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01873 *num_instr += LNOTARGET_Int_Mult_Str_Red_Res(resource_count,
01874 rtype,
01875 WN_const_val(WN_kid1(wn)));
01876 else
01877 *num_instr += LNOTARGET_Int_Mult_Res(resource_count, double_word);
01878 break;
01879 #else
01880 if (!Multiply_Will_Be_Strength_Reduced(wn)) {
01881 *num_instr += LNOTARGET_Int_Mult_Res(resource_count, double_word);
01882 break;
01883 }
01884 #endif
01885
01886
01887 case OPR_ADD:
01888 *num_instr += LNOTARGET_Int_Add_Res(resource_count, double_word);
01889 break;
01890 case OPR_SUB:
01891 *num_instr += LNOTARGET_Int_Sub_Res(resource_count, double_word);
01892 break;
01893 #ifdef TARG_X8664
01894 case OPR_DIV:
01895 if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01896 *num_instr += LNOTARGET_Int_Div_Str_Red_Res(resource_count,
01897 rtype,
01898 WN_const_val(WN_kid1(wn)));
01899 else
01900 *num_instr += LNOTARGET_Int_Div_Res(resource_count, rtype);
01901 break;
01902 case OPR_MOD:
01903 if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01904 *num_instr += LNOTARGET_Int_Mod_Str_Red_Res(resource_count,
01905 rtype,
01906 WN_const_val(WN_kid1(wn)));
01907 else
01908 *num_instr += LNOTARGET_Int_Mod_Res(resource_count, rtype);
01909 break;
01910 case OPR_REM:
01911 if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01912 *num_instr += LNOTARGET_Int_Rem_Str_Red_Res(resource_count,
01913 rtype,
01914 WN_const_val(WN_kid1(wn)));
01915 else
01916 *num_instr += LNOTARGET_Int_Rem_Res(resource_count, rtype);
01917 break;
01918 case OPR_DIVREM:
01919 if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01920 *num_instr += LNOTARGET_Int_DivRem_Str_Red_Res(resource_count,
01921 rtype,
01922 WN_const_val(WN_kid1(wn)));
01923 else
01924 *num_instr += LNOTARGET_Int_DivRem_Res(resource_count, rtype);
01925 break;
01926 case OPR_MAX:
01927 *num_instr += LNOTARGET_Int_Min_Res(resource_count, rtype);
01928 break;
01929 case OPR_MIN:
01930 *num_instr += LNOTARGET_Int_Max_Res(resource_count, rtype);
01931 break;
01932 case OPR_MINMAX:
01933 *num_instr += LNOTARGET_Int_Min_Max_Res(resource_count, rtype);
01934 break;
01935 case OPR_BAND:
01936 if (WN_operator_is(WN_kid0(wn), OPR_INTCONST))
01937 *num_instr += LNOTARGET_Int_Band_Str_Red_Res(resource_count,
01938 rtype,
01939 WN_const_val(WN_kid0(wn)));
01940 else if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01941 *num_instr += LNOTARGET_Int_Band_Str_Red_Res(resource_count,
01942 rtype,
01943 WN_const_val(WN_kid1(wn)));
01944 else
01945 *num_instr += LNOTARGET_Int_Band_Res(resource_count, rtype);
01946 break;
01947 case OPR_BIOR:
01948 if (WN_operator_is(WN_kid0(wn), OPR_INTCONST))
01949 *num_instr += LNOTARGET_Int_Bior_Str_Red_Res(resource_count,
01950 rtype,
01951 WN_const_val(WN_kid0(wn)));
01952 else if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01953 *num_instr += LNOTARGET_Int_Bior_Str_Red_Res(resource_count,
01954 rtype,
01955 WN_const_val(WN_kid1(wn)));
01956 else
01957 *num_instr += LNOTARGET_Int_Bior_Res(resource_count, rtype);
01958 break;
01959 case OPR_BNOR:
01960 *num_instr += LNOTARGET_Int_Bnor_Res(resource_count, rtype);
01961 break;
01962 case OPR_BXOR:
01963 if (WN_operator_is(WN_kid0(wn), OPR_INTCONST))
01964 *num_instr += LNOTARGET_Int_Bxor_Str_Red_Res(resource_count,
01965 rtype,
01966 WN_const_val(WN_kid0(wn)));
01967 else if (WN_operator_is(WN_kid1(wn), OPR_INTCONST))
01968 *num_instr += LNOTARGET_Int_Bxor_Str_Red_Res(resource_count,
01969 rtype,
01970 WN_const_val(WN_kid1(wn)));
01971 else
01972 *num_instr += LNOTARGET_Int_Bxor_Res(resource_count, rtype);
01973 break;
01974 case OPR_LAND:
01975 *num_instr += LNOTARGET_Int_Land_Res(resource_count, rtype);
01976 break;
01977 case OPR_CAND:
01978 *num_instr += LNOTARGET_Int_Cand_Res(resource_count, rtype);
01979 break;
01980 case OPR_LIOR:
01981 *num_instr += LNOTARGET_Int_Lior_Res(resource_count, rtype);
01982 break;
01983 case OPR_CIOR:
01984 *num_instr += LNOTARGET_Int_Cior_Res(resource_count, rtype);
01985 break;
01986 #else
01987 case OPR_DIV:
01988 #ifndef TARG_MIPS
01989 *num_instr += LNOTARGET_Int_Div_Res(resource_count, double_word);
01990 #else
01991 *num_instr += LNOTARGET_Int_Div_Res(resource_count, double_word,
01992 MTYPE_signed(WN_desc(wn)));
01993 #endif
01994 break;
01995 case OPR_MOD:
01996 *num_instr += LNOTARGET_Int_Mod_Res(resource_count, double_word);
01997 break;
01998 case OPR_REM:
01999 *num_instr += LNOTARGET_Int_Rem_Res(resource_count, double_word);
02000 break;
02001 case OPR_DIVREM:
02002 *num_instr += LNOTARGET_Int_DivRem_Res(resource_count, double_word);
02003 break;
02004 case OPR_MAX:
02005 case OPR_MIN:
02006 case OPR_MINMAX:
02007 *num_instr += LNOTARGET_Int_Min_Max_Res(resource_count,
02008 oper == OPR_MINMAX);
02009 break;
02010 case OPR_BAND:
02011 *num_instr += LNOTARGET_Int_Band_Res(resource_count);
02012 break;
02013 case OPR_BIOR:
02014 *num_instr += LNOTARGET_Int_Bior_Res(resource_count);
02015 break;
02016 case OPR_BNOR:
02017 *num_instr += LNOTARGET_Int_Bnor_Res(resource_count);
02018 break;
02019 case OPR_BXOR:
02020 *num_instr += LNOTARGET_Int_Bxor_Res(resource_count);
02021 break;
02022 case OPR_LAND:
02023 *num_instr += LNOTARGET_Int_Land_Res(resource_count);
02024 break;
02025 case OPR_CAND:
02026 *num_instr += LNOTARGET_Int_Cand_Res(resource_count);
02027 break;
02028 case OPR_LIOR:
02029 *num_instr += LNOTARGET_Int_Lior_Res(resource_count);
02030 break;
02031 case OPR_CIOR:
02032 *num_instr += LNOTARGET_Int_Cior_Res(resource_count);
02033 break;
02034 #endif
02035 case OPR_SHL:
02036 *num_instr += LNOTARGET_Int_Shl_Res(resource_count, double_word);
02037 break;
02038 case OPR_ASHR:
02039 *num_instr += LNOTARGET_Int_Ashr_Res(resource_count, double_word);
02040 break;
02041 case OPR_LSHR:
02042 *num_instr += LNOTARGET_Int_Lshr_Res(resource_count, double_word);
02043 break;
02044 #ifdef TARG_X8664
02045 case OPR_EQ:
02046 *num_instr += LNOTARGET_Int_Eq_Res(resource_count, desc);
02047 break;
02048 case OPR_NE:
02049 *num_instr += LNOTARGET_Int_Ne_Res(resource_count, desc);
02050 break;
02051 case OPR_GT:
02052 *num_instr += LNOTARGET_Int_Gt_Res(resource_count, desc);
02053 break;
02054 case OPR_GE:
02055 *num_instr += LNOTARGET_Int_Ge_Res(resource_count, desc);
02056 break;
02057 case OPR_LT:
02058 *num_instr += LNOTARGET_Int_Lt_Res(resource_count, desc);
02059 break;
02060 case OPR_LE:
02061 *num_instr += LNOTARGET_Int_Le_Res(resource_count, desc);
02062 break;
02063 #else
02064 case OPR_EQ:
02065 *num_instr += LNOTARGET_Int_Eq_Res(resource_count);
02066 break;
02067 case OPR_NE:
02068 *num_instr += LNOTARGET_Int_Ne_Res(resource_count);
02069 break;
02070 case OPR_GT:
02071 *num_instr += LNOTARGET_Int_Gt_Res(resource_count);
02072 break;
02073 case OPR_GE:
02074 *num_instr += LNOTARGET_Int_Ge_Res(resource_count);
02075 break;
02076 case OPR_LT:
02077 *num_instr += LNOTARGET_Int_Lt_Res(resource_count);
02078 break;
02079 case OPR_LE:
02080 *num_instr += LNOTARGET_Int_Le_Res(resource_count);
02081 break;
02082 #endif
02083 case OPR_LDA:
02084 *num_instr += LNOTARGET_Int_Lda_Res(resource_count);
02085 break;
02086 case OPR_INTCONST:
02087 break;
02088 default:
02089 DevWarn("Unknown whirl in LOOP_MODEL::OP_Resources_R");
02090 return -1;
02091 }
02092 }
02093 }
02094
02095 for (INT kidno = 0; kidno < WN_kid_count(wn); kidno++) {
02096 WN* kid = WN_kid(wn,kidno);
02097 if (OP_Resources_R(kid, resource_count, num_instr, invar_table) == -1) {
02098 return -1;
02099 }
02100 }
02101 return 1;
02102 }
02103
02104
02105
02106
02107
02108 INT
02109 LOOP_MODEL::FP_Cycles_Madd(WN* wn,
02110 TI_RES_COUNT* resource_count,
02111 double* num_fp_instr,
02112 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
02113 {
02114
02115 *num_fp_instr += LNOTARGET_FP_Madd_Res(resource_count, WN_rtype(wn));
02116
02117 WN *kid0 = WN_kid0(wn);
02118 WN *kid1 = WN_kid1(wn);
02119
02120 WN *non_mult_kid;
02121 WN *mult_kid0;
02122 WN *mult_kid1;
02123
02124
02125
02126 if (WN_operator(kid0) == OPR_MPY) {
02127 non_mult_kid = kid1;
02128 mult_kid0 = WN_kid0(kid0);
02129 mult_kid1 = WN_kid1(kid0);
02130 }
02131 else {
02132 non_mult_kid = kid0;
02133 mult_kid0 = WN_kid0(kid1);
02134 mult_kid1 = WN_kid1(kid1);
02135 }
02136
02137 if (OP_Resources_R(non_mult_kid,
02138 resource_count,
02139 num_fp_instr,
02140 invar_table) == -1) {
02141 return -1;
02142 }
02143
02144 if (OP_Resources_R(mult_kid0,
02145 resource_count,
02146 num_fp_instr,
02147 invar_table) == -1) {
02148 return -1;
02149 }
02150 if (OP_Resources_R(mult_kid1,
02151 resource_count,
02152 num_fp_instr,
02153 invar_table) == -1) {
02154 return -1;
02155 }
02156 return 1;
02157 }
02158
02159
02160
02161 INT
02162 LOOP_MODEL::OP_Cycles_Cvt(OPCODE opcode,
02163 TI_RES_COUNT* resource_count,
02164 double* num_fp_instr)
02165 {
02166 double instr = LNOTARGET_Cvt_Res(resource_count, opcode);
02167 if (instr < 0.1) {
02168 return -1;
02169 }
02170 *num_fp_instr += instr;
02171 return 1;
02172 }
02173
02174
02175
02176
02177 INT
02178 LOOP_MODEL::FP_Cycles_Intrinsic(WN* wn,
02179 TI_RES_COUNT* resource_count,
02180 double* num_fp_instr)
02181 {
02182 if (WN_kid_count(wn) != 2) {
02183 return -1;
02184 }
02185 WN *const_kid = WN_kid1(wn);
02186 if (WN_operator(const_kid) == OPR_PARM) {
02187 const_kid = WN_kid0(const_kid);
02188 }
02189 if (WN_operator(const_kid) != OPR_INTCONST) {
02190 return -1;
02191 }
02192 INT num_multiplies = WN_const_val(const_kid) - 1;
02193 if (num_multiplies == 0) {
02194 return 1;
02195 }
02196 if (num_multiplies < 0 || num_multiplies > 3) {
02197 return -1;
02198 }
02199 double instr = LNOTARGET_FP_Exp_Res(resource_count,
02200 (INTRINSIC) WN_intrinsic(wn),
02201 num_multiplies);
02202 if (instr < 0.1) {
02203 return -1;
02204 }
02205 *num_fp_instr += instr;
02206 return 1;
02207 }
02208
02209
02210
02211 BOOL
02212 ARRAY_REF_NODE::Lexically_Before(ARRAY_REF_NODE *node2)
02213 {
02214 INT num_loops = Array->Dim(0)->Nest_Depth();
02215 for (INT i=0; i<num_loops; i++) {
02216 if (_unroll_copy[i] < node2->_unroll_copy[i]) return TRUE;
02217 }
02218 return (_lex_number < node2->_lex_number);
02219 }
02220
02221 void
02222 ARRAY_REF_LIST::Print(FILE *fp) const
02223 {
02224 fprintf(fp,"The base array is \"");
02225 Base_Array->Print(fp);
02226 if (_is_scalar_expanded) fprintf(fp," (scalar expanded) ");
02227 fprintf(fp,"\" and the references are \n");
02228 ARRAY_REF_CONST_ITER iter(this);
02229 const ARRAY_REF_NODE *first = iter.First();
02230 for (const ARRAY_REF_NODE *n = first; !iter.Is_Empty(); n = iter.Next()) {
02231 fprintf(fp, " ");
02232 n->Print(fp);
02233 }
02234 }
02235
02236
02237
02238
02239 ARRAY_REF_LIST::~ARRAY_REF_LIST()
02240 {
02241 MEM_POOL_Set_Default(_pool);
02242 while (!Is_Empty())
02243 CXX_DELETE(Remove_Headnode(),_pool);
02244 }
02245
02246
02247 ARRAY_REF_LIST::ARRAY_REF_LIST(ARRAY_REF_LIST *orig, MEM_POOL *pool)
02248 {
02249 _pool = pool;
02250 _is_scalar_expanded = orig->_is_scalar_expanded;
02251 Base_Array = orig->Base_Array;
02252 ARRAY_REF_ITER iter(orig);
02253 for (ARRAY_REF_NODE *node = iter.First(); !iter.Is_Empty();
02254 node=iter.Next()) {
02255 Append(CXX_NEW(ARRAY_REF_NODE(node,pool),pool));
02256 }
02257 }
02258
02259 void
02260 ARRAY_REF_LIST::Remove_Invariants(INT loopno)
02261 {
02262 ARRAY_REF_ITER iter(this);
02263 ARRAY_REF_NODE *first = iter.First();
02264 ARRAY_REF_NODE *prev_node = NULL;
02265 ARRAY_REF_NODE *next_node = NULL;
02266 for (ARRAY_REF_NODE *node=first; node; node = next_node) {
02267 next_node = iter.Next();
02268 ACCESS_ARRAY *array = node->Array;
02269 BOOL is_invar = TRUE;
02270 for (INT i=0; i<array->Num_Vec(); i++) {
02271 ACCESS_VECTOR *av = array->Dim(i);
02272 if ((av->Non_Const_Loops() > loopno) || (av->Loop_Coeff(loopno) != 0)) {
02273 is_invar = FALSE;
02274 }
02275 }
02276 if (is_invar) {
02277 Remove(prev_node,node);
02278 } else {
02279 prev_node = node;
02280 }
02281 }
02282 }
02283
02284
02285
02286
02287
02288
02289
02290
02291
02292 static BOOL
02293 Cse_Or_Dup(ACCESS_ARRAY* array, ACCESS_ARRAY* array2,
02294 INT inner, INT max_dist, INT step,
02295 BOOL* is_dup, mINT16* max_inner_offset, mINT16* min_inner_offset)
02296 {
02297 *min_inner_offset = INT16_MAX;
02298 *max_inner_offset = INT16_MIN;
02299 if(!step) return((*array) == (*array2));
02300
02301 if (array->Too_Messy || array2->Too_Messy) return(FALSE);
02302 if (array->Num_Vec() != array2->Num_Vec()) return(FALSE);
02303 BOOL seen_mult = FALSE;
02304 INT diff=0;
02305 for (INT i=0; i<array->Num_Vec(); i++) {
02306 ACCESS_VECTOR *av1 = array->Dim(i);
02307 ACCESS_VECTOR *av2 = array2->Dim(i);
02308 if (av1->Too_Messy || av2->Too_Messy) return(FALSE);
02309 if (av1->Nest_Depth() != av2->Nest_Depth()) return(FALSE);
02310
02311 INT dist = av1->Const_Offset - av2->Const_Offset;
02312
02313
02314 if (!av1->Has_Loop_Coeff() || !av2->Has_Loop_Coeff()) {
02315 if (dist) return(FALSE);
02316 }
02317
02318
02319 if (av1->Lin_Symb != NULL && !av1->Lin_Symb->Is_Empty()) {
02320 if (av2->Lin_Symb == NULL || av2->Lin_Symb->Is_Empty() ||
02321 !(*av1->Lin_Symb == *av2->Lin_Symb)) {
02322 return(FALSE);
02323 }
02324 } else if (av2->Lin_Symb != NULL && !av2->Lin_Symb->Is_Empty()) {
02325 return(FALSE);
02326 }
02327 if (av1->Non_Lin_Symb != NULL && !av1->Non_Lin_Symb->Is_Empty()) {
02328 if (av2->Non_Lin_Symb == NULL || av2->Non_Lin_Symb->Is_Empty() ||
02329 !(*av1->Non_Lin_Symb == *av2->Non_Lin_Symb)) {
02330 return(FALSE);
02331 }
02332 } else if (av2->Non_Lin_Symb != NULL && !av2->Non_Lin_Symb->Is_Empty()) {
02333 return(FALSE);
02334 }
02335
02336
02337 for (INT i=0; i<av1->Nest_Depth(); i++) {
02338 if (av1->Loop_Coeff(i) != av2->Loop_Coeff(i)) {
02339 return(FALSE);
02340 }
02341 }
02342 INT mult = av1->Loop_Coeff(inner);
02343 if (mult) {
02344 if (abs(dist) > abs(step*mult*max_dist)) {
02345 return(FALSE);
02346 }
02347 if ((dist % (step*mult)) != 0) {
02348 return(FALSE);
02349 }
02350 INT this_diff = (dist / (step*mult));
02351 if (seen_mult && (this_diff != diff)) {
02352 return(FALSE);
02353 }
02354 seen_mult = TRUE;
02355 diff = this_diff;
02356 *max_inner_offset = MAX(*max_inner_offset,
02357 MAX(av1->Const_Offset,av2->Const_Offset)/(step*mult));
02358 *min_inner_offset = MIN(*min_inner_offset,
02359 MIN(av1->Const_Offset,av2->Const_Offset)/(step*mult));
02360 } else if (dist != 0) return(FALSE);
02361 }
02362 *is_dup = (diff == 0);
02363 return(TRUE);
02364 }
02365
02366
02367
02368
02369
02370
02371 void
02372 ARRAY_REF_LIST::Remove_Cse(INT inner, INT max_dist, INT step)
02373 {
02374 mINT16 max_inner_offset, min_inner_offset;
02375 ARRAY_REF_ITER iter(this);
02376 for (ARRAY_REF_NODE *node=iter.First(); node; node = iter.Next()) {
02377 ACCESS_ARRAY *array = node->Array;
02378
02379
02380 ARRAY_REF_ITER iter2(node);
02381 ARRAY_REF_NODE *first = iter2.First();
02382 first = iter2.Next();
02383 ARRAY_REF_NODE *prev_node = node;
02384 ARRAY_REF_NODE *next_node = NULL;
02385 for (ARRAY_REF_NODE *node2=first; node2; node2 = next_node) {
02386 next_node = iter2.Next();
02387 ACCESS_ARRAY *array2 = node2->Array;
02388 BOOL is_dup;
02389 if (Cse_Or_Dup(array,array2,inner,max_dist,step,&is_dup,
02390 &max_inner_offset,&min_inner_offset)) {
02391 if (is_dup) {
02392 node->_is_dup = TRUE;
02393 if (node2->_has_dup_loads ||
02394 (node->_has_load && node2->_has_load)) {
02395 node->_has_dup_loads = TRUE;
02396 }
02397 } else {
02398 node->_is_cse = TRUE;
02399 node->_max_inner_offset =
02400 MAX(node->_max_inner_offset,max_inner_offset);
02401 node->_min_inner_offset =
02402 MIN(node->_min_inner_offset,min_inner_offset);
02403 }
02404 node->_has_store |= node2->_has_store;
02405 node->_has_load |= node2->_has_load;
02406 if (node->_first_ref_store != node2->_first_ref_store) {
02407 if (node2->Lexically_Before(node)) {
02408 node->_first_ref_store = node2->_first_ref_store;
02409 }
02410 }
02411 Remove(prev_node,node2);
02412 } else {
02413 prev_node = node2;
02414 }
02415 }
02416 }
02417 }
02418
02419 void
02420 ARRAY_REF_LIST::Unroll(INT loop_no, INT num_copies)
02421 {
02422 ARRAY_REF_ITER iter(this);
02423 ARRAY_REF_NODE *next_node = NULL;
02424 for (ARRAY_REF_NODE *node=iter.First(); node; node = next_node) {
02425 next_node = node->Next();
02426 ACCESS_ARRAY *array = node->Array;
02427
02428
02429
02430
02431 BOOL varies = FALSE;
02432 if (array->Too_Messy) varies = TRUE;
02433 for (INT i=0; i<array->Num_Vec() && !varies; i++) {
02434 ACCESS_VECTOR *av = array->Dim(i);
02435 if (av->Too_Messy || (av->Loop_Coeff(loop_no) != 0)) {
02436 varies = TRUE;
02437 }
02438 }
02439
02440 if (!varies) {
02441 node->_is_dup = TRUE;
02442 if (node->_has_load) node->_has_dup_loads = TRUE;
02443 } else {
02444 INT orig_copy_num = node->_unroll_copy[loop_no];
02445 for (INT i=num_copies-1; i>=0; i--) {
02446 if (i != 0) {
02447 ARRAY_REF_NODE *new_node=CXX_NEW(ARRAY_REF_NODE(node,_pool),_pool);
02448 if (orig_copy_num) {
02449 new_node->_unroll_copy[loop_no] = orig_copy_num*num_copies+i;
02450 } else {
02451 new_node->_unroll_copy[loop_no] = i;
02452 }
02453 array = new_node->Array;
02454 Prepend(new_node,node);
02455 } else {
02456 array = node->Array;
02457 if (orig_copy_num) {
02458 node->_unroll_copy[loop_no] = orig_copy_num*num_copies+i;
02459 } else {
02460 node->_unroll_copy[loop_no] = i;
02461 }
02462 }
02463 for (INT j=0; j<array->Num_Vec(); j++) {
02464 ACCESS_VECTOR *av = array->Dim(j);
02465 if (!av->Too_Messy) {
02466 INT mult = av->Loop_Coeff(loop_no);
02467 if (mult) {
02468 av->Const_Offset += mult*i;
02469 av->Set_Loop_Coeff(loop_no,num_copies*mult);
02470 }
02471 }
02472 }
02473 }
02474 }
02475 }
02476 }
02477
02478
02479 void
02480 ARRAY_REF_LIST::Mark_Invariants(INT loopno)
02481 {
02482 ARRAY_REF_ITER iter(this);
02483 ARRAY_REF_NODE *first = iter.First();
02484 ARRAY_REF_NODE *next_node = NULL;
02485 for (ARRAY_REF_NODE *node=first; node; node = next_node) {
02486 next_node = iter.Next();
02487 ACCESS_ARRAY *array = node->Array;
02488 BOOL is_invar = TRUE;
02489 for (INT i=0; i<array->Num_Vec(); i++) {
02490 ACCESS_VECTOR *av = array->Dim(i);
02491 if ((av->Non_Const_Loops() > loopno) || (av->Loop_Coeff(loopno) != 0)) {
02492 is_invar = FALSE;
02493 }
02494 }
02495 node->_is_invariant = is_invar;
02496 }
02497 }
02498
02499
02500 INT
02501 ARRAY_REF_LIST::Num_Invariants(INT loopno)
02502 {
02503 INT result=0;
02504 Mark_Invariants(loopno);
02505
02506 ARRAY_REF_ITER iter(this);
02507 for (ARRAY_REF_NODE *node=iter.First(); node; node = iter.Next()) {
02508 if (node->_is_invariant) result++;
02509 }
02510 return result;
02511 }
02512
02513
02514 INT
02515 ARRAY_REF_LIST::Num_Fp_Refs() const
02516 {
02517 INT result=0;
02518
02519 if (_is_scalar_expanded) {
02520 if (MTYPE_float(Base_Array->Type))
02521 result += this->Len();
02522 } else {
02523 if (MTYPE_float(WN_desc(LWN_Get_Parent(Head()->Wn))) ||
02524 MTYPE_float(WN_rtype(LWN_Get_Parent(Head()->Wn))))
02525 result += this->Len();
02526 }
02527 return result;
02528 }
02529
02530
02531 INT
02532 ARRAY_REF_LIST::Num_Int_Refs() const
02533 {
02534 INT result=0;
02535
02536 if (_is_scalar_expanded) {
02537 if (!MTYPE_float(Base_Array->Type))
02538 result += this->Len();
02539 } else {
02540 if (!MTYPE_float(WN_desc(LWN_Get_Parent(Head()->Wn))) &&
02541 !MTYPE_float(WN_rtype(LWN_Get_Parent(Head()->Wn))))
02542 result += this->Len();
02543 }
02544 return result;
02545 }
02546
02547 void
02548 ARRAY_REF_LIST::Calc_Regs_And_Refs(INT* num_fp_regs,
02549 INT* num_fp_refs,
02550 INT* num_fp_variant_stores,
02551 INT* num_fp_invariant_stores,
02552 INT* num_int_regs,
02553 INT* num_int_refs,
02554 INT* num_int_variant_stores,
02555 INT* num_int_invariant_stores)
02556 {
02557 INT fp_regs = 0;
02558 INT fp_refs = 0;
02559 INT fp_variant_stores = 0;
02560 INT fp_invariant_stores = 0;
02561 INT int_regs = 0;
02562 INT int_refs = 0;
02563 INT int_variant_stores = 0;
02564 INT int_invariant_stores = 0;
02565
02566 ARRAY_REF_ITER iter(this);
02567 for (ARRAY_REF_NODE* node=iter.First(); node; node = iter.Next()) {
02568
02569 INT tmp_regs = 0;
02570 INT tmp_refs = 0;
02571 INT tmp_variant_stores = 0;
02572 INT tmp_invariant_stores = 0;
02573 INT tmp_base_regs = 0;
02574
02575 if (!node->_is_invariant) {
02576
02577
02578 BOOL found = FALSE;
02579 ARRAY_REF_ITER iter1(this);
02580 for (ARRAY_REF_NODE* node1=iter1.First();
02581 node1 != node;
02582 node1 = iter1.Next()) {
02583 ACCESS_ARRAY* ar = node->Array;
02584 ACCESS_ARRAY* ar1 = node1->Array;
02585 if (ar->Too_Messy ||
02586 ar1->Too_Messy ||
02587 ar->Num_Vec() != ar1->Num_Vec()) {
02588 continue;
02589 }
02590 BOOL need_diff_base = FALSE;
02591 for (INT i = 0; i < ar->Num_Vec(); i++) {
02592 ACCESS_VECTOR* av = ar->Dim(i);
02593 ACCESS_VECTOR* av1 = ar1->Dim(i);
02594 if (av->Too_Messy || av1->Too_Messy) {
02595 need_diff_base=TRUE;
02596 break;
02597 }
02598 ACCESS_VECTOR* diff=Subtract(av, av1, _pool);
02599 if (!diff->Is_Const()) {
02600 need_diff_base=TRUE;
02601 break;
02602 }
02603 else if (i < ar->Num_Vec()-1) {
02604 if (diff->Const_Offset != 0) {
02605 need_diff_base=TRUE;
02606 break;
02607 }
02608 else {
02609 continue;
02610 }
02611 }
02612 else if (diff->Const_Offset < 0x10000 &&
02613 - diff->Const_Offset < 0x10000) {
02614 need_diff_base=FALSE;
02615 break;
02616 }
02617 else {
02618 need_diff_base=TRUE;
02619 break;
02620 }
02621 }
02622 if (!need_diff_base) {
02623 found = TRUE;
02624 break;
02625 }
02626 }
02627 if (!found) {
02628 tmp_base_regs++;
02629 }
02630 }
02631
02632 BOOL is_fp = FALSE;
02633 INT regs_per_ref = 1;
02634 if (_is_scalar_expanded) {
02635 if (MTYPE_float(Base_Array->Type)) {
02636 is_fp = TRUE;
02637 }
02638 if (MTYPE_is_complex(Base_Array->Type)) {
02639 regs_per_ref *= 2;
02640 }
02641 if (MTYPE_is_quad(Base_Array->Type)) {
02642 regs_per_ref *= 2;
02643 }
02644 }
02645 else {
02646 WN* parent = LWN_Get_Parent(node->Wn);
02647 if (MTYPE_float(WN_desc(parent)) ||
02648 MTYPE_float(WN_rtype(parent))) {
02649 is_fp = TRUE;
02650 }
02651 if (MTYPE_is_complex(WN_desc(parent)) ||
02652 MTYPE_is_complex(WN_rtype(parent))) {
02653 regs_per_ref *= 2;
02654 }
02655 if (MTYPE_is_quad(WN_desc(parent)) ||
02656 MTYPE_is_quad(WN_rtype(parent))) {
02657 regs_per_ref *= 2;
02658 }
02659 }
02660 if (node->_is_invariant) {
02661 tmp_regs += regs_per_ref;
02662 }
02663 else if (node->_is_cse) {
02664 if (node->_max_inner_offset > node->_min_inner_offset) {
02665 tmp_regs +=
02666 MIN(Max_Cse_Dist,(node->_max_inner_offset-node->_min_inner_offset))
02667 * regs_per_ref;
02668 }
02669 else {
02670 tmp_regs += regs_per_ref;
02671 }
02672 tmp_refs += regs_per_ref;
02673 }
02674 else if (node->_is_dup) {
02675 if (node->_has_dup_loads) {
02676 tmp_regs += regs_per_ref;
02677 }
02678 if (node->_has_store) {
02679 tmp_refs += regs_per_ref;
02680 }
02681 if (node->_has_load && !node->_first_ref_store) {
02682 tmp_refs += regs_per_ref;
02683 }
02684 }
02685 else {
02686 tmp_refs += regs_per_ref;
02687 }
02688 if (node->_has_store) {
02689 if (node->_is_invariant) {
02690 tmp_invariant_stores += regs_per_ref;
02691 }
02692 else {
02693 tmp_variant_stores += regs_per_ref;
02694 }
02695 }
02696 if (is_fp) {
02697 fp_regs += tmp_regs;
02698 fp_refs += tmp_refs;
02699 fp_invariant_stores += tmp_invariant_stores;
02700 fp_variant_stores += tmp_variant_stores;
02701 }
02702 else {
02703 int_regs += tmp_regs;
02704 int_refs += tmp_refs;
02705 int_invariant_stores += tmp_invariant_stores;
02706 int_variant_stores += tmp_variant_stores;
02707 }
02708 int_regs += tmp_base_regs;
02709 }
02710
02711 *num_fp_regs = fp_regs;
02712 *num_fp_refs = fp_refs;
02713 *num_fp_variant_stores = fp_variant_stores;
02714 *num_fp_invariant_stores = fp_invariant_stores;
02715 *num_int_regs = int_regs;
02716 *num_int_refs = int_refs;
02717 *num_int_variant_stores = int_variant_stores;
02718 *num_int_invariant_stores = int_invariant_stores;
02719 }
02720
02721
02722
02723 INT
02724 ARRAY_REF_LIST::Conflict_Refs(INT max_dim,
02725 BOOL* can_be_unrolled,
02726 INT num_loops)
02727 {
02728 INT result = 0;
02729
02730 MEM_POOL_Push(&LNO_local_pool);
02731 BOOL *invar = CXX_NEW_ARRAY(BOOL,num_loops,&LNO_local_pool);
02732 ARRAY_REF_ITER iter1(this);
02733 for (ARRAY_REF_NODE *node1=iter1.First(); node1; node1 = iter1.Next()) {
02734 if (!node1->_is_invariant) {
02735 ACCESS_ARRAY *array=node1->Array;
02736 UINT num_vec = array->Num_Vec();
02737 if (num_vec == max_dim) {
02738 INT i;
02739 for (i=0; i<num_loops; i++) {
02740 invar[i] = can_be_unrolled[i];
02741 }
02742 BOOL too_messy = FALSE;
02743 for (i=0; i<num_vec; i++) {
02744 ACCESS_VECTOR *av = array->Dim(i);
02745 if (!av->Too_Messy) {
02746 for (INT j=0; j<av->Nest_Depth(); j++) {
02747 if ((av->Non_Const_Loops() > j) || (av->Loop_Coeff(j) != 0)) {
02748 invar[j] = FALSE;
02749 }
02750 }
02751 } else {
02752 too_messy = TRUE;
02753 }
02754 }
02755 if (!too_messy) {
02756 BOOL is_invar = FALSE;
02757 for (i=0; i<num_loops && !is_invar; i++) {
02758 if (invar[i]) {
02759 is_invar = TRUE;
02760 }
02761 }
02762 if (is_invar) {
02763 result++;
02764 }
02765 }
02766 }
02767 }
02768 }
02769 MEM_POOL_Pop(&LNO_local_pool);
02770 return result;
02771 }
02772
02773
02774
02775 ARRAY_REF::ARRAY_REF(ARRAY_REF *orig, MEM_POOL *pool) : _stack(pool)
02776 {
02777 _pool = pool;
02778 _num_bad_fp = orig->_num_bad_fp;
02779 _num_bad_int = orig->_num_bad_int;
02780 for (INT i=0; i<orig->Elements(); i++) {
02781 Push(CXX_NEW(ARRAY_REF_LIST(orig->Array_Ref_List(i),pool),pool));
02782 }
02783 }
02784
02785 void
02786 ARRAY_REF::Print(FILE *fp) const
02787 {
02788 fprintf(fp,"The number of bad references is %d fp and %d int\n",
02789 Num_Fp_Bad(), Num_Int_Bad());
02790 for (INT i=0; i<Elements(); i++) {
02791 Array_Ref_List(i)->Print(fp);
02792 }
02793 }
02794
02795
02796 INT
02797 ARRAY_REF::Num_Fp_Refs() const
02798 {
02799 INT result = Num_Fp_Bad();
02800 for (INT i=0; i<Elements(); i++) {
02801 result += Array_Ref_List(i)->Num_Fp_Refs();
02802 }
02803 return result;
02804 }
02805
02806 INT
02807 ARRAY_REF::Num_Int_Refs() const
02808 {
02809 INT result = Num_Int_Bad();
02810 for (INT i=0; i<Elements(); i++) {
02811 result += Array_Ref_List(i)->Num_Int_Refs();
02812 }
02813 return result;
02814 }
02815
02816 INT
02817 ARRAY_REF::Num_Invariants(INT loopno)
02818 {
02819 INT result = 0;
02820 for (INT i=0; i<Elements(); i++) {
02821 result += Array_Ref_List(i)->Num_Invariants(loopno);
02822 }
02823 return result;
02824 }
02825
02826 INT
02827 ARRAY_REF::Conflict_Refs(BOOL *can_be_unrolled, INT num_loops)
02828 {
02829 INT max_dim = 0;
02830 INT result = 0;
02831 for (INT i=0; i<Elements(); i++) {
02832 ARRAY_REF_ITER iter(Array_Ref_List(i));
02833 ARRAY_REF_NODE *node = iter.First();
02834 ACCESS_ARRAY *ar=node->Array;
02835 max_dim = MAX(max_dim,ar->Num_Vec());
02836 }
02837 if (max_dim >= 2) {
02838 for (INT i=0; i<Elements(); i++) {
02839 result +=
02840 Array_Ref_List(i)->Conflict_Refs(max_dim,can_be_unrolled,num_loops);
02841 }
02842 }
02843 return result;
02844 }
02845
02846 void
02847 ARRAY_REF::Calc_Regs_And_Refs(INT* num_fp_regs,
02848 INT* num_fp_refs,
02849 INT* num_fp_variant_stores,
02850 INT* num_fp_invariant_stores,
02851 INT* num_int_regs,
02852 INT* num_int_refs,
02853 INT* num_int_variant_stores,
02854 INT* num_int_invariant_stores)
02855 {
02856 *num_fp_regs = 0;
02857 *num_fp_refs = Num_Fp_Bad();
02858 *num_fp_variant_stores = 0;
02859 *num_fp_invariant_stores = 0;
02860 *num_int_regs = 0;
02861 *num_int_refs = Num_Int_Bad();
02862 *num_int_variant_stores = 0;
02863 *num_int_invariant_stores = 0;
02864 for (INT i=0; i<Elements(); i++) {
02865 INT this_fp_refs = 0;
02866 INT this_fp_regs = 0;
02867 INT fp_variant_stores = 0;
02868 INT fp_invariant_stores = 0;
02869 INT this_int_refs = 0;
02870 INT this_int_regs = 0;
02871 INT int_variant_stores = 0;
02872 INT int_invariant_stores = 0;
02873 Array_Ref_List(i)->Calc_Regs_And_Refs(
02874 &this_fp_regs, &this_fp_refs, &fp_variant_stores, &fp_invariant_stores,
02875 &this_int_regs, &this_int_refs, &int_variant_stores,
02876 &int_invariant_stores);
02877 *num_fp_regs += this_fp_regs;
02878 *num_fp_refs += this_fp_refs;
02879 *num_fp_variant_stores += fp_variant_stores;
02880 *num_fp_invariant_stores += fp_invariant_stores;
02881 *num_int_regs += this_int_regs;
02882 *num_int_refs += this_int_refs;
02883 *num_int_variant_stores += int_variant_stores;
02884 *num_int_invariant_stores += int_invariant_stores;
02885 }
02886
02887 *num_int_regs += (_num_bad_fp+_num_bad_int);
02888 }
02889
02890
02891 void
02892 ARRAY_REF::Remove_Invariants(INT loopno)
02893 {
02894 for (INT i=0; i<Elements(); i++) {
02895 Array_Ref_List(i)->Remove_Invariants(loopno);
02896 }
02897 }
02898
02899 void
02900 ARRAY_REF::Remove_Cse(INT inner, INT max_dist, INT step)
02901 {
02902 for (INT i=0; i<Elements(); i++) {
02903 Array_Ref_List(i)->Remove_Cse(inner, max_dist, step);
02904 }
02905 }
02906
02907 void
02908 ARRAY_REF::Mark_Invariants(INT loopno)
02909 {
02910 for (INT i=0; i<Elements(); i++) {
02911 Array_Ref_List(i)->Mark_Invariants(loopno);
02912 }
02913 }
02914
02915 void
02916 ARRAY_REF::Unroll(INT loop_no, INT num_copies)
02917 {
02918 _num_bad_fp *= num_copies;
02919 _num_bad_int *= num_copies;
02920 for (INT i=0; i<Elements(); i++) {
02921 Array_Ref_List(i)->Unroll(loop_no,num_copies);
02922 }
02923 }
02924
02925 static void
02926 Build_DLI_Stack(WN *wn, DLI_STACK *stack)
02927 {
02928 if (wn) {
02929 Build_DLI_Stack(LWN_Get_Parent(wn), stack);
02930 if (WN_opcode(wn) == OPC_DO_LOOP) {
02931 DO_LOOP_INFO *dli = Get_Do_Loop_Info(wn);
02932 stack->Push(dli);
02933 }
02934 }
02935 }
02936
02937
02938
02939
02940 BOOL
02941 Weird_Triangular(ACCESS_VECTOR* av, DLI_STACK* dli_stack, INT SNL_Depth)
02942 {
02943 for (INT i=0; i<av->Nest_Depth(); i++) {
02944 if (av->Loop_Coeff(i)) {
02945 ACCESS_ARRAY *array = dli_stack->Bottom_nth(i)->LB;
02946 INT j;
02947 for (j=0; j<array->Num_Vec(); j++) {
02948 ACCESS_VECTOR *bound = array->Dim(j);
02949 INT lb = av->Nest_Depth() - SNL_Depth;
02950 for (INT k=lb; k<bound->Nest_Depth()-1; k++) {
02951 if (abs(bound->Loop_Coeff(k)) > 5) {
02952 return TRUE;
02953 }
02954 }
02955 }
02956 array = dli_stack->Bottom_nth(i)->UB;
02957 for (j=0; j<array->Num_Vec(); j++) {
02958 ACCESS_VECTOR *bound = array->Dim(j);
02959 INT lb = av->Nest_Depth() - SNL_Depth;
02960 for (INT k=lb; k<bound->Nest_Depth()-1; k++) {
02961 if (abs(bound->Loop_Coeff(k)) > 5) {
02962 return TRUE;
02963 }
02964 }
02965 }
02966 }
02967 }
02968 return FALSE;
02969 }
02970
02971 extern BOOL
02972 Is_Bad_Array(WN* wn_ref, INT nloops)
02973 {
02974 OPCODE op = WN_opcode(wn_ref);
02975 OPERATOR opr = OPCODE_operator(op);
02976 if (!OPCODE_is_load(op) && !OPCODE_is_store(op))
02977 return FALSE;
02978 if (opr == OPR_LDID || opr == OPR_STID)
02979 return FALSE;
02980 WN* wn_array = OPCODE_is_load(op) ? WN_kid0(wn_ref) : WN_kid1(wn_ref);
02981 if (WN_operator(wn_array) != OPR_ARRAY)
02982 return FALSE;
02983 ACCESS_ARRAY* aa = (ACCESS_ARRAY *) WN_MAP_Get(LNO_Info_Map, wn_array);
02984 WN* wn_base = WN_array_base(wn_array);
02985 if (WN_operator(wn_base) != OPR_LDA
02986 && WN_operator(wn_base) != OPR_LDID)
02987 return TRUE;
02988 if (aa == NULL || aa->Too_Messy
02989 || Do_Depth(wn_array) + 1 - aa->Non_Const_Loops() < nloops)
02990 return TRUE;
02991 DLI_STACK *dli_stack =
02992 CXX_NEW(DLI_STACK(&LNO_local_pool), &LNO_local_pool);
02993 Build_DLI_Stack(wn_ref, dli_stack);
02994 for (INT i = 0; i < aa->Num_Vec(); i++) {
02995 ACCESS_VECTOR *av = aa->Dim(i);
02996 if (av->Too_Messy || av->Non_Lin_Symb
02997 || Weird_Triangular(av, dli_stack, nloops))
02998 return TRUE;
02999 }
03000 return FALSE;
03001 }
03002
03003 void
03004 ARRAY_REF::Build(WN* wn,
03005 INT SNL_Depth,
03006 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
03007 {
03008 DLI_STACK *dli_stack = CXX_NEW(DLI_STACK(_pool),_pool);
03009 Build_DLI_Stack(wn,dli_stack);
03010 Build_Rec(wn,dli_stack,SNL_Depth,invar_table);
03011 CXX_DELETE(dli_stack,_pool);
03012 }
03013
03014 void
03015 ARRAY_REF::Build_Rec(WN* wn,
03016 DLI_STACK* dli_stack,
03017 INT SNL_Depth,
03018 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table)
03019 {
03020 if (!wn) return;
03021
03022 OPCODE opcode = WN_opcode(wn);
03023 OPERATOR opr = OPCODE_operator(opcode);
03024
03025 if (opcode == OPC_BLOCK) {
03026 WN *kid = WN_first (wn);
03027 while (kid) {
03028 Build(kid, SNL_Depth,invar_table);
03029 kid = WN_next(kid);
03030 }
03031 return;
03032 } else if (opcode == OPC_DO_LOOP) {
03033 DO_LOOP_INFO *dli = Get_Do_Loop_Info(wn);
03034 dli_stack->Push(dli);
03035 }
03036
03037 if (invar_table && !OPCODE_is_load(opcode) && !OPCODE_is_store(opcode)) {
03038 BIT_VECTOR *bv = invar_table->Find(wn);
03039 if (bv && bv->Pop_Count()) {
03040 Enter_Scalar_Expand(bv,wn);
03041 return;
03042 }
03043 }
03044
03045 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
03046 WN *kid = WN_kid(wn,kidno);
03047 Build(kid, SNL_Depth,invar_table);
03048 }
03049
03050
03051 if (OPCODE_is_load(opcode) && opr != OPR_LDID) {
03052 if (WN_operator(WN_kid0(wn)) == OPR_ARRAY) {
03053 Build_Array(WN_kid0(wn),FALSE,dli_stack,SNL_Depth);
03054 } else {
03055 if (MTYPE_is_float(WN_desc(wn)))
03056 _num_bad_fp++;
03057 else
03058 _num_bad_int++;
03059 }
03060 } else if (OPCODE_is_store(opcode) && opr != OPR_STID) {
03061 if (WN_operator(WN_kid1(wn)) == OPR_ARRAY) {
03062 Build_Array(WN_kid1(wn),TRUE,dli_stack,SNL_Depth);
03063 } else {
03064 if (MTYPE_is_float(WN_desc(wn)))
03065 _num_bad_fp++;
03066 else
03067 _num_bad_int++;
03068 }
03069 } else if (opcode == OPC_DO_LOOP) {
03070 dli_stack->Pop();
03071 }
03072
03073 }
03074
03075 void
03076 ARRAY_REF::Build_Array(WN* wn_array,
03077 BOOL is_store,
03078 DLI_STACK* dli_stack,
03079 INT SNL_Depth)
03080 {
03081 TYPE_ID type = WN_desc(LWN_Get_Parent(wn_array));
03082 INT esz = MTYPE_size_min(type) >> 3;
03083 #ifdef KEY
03084
03085
03086 if (esz == 0 && type == MTYPE_BS)
03087 esz = 1;
03088 #endif
03089
03090 ACCESS_ARRAY *array = (ACCESS_ARRAY *) WN_MAP_Get(LNO_Info_Map,wn_array);
03091 if (!array || array->Too_Messy
03092 || Do_Depth(wn_array) + 1 - array->Non_Const_Loops() < SNL_Depth) {
03093 if (MTYPE_is_float(type))
03094 _num_bad_fp++;
03095 else
03096 _num_bad_int++;
03097 return;
03098 }
03099
03100 INT i;
03101 for (i=0; i<array->Num_Vec(); i++) {
03102 ACCESS_VECTOR *av = array->Dim(i);
03103 if (av->Too_Messy || av->Non_Lin_Symb
03104 || Weird_Triangular(av,dli_stack,SNL_Depth)) {
03105 if (MTYPE_is_float(type))
03106 _num_bad_fp++;
03107 else
03108 _num_bad_int++;
03109 return;
03110 }
03111 }
03112
03113
03114 WN *base = WN_array_base(wn_array);
03115 if ((WN_operator(base) != OPR_LDA) &&
03116 (WN_operator(base) != OPR_LDID)) {
03117 if (MTYPE_is_float(type))
03118 _num_bad_fp++;
03119 else
03120 _num_bad_int++;
03121 return;
03122 }
03123 SYMBOL symb(base);
03124
03125 ARRAY_REF_NODE* arn = CXX_NEW(ARRAY_REF_NODE(array, wn_array, is_store, esz,
03126 _lex_number++), _pool);
03127 for (i=0; i<Elements(); i++) {
03128 if (symb == *Array_Ref_List(i)->Base_Array) {
03129 Array_Ref_List(i)->Append(arn);
03130 return;
03131 }
03132 }
03133 SYMBOL* tmp_symb = CXX_NEW (SYMBOL(&symb),_pool);
03134 Push(CXX_NEW(ARRAY_REF_LIST(_pool,tmp_symb), _pool));
03135 Array_Ref_List(Elements()-1)->Append(arn);
03136 }
03137
03138
03139
03140
03141 void
03142 ARRAY_REF::Enter_Scalar_Expand(WN* wn,
03143 SX_PNODE* pnode,
03144 BOOL* can_be_inner,
03145 INT num_loops)
03146 {
03147 BOOL is_store = OPCODE_is_store(WN_opcode(wn));
03148 TYPE_ID type = WN_desc(wn);
03149 INT esz = MTYPE_size_min(type) >> 3;
03150
03151 INT count=0;
03152 INT i;
03153 for (i=0; i<num_loops; i++) {
03154 if (can_be_inner[i] && pnode->Transformable(i)==SX_PNODE::SE_REQD) {
03155 count++;
03156 }
03157 }
03158
03159 ACCESS_ARRAY *a = CXX_NEW(ACCESS_ARRAY(MAX(1,count),num_loops,_pool),_pool);
03160 a->Too_Messy = FALSE;
03161 if (count == 0) {
03162 a->Dim(0)->Too_Messy = FALSE;
03163 } else {
03164 INT c=0;
03165 for (INT i=0; i<num_loops; i++) {
03166 if (can_be_inner[i]&&pnode->Transformable(i)==SX_PNODE::SE_REQD) {
03167 a->Dim(c)->Too_Messy = FALSE;
03168 a->Dim(c)->Set_Loop_Coeff(i,1);
03169 c++;
03170 }
03171 }
03172 }
03173
03174 ARRAY_REF_NODE *arn =
03175 CXX_NEW(ARRAY_REF_NODE(a,NULL,is_store,esz,_lex_number++),_pool);
03176 for (i=0; i<Elements(); i++) {
03177 if (pnode->Symbol() == *Array_Ref_List(i)->Base_Array) {
03178 Array_Ref_List(i)->Append(arn);
03179 Array_Ref_List(i)->_is_scalar_expanded = TRUE;
03180 return;
03181 }
03182 }
03183 SYMBOL* tmp_symb = CXX_NEW (SYMBOL(pnode->Symbol()),_pool);
03184 Push(CXX_NEW(ARRAY_REF_LIST(_pool,tmp_symb), _pool));
03185 Array_Ref_List(Elements()-1)->Append(arn);
03186 Array_Ref_List(Elements()-1)->_is_scalar_expanded = TRUE;
03187 }
03188
03189
03190
03191 void
03192 ARRAY_REF::Enter_Scalar_Expand(BIT_VECTOR* bv, WN* wn)
03193 {
03194 INT num_loops = bv->Size();
03195 INT num_invar = bv->Pop_Count();
03196 INT num_var = num_loops - num_invar;
03197
03198 ACCESS_ARRAY* a=CXX_NEW(ACCESS_ARRAY(MAX(1,num_var),num_loops,_pool),_pool);
03199 a->Too_Messy = FALSE;
03200 if (num_var == 0) {
03201 a->Dim(0)->Too_Messy = FALSE;
03202 } else {
03203 INT count=0;
03204 for (INT i=0; i<num_loops; i++) {
03205 if (!bv->Test(i)) {
03206 a->Dim(count)->Too_Messy = FALSE;
03207 a->Dim(count)->Set_Loop_Coeff(i,1);
03208 count++;
03209 }
03210 }
03211 }
03212
03213 TYPE_ID type = WN_rtype(wn);
03214 INT esz = MTYPE_size_min(type) >> 3;
03215
03216 ARRAY_REF_NODE *arn =
03217 CXX_NEW(ARRAY_REF_NODE(a,wn,FALSE,esz,_lex_number++),_pool);
03218 SYMBOL* tmp_symb = CXX_NEW (SYMBOL(),_pool);
03219 tmp_symb->Type = type;
03220 Push(CXX_NEW(ARRAY_REF_LIST(_pool,tmp_symb), _pool));
03221 Array_Ref_List(Elements()-1)->Append(arn);
03222 Array_Ref_List(Elements()-1)->_is_scalar_expanded = TRUE;
03223 }
03224
03225
03226
03227
03228 void
03229 ARRAY_REF::Enter_Innermost_Scalar_Expand(WN* wn)
03230 {
03231 BOOL is_store = OPCODE_is_store(WN_opcode(wn));
03232 TYPE_ID type = WN_desc(wn);
03233 INT esz = MTYPE_size_min(type) >> 3;
03234 SYMBOL sym(wn);
03235
03236 ACCESS_ARRAY *a = CXX_NEW(ACCESS_ARRAY(1,1,_pool),_pool);
03237 a->Too_Messy = FALSE;
03238 a->Dim(0)->Too_Messy = FALSE;
03239
03240 ARRAY_REF_NODE *arn =
03241 CXX_NEW(ARRAY_REF_NODE(a,NULL,is_store,esz,_lex_number++),_pool);
03242 for (INT i=0; i<Elements(); i++) {
03243 if (sym == *Array_Ref_List(i)->Base_Array) {
03244 Array_Ref_List(i)->Append(arn);
03245 Array_Ref_List(i)->_is_scalar_expanded = TRUE;
03246 return;
03247 }
03248 }
03249 SYMBOL* tmp_symb = CXX_NEW (SYMBOL(sym),_pool);
03250 Push(CXX_NEW(ARRAY_REF_LIST(_pool,tmp_symb), _pool));
03251 Array_Ref_List(Elements()-1)->Append(arn);
03252 Array_Ref_List(Elements()-1)->_is_scalar_expanded = TRUE;
03253 }
03254
03255
03256
03257
03258
03259
03260
03261
03262
03263
03264
03265
03266
03267
03268
03269
03270
03271
03272
03273
03274
03275
03276 INT LAT_DIRECTED_GRAPH16::Add_Vertices_Op_Edges(WN *wn,HASH_TABLE<WN *,BIT_VECTOR *> *invar_table)
03277 {
03278 OPCODE opcode = WN_opcode(wn);
03279 if (opcode == OPC_BLOCK) {
03280 WN *kid = WN_first(wn);
03281 while (kid) {
03282 if (Add_Vertices_Op_Edges(kid,invar_table) == -1) return(-1);
03283 kid = WN_next(kid);
03284 }
03285 return (1);
03286 }
03287
03288 VINDEX16 v;
03289 if (OPCODE_is_store(opcode) && (v =_array_graph->Get_Vertex(wn))) {
03290 VINDEX16 this_v = Add_Vertex(wn);
03291 if (!this_v) return(-1);
03292 Map_Vertex(v,this_v);
03293
03294 if (Add_Vertices_Op_Edges_Rec(this_v,WN_kid0(wn),0,invar_table) == -1) return -1;
03295
03296 } else if (!OPCODE_is_stmt(opcode)) {
03297 for (INT kidno=0; kidno < WN_kid_count(wn); kidno++) {
03298 if (Add_Vertices_Op_Edges(WN_kid(wn,kidno),invar_table) == -1) return(-1);
03299 }
03300 }
03301 return(1);
03302 }
03303
03304
03305
03306 INT
03307 LAT_DIRECTED_GRAPH16::Add_Vertices_Op_Edges_Rec(VINDEX16 store,
03308 WN *wn,
03309 INT latency,
03310 HASH_TABLE<WN *,BIT_VECTOR *>
03311 *invar_table)
03312 {
03313 TOP top;
03314 OPERATOR oper = WN_operator(wn);
03315 TYPE_ID rtype = WN_rtype(wn);
03316 TYPE_ID desc = WN_desc(wn);
03317
03318 VINDEX16 v;
03319 if (OPERATOR_is_load(oper) && (v = _array_graph->Get_Vertex(wn))) {
03320 VINDEX16 this_v = Add_Vertex(wn);
03321 if (!this_v) {
03322 return -1;
03323 }
03324 Map_Vertex(v,this_v);
03325 EINDEX16 e = Add_Edge(this_v,store,0,latency,0);
03326 if (!e) {
03327 return -1;
03328 }
03329 }
03330
03331 if (invar_table) {
03332 BIT_VECTOR *bv = invar_table->Find(wn);
03333 if (bv && bv->Pop_Count()) {
03334 return 1;
03335 }
03336 }
03337
03338 INT op_latency = 0;
03339
03340 if (oper == OPR_CVT ||
03341 oper == OPR_RND ||
03342 oper == OPR_CEIL ||
03343 oper == OPR_TRUNC ||
03344 #ifdef TARG_X8664
03345
03346 oper == OPR_FLOOR &&
03347 !(desc == MTYPE_F4 && rtype == MTYPE_F4) &&
03348 !(desc == MTYPE_F8 && rtype == MTYPE_F8)
03349 #else
03350 oper == OPR_FLOOR
03351 #endif
03352 ) {
03353 op_latency = LNOTARGET_Cvt_Lat(WN_opcode(wn));
03354 if (op_latency == -1) {
03355 return -1;
03356 }
03357 }
03358 else if (oper == OPR_INTRINSIC_OP) {
03359 op_latency = FP_Latency_Intrinsic(wn);
03360 if (op_latency == -1) {
03361 return -1;
03362 }
03363 }
03364 else if (oper == OPR_REALPART ||
03365 oper == OPR_IMAGPART ||
03366 oper == OPR_PAREN ||
03367 oper == OPR_PARM) {
03368 op_latency = 0;
03369 }
03370 else if (OPERATOR_is_expression(oper) &&
03371 !OPERATOR_is_load(oper) &&
03372 oper != OPR_CONST) {
03373
03374 if (desc == MTYPE_FQ || rtype == MTYPE_FQ) {
03375 return -1;
03376 }
03377
03378 if (desc == MTYPE_FQ ||
03379 desc == MTYPE_CQ ||
03380 rtype == MTYPE_FQ ||
03381 rtype == MTYPE_CQ) {
03382 return -1;
03383 }
03384
03385 else if (desc == MTYPE_F4 ||
03386 desc == MTYPE_F8 ||
03387 #if defined(TARG_IA64)
03388 desc == MTYPE_F10 ||
03389 rtype == MTYPE_F10 ||
03390 #endif
03391 rtype == MTYPE_F4 ||
03392 rtype == MTYPE_F8) {
03393
03394 if (Target_ISA_Has_Madd() &&
03395 (oper == OPR_ADD || oper == OPR_SUB) &&
03396 (WN_operator(WN_kid0(wn)) == OPR_MPY ||
03397 WN_operator(WN_kid1(wn)) == OPR_MPY)) {
03398 return FP_Latency_Madd(store, wn, latency, invar_table);
03399 }
03400 else if (oper == OPR_MAX || oper == OPR_MIN) {
03401 op_latency = LNOTARGET_FP_Min_Max_Lat(rtype);
03402 }
03403 else if (oper == OPR_SQRT) {
03404 op_latency = LNOTARGET_FP_Sqrt_Lat(rtype);
03405 }
03406 else if ((top = LNOTARGET_Whirl_To_Top(wn)) != TOP_UNDEFINED) {
03407 op_latency = LNOTARGET_Top_Latency(top);
03408 }
03409 else if (oper == OPR_DIV) {
03410 op_latency = LNOTARGET_FP_Div_Lat(rtype);
03411 }
03412 else if (oper == OPR_RECIP) {
03413 op_latency = LNOTARGET_FP_Recip_Lat(rtype);
03414 }
03415 else if (oper == OPR_RSQRT
03416 #ifdef TARG_X8664
03417 || oper == OPR_ATOMIC_RSQRT
03418 #endif
03419 ) {
03420 op_latency = LNOTARGET_FP_Rsqrt_Lat(rtype);
03421 }
03422 #ifdef TARG_X8664
03423 else if (oper == OPR_FLOOR) {
03424 op_latency = LNOTARGET_FP_Floor_Lat(rtype);
03425 }
03426 else if (oper == OPR_SELECT) {
03427 op_latency = LNOTARGET_FP_Select_Lat(rtype);
03428 }
03429 else if (OPCODE_is_compare(WN_opcode(wn))) {
03430 op_latency = LNOTARGET_FP_Compare_Lat(rtype);
03431 }
03432 #endif
03433 else {
03434 return -1;
03435 }
03436 }
03437 else if (desc == MTYPE_C4 ||
03438 #if defined(TARG_IA64)
03439 desc == MTYPE_C10 ||
03440 rtype == MTYPE_C10 ||
03441 #endif
03442 desc == MTYPE_C8 ||
03443 rtype == MTYPE_C4 ||
03444 rtype == MTYPE_C8) {
03445 if (oper == OPR_ADD || oper == OPR_SUB) {
03446 op_latency = LNOTARGET_Complex_Add_Lat(rtype);
03447 }
03448 else if (oper == OPR_MPY) {
03449 op_latency = LNOTARGET_Complex_Mult_Lat(rtype);
03450 }
03451 else if (oper == OPR_NEG) {
03452 op_latency = LNOTARGET_Complex_Neg_Lat(rtype);
03453 }
03454 }
03455 }
03456
03457 for (INT kidno = 0; kidno < WN_kid_count(wn); kidno++) {
03458 if (Add_Vertices_Op_Edges_Rec(store,
03459 WN_kid(wn,kidno),
03460 latency+op_latency,
03461 invar_table) == -1) {
03462 return -1;
03463 }
03464 }
03465 return 1;
03466 }
03467
03468
03469 INT
03470 LAT_DIRECTED_GRAPH16::FP_Latency_Madd(VINDEX16 store,
03471 WN *wn,
03472 INT latency,
03473 HASH_TABLE<WN *,BIT_VECTOR *>
03474 *invar_table)
03475 {
03476 TYPE_ID rtype = WN_rtype(wn);
03477 INT add_op_latency = LNOTARGET_FP_Madd_Add_Lat(rtype);
03478 INT mult_op_latency = LNOTARGET_FP_Madd_Mult_Lat(rtype);
03479
03480 WN *kid0 = WN_kid0(wn);
03481 WN *kid1 = WN_kid1(wn);
03482
03483 WN *non_mult_kid;
03484 WN *mult_kid0;
03485 WN *mult_kid1;
03486
03487
03488 if (WN_operator(kid0) == OPR_MPY) {
03489 non_mult_kid = kid1;
03490 mult_kid0 = WN_kid0(kid0);
03491 mult_kid1 = WN_kid1(kid0);
03492 } else {
03493 non_mult_kid = kid0;
03494 mult_kid0 = WN_kid0(kid1);
03495 mult_kid1 = WN_kid1(kid1);
03496 }
03497
03498 if (Add_Vertices_Op_Edges_Rec(store,
03499 non_mult_kid,
03500 latency+add_op_latency,
03501 invar_table) == -1) {
03502 return -1;
03503 }
03504 if (Add_Vertices_Op_Edges_Rec(store,
03505 mult_kid0,
03506 latency+mult_op_latency,
03507 invar_table) == -1) {
03508 return -1;
03509 }
03510 if (Add_Vertices_Op_Edges_Rec(store,
03511 mult_kid1,
03512 latency+mult_op_latency,
03513 invar_table) == -1) {
03514 return -1;
03515 }
03516
03517 return 1;
03518 }
03519
03520
03521
03522
03523 INT LAT_DIRECTED_GRAPH16::FP_Latency_Intrinsic(WN *wn)
03524 {
03525 if (WN_kid_count(wn) != 2) {
03526 return -1;
03527 }
03528 WN *const_kid = WN_kid1(wn);
03529 if (WN_operator(const_kid) == OPR_PARM) {
03530 const_kid = WN_kid0(const_kid);
03531 }
03532 if (WN_operator(const_kid) != OPR_INTCONST) {
03533 return -1;
03534 }
03535 INT num_multiplies = WN_const_val(const_kid)-1;
03536 if (num_multiplies == 0) {
03537 return 0;
03538 }
03539 if (num_multiplies < 0 || num_multiplies > 3) {
03540 return -1;
03541 }
03542 return LNOTARGET_FP_Exp_Lat((INTRINSIC) WN_intrinsic(wn), num_multiplies);
03543 }
03544
03545
03546
03547
03548
03549
03550
03551 INT LAT_DIRECTED_GRAPH16::Add_Flow_Edges()
03552 {
03553 for (VINDEX16 i = Get_Vertex(); i; i=Get_Next_Vertex(i)) {
03554 WN *wn = _v[i]._wn;
03555 if (OPCODE_is_store(WN_opcode(wn))) {
03556 REDUCTION_TYPE source_red =
03557 red_manager == NULL? RED_NONE : red_manager->Which_Reduction(wn);
03558 VINDEX16 array_source = _array_graph->Get_Vertex(wn);
03559 EINDEX16 e = _array_graph->Get_Out_Edge(array_source);
03560 while (e) {
03561 REDUCTION_TYPE sink_red =
03562 red_manager == NULL? RED_NONE : red_manager->Which_Reduction(wn);
03563 if ((source_red != sink_red) || (source_red == RED_NONE)) {
03564 VINDEX16 array_sink = _array_graph->Get_Sink(e);
03565
03566 VINDEX16 sink = _hash_table.Find(array_sink);
03567 if (sink && OPCODE_is_load(WN_opcode(_v[sink]._wn))) {
03568 EINDEX16 newe=
03569 Add_Edge(i,sink,_array_graph->Depv_Array(e)->Union(_pool),0,
03570 _array_graph->Depv_Array(e)->Num_Unused_Dim());
03571 if (!newe) return(-1);
03572 }
03573 }
03574 e = _array_graph->Get_Next_Out_Edge(e);
03575 }
03576 }
03577 }
03578 return(1);
03579 }
03580
03581
03582
03583
03584
03585
03586
03587 double LAT_DIRECTED_GRAPH16::Max_Cycle(INT inner, double lower_bound)
03588 {
03589 double result=0.0;
03590
03591
03592
03593
03594
03595 SCC_DIRECTED_GRAPH16 *scc_graph = CXX_NEW(SCC_DIRECTED_GRAPH16(
03596 Get_Vertex_Count(),Get_Vertex_Count()),_pool);
03597 Set_Scc_Graph(scc_graph,inner);
03598 INT num_scc = scc_graph->Get_Scc_Count();
03599
03600
03601 INT *scc_counts = CXX_NEW_ARRAY(INT,num_scc+1,_pool);
03602
03603
03604
03605 INT *scc_pos = CXX_NEW_ARRAY(INT,scc_graph->Get_Vertex_Count()+1,_pool);
03606 INT i;
03607 for (i=1; i<=num_scc; i++) {
03608 scc_counts[i] = 0;
03609 scc_pos[i] = 0;
03610 }
03611 for (i=1; i<=scc_graph->Get_Vertex_Count(); i++) {
03612 INT id = scc_graph->Get_Scc_Id(i);
03613 scc_pos[i] = scc_counts[id];
03614 scc_counts[id]++;
03615 }
03616
03617 COST_TABLE *ct = NULL;
03618
03619 for (i=1; i<=num_scc; i++) {
03620 if (scc_counts[i] > 1) {
03621
03622
03623 if (!ct) {
03624 ct = CXX_NEW(COST_TABLE(scc_counts[i],_pool),_pool);
03625 } else {
03626 ct->Realloc(scc_counts[i]);
03627 }
03628
03629
03630 double upper_bound = ct->Init(inner,this,scc_graph,i,scc_pos);
03631 if (upper_bound > result) {
03632
03633 INT solve = (INT)(ct->Solve(lower_bound));
03634 result = MAX(result,solve);
03635 }
03636 }
03637 }
03638 CXX_DELETE(scc_graph,_pool);
03639 CXX_DELETE(ct,_pool);
03640 CXX_DELETE_ARRAY(scc_pos,_pool);
03641 CXX_DELETE_ARRAY(scc_counts,_pool);
03642 return result;
03643 }
03644
03645
03646
03647
03648
03649 void LAT_DIRECTED_GRAPH16::Set_Scc_Graph(SCC_DIRECTED_GRAPH16 *scc_graph,
03650 INT inner)
03651 {
03652 Lat_scc_vertex_map= CXX_NEW_ARRAY(VINDEX16,_v.Lastidx()+1,_pool);
03653 Scc_lat_vertex_map= CXX_NEW_ARRAY(VINDEX16,_v.Lastidx()+1,_pool);
03654 for (INT i=Get_Vertex(); i; i = Get_Next_Vertex(i)) {
03655 VINDEX16 scc_vertex = scc_graph->Add_Vertex();
03656 Is_True(scc_vertex,("Impossible overflow in Set_Scc_Graph"));
03657 Lat_scc_vertex_map[i] = scc_vertex;
03658 Scc_lat_vertex_map[scc_vertex] = i;
03659 }
03660 EINDEX16 e = Get_Edge();
03661 while (e) {
03662 if (Is_Valid(inner,e)) {
03663 scc_graph->Add_Edge(Lat_scc_vertex_map[Get_Source(e)],
03664 Lat_scc_vertex_map[Get_Sink(e)]);
03665 }
03666 e = Get_Next_Edge(e);
03667 }
03668 CXX_DELETE_ARRAY(Lat_scc_vertex_map,_pool);
03669 }
03670
03671 COST_V::COST_V() {
03672 _alloc_length = 4;
03673 _length = 0;
03674 _costs = CXX_NEW_ARRAY(COST,(INT) _alloc_length,Default_Mem_Pool);
03675 }
03676
03677
03678 void COST_V::Push(UINT16 latency, UINT16 distance, MEM_POOL *pool) {
03679 if (_length == _alloc_length) {
03680 COST *tmp = CXX_NEW_ARRAY(COST,((INT) 2*_alloc_length),pool);
03681 bcopy(_costs,tmp,_length*sizeof(COST));
03682 CXX_DELETE_ARRAY(_costs,pool);
03683 _costs = tmp;
03684 _alloc_length *= 2;
03685 }
03686 _costs[_length].Distance = distance;
03687 _costs[_length++].Latency = latency;
03688 }
03689
03690
03691
03692 COST_TABLE::COST_TABLE(UINT16 num_vertex, MEM_POOL *pool)
03693 {
03694 _pool = pool;
03695 MEM_POOL_Set_Default(_pool);
03696 _data = CXX_NEW_ARRAY(COST_V,((INT) num_vertex*num_vertex),pool);
03697 _n = num_vertex;
03698 _maxn = _n;
03699 }
03700
03701
03702 void COST_TABLE::Realloc(UINT16 num_vertex)
03703 {
03704 if (num_vertex <= _maxn) {
03705 for (INT i=0; i<num_vertex; i++) {
03706 for (INT j=0; j<num_vertex; j++) {
03707 _data[num_vertex*i+j].Init();
03708 }
03709 }
03710 _n = num_vertex;
03711 } else {
03712 MEM_POOL_Set_Default(_pool);
03713 CXX_DELETE_ARRAY(_data,_pool);
03714 _data = CXX_NEW_ARRAY(COST_V,((INT) num_vertex*num_vertex),_pool);
03715 _n = _maxn = num_vertex;
03716 }
03717 }
03718
03719
03720
03721
03722
03723
03724
03725
03726
03727 double COST_TABLE::Init(INT inner, LAT_DIRECTED_GRAPH16 *graph,
03728 SCC_DIRECTED_GRAPH16 *scc_graph, INT scc_id, INT *scc_pos)
03729 {
03730 double result = 0.0;
03731 BOOL pos_distance = TRUE;
03732 EINDEX16 scc_e = scc_graph->Get_Edge();
03733 while (scc_e) {
03734 VINDEX16 source = scc_graph->Get_Source(scc_e);
03735 INT source_id = scc_graph->Get_Scc_Id(source);
03736 if (source_id == scc_id) {
03737 VINDEX16 sink = scc_graph->Get_Sink(scc_e);
03738 INT sink_id = scc_graph->Get_Scc_Id(sink);
03739 if (sink_id == scc_id) {
03740 EINDEX16 e = graph->Get_Edge(graph->Scc_lat_vertex_map[source],
03741 graph->Scc_lat_vertex_map[sink]);
03742 UINT latency = graph->Latency(e);
03743 result = result + latency;
03744 UINT distance=0;
03745 DEPV *depv = graph->Depv(e);
03746 if (depv) {
03747
03748 DEP dep = DEPV_Dep(depv,inner-graph->Num_Unused_Dim(e));
03749 if (DEP_IsDistance(dep)) {
03750 if (DEP_Distance(dep) >= 0) {
03751 distance = DEP_Distance(dep);
03752 } else {
03753 pos_distance = FALSE;
03754 }
03755 } else {
03756 DIRECTION dir = DEP_Direction(dep);
03757 if ((dir == DIR_POS) || (dir == DIR_POSEQ) || (dir == DIR_POSNEG)
03758 ||(dir == DIR_STAR)) {
03759 distance = 1;
03760 } else if ((dir == DIR_NEGEQ) || (dir == DIR_EQ)) {
03761 distance = 0;
03762 } else {
03763 pos_distance = FALSE;
03764 }
03765 }
03766 }
03767 if (pos_distance) {
03768 Push(scc_pos[source],scc_pos[sink],latency,distance);
03769 }
03770 }
03771 }
03772 scc_e = scc_graph->Get_Next_Edge(scc_e);
03773 }
03774 return (double) result;
03775 }
03776
03777
03778
03779 double COST_TABLE::Solve(double init_min_ii)
03780 {
03781 _min_ii = init_min_ii;
03782
03783 for (INT k=0; k<_n; k++) {
03784 for (INT i=0; i<_n; i++) {
03785 for (INT j=0; j<_n; j++) {
03786
03787
03788
03789
03790 Add_Maximal_Costs(Cost_V(i,j),Cost_V(i,k),Cost_V(k,j));
03791
03792
03793 Update_Min_II(Cost_V(i,j),Cost_V(j,i));
03794 }
03795 }
03796 }
03797 return(_min_ii);
03798 }
03799
03800
03801
03802
03803
03804
03805
03806
03807 void COST_TABLE::Add_Maximal_Costs(COST_V *cvij, COST_V *cvik, COST_V *cvkj)
03808 {
03809 COST *cvik_costs = cvik->Costs();
03810 COST *cvkj_costs = cvkj->Costs();
03811 UINT16 cvik_length = cvik->Length();
03812 UINT16 cvkj_length = cvkj->Length();
03813
03814
03815
03816
03817 INT i;
03818 for ( i = 0; i < cvik_length; ++i ) {
03819 COST *cpik = cvik_costs + i;
03820 INT ikdist = cpik->Distance;
03821 INT iklatency = cpik->Latency;
03822 for (INT j = 0; j < cvkj_length; ++j ) {
03823 COST *cpkj = cvkj_costs + j;
03824 INT kjdist = cpkj->Distance;
03825 INT kjlatency = cpkj->Latency;
03826 INT ikjdist = ikdist + kjdist;
03827 INT ikjlatency = iklatency + kjlatency;
03828 if ( Is_Max_Cost(ikjdist,ikjlatency,cvij,0)) {
03829 cvij->Push(ikjlatency,ikjdist,_pool);
03830 }
03831 }
03832 }
03833
03834
03835
03836
03837
03838
03839
03840
03841
03842
03843 COST *cvij_costs = cvij->Costs();
03844 UINT16 cvij_length = cvij->Length();
03845 for ( i = cvij_length - 1; i >= 0; --i ) {
03846 COST *cpij = cvij_costs + i;
03847 INT dist = cpij->Distance;
03848 INT latency = cpij->Latency;
03849 if (!Is_Max_Cost(dist,latency,cvij,i+1)) {
03850
03851 if ( i != cvij_length - 1 ) {
03852 COST *ij_last = cvij_costs + (cvij_length - 1);
03853 *cpij = *ij_last;
03854 }
03855 --cvij_length;
03856 }
03857 }
03858 cvij->Set_Length(cvij_length);
03859 }
03860
03861
03862
03863
03864
03865
03866
03867 BOOL COST_TABLE::Is_Max_Cost(INT dist, INT latency, COST_V *cv, INT offset)
03868 {
03869 INT len = cv->Length();
03870 COST *cp = cv->Costs();
03871 for (INT i = offset; i < len; ++i ) {
03872 INT cvdist = cp[i].Distance;
03873 INT cvlatency = cp[i].Latency;
03874
03875
03876 if ((dist == cvdist && latency <= cvlatency) ||
03877 dist > cvdist && (latency - cvlatency) <= ((dist-cvdist) * _min_ii)) {
03878 return FALSE;
03879 }
03880 }
03881 return TRUE;
03882 }
03883
03884
03885
03886
03887 void COST_TABLE::Update_Min_II(COST_V *cv1, COST_V *cv2)
03888 {
03889 COST *cp1 = cv1->Costs();
03890 COST *cp2 = cv2->Costs();
03891 INT len1 = cv1->Length();
03892 INT len2 = cv2->Length();
03893 for (INT i = 0; i < len1; ++i ) {
03894 INT dist1 = cp1[i].Distance;
03895 INT latency1 = cp1[i].Latency;
03896 for (INT j = 0; j < len2; ++j) {
03897 INT dist2 = cp2[j].Distance;
03898 INT latency2 = cp2[j].Latency;
03899 if ((dist1 + dist2) != 0) {
03900 INT path_mii = (latency1+latency2)/(dist1+dist2);
03901 _min_ii = MAX(_min_ii,path_mii);
03902 }
03903 }
03904 }
03905 }
03906
03907
03908
03909
03910 void COST_TABLE::Print(FILE *fp)
03911 {
03912 fprintf(fp,"Printing a table \n");
03913 for (INT i = 0; i<_n; i++) {
03914 for (INT j = 0; j<_n; j++) {
03915 COST_V cv = _data[_n*i+j];
03916 if (cv.Length()) {
03917 fprintf(fp,"Point[%d][%d]: ",i,j);
03918 for (INT l = 0; l<cv.Length(); l++) {
03919 fprintf(fp," (L:%d, D:%d) ",cv.Costs()[l].Latency,
03920 cv.Costs()[l].Distance);
03921 }
03922 fprintf(fp,"\n");
03923 }
03924 }
03925 }
03926 }
03927
03928
03929
03930
03931 BOOL LAT_DIRECTED_GRAPH16::Is_Valid(INT inner,EINDEX16 e)
03932 {
03933 DEPV *depv = _e[e].Depv;
03934 if (!depv) return TRUE;
03935 for (INT i=0; i<_num_dim; i++) {
03936 if ((i!=(inner-_num_bad)) &&
03937 (DEP_Direction(DEPV_Dep(depv,i)) != DIR_EQ) &&
03938 (DEP_Direction(DEPV_Dep(depv,i)) != DIR_POSEQ) &&
03939 (DEP_Direction(DEPV_Dep(depv,i)) != DIR_NEGEQ) &&
03940 (DEP_Direction(DEPV_Dep(depv,i)) != DIR_STAR)) {
03941 return(FALSE);
03942 }
03943 }
03944 return(TRUE);
03945 }
03946
03947
03948 void LAT_DIRECTED_GRAPH16::Print(FILE *fp)
03949 {
03950 VINDEX16 i;
03951 EINDEX16 e;
03952 fprintf(fp,"Printing a LAT_DIRECTED_GRAPH16 \n");
03953 for (i=Get_Vertex(); i; i = Get_Next_Vertex(i)) {
03954 fprintf(fp,"Vertex %d for Wn = %s\n",i,OPCODE_name(WN_opcode(_v[i]._wn)));
03955
03956 e = _v[i].Get_Out_Edge();
03957 while (e) {
03958 fprintf(fp,"Edge to vertex %d ",_e[e].Get_Sink());
03959 fprintf(fp," has latency = %d ",_e[e].Latency);
03960 if (_e[e].Depv) {
03961 fprintf(fp," and dependence ");
03962 DEPV_Print(_e[e].Depv,fp,_num_dim);
03963 fprintf(fp,"\n");
03964 } else {
03965 fprintf(fp," and an all equals dependence \n");
03966 }
03967 e = _e[e].Get_Next_Out_Edge();
03968 }
03969 }
03970
03971 }
03972
03973
03974
03975
03976
03977
03978
03979
03980
03981
03982
03983
03984
03985
03986
03987
03988
03989
03990
03991
03992
03993
03994 INT LOOP_MODEL::Unique_Unstored_Fp_Scalar_Refs(WN *wn, ARRAY_REF *ar,
03995 SX_INFO *pi)
03996 {
03997 MEM_POOL_Push(&LNO_local_pool);
03998 SYMBOL_TREE *symbol_tree = CXX_NEW(SYMBOL_TREE(
03999 TRUE,&LNO_local_pool), &LNO_local_pool);
04000 INT outer=0;
04001 _num_fp_scalar_refs = 0;
04002 while (!_can_be_inner[outer]) outer++;
04003 symbol_tree->Enter_Scalar_Refs(wn,ar,pi,_can_be_inner,_num_loops,outer,
04004 &_num_fp_scalar_refs);
04005 INT result = symbol_tree->Num_Fp_Unstored();
04006 CXX_DELETE(symbol_tree,&LNO_local_pool);
04007 MEM_POOL_Pop(&LNO_local_pool);
04008 return result;
04009 }
04010
04011 INT LOOP_MODEL::Unique_Unstored_Int_Scalar_Refs(WN *wn, ARRAY_REF *ar,
04012 SX_INFO *pi)
04013 {
04014 MEM_POOL_Push(&LNO_local_pool);
04015 SYMBOL_TREE *symbol_tree = CXX_NEW(SYMBOL_TREE(
04016 FALSE,&LNO_local_pool), &LNO_local_pool);
04017 INT outer=0;
04018 _num_int_scalar_refs = 0;
04019 while (!_can_be_inner[outer]) outer++;
04020 symbol_tree->Initialize_Innermost_Loop_Var_Symbol(wn);
04021 symbol_tree->Enter_Scalar_Refs(wn,ar,pi,_can_be_inner,_num_loops,outer,
04022 &_num_int_scalar_refs);
04023 INT result = symbol_tree->Num_Int_Unstored();
04024 CXX_DELETE(symbol_tree,&LNO_local_pool);
04025 MEM_POOL_Pop(&LNO_local_pool);
04026 return result;
04027 }
04028
04029 void SYMBOL_TREE::Initialize_Innermost_Loop_Var_Symbol(WN* wn) {
04030 WN* parent=wn;
04031 while (WN_opcode(parent)!=OPC_DO_LOOP)
04032 parent=LWN_Get_Parent(parent);
04033 _innermost_loop_var_symb.Init(WN_index(parent));
04034 }
04035
04036 BOOL SYMBOL_TREE::Integer_Ref_Needs_Reg(WN* wn) {
04037
04038 SYMBOL symb(wn);
04039 WN* wn1=wn;
04040 WN* parent=LWN_Get_Parent(wn1);
04041 while (WN_operator(parent)!=OPR_ARRAY &&
04042 OPCODE_is_expression(WN_opcode(parent))) {
04043 wn1=LWN_Get_Parent(wn1);
04044 parent=LWN_Get_Parent(wn1);
04045 }
04046
04047 if (WN_operator(parent)==OPR_ARRAY) {
04048 INT kid_id=0;
04049 INT num_dim=WN_num_dim(parent);
04050 while (WN_kid(parent,kid_id)!=wn1) kid_id++;
04051
04052
04053
04054 if (1<kid_id && kid_id<=num_dim) {
04055 ACCESS_ARRAY *ar=(ACCESS_ARRAY*)WN_MAP_Get(LNO_Info_Map,parent);
04056 for (INT i=kid_id+1; i<=num_dim; i++) {
04057 ACCESS_VECTOR *av=ar->Dim(i-1);
04058
04059 if (av->Loop_Coeff(av->Nest_Depth()-1)!=0) {
04060 return TRUE;
04061 }
04062 }
04063 } else if (num_dim<kid_id) {
04064 if (symb!=_innermost_loop_var_symb) {
04065 return FALSE;
04066 }
04067 }
04068 } else if (symb!=_innermost_loop_var_symb) {
04069 return TRUE;
04070 }
04071 return FALSE;
04072 }
04073
04074 void SYMBOL_TREE::Enter_Scalar_Refs(
04075 WN *wn, INT *num_scalar_refs,
04076 WN2INT *se_needed, ARRAY_REF *ar)
04077 {
04078 OPCODE opcode = WN_opcode(wn);
04079 BOOL is_store = FALSE;
04080
04081 if (opcode == OPC_BLOCK) {
04082 WN *kid = WN_first (wn);
04083 while (kid) {
04084 Enter_Scalar_Refs(kid,num_scalar_refs,se_needed,ar);
04085 kid = WN_next(kid);
04086 }
04087 return;
04088 }
04089
04090 OPERATOR oper = OPCODE_operator(opcode);
04091 if ((oper == OPR_LDID) || (oper == OPR_CONST) ||
04092 (is_store=(OPCODE_operator(opcode) == OPR_STID))) {
04093
04094 if (se_needed && se_needed->Find(wn)==1)
04095 ar->Enter_Innermost_Scalar_Expand(wn);
04096 else {
04097
04098 TYPE_ID type;
04099 if (is_store) {
04100 type = OPCODE_desc(opcode);
04101 } else {
04102 type = OPCODE_rtype(opcode);
04103 }
04104 if (_is_floating_point && MTYPE_float(type)) {
04105 if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
04106 SYMBOL symb(wn);
04107 (*num_scalar_refs)++;
04108 Enter(&symb, is_store, 1);
04109 } else if ((type == MTYPE_C4) || (type==MTYPE_C8) ||
04110 #if defined(TARG_IA64)
04111 (type == MTYPE_F10) ||
04112 #endif
04113 (type == MTYPE_FQ)) {
04114 SYMBOL symb(wn);
04115 (*num_scalar_refs)+=2;
04116 Enter(&symb, is_store, 2);
04117 } else if (type == MTYPE_CQ
04118 #if defined(TARG_IA64)
04119 || type == MTYPE_CQ
04120 #endif
04121 ) {
04122 SYMBOL symb(wn);
04123 (*num_scalar_refs)+=4;
04124 Enter(&symb, is_store, 4);
04125 }
04126 } else if ( !_is_floating_point && MTYPE_float(type)==FALSE ) {
04127 SYMBOL symb(wn);
04128
04129 if (Integer_Ref_Needs_Reg(wn)) {
04130 (*num_scalar_refs)++;
04131 Enter(&symb, is_store, 1);
04132 }
04133 }
04134 }
04135 } else if (OPCODE_is_store(opcode)) {
04136 Enter_Scalar_Refs(WN_kid0(wn),num_scalar_refs,se_needed,ar);
04137 } else if (!OPCODE_is_load(opcode)) {
04138 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
04139 WN *kid = WN_kid(wn,kidno);
04140 Enter_Scalar_Refs(kid,num_scalar_refs,se_needed,ar);
04141 }
04142 }
04143 }
04144
04145
04146
04147 void SYMBOL_TREE::Enter_Scalar_Refs(WN *wn, ARRAY_REF *ar,
04148 SX_INFO *pi, BOOL *can_be_inner, INT num_loops,
04149 INT outer, INT *num_scalar_refs)
04150 {
04151 OPCODE opcode = WN_opcode(wn);
04152 BOOL is_store = FALSE;
04153
04154 if (opcode == OPC_BLOCK) {
04155 WN *kid = WN_first (wn);
04156 while (kid) {
04157 Enter_Scalar_Refs(kid,ar,pi,can_be_inner,num_loops,outer,
04158 num_scalar_refs);
04159 kid = WN_next(kid);
04160 }
04161 return;
04162 }
04163
04164 if (OPCODE_is_store(opcode)) {
04165 Enter_Scalar_Refs(WN_kid0(wn),ar,pi,can_be_inner,num_loops,outer,
04166 num_scalar_refs);
04167 } else if (!OPCODE_is_load(opcode)) {
04168 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
04169 WN *kid = WN_kid(wn,kidno);
04170 Enter_Scalar_Refs(kid,ar,pi,can_be_inner,num_loops,outer,
04171 num_scalar_refs);
04172 }
04173 }
04174
04175 OPERATOR oper = OPCODE_operator(opcode);
04176 if ((oper == OPR_LDID) || (oper == OPR_CONST) ||
04177 (is_store=(OPCODE_operator(opcode) == OPR_STID))) {
04178 TYPE_ID type;
04179 if (is_store) {
04180 type = OPCODE_desc(opcode);
04181 } else {
04182 type = OPCODE_rtype(opcode);
04183 }
04184 if (_is_floating_point != MTYPE_float(type)) return;
04185 SYMBOL symb(wn);
04186 SX_PITER ii(&pi->Plist);
04187 BOOL found = FALSE;
04188 SX_PNODE *n,*found_n=NULL;
04189 for (n = ii.First(); n && !found; n = ii.Next()) {
04190 if (n->Symbol() == symb) {
04191 found = TRUE;
04192 SX_PNODE::STATUS status = n->Transformable(outer);
04193 if (status != SX_PNODE::SE_NOT_REQD) {
04194 found_n = n;
04195 }
04196 }
04197 }
04198
04199 if (found_n) {
04200 ar->Enter_Scalar_Expand(wn,found_n, can_be_inner, num_loops);
04201 } else if (_is_floating_point && MTYPE_float(type)) {
04202 if ((type == MTYPE_F4) || (type == MTYPE_F8)) {
04203 Enter(&symb, is_store, 1);
04204 (*num_scalar_refs)++;
04205 } else if ((type == MTYPE_C4) || (type==MTYPE_C8) ||
04206 #if defined(TARG_IA64)
04207 (type == MTYPE_F10) ||
04208 #endif
04209 (type == MTYPE_FQ)) {
04210 Enter(&symb, is_store, 2);
04211 (*num_scalar_refs)+=2;
04212 } else if (type == MTYPE_CQ
04213 #if defined(TARG_IA64)
04214 || type == MTYPE_C10
04215 #endif
04216 ) {
04217 Enter(&symb, is_store, 4);
04218 (*num_scalar_refs)+=4;
04219 }
04220 } else if (_is_floating_point==FALSE && MTYPE_float(type)==FALSE) {
04221 if (Integer_Ref_Needs_Reg(wn)) {
04222 (*num_scalar_refs)++;
04223 Enter(&symb, is_store, 1);
04224 }
04225 }
04226 }
04227 }
04228
04229
04230
04231
04232
04233 void SYMBOL_TREE_NODE::Enter(SYMBOL *symbol, MEM_POOL *pool, BOOL is_store,
04234 INT weight)
04235 {
04236 INT result = Symbol_Compare(symbol);
04237 if (result == 0) {
04238 if (is_store) _is_store = TRUE;
04239 } else if (result < 0) {
04240 if (_left) {
04241 _left->Enter(symbol,pool,is_store,weight);
04242 } else {
04243 _left = CXX_NEW(SYMBOL_TREE_NODE(*symbol,is_store,weight),pool);
04244 }
04245 } else {
04246 if (_right) {
04247 _right->Enter(symbol,pool,is_store,weight);
04248 } else {
04249 _right = CXX_NEW(SYMBOL_TREE_NODE(*symbol,is_store,weight),pool);
04250 }
04251 }
04252 }
04253
04254
04255 INT SYMBOL_TREE_NODE::Num_Fp_Unstored() const
04256 {
04257 INT result=0;
04258 if (!_is_store && MTYPE_float(_symbol.Type))
04259 result+=_weight;
04260 if (_left) result += _left->Num_Fp_Unstored();
04261 if (_right) result += _right->Num_Fp_Unstored();
04262 return result;
04263 }
04264
04265
04266 INT SYMBOL_TREE_NODE::Num_Int_Unstored() const
04267 {
04268 INT result=0;
04269 if (!_is_store && !MTYPE_float(_symbol.Type))
04270 result+=_weight;
04271 if (_left) result += _left->Num_Int_Unstored();
04272 if (_right) result += _right->Num_Int_Unstored();
04273 return result;
04274 }
04275
04276
04277
04278
04279 void
04280 REGISTER_MODEL::Calculate_Register_Usage(WN* inner,
04281 INT* fp_regs_used_out,
04282 INT* int_regs_used_out,
04283 INT* tlb_out)
04284 {
04285 Evaluate(inner,NULL,NULL,NULL,fp_regs_used_out,int_regs_used_out,tlb_out);
04286 }
04287
04288 void
04289 REGISTER_MODEL::Evaluate(WN* inner,
04290 WN2INT* se_needed,
04291 HASH_TABLE<WN*,BIT_VECTOR*>* invar_table,
04292 double* loop_cycles,
04293 INT* fp_regs_used_out,
04294 INT* int_regs_used_out,
04295 INT* tlb_out)
04296 {
04297 INT base_fp_regs;
04298 INT32 fp_regs_used;
04299 INT32 int_regs_used;
04300 INT num_fp_scalar_refs = 0;
04301 INT num_int_scalar_refs = 0;
04302 INT issue_rate;
04303 INT num_mem_units;
04304 INT num_fp_regs, num_fp_array_refs;
04305 INT num_fp_variant_stores, num_fp_invariant_stores;
04306 INT num_int_regs, num_int_array_refs;
04307 INT num_int_variant_stores, num_int_invariant_stores;
04308
04309 BOOL register_only = (se_needed == NULL && loop_cycles == NULL);
04310
04311 MEM_POOL_Push(_pool);
04312
04313 INT base_int_regs = Reserved_Int_Regs;
04314
04315
04316 if (Is_Target_R8K()) {
04317 issue_rate = 4;
04318 base_fp_regs = 18;
04319 num_mem_units = 2;
04320 } else if (Is_Target_R10K()) {
04321 issue_rate = 4;
04322 base_fp_regs = 14;
04323 num_mem_units = 1;
04324 } else if (Is_Target_R4K()) {
04325 issue_rate = 1;
04326 base_fp_regs = 12;
04327 num_mem_units = 1;
04328 } else if (Is_Target_R5K()) {
04329 issue_rate = 2;
04330 base_fp_regs = 14;
04331 num_mem_units = 1;
04332 #ifdef TARG_MIPS
04333 } else if (Is_Target_Sb1()) {
04334 issue_rate = 4;
04335 base_fp_regs = 18;
04336 num_mem_units = 2;
04337 #endif
04338 #ifdef TARG_X8664
04339 } else if (Is_Target_x86_64()) {
04340 issue_rate = 3;
04341 base_fp_regs = 16;
04342 num_mem_units = 2;
04343 #endif
04344 #ifdef TARG_IA64
04345 } else if (Is_Target_Itanium()) {
04346 Lmt_DevWarn(1, ("TODO: Tune LNO machine model parameters for IA-64"));
04347 issue_rate = 6;
04348 base_fp_regs = 32;
04349 num_mem_units = 2;
04350 #endif
04351 } else {
04352 Lmt_DevWarn(1, ("TODO: LNO machine model parameters are just wild guesses"));
04353 issue_rate = 4;
04354 base_fp_regs = 4;
04355 num_mem_units = 2;
04356 }
04357
04358
04359 ARRAY_REF *array_ref = CXX_NEW(ARRAY_REF(_pool),_pool);
04360 INT i;
04361 for (i=0; i<_statement_stack->Elements(); i++) {
04362 array_ref->Add_References(_statement_stack->Bottom_nth(i), 1,NULL);
04363 }
04364
04365 *tlb_out = array_ref->Elements();
04366
04367
04368 SYMBOL_TREE *fp_symbol_tree =
04369 CXX_NEW(SYMBOL_TREE(TRUE,_pool),_pool);
04370 SYMBOL_TREE *int_symbol_tree =
04371 CXX_NEW(SYMBOL_TREE(FALSE,_pool),_pool);
04372 for (i=0; i<_statement_stack->Elements(); i++) {
04373 fp_symbol_tree->Enter_Scalar_Refs(_statement_stack->Bottom_nth(i),
04374 &num_fp_scalar_refs,
04375 se_needed,array_ref);
04376 int_symbol_tree->Enter_Scalar_Refs(_statement_stack->Bottom_nth(i),
04377 &num_int_scalar_refs,
04378 se_needed,array_ref);
04379 }
04380 INT scalar_fp_regs = fp_symbol_tree->Num_Fp_Unstored();
04381 INT scalar_int_regs = int_symbol_tree->Num_Int_Unstored();
04382 CXX_DELETE(fp_symbol_tree,_pool);
04383 CXX_DELETE(int_symbol_tree,_pool);
04384
04385 INT inner_number = Do_Loop_Depth(inner);
04386 num_fp_array_refs = array_ref->Num_Fp_Refs();
04387 num_int_array_refs = array_ref->Num_Int_Refs();
04388 array_ref->Remove_Cse(inner_number, Max_Cse_Dist,
04389 Find_Step(inner,inner_number));
04390 array_ref->Mark_Invariants(inner_number);
04391
04392 INT num_fp_refs;
04393 INT num_int_refs;
04394 INT num_fp_spills=0;
04395 INT num_int_spills=0;
04396
04397 array_ref->Calc_Regs_And_Refs(
04398 &num_fp_regs,&num_fp_refs,&num_fp_variant_stores,
04399 &num_fp_invariant_stores,
04400 &num_int_regs,&num_int_refs,&num_int_variant_stores,
04401 &num_int_invariant_stores);
04402
04403 if (num_fp_invariant_stores > 4*num_fp_variant_stores) {
04404 base_fp_regs /= 3;
04405
04406 }
04407
04408 fp_regs_used = base_fp_regs + scalar_fp_regs + num_fp_regs;
04409 int_regs_used = base_int_regs + scalar_int_regs + num_int_regs;
04410
04411 BOOL can_reg_allocate = TRUE;
04412 if (fp_regs_used > Target_FPRs) {
04413 double fp_refs_per_reg = (num_fp_array_refs + num_fp_scalar_refs)/
04414 (num_fp_regs +scalar_fp_regs);
04415 num_fp_spills =
04416 (INT)((num_fp_regs+base_fp_regs+scalar_fp_regs - Target_FPRs) *
04417 fp_refs_per_reg);
04418 num_fp_refs += num_fp_spills;
04419 can_reg_allocate = FALSE;
04420 }
04421 if (int_regs_used > Target_INTRs) {
04422
04423 double int_refs_per_reg = (num_int_array_refs + num_int_scalar_refs)/
04424 (num_int_regs +scalar_int_regs);
04425 num_int_spills =
04426 (INT)((num_int_regs+base_int_regs+scalar_int_regs - Target_INTRs)*
04427 int_refs_per_reg);
04428 num_int_refs += num_int_spills;
04429 can_reg_allocate = FALSE;
04430 }
04431
04432 if (register_only && can_reg_allocate) {
04433 CXX_DELETE(array_ref,_pool);
04434 MEM_POOL_Pop(_pool);
04435 *fp_regs_used_out=fp_regs_used;
04436 *int_regs_used_out=int_regs_used;
04437 return;
04438 }
04439
04440
04441 double OP_issue = 0.0, op_cycles;
04442 op_cycles = LOOP_MODEL::OP_Cycles(this, &OP_issue, invar_table, _pool);
04443 if (op_cycles == -1.0) {
04444 OP_issue = Count_Op();
04445 }
04446
04447 double LOOP_INIT_issue = 2.0;
04448
04449
04450 double MEM_issue = ((double) (num_fp_refs+num_int_refs));
04451 double MEM_rcycles = MEM_issue/num_mem_units;
04452
04453 double MEM_issue_minus_spills = ((double)
04454 (num_fp_refs-num_fp_spills + num_int_refs-num_int_spills));
04455 double MEM_rcycles_minus_spills = MEM_issue_minus_spills/num_mem_units;
04456
04457 double issue_limit =
04458 (OP_issue+LOOP_INIT_issue+MEM_issue) / issue_rate;
04459 double issue_limit_minus_spills =
04460 (OP_issue+LOOP_INIT_issue+MEM_issue_minus_spills)/ issue_rate;
04461 double resource_cycles = MAX(op_cycles,
04462 MAX(MEM_rcycles,issue_limit));
04463 double resource_cycles_minus_spills =
04464 MAX(op_cycles,
04465 MAX(MEM_rcycles_minus_spills,issue_limit_minus_spills));
04466
04467 double cycles = resource_cycles;
04468 double cycles_minus_spills = resource_cycles_minus_spills;
04469
04470 if (can_reg_allocate==FALSE &&
04471 (cycles == cycles_minus_spills)) {
04472
04473
04474 can_reg_allocate = TRUE;
04475 num_fp_regs = Target_FPRs - scalar_fp_regs - base_fp_regs;
04476 num_int_regs = Target_INTRs - scalar_int_regs - base_int_regs;
04477 }
04478
04479 if (can_reg_allocate) {
04480 if (num_fp_regs + base_fp_regs + scalar_fp_regs > Target_FPRs-2) {
04481 cycles *= 1.1;
04482 } else if (num_int_regs + base_int_regs + scalar_int_regs >
04483 Target_INTRs-2){
04484 cycles *= 1.1;
04485 }
04486 }
04487
04488 fp_regs_used = base_fp_regs + scalar_fp_regs + num_fp_regs;
04489 int_regs_used = base_int_regs + scalar_int_regs + num_int_regs;
04490
04491 CXX_DELETE(array_ref,_pool);
04492 MEM_POOL_Pop(_pool);
04493 *fp_regs_used_out=fp_regs_used;
04494 *int_regs_used_out=int_regs_used;
04495
04496 if (*tlb_out > Mhd.L[0].TLB_Entries) {
04497 cycles += Mhd.L[0].TLB_Miss_Penalty * (Mhd.L[0].TLB_Entries-*tlb_out);
04498 }
04499
04500 if (!register_only)
04501 *loop_cycles=cycles;
04502
04503 }
04504
04505
04506
04507
04508
04509 double
04510 REGISTER_MODEL::Count_Op()
04511 {
04512 double result = 0.0;
04513 for (INT i = 0; i < _statement_stack->Elements(); i++) {
04514 result += Count_Op(_statement_stack->Bottom_nth(0));
04515 }
04516 return result;
04517 }
04518
04519 double
04520 REGISTER_MODEL::Count_Op(WN* wn)
04521 {
04522 double result = 0.0;
04523 OPCODE opcode = WN_opcode(wn);
04524 if (opcode == OPC_BLOCK) {
04525 WN *kid = WN_first (wn);
04526 while (kid) {
04527 result += Count_Op(kid);
04528 kid = WN_next(kid);
04529 }
04530 } else if (!OPCODE_is_leaf(opcode)) {
04531 TYPE_ID ti = OPCODE_rtype(opcode);
04532 TYPE_ID ti2 = OPCODE_desc(opcode);
04533 if ((ti == MTYPE_C4) || (ti == MTYPE_C8) || (ti == MTYPE_CQ) ||
04534 (ti2 == MTYPE_C4) || (ti2 == MTYPE_C8) || (ti2 == MTYPE_CQ) ||
04535 #if defined(TARG_IA64)
04536 ti == MTYPE_C10 || ti2 == MTYPE_C10 ||
04537 ti == MTYPE_F10 || ti2 == MTYPE_F10 ||
04538 #endif
04539 (ti == MTYPE_FQ) || (ti2 == MTYPE_FQ)) {
04540 result = 2.0;
04541 } else if ((ti == MTYPE_F4) || (ti == MTYPE_F8) ||
04542 (ti2 == MTYPE_F4) || (ti2 == MTYPE_F8)) {
04543 result = 1.0;
04544 } else if ((ti == MTYPE_B) || (ti == MTYPE_I1) || (ti == MTYPE_I2) ||
04545 (ti == MTYPE_I4) || (ti == MTYPE_I8) || (ti == MTYPE_U1) ||
04546 (ti == MTYPE_U2) || (ti == MTYPE_U4) || (ti == MTYPE_U8) ||
04547 (ti2 == MTYPE_B) || (ti2 == MTYPE_I1) || (ti2 == MTYPE_I2) ||
04548 (ti2 == MTYPE_I4) || (ti2 == MTYPE_I8) || (ti2 == MTYPE_U1) ||
04549 (ti2 == MTYPE_U2) || (ti2 == MTYPE_U4) || (ti2 == MTYPE_U8)) {
04550 result = 1.0;
04551 }
04552 for (INT kidno=0; kidno<WN_kid_count(wn); kidno++) {
04553 WN *kid = WN_kid(wn,kidno);
04554 result += Count_Op(kid);
04555 }
04556 }
04557 return result;
04558 }
04559
04560
04561
04562 void
04563 LOOP_MODEL::Model_Results_Analysis(INT inner,
04564 INT num_loops,
04565 INT outermost_can_be_tiled,
04566 double machine_cycles,
04567 double cache_cycles,
04568 double overhead_cycles)
04569 {
04570 fprintf(LNO_Analysis," (IF_INNER %d \n",
04571 Srcpos_To_Line(SNL_Line_Numbers[inner]));
04572 fprintf(LNO_Analysis," (CYCLES %g \n",_num_cycles_inner);
04573 if (machine_cycles < 0.0) {
04574 if (_OP_resource_count==NULL) {
04575 fprintf(LNO_Analysis," (0 \"Can't model fp resources\")");
04576 } else {
04577 fprintf(LNO_Analysis," (0 \"Requires too many registers\")");
04578 }
04579 } else {
04580 switch (_model_limit) {
04581 case MODEL_LIMIT_UNSET:
04582 fprintf(LNO_Analysis," (%g \"\")\n",machine_cycles);
04583 break;
04584 case MODEL_LIMIT_IDEAL:
04585 fprintf(LNO_Analysis," (%g \"Ideal Schedule\")\n",
04586 machine_cycles);
04587 break;
04588 case MODEL_LIMIT_RES:
04589 fprintf(LNO_Analysis," ");
04590 fprintf(LNO_Analysis,"(%g \"Resource Limited Schedule\")\n",
04591 machine_cycles);
04592 break;
04593 case MODEL_LIMIT_LAT:
04594 fprintf(LNO_Analysis," ");
04595 fprintf(LNO_Analysis,"(%g \"Latency Limited Schedule\")\n",
04596 machine_cycles);
04597 break;
04598 }
04599 }
04600 fprintf(LNO_Analysis," %g\n",cache_cycles);
04601 fprintf(LNO_Analysis," %g)\n",overhead_cycles);
04602 fprintf(LNO_Analysis," (FP_REGISTERS %d) \n",_num_fp_regs_inner);
04603
04604 fprintf(LNO_Analysis," (TRANSFORMATIONS\n");
04605
04606 fprintf(LNO_Analysis," (UNTILED_ORDER");
04607 INT i;
04608 for (i=outermost_can_be_tiled; i<num_loops; i++) {
04609 fprintf(LNO_Analysis," %d",
04610 Srcpos_To_Line(SNL_Line_Numbers[_new_order_inner[i]]));
04611 }
04612 fprintf(LNO_Analysis,")");
04613
04614 INT unroll_entries = 0;
04615 for (i=outermost_can_be_tiled; i<num_loops; i++) {
04616 if (_block_number_inner[i] > 1)
04617 unroll_entries++;
04618 }
04619 if (unroll_entries) {
04620 fprintf(LNO_Analysis,"\n (UNROLL");
04621 for (i=outermost_can_be_tiled; i<num_loops; i++) {
04622 if (_block_number_inner[i] > 1)
04623 fprintf(LNO_Analysis," (%d %d)",
04624 Srcpos_To_Line(SNL_Line_Numbers[i]),_block_number_inner[i]);
04625 }
04626 fprintf(LNO_Analysis,")");
04627 }
04628 if (_nstrips_inner) {
04629 fprintf(LNO_Analysis,"\n (BLOCKING");
04630 for (INT s = 0; s < _nstrips_inner; s++) {
04631 INT i = _iloop_inner[s];
04632 fprintf(LNO_Analysis," (%d %d L%d %d)",
04633 Srcpos_To_Line(SNL_Line_Numbers[_new_order_inner[i]]),
04634 _stripsz_inner[s],
04635 _striplevel_inner[s],
04636 Srcpos_To_Line(SNL_Line_Numbers[_new_order_inner[_stripdepth_inner]]));
04637 }
04638 fprintf(LNO_Analysis,")");
04639 }
04640 fprintf(LNO_Analysis,"))\n");
04641 }
04642