00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "cgir.h"
00032 #include "glob.h"
00033 #include "tn_map.h"
00034 #include "cgtarget.h"
00035 #include "cg_vector.h"
00036 #include "cg_loop.h"
00037 #include "gra_live.h"
00038 #include "freq.h"
00039 #include "findloops.h"
00040 #include "register.h"
00041 #include "ti_res_res.h"
00042 #include "ti_res_count.h"
00043 #include "tracing.h"
00044 #include "config_asm.h"
00045 #include "note.h"
00046 #include "cgexp.h"
00047 #include "lra.h"
00048 #include "wn_util.h"
00049 #include "cg_swp_options.h"
00050 #include <unistd.h>
00051 #include <sys/types.h>
00052 #include <sys/wait.h>
00053 #include <math.h>
00054
00055 #include "cg_sas.h"
00056
00057 #define TN_has_register(t) ( TN_register(t) != REGISTER_UNDEFINED )
00058
00059
00060
00061 void Emit_KEY_SWP_Note( BB* bb, FILE* file )
00062 {
00063 const ANNOTATION* ant = ANNOT_Get( BB_annotations(bb), ANNOT_ROTATING_KERNEL );
00064 const ROTATING_KERNEL_INFO* info = ANNOT_rotating_kernel(ant);
00065 char prefix[20];
00066
00067 if( ROTATING_KERNEL_INFO_succeeded(info) ){
00068 const int Kmin = ROTATING_KERNEL_INFO_min_ii(info);
00069 sprintf( prefix, "%s<swps> ", ASM_CMNT_LINE );
00070 fprintf( file, "%s\n", prefix );
00071 fprintf( file, "%s%3d cycles per 1 iteration in steady state\n",
00072 prefix, ROTATING_KERNEL_INFO_ii(info) );
00073 fprintf( file, "%s%3d pipeline stages\n",
00074 prefix, ROTATING_KERNEL_INFO_stage_count(info) );
00075 fprintf( file, "%s%3d %s of the kernel\n",
00076 prefix, Kmin, Kmin == 1 ? "copy" : "copies" );
00077 fprintf( file, "%s\n", prefix );
00078
00079 sprintf( prefix, "%s<swps> ", ASM_CMNT_LINE );
00080
00081 ISA_REGISTER_CLASS rc;
00082 FOR_ALL_ISA_REGISTER_CLASS( rc ){
00083 const ISA_REGISTER_CLASS_INFO* icinfo = ISA_REGISTER_CLASS_Info( rc );
00084 const REGISTER_SET tmp = ROTATING_KERNEL_INFO_live_in(info)[rc];
00085 const int size = REGISTER_SET_Size( tmp );
00086 if( size > 0 ){
00087 fprintf( file, "%s%d %s registers are required\n",
00088 prefix, size, ISA_REGISTER_CLASS_INFO_Name( icinfo ) );
00089 }
00090 }
00091
00092 fprintf( file, "%s\n", prefix );
00093
00094 const int mii = std::max( ROTATING_KERNEL_INFO_res_min_ii(info),
00095 ROTATING_KERNEL_INFO_rec_min_ii(info) );
00096
00097 fprintf( file, "%smin %d cycles required by resources\n",
00098 prefix, ROTATING_KERNEL_INFO_res_min_ii(info) );
00099 fprintf( file, "%smin %d cycles required by recurrences\n",
00100 prefix, ROTATING_KERNEL_INFO_rec_min_ii(info) );
00101 fprintf( file, "%smin %d cycles required by resources/recurrence\n",
00102 prefix, mii );
00103 fprintf( file, "%smin %d cycles (actual %d cycles) required to schedule one iteration\n",
00104 prefix, ROTATING_KERNEL_INFO_min_sched_len(info),
00105 ROTATING_KERNEL_INFO_sched_len(info) );
00106 fprintf( file, "%s\n", prefix );
00107 TI_RES_COUNT_Emit_Note( prefix, file, ROTATING_KERNEL_INFO_res_counts(info),
00108 ROTATING_KERNEL_INFO_ii(info) );
00109 fprintf( file, "%s\n", prefix );
00110
00111 } else {
00112 sprintf( prefix, "%s<swpf> ", ASM_CMNT_LINE );
00113 fprintf( file, "%s\n", prefix );
00114 char* failure_msg = NULL;
00115
00116 switch( ROTATING_KERNEL_INFO_failure_code(info) ){
00117 case MODULO_SCHED_FAILED:
00118 failure_msg = "unable to find a modulo schedule";
00119 break;
00120 case REGISTER_ALLOC_FAILED:
00121 failure_msg = "not enough registers";
00122 break;
00123 case ONE_STAGE_COUNT_ONLY:
00124 failure_msg = "loop has only one stage count";
00125 break;
00126 case SKIP_FDIV_SQRT:
00127 failure_msg = "loop has fdiv or sqrt operation";
00128 break;
00129 default:
00130
00131 failure_msg = "unknown failure code";
00132 }
00133
00134 fprintf( file, "%s %s\n", prefix, failure_msg );
00135 fprintf( file, "%s\n", prefix );
00136 }
00137 }
00138
00139
00140 #define is_power_of_two(i) (((i) & ((i)-1)) == 0)
00141
00142 extern ARC* new_arc( CG_DEP_KIND, OP*, OP*, UINT8, UINT8, BOOL );
00143 extern void Exp_COPY( TN*, TN*, OPS* );
00144 extern void Emit_SWP_Note( BB*, FILE* );
00145 extern void Unroll_Make_Remainder_Loop( CG_LOOP&, INT32 );
00146 extern void Unroll_Do_Loop_guard( LOOP_DESCR*, LOOPINFO*, TN* );
00147 extern void append_to_prolog( BB* );
00148 extern void Unroll_Do_Loop( CG_LOOP&, UINT32 );
00149 extern void Fix_Backpatches( CG_LOOP&, bool );
00150 extern void extend_prolog();
00151 extern void extend_epilog( LOOP_DESCR* );
00152
00153 typedef struct {
00154 uint16_t ntimes;
00155 bool const_trip;
00156 } NOTE_SWP_HEAD;
00157
00158
00159
00160
00161
00162
00163 struct OP_info {
00164 OP* op;
00165 OP* last_use;
00166 OP* addr_op;
00167 bool is_rv;
00168 int indx;
00169 int cycle;
00170 int order;
00171 int8_t stage;
00172 uint8_t omega[OP_MAX_FIXED_OPNDS];
00173
00174 void Init( OP* opr, bool swp ) {
00175 indx = cycle = order = stage = -1;
00176 op = opr;
00177 is_rv = false;
00178 addr_op = last_use = NULL;
00179
00180 if( swp ){
00181 for( int opnd = 0; opnd < OP_opnds(op); opnd++ ){
00182 TN* tn = OP_opnd( op, opnd );
00183 omega[opnd] = OP_omega( op, opnd );
00184 }
00185 }
00186 }
00187
00188 }* op_info = NULL;
00189
00190 static int OP_info_size = 0;
00191
00192 #define OP_INFO(op) ( &op_info[OP_map_idx(op)] )
00193 #define OP_info_init(op,s) ( (OP_INFO(op))->Init(op,s) )
00194 #define OP_info_op(op) ( (OP_INFO(op))->op )
00195 #define OP_info_verify(op) ( Is_True( (OP_INFO(op))->op == (op), ("") ) )
00196 #define OP_info_indx(op) ( (OP_INFO(op))->indx )
00197 #define OP_info_cycle(op) ( (OP_INFO(op))->cycle )
00198 #define OP_info_order(op) ( (OP_INFO(op))->order )
00199 #define OP_info_stage(op) ( (OP_INFO(op))->stage )
00200 #define OP_info_last_use(op) ( (OP_INFO(op))->last_use )
00201 #define OP_info_omega(op,i) ( (OP_INFO(op))->omega[i] )
00202 #define OP_info_reset() (bzero( op_info, sizeof(op_info[0])*OP_info_size ));
00203 #define OP_info_is_rv(op) ( (OP_INFO(op))->is_rv )
00204 #define OP_info_addr_op(op) ( (OP_INFO(op))->addr_op )
00205
00206
00207 inline static void OP_info_move( OP* from, OP* to )
00208 {
00209 int to_idx = OP_map_idx( to );
00210 int from_idx = OP_map_idx( from );
00211
00212 Is_True( to_idx < OP_info_size, ("") );
00213 Is_True( op_info[to_idx].op == NULL, ("") );
00214
00215 op_info[to_idx] = op_info[from_idx];
00216 op_info[to_idx].op = to;
00217 op_info[from_idx].op = NULL;
00218 }
00219
00220 #ifndef cg_swp_INCLUDED
00221
00222
00223
00224 inline bool operator<(CLASS_REG_PAIR x, CLASS_REG_PAIR y) {
00225 return CLASS_REG_PAIR_class_n_reg(x) < CLASS_REG_PAIR_class_n_reg(y);
00226 }
00227
00228 inline bool operator==(CLASS_REG_PAIR x, CLASS_REG_PAIR y) {
00229 return memcmp(&x, &y, sizeof(CLASS_REG_PAIR)) == 0;
00230 }
00231 #endif
00232
00233
00234 static unsigned int bb_count = 0;
00235 static const char* ddg_file = tempnam( "/tmp/", "key_sch" );
00236 static const char* sched_file = tempnam( "/tmp/", "q" );
00237 static char func_name[128];
00238 static const bool dump_ddg = false;
00239
00240 inline static uint16_t log2( uint32_t n )
00241 {
00242 uint16_t result = 0;
00243 Is_True( n > 0, ("") );
00244
00245 while( ( 1 << result ) <= n ){
00246 result++;
00247 }
00248
00249 return result-1;
00250 }
00251
00252
00253 static void preconditioning_head_note_handler( NOTE_ACTION action, NOTE_INFO* info,
00254 FILE *file )
00255 {
00256 const NOTE_SWP_HEAD* info_u = (const NOTE_SWP_HEAD*)info;
00257 const int ntimes = info_u->ntimes;
00258 const bool const_trip = info_u->const_trip;
00259
00260 switch( action ){
00261 case NOTE_PRINT_TO_FILE:
00262 if( const_trip ){
00263 fprintf(file,
00264 "%s<loop> Preconditioning loop (%d iteration%s)\n",
00265 ASM_CMNT_LINE, ntimes, ntimes == 1 ? "" : "s");
00266 } else {
00267 fprintf(file,
00268 "%s<loop> Preconditioning loop (at most %d iteration%s)\n",
00269 ASM_CMNT_LINE, ntimes-1, ntimes-1 == 1 ? "" : "s");
00270 }
00271 if (ntimes == 0)
00272 DevWarn( "Found Preconditioning head note with ntimes = 0" );
00273 break;
00274 case NOTE_PRINT_TO_ANL_FILE:
00275
00276 break;
00277 case NOTE_PRINT_HANDLER_NAME_TO_FILE:
00278 fprintf(file, "preconditioning_head_note_handler");
00279 break;
00280 default:
00281 Is_True(FALSE, ("Didn't recognize action"));
00282 }
00283 }
00284
00285
00286
00287 static void prolog_note_handler( NOTE_ACTION action, NOTE_INFO* info, FILE *file )
00288 {
00289 const NOTE_SWP_HEAD* info_u = (const NOTE_SWP_HEAD*)info;
00290 const int ntimes = info_u->ntimes;
00291 const bool const_trip = info_u->const_trip;
00292
00293 switch( action ){
00294 case NOTE_PRINT_TO_FILE:
00295 if( const_trip ){
00296 fprintf( file,
00297 "%s<loop> SWP Prolog (%d stage%s)\n",
00298 ASM_CMNT_LINE, ntimes, ntimes == 1 ? "" : "s" );
00299 } else {
00300 fprintf( file,
00301 "%s<loop> SWP Prolog (at most %d stage%s)\n",
00302 ASM_CMNT_LINE, ntimes-1, ntimes-1 == 1 ? "" : "s" );
00303 }
00304
00305 break;
00306
00307 case NOTE_PRINT_HANDLER_NAME_TO_FILE:
00308 fprintf( file, "prolog_note_handler" );
00309 break;
00310
00311 default:
00312 Is_True( FALSE, ("Didn't recognize action") );
00313 }
00314 }
00315
00316
00317 static int sort_by_scycle( const void* a, const void* b )
00318 {
00319 OP* op1 = *(OP**)a;
00320 OP* op2 = *(OP**)b;
00321
00322 if( OP_info_cycle( op1 ) < OP_info_cycle( op2 ) )
00323 return -1;
00324
00325 if( OP_info_cycle( op1 ) > OP_info_cycle( op2 ) )
00326 return 1;
00327
00328
00329 if( OP_info_stage( op1 ) > OP_info_stage( op2 ) )
00330 return -1;
00331
00332 if( OP_info_stage( op1 ) < OP_info_stage( op2 ) )
00333 return 1;
00334
00335
00336 if( OP_info_order( op1 ) < OP_info_order( op2 ) )
00337 return -1;
00338
00339 return 1;
00340 }
00341
00342
00343 void KEY_SCH::Emit_TN( char* buf, int size, TN* tn ) const
00344 {
00345 char *result = buf;
00346
00347 FmtAssert( tn != NULL, ("") );
00348
00349 if( TN_is_constant(tn) ){
00350 if( TN_has_value(tn) ){
00351 buf += sprintf( buf, "(0x%llx)", TN_value(tn) );
00352 if (TN_size(tn) == 4 &&
00353 TN_value(tn) >> 32 != 0 &&
00354 TN_value(tn) >> 31 != -1)
00355 buf += sprintf( buf, "!!! TN_value=0x%llx is too big to fit in a word",
00356 TN_value(tn) );
00357 }
00358 else if (TN_is_enum(tn)) {
00359 buf += sprintf( buf, "(enum:%s)", ISA_ECV_Name(TN_enum(tn)) );
00360 }
00361 else if ( TN_is_label(tn) ) {
00362 LABEL_IDX lab = TN_label(tn);
00363 const char *name = LABEL_name(lab);
00364 int64_t offset = TN_offset(tn);
00365 if ( offset == 0 ) {
00366 buf += sprintf( buf, "(lab:%s)", name );
00367 }
00368 else {
00369 buf += sprintf( buf, "(lab:%s+%lld)", name, offset );
00370 }
00371 }
00372 else if ( TN_is_tag(tn) ) {
00373 LABEL_IDX lab = TN_label(tn);
00374 const char *name = LABEL_name(lab);
00375 buf += sprintf( buf, "(tag:%s)", name );
00376 }
00377 else if ( TN_is_symbol(tn) ) {
00378 ST *var = TN_var(tn);
00379 buf += sprintf( buf, "(sym" );
00380 char* p = buf;
00381 if (ST_class(var) == CLASS_CONST)
00382 buf += sprintf( buf, ":%s)", Targ_Print(NULL, ST_tcon_val(var)));
00383 else
00384 buf += sprintf( buf, ":%s%+lld)", ST_name(var), TN_offset(tn) );
00385
00386
00387 while( p < buf - 1 ){
00388 if( *p == '(' )
00389 *p = '[';
00390 else if( *p == ')' )
00391 *p = ']';
00392 p++;
00393 }
00394 }
00395 else {
00396 FmtAssert( false, ("") );
00397 }
00398
00399 } else {
00400 if( TN_is_global_reg(tn) ){
00401 buf += sprintf( buf, "GTN%d", TN_number(tn) );
00402 } else {
00403 buf += sprintf( buf, "TN%d", TN_number(tn) );
00404 }
00405
00406 if( TN_register(tn) != REGISTER_UNDEFINED ){
00407 if (TN_register(tn) <= REGISTER_CLASS_last_register(TN_register_class(tn))) {
00408 buf += sprintf(buf, "(%s)",
00409 REGISTER_name(TN_register_class(tn), TN_register(tn)));
00410 } else {
00411 buf += sprintf( buf, "(%d,%d)", TN_register_class(tn), TN_register(tn) );
00412 }
00413 }
00414
00415 if( TN_is_save_reg(tn) ){
00416 buf += sprintf( buf, "(sv:%s)",
00417 REGISTER_name(TN_save_rclass(tn), TN_save_reg(tn)) );
00418 }
00419 }
00420
00421 FmtAssert( buf - result < size, ("") );
00422 }
00423
00424
00425 void KEY_SCH::Emit_Src_DDG() const
00426 {
00427 OP* op;
00428 char buf[1024];
00429 int size = sizeof(buf);
00430 FILE* t = fopen( ddg_file, dump_ddg ? "a" : "w" );
00431 FmtAssert( t != NULL, ("Emit_Src_DDG: fail to open %s",ddg_file) );
00432
00433 fprintf( t, "\n{\n" );
00434
00435
00436 fprintf( t, "\tbb: %s:%s:%d\n", Obj_File_Name, func_name, bb_count );
00437 fprintf( t, "\tops: %d\n", nOps );
00438 if( perform_swp )
00439 fprintf( t, "\tswp\n" );
00440 else
00441 fprintf( t, "\tlist\n" );
00442
00443 FOR_ALL_BB_OPs( kernel, op ){
00444 FmtAssert( BB_id(OP_bb(op)) == BB_id(kernel), ("") );
00445
00446
00447
00448 bool cg_loop_op = Is_CG_LOOP_Op(op);
00449
00450 fprintf( t, "\top%d: ", OP_info_indx(op) );
00451
00452 for( int i = 0; i < OP_results(op); i++ ){
00453 Emit_TN( buf, size, OP_result(op,i) );
00454 fprintf( t , "%s ", buf );
00455 }
00456
00457 fprintf(t, "= ");
00458 fprintf(t, "%s ", TOP_Name(OP_code(op)));
00459 for( int i=0; i<OP_opnds(op); i++ ){
00460 TN *tn = OP_opnd(op,i);
00461 Emit_TN( buf, size, tn );
00462 fprintf( t, "%s ", buf );
00463 if ( cg_loop_op ) {
00464 int omega = TN_is_symbol(tn) ? OP_restore_omega(op) : OP_omega(op,i);
00465 if (omega) fprintf(t, "[%d]", omega);
00466 }
00467
00468 fprintf(t, " ");
00469 }
00470
00471 #if 0
00472
00473 if (OP_glue(op)) fprintf (t, " glue");
00474 if (OP_no_alias(op)) fprintf (t, " noalias");
00475 if (OP_copy(op)) fprintf (t, " copy");
00476 if (OP_volatile(op)) fprintf (t, " volatile");
00477 if (OP_side_effects(op)) fprintf (t, " side_effects");
00478 if (OP_hoisted(op)) fprintf (t, " hoisted");
00479 if (OP_cond_def(op)) fprintf (t, " cond_def");
00480 if (OP_end_group(op)) fprintf (t, " end_group");
00481 if (OP_tail_call(op)) fprintf (t, " tail_call");
00482 if (OP_no_move_before_gra(op)) fprintf (t, " no_move");
00483 if (OP_spadjust_plus(op)) fprintf (t, " spadjust_plus");
00484 if (OP_spadjust_minus(op)) fprintf (t, " spadjust_minus");
00485
00486 if (OP_unrolling(op)) {
00487 uint16_t unr = OP_unrolling(op);
00488 fprintf(t, " %d%s unrolling", unr,
00489 unr == 1 ? "st" : unr == 2 ? "nd" : unr == 3 ? "rd" : "th");
00490 }
00491 #endif
00492 fprintf( t, " {" );
00493
00494
00495
00496 for( ARC_LIST* arcs = OP_succs(op); arcs != NULL; arcs = ARC_LIST_rest(arcs) ){
00497 ARC* arc = ARC_LIST_first(arcs);
00498 OP* succ_op = ARC_succ(arc);
00499 uint16_t succ_id = OP_info_indx(succ_op);
00500 CG_DEP_KIND kind = ARC_kind(arc);
00501
00502 fprintf( t, "\n\t\top%d: ", succ_id );
00503
00504 if( kind == CG_DEP_REGIN )
00505 fprintf( t, "raw opnd(%d) ", ARC_opnd(arc) );
00506 else if( kind == CG_DEP_REGANTI )
00507 fprintf( t, "war opnd(%d) ", ARC_opnd(arc) );
00508 else if ( kind == CG_DEP_REGOUT )
00509 fprintf( t, "waw " );
00510 else
00511 fprintf( t, "misc " );
00512
00513 fprintf( t, "latency(%d) omega(%d) ", ARC_latency(arc), ARC_omega(arc) );
00514 }
00515
00516 if( OP_succs(op) != NULL )
00517 fprintf( t, "\n\t" );
00518
00519 fprintf( t, "}\n" );
00520 }
00521
00522 fprintf( t, "}\n" );
00523 fclose( t );
00524 }
00525
00526
00527 void KEY_SCH::Schedule_DDG()
00528 {
00529 static char* solver = NULL;
00530
00531 if( solver == NULL ){
00532 (void*)solver = (char*)malloc( sizeof(solver[0]) * 256 );
00533
00534 if( CG_Path == NULL ){
00535 char* path = getenv( "LD_LIBRARY_PATH" );
00536 Is_True( path != NULL, ("environment variable LD_LIBRARY_PATH is not set") );
00537 strcpy( solver, path );
00538 path = strchr( solver, ':' );
00539 if( path == NULL )
00540 strcat( solver, "/sas" );
00541 else
00542 strcpy( path, "/sas" );
00543
00544 } else {
00545 sprintf( solver, "%s/sas", CG_Path );
00546 }
00547 }
00548
00549 #if 0
00550 if( system( solver ) < 0 ){
00551 FmtAssert( false, ("") );
00552 }
00553
00554 return;
00555 #else
00556 pid_t childpid;
00557
00558
00559 if( ( childpid = fork() ) == 0 ){
00560 close(0); close(1);
00561
00562 execlp( solver, solver, ddg_file, "-o", sched_file,(char*)0 );
00563 FmtAssert( FALSE, ("KEY_SCH: fail to execute %s", solver) );
00564
00565 } else {
00566
00567 int status;
00568 FmtAssert( childpid > 0, ("") );
00569
00570 if( waitpid( childpid, &status, 0 ) != childpid ){
00571 FmtAssert( false, ("") );
00572 }
00573
00574 if( status != 0 ){
00575 Gen_Kernel_Fail_Info( MODULO_SCHED_FAILED );
00576 Is_True( false, ("KEY_SAS FAILED to SCHEDULE %s", ddg_file) );
00577 }
00578 }
00579 #endif
00580 }
00581
00582
00583 void KEY_SCH::GetLine( FILE* f, char* line )
00584 {
00585 int len = 0, C = EOF;
00586
00587 while( ( C = fgetc( f ) ) != EOF ){
00588 char c = (char)C;
00589 if( c == '\n' || c == '\r' ){
00590 break;
00591 }
00592
00593 if( c != ' ' && c != '\t' ){
00594 line[len++] = c;
00595 }
00596 }
00597
00598 is_eof = C == EOF;
00599
00600 line[len] = '\0';
00601 }
00602
00603
00604 void KEY_SCH::Collect_Sched_Info()
00605 {
00606 char line[1024];
00607 FILE* f = fopen( sched_file, "r" );
00608 FmtAssert( f != NULL, ("") );
00609 OP* op = BB_first_op( kernel );
00610 int indx = 0;
00611 uint32_t big = 0;
00612
00613 is_eof = false;
00614 min_sched_len = sched_len = sc = res_mii = rec_mii = mii = -1;
00615
00616 while( !is_eof ){
00617 char* p = NULL;
00618
00619 GetLine( f, line );
00620
00621
00622 if( perform_swp &&
00623 mii < 0 &&
00624 ( p = strstr( line, "swp:" ) ) != NULL ){
00625 p = strstr( p, "mii" );
00626 if( p == NULL )
00627 continue;
00628
00629 mii = atoi( &p[3] );
00630 FmtAssert( mii > 0, ("") );
00631 p = strstr( p, "res_mii" );
00632 res_mii = atoi( &p[7] );
00633 p = strstr( p, "rec_mii" );
00634 rec_mii = atoi( &p[7] );
00635
00636 continue;
00637 }
00638
00639 if( !( p = strstr( line, "cycle" ) ) )
00640 continue;
00641 if( p[5] > '9' || p[5] < '0' )
00642 continue;
00643
00644 OP_info_cycle( op ) = atoi( &p[5] );
00645 sched_len = MAX( sched_len, OP_info_cycle(op) );
00646 if( !OP_br( op ) ){
00647 min_sched_len = MAX( min_sched_len, OP_info_cycle(op) );
00648 }
00649
00650 if( perform_swp ){
00651 OP_info_stage( op ) = OP_info_cycle( op ) / mii;
00652
00653 if( OP_info_stage( op ) > 0 &&
00654 !OP_br( op ) ){
00655 for( int opnd = 0; opnd < OP_opnds(op); opnd++ ){
00656 TN* tn = OP_opnd( op, opnd );
00657 if( TN_is_register(tn) &&
00658 !TN_is_const_reg(tn) ){
00659 int new_omega = OP_info_omega( op, opnd ) + OP_info_stage( op );
00660
00661
00662 OP_info_omega( op, opnd ) = new_omega;
00663 }
00664 }
00665 }
00666 }
00667
00668 p = strstr( p, "order" );
00669 FmtAssert( p != NULL, ("") );
00670 OP_info_order( op ) = atoi( &p[5] );
00671 #if 0
00672 if( trace ){
00673 fprintf( TFile, "op%d: cycle%d stage%d order%d\n",
00674 OP_info_indx(op), OP_info_cycle(op),
00675 OP_info_stage(op), OP_info_order(op) );
00676 }
00677 #endif
00678 FmtAssert( OP_info_indx(op) == indx, ("") );
00679 op = OP_next( op );
00680 indx++;
00681 }
00682
00683 if( perform_swp ){
00684 sched_len++;
00685 min_sched_len++;
00686 sc = (int)ceil( (double)sched_len / mii );
00687 if( sc < 2 ){
00688 Gen_Kernel_Fail_Info( ONE_STAGE_COUNT_ONLY );
00689 }
00690 }
00691
00692 FmtAssert( indx == nOps, ("") );
00693
00694 fclose( f );
00695 }
00696
00697
00698 void KEY_SCH::Reorder_Kernel( ARRAY_ELEMENT_CMP_FUNC cmp_func )
00699 {
00700 OP* op = NULL;
00701
00702 VECTOR_Reset( schedule );
00703
00704 int indx = 0;
00705 FOR_ALL_BB_OPs( kernel, op ){
00706
00707 OP_info_verify( op );
00708 Is_True( OP_info_indx( op ) == indx, ("") );
00709
00710 VECTOR_Add_Element( schedule, op );
00711 indx++;
00712 }
00713
00714 VECTOR_Sort( schedule, cmp_func );
00715
00716
00717 Is_True( BB_length(kernel) == nOps, ("") );
00718 BB_Remove_All( kernel );
00719
00720 for( int i = 0; i < nOps; i++ ){
00721 OP* op = (OP*)VECTOR_element( schedule, i );
00722 int old_map_idx = OP_map_idx( op );
00723
00724 OP_info_order( op ) = i;
00725 OP_scycle( op ) = OP_info_cycle( op );
00726 BB_Append_Op( kernel, op );
00727
00728 if( old_map_idx != OP_map_idx(op) ){
00729 Is_True( OP_map_idx(op) < OP_info_size, ("") );
00730 struct OP_info tmp = op_info[OP_map_idx(op)];
00731
00732 op_info[OP_map_idx(op)] = op_info[old_map_idx];
00733 op_info[old_map_idx] = tmp;
00734
00735 if( tmp.op != NULL ){
00736 (tmp.op)->map_idx = old_map_idx;
00737 }
00738 }
00739 }
00740
00741
00742
00743 return;
00744 }
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756
00757 void KEY_SCH::Handle_Ldst_Addiu()
00758 {
00759
00760
00761
00762 return;
00763
00764 OP* mem_op = NULL;
00765
00766 FOR_ALL_BB_OPs_REV( kernel, mem_op ){
00767
00768 if( !OP_load( mem_op ) )
00769 continue;
00770
00771
00772
00773
00774 const INT ofst_idx = TOP_Find_Operand_Use( OP_code(mem_op), OU_offset );
00775 if( !TN_has_value( OP_opnd( mem_op, ofst_idx ) ) )
00776 continue;
00777
00778 const INT base_idx = TOP_Find_Operand_Use( OP_code(mem_op), OU_base );
00779 OP* addr_op = NULL;
00780 ARC* addr_arc = NULL;
00781
00782 for( ARC_LIST* arcs = OP_preds(mem_op); arcs != NULL; arcs = ARC_LIST_rest(arcs) ){
00783 ARC* arc = ARC_LIST_first(arcs);
00784 OP* op = ARC_pred( arc );
00785
00786 if( ARC_kind(arc) == CG_DEP_REGIN ){
00787 if( ARC_opnd(arc) == base_idx &&
00788 CGTARG_Is_OP_Addr_Incr( op ) ){
00789 addr_op = op;
00790 addr_arc = arc;
00791 }
00792 break;
00793 }
00794 }
00795
00796 if( addr_op == NULL )
00797 continue;
00798
00799
00800 const INT64 addiu_const = TN_value( OP_opnd( addr_op, 1 ) );
00801 const INT64 ld_const = TN_value( OP_opnd( mem_op, ofst_idx ) ) + addiu_const;
00802
00803 if( !TOP_Can_Have_Immediate( ld_const, OP_code(mem_op) ) )
00804 continue;
00805
00806 for( ARC_LIST* arcs = OP_succs(addr_op); arcs != NULL; arcs = ARC_LIST_rest(arcs)) {
00807 ARC* arc = ARC_LIST_first(arcs);
00808 FmtAssert( ARC_kind(arc) != CG_DEP_REGOUT, ("") );
00809 }
00810
00811
00812 TN* old_ofst_tn = OP_opnd( mem_op, ofst_idx );
00813 TN* new_ofst_tn = Gen_Literal_TN( ld_const, TN_size( old_ofst_tn ) );
00814
00815 Set_OP_opnd( mem_op, ofst_idx, new_ofst_tn );
00816
00817
00818 CG_DEP_Detach_Arc( addr_arc );
00819
00820
00821 for( ARC_LIST* arcs = OP_preds(addr_op); arcs != NULL; arcs = ARC_LIST_rest(arcs)) {
00822 ARC* arc = ARC_LIST_first(arcs);
00823 if( ARC_kind(arc) == CG_DEP_REGIN ){
00824 new_arc( CG_DEP_REGIN, ARC_pred(arc), mem_op,
00825 ARC_omega(arc), base_idx, FALSE );
00826 break;
00827 }
00828 }
00829
00830
00831 if( OP_opnd( mem_op, base_idx ) == OP_result( addr_op, 0 ) ){
00832 new_arc( CG_DEP_REGOUT, mem_op, addr_op, 0, 0, FALSE );
00833
00834 }
00835 }
00836 }
00837
00838
00839
00840
00841
00842
00843
00844 void KEY_SCH::Construct_addr_vector()
00845 {
00846 OP* addr_op = NULL;
00847
00848 FOR_ALL_BB_OPs( kernel, addr_op ){
00849
00850 if( !CGTARG_Is_OP_Addr_Incr( addr_op ) )
00851 continue;
00852
00853 Is_True( OP_code(addr_op) == TOP_addiu, ("") );
00854
00855 ARC_LIST* arcs = NULL;
00856
00857 for( arcs = OP_preds(addr_op); arcs != NULL; arcs = ARC_LIST_rest(arcs) ){
00858 ARC* arc = ARC_LIST_first(arcs);
00859 OP* pred_op = ARC_pred(arc);
00860 if( pred_op != addr_op )
00861 break;
00862 }
00863
00864 if( arcs != NULL )
00865 continue;
00866
00867 OP_info_is_rv( addr_op ) = true;
00868
00869 for( arcs = OP_succs(addr_op); arcs != NULL; ){
00870 ARC* arc = ARC_LIST_first(arcs);
00871 OP* succ_op = ARC_succ(arc);
00872
00873 arcs = ARC_LIST_rest(arcs);
00874
00875 if( ARC_kind(arc) != CG_DEP_REGIN ){
00876 FmtAssert( false, ("") );
00877 continue;
00878 }
00879
00880 if( succ_op == addr_op ){
00881 ;
00882
00883 } else if( OP_memory( succ_op ) ){
00884 const int ofst_idx = TOP_Find_Operand_Use( OP_code(succ_op), OU_offset );
00885 const int base_idx = TOP_Find_Operand_Use( OP_code(succ_op), OU_base );
00886
00887
00888
00889 if( OP_info_omega( succ_op, base_idx ) == 0 )
00890 continue;
00891
00892 if( !TN_has_value( OP_opnd( succ_op, ofst_idx ) ) ||
00893 ARC_opnd(arc) != base_idx )
00894 continue;
00895
00896 const INT64 addiu_const = TN_value( OP_opnd( addr_op, 1 ) );
00897
00898 const INT64 ofst_const =
00899 TN_value( OP_opnd( succ_op, ofst_idx ) ) - addiu_const * 8;
00900
00901 if( !TOP_Can_Have_Immediate( ofst_const, OP_code(succ_op) ) )
00902 continue;
00903
00904 OP_info_addr_op( succ_op ) = addr_op;
00905
00906 } else if( OP_br( succ_op ) ){
00907 ;
00908 } else
00909 continue;
00910
00911
00912 CG_DEP_Detach_Arc( arc );
00913 }
00914
00915
00916
00917 for( ARC_LIST* arcs = OP_preds(addr_op); arcs != NULL; ){
00918 ARC* arc = ARC_LIST_first(arcs);
00919
00920
00921 Is_True( ARC_kind(arc) != CG_DEP_REGANTI, ("") );
00922 arcs = ARC_LIST_rest(arcs);
00923 CG_DEP_Detach_Arc( arc );
00924 }
00925 }
00926 }
00927
00928
00929 void KEY_SCH::Adjust_incr( OP* addr_op )
00930 {
00931 const TN* old_incr_tn = OP_opnd( addr_op, 1 );
00932 const INT64 incr_const = TN_value( old_incr_tn ) * Kmin;
00933
00934 TN* new_incr_tn = Gen_Literal_TN( incr_const, TN_size( old_incr_tn ) );
00935 Set_OP_opnd( addr_op, 1, new_incr_tn );
00936 }
00937
00938
00939
00940
00941 void KEY_SCH::Adjust_ofst( const OP* addr_op, OP* mem_op ) const
00942 {
00943
00944 const INT64 addiu_const = TN_value( OP_opnd( addr_op, 1 ) );
00945 const int base_idx = TOP_Find_Operand_Use( OP_code(mem_op), OU_base );
00946
00947
00948
00949
00950 const int ofst_idx = TOP_Find_Operand_Use( OP_code(mem_op), OU_offset );
00951 const TN* old_ofst_tn = OP_opnd( mem_op, ofst_idx );
00952
00953
00954
00955 int omega = 0;
00956 const int unrollings = Kmin + sc - 1;
00957
00958 int start = OP_info_cycle( addr_op ) + ( 1 + OP_unrolling( mem_op ) ) * mii;
00959
00960 for( int unrolling = OP_unrolling( mem_op) + 1;
00961 unrolling < unrollings;
00962 unrolling++ ){
00963 if( start >= OP_scycle( mem_op) )
00964 break;
00965
00966 omega++;
00967 start += mii;
00968 }
00969
00970 if( OP_info_order( addr_op ) < OP_info_order( mem_op ) ){
00971 if( OP_info_indx( addr_op ) > OP_info_indx( mem_op ) )
00972 omega++;
00973
00974 } else {
00975 if( OP_info_indx( addr_op ) < OP_info_indx( mem_op ) )
00976 omega--;
00977 }
00978
00979
00980
00981 if( omega == 0 )
00982 return;
00983
00984 const INT64 ofst_const = TN_value( old_ofst_tn ) - addiu_const * omega;
00985 Is_True( TOP_Can_Have_Immediate( ofst_const, OP_code(mem_op) ), ("") );
00986 TN* new_ofst_tn = Gen_Literal_TN( ofst_const, TN_size( old_ofst_tn ) );
00987
00988
00989 Set_OP_opnd( mem_op, ofst_idx, new_ofst_tn );
00990
00991 if( trace ){
00992 fprintf( TFile, "old: %lld, new: %lld\n",
00993 TN_value(old_ofst_tn), TN_value(new_ofst_tn) );
00994 fprintf( TFile, "offset changed:" );
00995 Print_OP_No_SrcLine( mem_op );
00996 }
00997 }
00998
00999
01000 void KEY_SCH::Schedule_Kernel()
01001 {
01002
01003 int max_indx = 0;
01004 OP* op;
01005
01006 FOR_ALL_BB_OPs( kernel, op ){
01007 max_indx = std::max( max_indx, OP_map_idx(op) );
01008 }
01009
01010 OP_info_size = max_indx + 1 + 1;
01011 (void*)op_info = (OP_info *)MEM_POOL_Alloc( mem_pool, ( sizeof(op_info[0]) * OP_info_size ) );
01012 OP_info_reset();
01013 max_indx = 0;
01014 FOR_ALL_BB_OPs( kernel, op ){
01015 OP_info_init( op, perform_swp );
01016 OP_info_indx(op) = max_indx++;
01017 }
01018
01019 nOps = max_indx;
01020 FmtAssert( nOps == BB_length(kernel), ("") );
01021
01022 schedule = VECTOR_Init( nOps, mem_pool );
01023
01024 if( perform_swp )
01025 Construct_addr_vector();
01026 else
01027 Handle_Ldst_Addiu();
01028
01029 Emit_Src_DDG();
01030
01031 Schedule_DDG();
01032
01033 Collect_Sched_Info();
01034
01035 bb_count++;
01036
01037 if( unlink( ddg_file ) != 0 ||
01038 unlink( sched_file ) != 0 ){
01039 FmtAssert( false, ("") );
01040 }
01041 }
01042
01043
01044 KEY_SCH::KEY_SCH( BB* bb, MEM_POOL* pool )
01045 : kernel( bb ), mem_pool( pool ), perform_swp( false )
01046 {
01047 success = true;
01048
01049 if( BB_entry( kernel ) ){
01050 ANNOTATION* ant = ANNOT_Get( BB_annotations(kernel), ANNOT_ENTRYINFO );
01051 ENTRYINFO* ent = ANNOT_entryinfo( ant );
01052 strcpy( func_name, ST_name( ENTRYINFO_name(ent) ) );
01053 }
01054
01055 if( func_name[0] == '\0' ){
01056 strcpy( func_name, "unknown" );
01057 }
01058
01059 Schedule_Kernel();
01060 Reorder_Kernel( sort_by_scycle );
01061 }
01062
01063
01064 void KEY_SCH::Assign_Register( TN* tn )
01065 {
01066 Is_True( TN_is_register( tn ) && !TN_has_register( tn ), ("") );
01067
01068 if( reg_allocation.find(tn) != reg_allocation.end() )
01069 return;
01070
01071 CLASS_REG_PAIR rp;
01072 ISA_REGISTER_CLASS rc = TN_register_class(tn);
01073
01074 REGISTER r = REGISTER_SET_Choose_Intersection( avail_reg_set[rc],
01075 REGISTER_CLASS_caller_saves(rc) );
01076 if( r == REGISTER_UNDEFINED ){
01077 r = REGISTER_SET_Choose( avail_reg_set[rc] );
01078 if( r == REGISTER_UNDEFINED ){
01079 FmtAssert( false, ("SWP_KEY: run out of registers unexpectedly.") );
01080 return;
01081 }
01082 }
01083
01084 avail_reg_set[rc] = REGISTER_SET_Difference1( avail_reg_set[rc], r );
01085 Set_CLASS_REG_PAIR( rp, rc, r );
01086 reg_allocation[tn] = rp;
01087
01088 Is_True( CLASS_REG_PAIR_rclass(rp) >= ISA_REGISTER_CLASS_MIN &&
01089 CLASS_REG_PAIR_rclass(rp) <= ISA_REGISTER_CLASS_MAX,
01090 ("SWP_KEY: invalid register class %d\n", CLASS_REG_PAIR_rclass(rp)));
01091
01092 Set_TN_class_reg( tn, rp );
01093 Set_TN_is_dedicated( tn );
01094 Is_True( reg2tn_map.find(rp) == reg2tn_map.end(), ("") );
01095 reg2tn_map[rp] = tn;
01096
01097 if( trace ){
01098 fPrint_TN( TFile, "KEY_SWP REG ALLOC: %s\n", tn );
01099 }
01100 }
01101
01102
01103 void KEY_SCH::Add_Glue( TN* result, TN* opnd, BB* bb, GLUE_DIRECTION dir )
01104 {
01105 if( result == opnd )
01106 return;
01107
01108 OPS ops = OPS_EMPTY;
01109 Exp_COPY( result, opnd, &ops );
01110
01111 OP* op = NULL;
01112
01113 FOR_ALL_OPS_OPs( &ops, op ){
01114 Set_OP_glue( op );
01115 }
01116
01117 if( trace ){
01118 fprintf( TFile, "Add_Glue: " );
01119 Print_OPS_No_SrcLines( &ops );
01120 }
01121
01122 if( dir == APPEND )
01123 BB_Append_Ops( bb, &ops );
01124 else
01125 BB_Prepend_Ops( bb, &ops );
01126 }
01127
01128
01129 void KEY_SCH::register_allocation_init()
01130 {
01131 bool enough_reg = true;
01132
01133 static unsigned int swp_count = 0;
01134
01135 ISA_REGISTER_CLASS rc;
01136 int reg_num_required[ISA_REGISTER_CLASS_MAX+1];
01137
01138 FOR_ALL_ISA_REGISTER_CLASS( rc ){
01139 avail_reg_set[rc] = REGISTER_CLASS_allocatable( rc );
01140 reg_num_required[rc] = 0;
01141 }
01142
01143
01144 TN_SET* tn_defs = TN_SET_Create_Empty( Last_TN + 1, mem_pool );
01145 TN_SET* tn_uses = TN_SET_Create_Empty( Last_TN + 1, mem_pool );
01146
01147 tn_live_ins = TN_SET_Create_Empty( Last_TN + 1, mem_pool );
01148
01149 OP* op = NULL;
01150
01151 const TN_SET* live_outs = BB_live_in( epilog );
01152
01153 FOR_ALL_BB_OPs( kernel, op ){
01154 for( INT j = 0; j < OP_opnds(op); j++ ){
01155 TN* tn = OP_opnd(op, j);
01156 if( TN_is_register(tn) && !TN_is_dedicated(tn) ){
01157 tn_uses = TN_SET_Union1D( tn_uses, tn, mem_pool );
01158 if( OP_omega( op, j ) > 0 &&
01159 !TN_SET_MemberP( tn_defs, tn ) ){
01160 tn_live_ins = TN_SET_Union1D( tn_live_ins, tn, mem_pool );
01161 }
01162 }
01163 }
01164
01165 for( INT i = 0; i < OP_results(op); i++ ){
01166 TN* tn = OP_result(op, i);
01167 if( TN_is_register(tn) && !TN_is_dedicated(tn) ){
01168 tn_defs = TN_SET_Union1D( tn_defs, tn, mem_pool );
01169 }
01170 }
01171 }
01172
01173
01174 tn_invariants = TN_SET_Difference( tn_uses, tn_defs, mem_pool );
01175 tn_live_ins = TN_SET_Difference( tn_live_ins, tn_invariants, mem_pool );
01176 tn_live_outs = TN_SET_Difference( tn_defs, BB_live_in(epilog), mem_pool );
01177
01178
01179
01180 for( TN* tn = TN_SET_Choose(tn_invariants);
01181 tn != TN_SET_CHOOSE_FAILURE;
01182 tn = TN_SET_Choose_Next(tn_invariants,tn) ){
01183 if( TN_is_dedicated( tn ) ){
01184 Is_True( false, ("step thru it") );
01185 tn_invariants = TN_SET_Difference1( tn_invariants, tn, mem_pool );
01186 } else {
01187 const ISA_REGISTER_CLASS rc = TN_register_class(tn);
01188 reg_num_required[rc]++;
01189 }
01190 }
01191
01192 if( trace ){
01193 if( BS_Size( tn_live_ins ) > 0 ){
01194 fprintf( TFile, "%d live_ins: GTN", BS_Size( tn_live_ins ) );
01195 TN_SET_Print( tn_live_ins, TFile );
01196 fprintf( TFile, "\n" );
01197 }
01198
01199 if( BS_Size( tn_invariants ) > 0 ){
01200 fprintf( TFile, "%d invariants: GTN", BS_Size( tn_invariants ) );
01201 TN_SET_Print( tn_invariants, TFile );
01202 fprintf( TFile, "\n" );
01203 }
01204
01205 if( BS_Size( tn_live_outs ) > 0 ){
01206 fprintf( TFile, "%d live_outs: GTN", BS_Size( tn_live_outs ) );
01207 TN_SET_Print( tn_live_outs, TFile );
01208 fprintf( TFile, "\n" );
01209 }
01210 }
01211
01212
01213
01214 FOR_ALL_BB_OPs( kernel, op ){
01215 for( int i = 0; i < OP_results(op); i++ ){
01216 TN* result = OP_result( op, i );
01217 if( TN_is_dedicated( result ) ||
01218 OP_info_last_use( op ) == NULL ||
01219 TN_SET_MemberP( tn_invariants, result ) )
01220 continue;
01221
01222 const ISA_REGISTER_CLASS rc = TN_register_class( result );
01223
01224 const int start = OP_info_cycle( op );
01225 const int end = OP_info_cycle( OP_info_last_use( op ) );
01226
01227
01228
01229 int k_min = 1 + ( end - start - 1 ) / mii;
01230
01231 while( ( Kmin % k_min ) != 0 ){
01232 k_min++;
01233 }
01234
01235 reg_num_required[rc] += k_min;
01236 }
01237 }
01238
01239 FOR_ALL_ISA_REGISTER_CLASS( rc ){
01240 if( reg_num_required[rc] == 0 )
01241 continue;
01242
01243 int avail_reg_num = REGISTER_SET_Size( avail_reg_set[rc] );
01244
01245 if( trace ){
01246 const ISA_REGISTER_CLASS_INFO* icinfo = ISA_REGISTER_CLASS_Info( rc );
01247 const char* rcname = ISA_REGISTER_CLASS_INFO_Name( icinfo );
01248 fprintf( TFile, "(BB:%d, rc:%s)\tAvail:%d\tRequired:%d",
01249 BB_id(kernel), rcname, avail_reg_num, reg_num_required[rc] );
01250
01251 if( avail_reg_num < reg_num_required[rc] ){
01252 fprintf( TFile, " (%d more registers are needed)",
01253 reg_num_required[rc] - avail_reg_num );
01254 }
01255
01256 fprintf( TFile, "\n" );
01257 #if 0
01258 REGISTER_SET_Print( avail_reg_set[rc], TFile );
01259 fprintf( TFile, "\n\tAvail(Registers):" );
01260 REGISTER reg;
01261 FOR_ALL_REGISTER_SET_members( avail_reg_set[rc], reg ){
01262 fprintf( TFile, " %s", REGISTER_name( rc, reg ) );
01263 }
01264 #endif
01265 }
01266
01267 if( avail_reg_num < reg_num_required[rc] ){
01268 enough_reg = false;
01269 }
01270 }
01271
01272 if( !enough_reg )
01273 Gen_Kernel_Fail_Info( REGISTER_ALLOC_FAILED );
01274 else
01275 swp_count++;
01276
01277 return;
01278 }
01279
01280
01281 TN* KEY_SCH::New_live_out_tn( TN* live_out )
01282 {
01283
01284 TN* new_live_out = CG_LOOP_Backpatch_Find_Body_TN( prolog, live_out, NULL );
01285
01286 if( new_live_out != NULL ){
01287 CG_LOOP_Backpatch_Add( epilog, live_out, new_live_out, 0 );
01288 }
01289
01290 return new_live_out;
01291 }
01292
01293
01294 TN* KEY_SCH::New_live_in_tn( TN* live_in )
01295 {
01296 TN* new_live_in = CG_LOOP_Backpatch_Find_Body_TN( prolog, live_in, NULL );
01297
01298 if( new_live_in == NULL ){
01299 new_live_in = Dup_TN( live_in );
01300 CG_LOOP_Backpatch_Add( prolog, live_in, new_live_in, 0 );
01301
01302 if( TN_SET_MemberP( tn_invariants, live_in ) ){
01303 CG_LOOP_Backpatch_Add( epilog, live_in, new_live_in, 0 );
01304 }
01305
01306 if( trace ){
01307 if( TN_SET_MemberP( tn_invariants, live_in ) )
01308 fPrint_TN( TFile, "KEY_SWP: invariant %s ", live_in );
01309 else
01310 fPrint_TN( TFile, "KEY_SWP: live_in %s ", live_in );
01311
01312 fPrint_TN( TFile, "is renamed as %s\n", new_live_in );
01313 }
01314
01315 Assign_Register( new_live_in );
01316 }
01317
01318 return new_live_in;
01319 }
01320
01321
01322
01323 void KEY_SCH::rename_invariants()
01324 {
01325 OP* op = NULL;
01326
01327 FOR_ALL_BB_OPs( kernel, op ){
01328 for( int i = 0; i < OP_opnds(op); i++ ){
01329 TN* opnd = OP_opnd( op, i );
01330 if( !TN_is_register( opnd ) ||
01331 TN_is_dedicated( opnd ) )
01332 continue;
01333
01334 if( TN_SET_MemberP( tn_invariants, opnd ) ||
01335 TN_SET_MemberP( tn_live_ins, opnd ) ){
01336 TN* new_body_tn = New_live_in_tn( opnd );
01337 Set_OP_opnd( op, i, new_body_tn );
01338 }
01339 }
01340
01341
01342 for( int i = 0; i < OP_results(op); i++ ){
01343 TN* result = OP_result( op, i );
01344 if( TN_is_dedicated( result ) )
01345 continue;
01346
01347 if( TN_SET_MemberP( tn_live_outs, result ) ){
01348 TN* new_result_tn = New_live_out_tn( result );
01349
01350 if( new_result_tn != NULL )
01351 Set_OP_result( op, i, new_result_tn );
01352 }
01353 }
01354 }
01355 }
01356
01357
01358 void KEY_SCH::Gen_Kernel_Fail_Info( SWP_FAILURE_CODE failure_code )
01359 {
01360
01361 ROTATING_KERNEL_INFO* info = TYPE_PU_ALLOC( ROTATING_KERNEL_INFO );
01362
01363 bzero( info, sizeof(info[0]) );
01364 ROTATING_KERNEL_INFO_succeeded(info) = false;
01365 ROTATING_KERNEL_INFO_failure_code(info) = failure_code;
01366 BB_Add_Annotation( kernel, ANNOT_ROTATING_KERNEL, (void*)info );
01367 Reset_BB_rotating_kernel( kernel );
01368
01369 success = false;
01370 }
01371
01372
01373 void KEY_SCH::Gen_Kernel_Info()
01374 {
01375 OP* op = NULL;
01376
01377
01378 ROTATING_KERNEL_INFO* info = TYPE_PU_ALLOC( ROTATING_KERNEL_INFO );
01379 bzero( info, sizeof(info[0]) );
01380
01381 ISA_REGISTER_CLASS rc;
01382
01383 FOR_ALL_ISA_REGISTER_CLASS( rc ){
01384 REGISTER_SET tmp = REGISTER_CLASS_allocatable(rc);
01385 tmp = REGISTER_SET_Difference( tmp, avail_reg_set[rc] );
01386 ROTATING_KERNEL_INFO_live_in(info)[rc] = tmp;
01387 ROTATING_KERNEL_INFO_kill(info)[rc] = tmp;
01388 }
01389
01390
01391 TI_RES_COUNT* res_counts = TI_RES_COUNT_Alloc( MEM_pu_pool_ptr );
01392
01393
01394 for( op = BB_first_op( kernel ); op != NULL; op = OP_next( op ) ){
01395 if( OP_scycle( op ) < mii )
01396 TI_RES_COUNT_Add_Op_Resources( res_counts, OP_code(op) );
01397 }
01398
01399
01400 ROTATING_KERNEL_INFO_succeeded(info) = success;
01401 ROTATING_KERNEL_INFO_ii(info) = mii;
01402 ROTATING_KERNEL_INFO_stage_count(info) = sc;
01403
01404 ROTATING_KERNEL_INFO_min_ii(info) = Kmin;
01405 ROTATING_KERNEL_INFO_res_min_ii(info) = res_mii;
01406 ROTATING_KERNEL_INFO_rec_min_ii(info) = rec_mii;
01407 ROTATING_KERNEL_INFO_sched_len(info) = sched_len;
01408 ROTATING_KERNEL_INFO_min_sched_len(info) = min_sched_len;
01409 ROTATING_KERNEL_INFO_res_counts(info) = res_counts;
01410
01411 FOR_ALL_BB_OPs( glue_prolog, op ){
01412 if( OP_glue(op) ){
01413 for( INT k = 0; k < OP_results(op); k++ ){
01414 TN* tn = OP_result( op, k );
01415 if( !TN_is_const_reg(tn) )
01416 ROTATING_KERNEL_INFO_copyin(info).push_back(tn);
01417 }
01418 }
01419 }
01420
01421 FOR_ALL_BB_OPs( glue_epilog, op ){
01422 if( OP_glue(op) ){
01423 for( INT j = 0; j < OP_opnds(op); j++ ){
01424 TN* tn = OP_opnd( op, j );
01425 if( !TN_is_const_reg(tn) )
01426 ROTATING_KERNEL_INFO_copyout(info).push_back(tn);
01427 }
01428 }
01429 }
01430
01431 BB_Add_Annotation( kernel, ANNOT_ROTATING_KERNEL, (void *)info );
01432
01433 if( trace ){
01434 Emit_SWP_Note( kernel, TFile );
01435 }
01436 }
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449 void KEY_SCH::tn_renaming( const int unrollings )
01450 {
01451 OP* op = NULL;
01452
01453 tn_renames = TN_MAP_Create();
01454
01455 FOR_ALL_BB_OPs( kernel, op ){
01456 for( int i = 0; i < OP_results(op); i++ ){
01457 TN* result = OP_result( op, i );
01458
01459 if( TN_MAP_Get( tn_renames, result ) != NULL )
01460 continue;
01461
01462 TN** entry = TYPE_MEM_POOL_ALLOC_N( TN*, mem_pool, unrollings );
01463 TN_MAP_Set( tn_renames, result, entry );
01464
01465 if( TN_is_dedicated( result ) ||
01466 OP_info_last_use( op ) == NULL ){
01467 for( int unrolling = 0; unrolling < unrollings; unrolling++ ){
01468 entry[unrolling] = result;
01469 }
01470
01471 } else {
01472 entry[0] = result;
01473
01474
01475 const int start = OP_info_cycle( op );
01476 const int end = OP_info_cycle( OP_info_last_use( op ) );
01477
01478
01479
01480 int k_min = 1 + ( end - start - 1 ) / mii;
01481
01482 while( ( Kmin % k_min ) != 0 ){
01483 k_min++;
01484 }
01485
01486 for( int unrolling = 1; unrolling < unrollings; unrolling++ ){
01487 TN* tn = result;
01488
01489 if( unrolling < k_min ){
01490
01491
01492 tn = Dup_TN( result );
01493
01494 } else {
01495 int pos = unrolling % k_min;
01496 tn = entry[pos];
01497 }
01498
01499 entry[unrolling] = tn;
01500 }
01501 }
01502
01503
01504 for( int unrolling = 0; unrolling < unrollings; unrolling++ ){
01505 TN* tn = entry[unrolling];
01506 if( !TN_has_register( tn ) )
01507 Assign_Register( tn );
01508 }
01509 }
01510 }
01511 }
01512
01513
01514
01515
01516
01517 TN* KEY_SCH::rename_tn( TN* tn, const int unrolling, const int omega ) const
01518 {
01519 if( TN_is_register(tn) ){
01520 const int indx = unrolling - omega;
01521 Is_True( indx >= -1, ("") );
01522
01523 if( indx == -1 ){
01524 return tn;
01525 }
01526
01527 TN** entry = (TN**)TN_MAP_Get( tn_renames, tn );
01528 return ( entry == NULL ? tn : entry[indx] );
01529 }
01530
01531 return tn;
01532 }
01533
01534
01535 static int sort_swp_kernel( const void* a, const void* b )
01536 {
01537 OP* op1 = *(OP**)a;
01538 OP* op2 = *(OP**)b;
01539
01540 if( OP_scycle( op1 ) < OP_scycle( op2 ) )
01541 return -1;
01542
01543 if( OP_scycle( op1 ) > OP_scycle( op2 ) )
01544 return 1;
01545
01546
01547
01548
01549 if( OP_unrolling( op1 ) < OP_unrolling( op2 ) )
01550 return -1;
01551
01552 if( OP_unrolling( op1 ) > OP_unrolling( op2 ) )
01553 return 1;
01554
01555 if( OP_info_order( op1 ) < OP_info_order( op2 ) )
01556 return -1;
01557
01558 return 1;
01559 }
01560
01561
01562 static void Create_Region( BB* bb )
01563 {
01564 RID* r = RID_Create(New_Region_Id(), 0, NULL);
01565 RID_has_reg_alloc_Set(r);
01566 RID_level(r) = RL_CG;
01567 RID_type(r) = RID_TYPE_swp;
01568 RID_bounds_exist(r) = REGION_BOUND_UNKNOWN;
01569 RID_has_return(r) = REGION_NO_RETURN;
01570 RID_num_exits(r) = 1;
01571 RID_is_glue_code(r) = FALSE;
01572 RID* parent = BB_rid( bb );
01573 RID_parent(r) = parent;
01574 RID_cginfo(r) = NULL;
01575 if( parent != NULL )
01576 RID_Add_kid( r, parent );
01577
01578 BB_rid( bb ) = r;
01579 }
01580
01581
01582
01583 void KEY_SCH::Gen_PKE( CG_LOOP& cl )
01584 {
01585
01586 Create_Region( kernel );
01587
01588 Set_BB_scheduled( kernel );
01589 Set_BB_reg_alloc( kernel );
01590
01591 Reorder_Kernel( sort_by_scycle );
01592
01593 rename_invariants();
01594 tn_renaming( Kmin + sc - 1 );
01595
01596 if( trace ){
01597 CG_LOOP_Backpatch_Trace( prolog, NULL );
01598 CG_LOOP_Backpatch_Trace( epilog, NULL );
01599 Print_OPS_No_SrcLines( &kernel->ops );
01600 fprintf( TFile, "\n" );
01601 }
01602
01603
01604 OP* br_op = BB_branch_op( kernel );
01605 const bool br_op_is_last = BB_last_op(kernel) == br_op;
01606 BB_Remove_Op( kernel, br_op );
01607
01608 VECTOR kernel_vec = VECTOR_Init( Kmin * BB_length(kernel), mem_pool );
01609 const int length = BB_length(kernel) * ( sc - 1 );
01610 VECTOR prolog_vec = VECTOR_Init( length, mem_pool );
01611 VECTOR epilog_vec = VECTOR_Init( length, mem_pool );
01612
01613
01614
01615 OP* op;
01616 FOR_ALL_BB_OPs( kernel, op ){
01617 Set_OP_unrolling( op, 0 );
01618 }
01619
01620
01621 const int unrollings = Kmin + sc - 1;
01622
01623 for( int unrolling = 0; unrolling < unrollings; unrolling++ ){
01624 OP* mom = NULL;
01625
01626 FOR_ALL_BB_OPs( kernel, mom ){
01627 OP* new_op = Dup_OP( mom );
01628 CG_LOOP_Init_Op( new_op );
01629 Copy_WN_For_Memory_OP( new_op, mom );
01630
01631 Set_OP_unrolling( new_op, unrolling );
01632 Set_OP_orig_idx( new_op, OP_map_idx(mom) );
01633
01634 for( int opnd = 0; opnd < OP_opnds(mom); opnd++ ){
01635 TN* new_tn = rename_tn( OP_opnd(mom,opnd), unrolling, OP_omega(mom,opnd) );
01636
01637 if( TN_is_register(new_tn) && !TN_has_register(new_tn) ){
01638 Is_True( false, ("") );
01639 Assign_Register( new_tn );
01640 }
01641
01642 Set_OP_opnd( new_op, opnd, new_tn );
01643 Set_OP_omega( new_op, opnd, 0 );
01644 }
01645
01646 for( int res = 0; res < OP_results(mom); res++ ){
01647 TN* new_res = rename_tn( OP_result(mom,res), unrolling, 0 );
01648 if( TN_is_register(new_res) && !TN_has_register(new_res) ){
01649 Is_True( false, ("") );
01650 Assign_Register( new_res );
01651 }
01652
01653 Set_OP_result( new_op, res, new_res );
01654 }
01655
01656 const int scycle = OP_info_cycle( mom ) + unrolling * mii;
01657 OP_scycle( new_op ) = scycle;
01658
01659 if( OP_info_addr_op( mom ) != NULL )
01660 Adjust_ofst( OP_info_addr_op( mom ), new_op );
01661
01662
01663
01664 if( scycle < ( sc - 1 ) * mii ){
01665 VECTOR_Add_Element( prolog_vec, new_op );
01666 Set_OP_unroll_bb( new_op, prolog );
01667
01668 } else if( scycle < ( sc - 1 + Kmin ) * mii ){
01669 VECTOR_Add_Element( kernel_vec, new_op );
01670 Set_OP_unroll_bb( new_op, kernel );
01671 OP_scycle( new_op ) = scycle - ( sc - 1 ) * mii;
01672 #if 0
01673
01674 if( OP_results( new_op ) == 1 ){
01675 TN* body_tn = OP_result( new_op, 0 );
01676
01677 if( REGISTER_CLASS_can_store( TN_register_class( body_tn ) ) &&
01678 CG_LOOP_Backpatch_Find_Non_Body_TN( prolog, body_tn, 0 ) == NULL ){
01679 TN* non_body_tn = Build_TN_Like( body_tn );
01680 Set_TN_register_and_class( non_body_tn,
01681 TN_register_and_class( body_tn ) );
01682 Set_TN_is_dedicated( non_body_tn );
01683 CG_LOOP_Backpatch_Add( prolog, non_body_tn, body_tn, 0 );
01684 }
01685 }
01686 #endif
01687 } else {
01688 VECTOR_Add_Element( epilog_vec, new_op );
01689 Set_OP_unroll_bb( new_op, epilog );
01690 OP_scycle( new_op ) = scycle - ( sc - 1 + Kmin ) * mii;
01691 }
01692 }
01693 }
01694
01695 for( int opnd = 0; opnd < OP_opnds(br_op); opnd++ ){
01696 TN* new_tn = rename_tn( OP_opnd(br_op,opnd), unrollings-1, OP_omega(br_op,opnd) );
01697 Set_OP_opnd( br_op, opnd, new_tn );
01698 Set_OP_omega( br_op, opnd, 0 );
01699 }
01700
01701 OP_scycle( br_op ) = Kmin * mii - 1;
01702
01703 TN_MAP_Delete( tn_renames );
01704
01705
01706 glue_prolog = prolog;
01707 for( CG_LOOP_BACKPATCH* bp = CG_LOOP_Backpatch_First(prolog, NULL);
01708 bp != NULL;
01709 bp = CG_LOOP_Backpatch_Next(bp) ){
01710 TN* non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01711 TN* body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01712
01713 Add_Glue( body_tn, non_body_tn, glue_prolog, APPEND );
01714 }
01715
01716
01717 glue_epilog = epilog;
01718 for( CG_LOOP_BACKPATCH* bp = CG_LOOP_Backpatch_First(epilog, NULL);
01719 bp != NULL;
01720 bp = CG_LOOP_Backpatch_Next(bp) ){
01721 TN* body_tn = CG_LOOP_BACKPATCH_body_tn(bp);
01722 TN* non_body_tn = CG_LOOP_BACKPATCH_non_body_tn(bp);
01723
01724 Add_Glue( non_body_tn, body_tn, glue_epilog, PREPEND );
01725 }
01726
01727 extend_prolog();
01728 prolog = CG_LOOP_prolog;
01729
01730 extend_epilog( cl.Loop() );
01731 epilog = CG_LOOP_epilog;
01732
01733 #if 0
01734 Create_Region( prolog );
01735 Set_BB_reg_alloc( prolog );
01736 Create_Region( epilog );
01737 Set_BB_reg_alloc( epilog );
01738 #endif
01739
01740
01741
01742 VECTOR_Sort( prolog_vec, &sort_swp_kernel );
01743 VECTOR_Sort( kernel_vec, &sort_swp_kernel );
01744 VECTOR_Sort( epilog_vec, &sort_swp_kernel );
01745
01746 BB_Remove_All( kernel );
01747 for( int i = 0; i < VECTOR_count( kernel_vec ); i++ ){
01748 op = (OP*)VECTOR_element( kernel_vec, i );
01749 BB_Append_Op( kernel, op );
01750 }
01751
01752
01753 OP_info_reset();
01754
01755 if( br_op_is_last )
01756 BB_Append_Op( kernel, br_op );
01757 else
01758 BB_Insert_Op_Before( kernel, BB_last_op(kernel), br_op );
01759
01760 for( int i = 0; i < VECTOR_count( prolog_vec ); i++ ){
01761 op = (OP*)VECTOR_element( prolog_vec, i );
01762 BB_Append_Op( prolog, op );
01763 }
01764
01765 for( int i = VECTOR_count( epilog_vec ) - 1; i >= 0; i-- ){
01766 op = (OP*)VECTOR_element( epilog_vec, i );
01767 BB_Prepend_Op( epilog, op );
01768 }
01769
01770 if( trace ){
01771 Print_OPS_No_SrcLines( &prolog->ops );
01772 fprintf( TFile, "\n" );
01773 Print_OPS_No_SrcLines( &kernel->ops );
01774 fprintf( TFile, "\n" );
01775 Print_OPS_No_SrcLines( &epilog->ops );
01776 fprintf( TFile, "\n" );
01777 }
01778
01779
01780 NOTE_SWP_HEAD* note = TYPE_MEM_POOL_ALLOC( NOTE_SWP_HEAD, &MEM_pu_nz_pool );
01781 note->const_trip = true;
01782 note->ntimes = sc - 1;
01783 NOTE_Add_To_BB( prolog, prolog_note_handler, (NOTE_INFO*)note );
01784
01785 cl.Recompute_Liveness();
01786
01787
01788 Gen_Kernel_Info();
01789
01790
01791 const ANNOTATION* annot = ANNOT_Get( BB_annotations(kernel), ANNOT_LOOPINFO );
01792 LOOPINFO* info = ANNOT_loopinfo(annot);
01793 const TN* trip_count = LOOPINFO_trip_count_tn(info);
01794
01795 if( TN_is_constant(trip_count) ){
01796 int new_trip_count_val = ( TN_value(trip_count) - sc + 1 ) / Kmin;
01797 LOOPINFO_trip_count_tn(info) =
01798 Gen_Literal_TN(new_trip_count_val, TN_size(trip_count));
01799 }
01800 }
01801
01802
01803
01804
01805 void KEY_SCH::Loop_Peeling( const int ntimes, TN* trip_count_tn ) const
01806 {
01807 if( ntimes < 2 )
01808 return;
01809
01810 const INT32 trip_size = TN_size( trip_count_tn );
01811 const float exit_prob = 1.0 / ntimes;
01812 const BOOL freqs = FREQ_Frequencies_Computed();
01813 OPS ops = OPS_EMPTY;
01814
01815 BB* loop_entry = Gen_BB_Like( prolog );
01816 Link_Pred_Succ_with_Prob( prolog, loop_entry, exit_prob );
01817
01818 if( freqs || BB_freq_fb_based(prolog) )
01819 BB_freq( loop_entry ) += BB_freq(prolog) * exit_prob;
01820 if( freqs )
01821 Change_Succ_Prob( prolog, BB_next(prolog), 1.0 - exit_prob );
01822
01823
01824
01825 if( is_power_of_two(ntimes) )
01826 Exp_OP2( trip_size == 4 ? OPC_U4BAND : OPC_U8BAND,
01827 trip_count_tn,
01828 trip_count_tn,
01829 Gen_Literal_TN(ntimes-1, trip_size),
01830 &ops );
01831 else
01832 Exp_OP2( trip_size == 4 ? OPC_U4MOD : OPC_U8MOD,
01833 trip_count_tn,
01834 trip_count_tn,
01835 Gen_Literal_TN(ntimes, trip_size),
01836 &ops );
01837
01838 Exp_OP3v( OPC_FALSEBR,
01839 NULL,
01840 Gen_Label_TN( Gen_Label_For_BB(loop_entry), 0 ),
01841 trip_count_tn,
01842 Zero_TN,
01843 V_BR_I8EQ,
01844 &ops );
01845
01846 BB_Append_Ops( prolog, &ops );
01847
01848 OP* op = NULL;
01849 BB* new_body = Gen_BB_Like( kernel );
01850
01851 OPS_Remove_All( &ops );
01852
01853 FOR_ALL_BB_OPs( kernel, op ){
01854 if( !OP_br( op ) ){
01855 OP* new_op = Dup_OP(op);
01856 CG_LOOP_Init_Op( new_op );
01857 Copy_WN_For_Memory_OP( new_op, op );
01858 BB_Append_Op( new_body, new_op );
01859 }
01860 }
01861
01862 Exp_OP2( trip_size == 4 ? OPC_I4ADD : OPC_I8ADD,
01863 trip_count_tn,
01864 trip_count_tn,
01865 Gen_Literal_TN(-1, trip_size),
01866 &ops );
01867
01868 Exp_OP3v( OPC_TRUEBR,
01869 NULL,
01870 Gen_Label_TN( Gen_Label_For_BB(new_body), 0 ),
01871 trip_count_tn,
01872 Zero_TN,
01873 V_BR_I8NE,
01874 &ops );
01875
01876 BB_Append_Ops( new_body, &ops);
01877
01878 INT64 trip_est = ( TN_is_constant(trip_count_tn) ? TN_value(trip_count_tn)
01879 : MIN((1 + ntimes) / 2, 2) );
01880
01881 Link_Pred_Succ_with_Prob( new_body, new_body, 1.0 - exit_prob );
01882
01883
01884
01885 float body_freq = 0.0;
01886
01887 if( freqs || BB_freq_fb_based(kernel) ){
01888
01889 body_freq = BB_freq(CG_LOOP_prolog) * (trip_est - 1);
01890 if (freqs && trip_est > 0)
01891 BBLIST_prob(BB_succs(new_body)) = (trip_est - 1.0) / trip_est;
01892 }
01893
01894 append_to_prolog( new_body );
01895 append_to_prolog( loop_entry );
01896
01897 if( freqs || BB_freq_fb_based(new_body) )
01898 BB_freq(new_body) = body_freq;
01899 }
01900
01901
01902 void KEY_SCH::Peeling_For_Unknown_Trip( CG_LOOP& cl, TN* trip_count_tn )
01903 {
01904 const int trip_size = TN_size(trip_count_tn);
01905 OPS ops = OPS_EMPTY;
01906 const BOOL freqs = FREQ_Frequencies_Computed();
01907 const ANNOTATION* annot = ANNOT_Get( BB_annotations(kernel), ANNOT_LOOPINFO );
01908 LOOPINFO* info = ANNOT_loopinfo(annot);
01909 const WN* wn = LOOPINFO_wn(info);
01910
01911
01912 extend_prolog();
01913 extend_epilog( cl.Loop() );
01914 cl.Recompute_Liveness();
01915
01916
01917
01918 Exp_OP3v( OPC_FALSEBR,
01919 NULL,
01920 Gen_Label_TN( Gen_Label_For_BB(CG_LOOP_prolog), 0 ),
01921 trip_count_tn,
01922 Gen_Literal_TN( (Kmin+sc-1), trip_size ),
01923 V_BR_I8GE,
01924 &ops );
01925
01926 BB_Append_Ops( prolog, &ops );
01927
01928 OP* op = NULL;
01929 BB* new_kernel = Gen_BB_Like( kernel );
01930
01931 FOR_ALL_BB_OPs( kernel, op ){
01932 OP* new_op = Dup_OP( op );
01933 if( OP_br( new_op ) ){
01934 Set_OP_opnd( new_op,
01935 Branch_Target_Operand(new_op),
01936 Gen_Label_TN( Gen_Label_For_BB(new_kernel), 0 ) );
01937 }
01938 CG_LOOP_Init_Op( new_op );
01939 Copy_WN_For_Memory_OP( new_op, op );
01940 BB_Append_Op( new_kernel, new_op );
01941 }
01942
01943 if( freqs || BB_freq_fb_based(kernel) )
01944 BB_freq( new_kernel ) = BB_freq( prolog );
01945
01946 Chain_BBs( prolog, new_kernel );
01947 Link_Pred_Succ_with_Prob( prolog, new_kernel, 0.1F );
01948 Change_Succ_Prob( prolog, CG_LOOP_prolog, 0.9F );
01949 Link_Pred_Succ_with_Prob( new_kernel, new_kernel,
01950 (WN_loop_trip_est(wn) - 1.0) / WN_loop_trip_est(wn) );
01951
01952 BB* goto_bb = Gen_BB_Like( epilog );
01953 Chain_BBs( new_kernel, goto_bb );
01954 Add_Goto( goto_bb, epilog );
01955 Link_Pred_Succ_with_Prob( new_kernel, goto_bb, 1.0 / WN_loop_trip_est(wn) );
01956 Chain_BBs( goto_bb, CG_LOOP_prolog );
01957
01958
01959
01960 prolog = CG_LOOP_prolog;
01961 epilog = CG_LOOP_epilog;
01962
01963
01964
01965 TN* new_trip_count = Build_TN_Like( trip_count_tn );
01966
01967 OPS_Remove_All( &ops );
01968 Exp_OP2( trip_size == 4 ? OPC_I4ADD : OPC_I8ADD,
01969 new_trip_count,
01970 trip_count_tn,
01971 Gen_Literal_TN( -(sc-1), trip_size ),
01972 &ops );
01973
01974 BB_Append_Ops( prolog, &ops );
01975
01976 Loop_Peeling( Kmin, new_trip_count );
01977
01978 if( trace )
01979 CG_LOOP_Trace_Loop( cl.Loop(), "**** After Loop Pre-conditioning ****" );
01980
01981 #if 0
01982
01983
01984 TN* remainder_trip_count = Build_TN_Like(trip_count_tn);
01985
01986 OPS_Remove_All( &ops );
01987
01988
01989 if( is_power_of_two( Kmin ) ){
01990 Exp_OP2( trip_size == 4 ? OPC_I4ASHR : OPC_I8ASHR,
01991 remainder_trip_count,
01992 trip_count_tn,
01993 Gen_Literal_TN( log2(Kmin), trip_size ),
01994 &ops );
01995 } else {
01996 Exp_OP2( trip_size == 4 ? OPC_U4DIV : OPC_U8DIV,
01997 remainder_trip_count,
01998 trip_count_tn,
01999 Gen_Literal_TN( Kmin, trip_size ),
02000 &ops );
02001 }
02002
02003 BB_Append_Ops( prolog, &ops );
02004 Unroll_Do_Loop_guard( cl.Loop(), info, remainder_trip_count );
02005 #endif
02006
02007 prolog = CG_LOOP_prolog;
02008 epilog = CG_LOOP_epilog;
02009 }
02010
02011
02012 void KEY_SCH::Peeling_For_Known_Trip( CG_LOOP& cl, TN* trip_count_tn )
02013 {
02014 INT64 trips = TN_value( trip_count_tn ) - ( sc - 1 );
02015 Is_True( trips >= ( Kmin + sc - 1 ), ("") );
02016
02017 if( ( trips % Kmin ) == 0 )
02018 return;
02019
02020 if( ( trips % Kmin ) == 1 ){
02021 OP* op = NULL;
02022 BB* new_bb = Gen_BB_Like( prolog );
02023
02024 FOR_ALL_BB_OPs( kernel, op ){
02025 if( !OP_br( op ) ){
02026 OP* new_op = Dup_OP(op);
02027 Set_OP_orig_idx( new_op, OP_map_idx(op) );
02028 Set_OP_unroll_bb( new_op, new_bb );
02029 CG_LOOP_Init_Op( new_op );
02030 Copy_WN_For_Memory_OP( new_op, op );
02031 BB_Append_Op( new_bb, new_op );
02032 }
02033 }
02034
02035 if( BB_freq_fb_based( prolog ) )
02036 Set_BB_freq_fb_based(new_bb);
02037
02038 append_to_prolog( new_bb );
02039 prolog = CG_LOOP_prolog;
02040
02041 return;
02042 }
02043
02044 trips = TN_value( trip_count_tn ) - ( sc - 1 );
02045 Set_TN_value( trip_count_tn, trips );
02046
02047
02048 Unroll_Make_Remainder_Loop( cl, Kmin );
02049
02050 trips += ( sc - 1 );
02051 Set_TN_value( trip_count_tn, trips );
02052 }
02053
02054
02055
02056
02057 void KEY_SCH::Loop_Preconditioning( CG_LOOP& cl )
02058 {
02059 const ANNOTATION* annot = ANNOT_Get( BB_annotations(kernel), ANNOT_LOOPINFO );
02060 LOOPINFO* info = ANNOT_loopinfo(annot);
02061 TN* trip_count_tn = LOOPINFO_trip_count_tn(info);
02062
02063
02064 OP* org_br_op = BB_branch_op( kernel );
02065 const bool br_op_is_last = BB_last_op(kernel) == org_br_op;
02066 OP* br_op = Dup_OP( org_br_op );
02067
02068 CG_LOOP_Init_Op( br_op );
02069 Copy_WN_For_Memory_OP( br_op, org_br_op );
02070 Set_BB_unrollings( kernel, 0 );
02071
02072 if( TN_is_constant(trip_count_tn) ){
02073 Peeling_For_Known_Trip( cl, trip_count_tn );
02074
02075 } else {
02076 Peeling_For_Unknown_Trip( cl, trip_count_tn );
02077 }
02078
02079 #if 0
02080
02081
02082 NOTE_SWP_HEAD* note = TYPE_MEM_POOL_ALLOC( NOTE_SWP_HEAD, &MEM_pu_nz_pool );
02083 if( TN_is_constant(trip_count_tn) ){
02084 note->const_trip = true;
02085 note->ntimes = TN_value(trip_count_tn) % ntimes;
02086 } else {
02087 note->const_trip = false;
02088 note->ntimes = ntimes;
02089 }
02090 NOTE_Add_To_BB( prolog, preconditioning_head_note_handler, (NOTE_INFO*)note );
02091 #endif
02092
02093
02094 Is_True( kernel == cl.Loop_header(), ("") );
02095 prolog = CG_LOOP_prolog;
02096 epilog = CG_LOOP_epilog;
02097
02098 if( BB_branch_op( kernel ) == NULL ){
02099 if( br_op_is_last )
02100 BB_Append_Op( kernel, br_op );
02101 else
02102 BB_Insert_Op_Before( kernel, BB_last_op(kernel), br_op );
02103
02104
02105 OP_info_move( org_br_op, br_op );
02106 }
02107 }
02108
02109
02110
02111
02112
02113
02114
02115
02116 void KEY_SCH::Compute_Kmin()
02117 {
02118 OP* op = NULL;
02119
02120 Kmin = 1;
02121
02122 FOR_ALL_BB_OPs( kernel, op ){
02123 int start = OP_info_cycle( op );
02124 int end = start - 1;
02125
02126 for( ARC_LIST* arcs = OP_succs(op); arcs != NULL; arcs = ARC_LIST_rest(arcs) ){
02127 ARC* arc = ARC_LIST_first(arcs);
02128
02129 if( ARC_kind(arc) == CG_DEP_REGIN ){
02130 OP* succ_op = ARC_succ(arc);
02131 int c = OP_info_cycle( succ_op );
02132 if( c > end ){
02133 end = c;
02134 OP_info_last_use( op ) = succ_op;
02135 }
02136 }
02137 }
02138
02139
02140 if( end > start ){
02141 int copies = 1 + ( end - start - 1 ) / mii;
02142 Kmin = MAX( Kmin, copies );
02143 }
02144 }
02145 }
02146
02147
02148 void KEY_SCH::Loop_Unrolling( CG_LOOP& cl, int ntimes )
02149 {
02150 Is_True( ntimes > 1, ("") );
02151
02152 Unroll_Do_Loop( cl, ntimes );
02153
02154 cl.Recompute_Liveness();
02155 cl.EBO_Before_Unrolling();
02156 Fix_Backpatches( cl, trace );
02157
02158 kernel = cl.Loop_header();
02159 prolog = CG_LOOP_prolog;
02160 epilog = CG_LOOP_epilog;
02161
02162 unrolls *= ntimes;
02163 }
02164
02165
02166 void KEY_SCH::Delete_Backpatches()
02167 {
02168 for( CG_LOOP_BACKPATCH* bp = CG_LOOP_Backpatch_First(prolog, NULL);
02169 bp != NULL;
02170 bp = CG_LOOP_Backpatch_Next(bp) ){
02171 CG_LOOP_Backpatch_Delete( prolog, bp );
02172 }
02173
02174 for( CG_LOOP_BACKPATCH* bp = CG_LOOP_Backpatch_First(epilog, NULL);
02175 bp != NULL;
02176 bp = CG_LOOP_Backpatch_Next(bp) ){
02177 CG_LOOP_Backpatch_Delete( epilog, bp );
02178 }
02179 }
02180
02181
02182 KEY_SCH::KEY_SCH( CG_LOOP& cl, BB* _prolog, BB* _epilog, bool trace )
02183 : prolog( _prolog ), epilog( _epilog ), trace( trace ), perform_swp( true )
02184 {
02185 OP* op = NULL;
02186
02187 int max_omega = 0;
02188 CXX_MEM_POOL sas_local_pool( "sas pool", FALSE );
02189 mem_pool = sas_local_pool();
02190
02191 strcpy( func_name, "swp" );
02192 unrolls = 1;
02193
02194
02195
02196
02197 kernel = cl.Loop_header();
02198
02199 FOR_ALL_BB_OPs( kernel, op ){
02200
02201
02202 if( OP_fdiv( op ) || OP_sqrt( op) ){
02203 Gen_Kernel_Fail_Info( SKIP_FDIV_SQRT );
02204 return;
02205 }
02206
02207 Is_True( OP_results( op ) < 2, ("") );
02208 for( int opnd = 0; opnd < OP_opnds(op); opnd++ ){
02209 TN* tn = OP_opnd( op, opnd );
02210 max_omega = MAX( max_omega, OP_omega( op, opnd ) );
02211 }
02212 }
02213
02214 #if 0
02215 if( max_omega > 1 &&
02216 max_omega <= SWP_Options.Max_Unroll_Times ){
02217 Loop_Unrolling( cl, max_omega );
02218 }
02219 #endif
02220
02221 CG_LOOP_Remove_Notations( cl.Loop(), prolog, epilog );
02222 Delete_Backpatches();
02223 cl.Recompute_Liveness();
02224
02225 if( trace ){
02226 CG_LOOP_Trace_Loop( cl.Loop(), "**** Before KEY_SWP ****" );
02227 }
02228
02229
02230 CYCLIC_DEP_GRAPH cyclic_graph( kernel, mem_pool );
02231 if( trace )
02232 CG_DEP_Trace_Graph( kernel );
02233
02234 success = true;
02235 Schedule_Kernel();
02236
02237 if( !success )
02238 return;
02239
02240 Compute_Kmin();
02241
02242 if( trace )
02243 fprintf( TFile, "KEY_SWP: Kmin = %d sc = %d\n", Kmin, sc );
02244
02245
02246 register_allocation_init();
02247 if( !success )
02248 return;
02249
02250 prolog = CG_LOOP_prolog;
02251 epilog = CG_LOOP_epilog;
02252 glue_prolog = glue_epilog = NULL;
02253
02254 Loop_Preconditioning( cl );
02255
02256
02257 Gen_PKE( cl );
02258
02259 Is_True( success, ("") );
02260
02261
02262 Delete_Backpatches();
02263
02264
02265
02266 if( trace ){
02267 CG_LOOP_Trace_Loop( cl.Loop(), "**** After KEY_SWP ****" );
02268 }
02269 }