00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifdef USE_PCH
00029 #include "be_com_pch.h"
00030 #endif
00031 #pragma hdrstop
00032
00033 #include <stdint.h>
00034 #include "defs.h"
00035 #include "config.h"
00036 #include "opt_config.h"
00037 #include "errors.h"
00038 #include "erglob.h"
00039 #include "tracing.h"
00040 #include "glob.h"
00041 #include "timing.h"
00042 #include "wn.h"
00043 #include "wn_util.h"
00044 #include "wn_lower.h"
00045
00046 static INT loop_count = 0;
00047
00048 class WN_UNROLL {
00049 private:
00050 WN *_orig_wn;
00051 WN *_indx_var;
00052 WN *_init_stmt;
00053 WN *_start;
00054 WN *_end_cond;
00055 WN *_end;
00056
00057
00058
00059 WN *_trips;
00060
00061 WN *_incr_stmt;
00062 TYPE_ID _rtype;
00063 WN *_loop_body;
00064 WN *_loop_info;
00065 INT _map_id;
00066
00067 INT _step_amt;
00068 INT _abs_step_amt;
00069 INT _node_count;
00070 INT _if_count;
00071 INT _istore_count;
00072
00073 public:
00074 WN *Loop_info(void) const { return _loop_info; }
00075 WN *End(void) const { return _end; }
00076 WN *Trips(void) const { return _trips; }
00077 INT Step_amt(void) const { return _step_amt; }
00078 INT Node_count(void) const { return _node_count; }
00079 INT If_count(void) const { return _if_count; }
00080 INT Istore_count(void) const { return _istore_count; }
00081
00082 WN_UNROLL(WN *doloop): _orig_wn(doloop),
00083 _indx_var(WN_kid0(doloop)),
00084 _init_stmt(WN_kid1(doloop)),
00085 _end_cond(WN_kid2(doloop)),
00086 _incr_stmt(WN_kid(doloop, 3)),
00087 _loop_body(WN_kid(doloop, 4)),
00088 _map_id(WN_map_id(doloop)),
00089 _node_count(0),
00090 _if_count(0),
00091 _istore_count(0)
00092 { _start = WN_kid0(_init_stmt);
00093
00094 _rtype = WN_rtype(WN_kid0(_incr_stmt));
00095
00096
00097 if (WN_operator(WN_kid1(_end_cond)) == OPR_LDID &&
00098 WN_st_idx(WN_kid1(_end_cond)) == WN_st_idx(_indx_var) &&
00099 WN_offset(WN_kid1(_end_cond)) == WN_offset(_indx_var)) {
00100 WN *temp = WN_kid0(_end_cond);
00101 WN_kid0(_end_cond) = WN_kid1(_end_cond);
00102 WN_kid1(_end_cond) = temp;
00103 switch (WN_operator(_end_cond)) {
00104 case OPR_LT: WN_set_operator(_end_cond, OPR_GT); break;
00105 case OPR_LE: WN_set_operator(_end_cond, OPR_GE); break;
00106 case OPR_GT: WN_set_operator(_end_cond, OPR_LT); break;
00107 case OPR_GE: WN_set_operator(_end_cond, OPR_LE); break;
00108 default: _end = NULL; return;
00109 }
00110 }
00111
00112 if (WN_operator(WN_kid0(_end_cond)) == OPR_LDID &&
00113 WN_st_idx(WN_kid0(_end_cond)) == WN_st_idx(_indx_var) &&
00114 WN_offset(WN_kid0(_end_cond)) == WN_offset(_indx_var))
00115 _end = WN_COPY_Tree_With_Map(WN_kid1(_end_cond));
00116 else _end = NULL;
00117
00118 WN *incr_expr = WN_kid0(_incr_stmt);
00119 if (WN_operator(incr_expr) != OPR_ADD)
00120 _step_amt = 0;
00121 else if (WN_operator(WN_kid1(incr_expr)) != OPR_INTCONST)
00122 _step_amt = 0;
00123 else _step_amt = WN_const_val(WN_kid1(incr_expr));
00124
00125 if (! (_step_amt > 0 && (WN_operator(_end_cond) == OPR_LT ||
00126 WN_operator(_end_cond) == OPR_LE) ||
00127 _step_amt < 0 && (WN_operator(_end_cond) == OPR_GT ||
00128 WN_operator(_end_cond) == OPR_GE)))
00129 _end = NULL;
00130
00131
00132 if (_end)
00133 if (WN_operator(_end_cond) == OPR_LE ||
00134 WN_operator(_end_cond) == OPR_GE)
00135 _end = WN_Add(_rtype, _end, WN_Intconst(_rtype, _step_amt));
00136
00137 _abs_step_amt = _step_amt >= 0 ? _step_amt : -_step_amt;
00138
00139 if (_end) {
00140 if (_step_amt > 0)
00141 _trips = WN_Sub(_rtype, _end, WN_COPY_Tree_With_Map(_start));
00142 else _trips = WN_Sub(_rtype, WN_COPY_Tree_With_Map(_start), _end);
00143
00144
00145 if (! (WN_operator(_end_cond) == OPR_LE ||
00146 WN_operator(_end_cond) == OPR_GE))
00147 {
00148 WN *tmp = WN_Binary(OPR_MOD, _rtype, WN_COPY_Tree_With_Map(_trips),
00149 WN_Intconst(_rtype, _abs_step_amt));
00150 tmp = WN_Select(_rtype, WN_EQ(_rtype, tmp, WN_Intconst(_rtype, 0)),
00151 WN_Intconst(_rtype, 0),
00152 WN_Intconst(_rtype, 1));
00153
00154 _trips = WN_Div(_rtype, _trips, WN_Intconst(_rtype, _abs_step_amt));
00155 _trips = WN_Add(_rtype, _trips, tmp);
00156 }
00157 else {
00158
00159 _trips = WN_Div(_rtype, _trips, WN_Intconst(_rtype, _abs_step_amt));
00160 }
00161
00162
00163 }
00164
00165 if (WN_kid_count(doloop) == 6)
00166 _loop_info = WN_kid(doloop, 5);
00167 else _loop_info = NULL;
00168 }
00169 ~WN_UNROLL(void) {}
00170
00171 void Analyze_body_expr(WN *tree);
00172 void Analyze_body_stmt(WN *tree);
00173 WN * Replicate_expr(WN *expr, INT rep_cnt);
00174 WN * Replicate_stmt(WN *stmt, INT rep_cnt);
00175 void Unroll(INT unroll_times);
00176 };
00177
00178
00179
00180
00181 void
00182 WN_UNROLL::Analyze_body_expr(WN *tree)
00183 {
00184 OPERATOR opr = WN_operator(tree);
00185 INT i;
00186
00187 _node_count++;
00188
00189 switch (opr) {
00190
00191 case OPR_LDID:
00192 case OPR_LDBITS:
00193 case OPR_INTCONST:
00194 case OPR_CONST:
00195 case OPR_LDA:
00196 case OPR_LDA_LABEL:
00197 return;
00198
00199
00200 case OPR_ILOAD:
00201 case OPR_ILDBITS:
00202 case OPR_SQRT: case OPR_RSQRT: case OPR_RECIP:
00203 case OPR_PAREN:
00204 case OPR_REALPART: case OPR_IMAGPART:
00205 case OPR_HIGHPART: case OPR_LOWPART:
00206 case OPR_ALLOCA:
00207 case OPR_LNOT:
00208 case OPR_EXTRACT_BITS:
00209 case OPR_BNOT:
00210 case OPR_PARM:
00211 case OPR_TAS:
00212 case OPR_RND: case OPR_TRUNC: case OPR_CEIL: case OPR_FLOOR:
00213 #ifdef TARG_X8664
00214 case OPR_REPLICATE:
00215 case OPR_REDUCE_ADD: case OPR_REDUCE_MPY:
00216 case OPR_REDUCE_MAX: case OPR_REDUCE_MIN:
00217 case OPR_SHUFFLE:
00218 case OPR_ATOMIC_RSQRT:
00219 #endif // TARG_X8664
00220 case OPR_NEG:
00221 case OPR_ABS:
00222 case OPR_MINPART: case OPR_MAXPART:
00223 case OPR_CVTL:
00224 case OPR_CVT:
00225 Analyze_body_expr(WN_kid0(tree));
00226 return;
00227
00228
00229 case OPR_MLOAD:
00230 case OPR_ILOADX:
00231 case OPR_MPY: case OPR_HIGHMPY:
00232 case OPR_DIV:
00233 case OPR_MOD: case OPR_REM:
00234 case OPR_DIVREM:
00235 case OPR_ADD: case OPR_SUB:
00236 case OPR_MAX: case OPR_MIN:
00237 case OPR_MINMAX:
00238 case OPR_BAND: case OPR_BIOR: case OPR_BNOR: case OPR_BXOR:
00239 case OPR_ASHR: case OPR_LSHR:
00240 case OPR_SHL:
00241 case OPR_RROTATE:
00242 case OPR_EQ: case OPR_NE:
00243 case OPR_GE: case OPR_GT: case OPR_LE: case OPR_LT:
00244 case OPR_LAND: case OPR_LIOR:
00245 case OPR_COMPLEX:
00246 case OPR_COMPOSE_BITS:
00247 Analyze_body_expr(WN_kid0(tree));
00248 Analyze_body_expr(WN_kid1(tree));
00249 return;
00250
00251
00252 case OPR_SELECT:
00253 Analyze_body_expr(WN_kid0(tree));
00254 Analyze_body_expr(WN_kid1(tree));
00255 Analyze_body_expr(WN_kid2(tree));
00256 return;
00257
00258
00259 case OPR_INTRINSIC_OP:
00260 for (i = 0; i < WN_kid_count(tree); i++)
00261 Analyze_body_expr(WN_kid(tree,i));
00262 return;
00263
00264 default: Is_True(FALSE,("unexpected operator %s", OPERATOR_name(opr)));
00265 }
00266
00267 return;
00268 }
00269
00270
00271
00272
00273 void
00274 WN_UNROLL::Analyze_body_stmt(WN *tree)
00275 {
00276 OPERATOR opr = WN_operator(tree);
00277
00278 switch (opr) {
00279 case OPR_COMMENT:
00280 case OPR_PRAGMA:
00281 case OPR_XPRAGMA:
00282 return;
00283
00284 case OPR_PREFETCH:
00285 case OPR_ASSERT:
00286 _node_count++;
00287
00288
00289 case OPR_EVAL:
00290 Analyze_body_expr(WN_kid0(tree));
00291 return;
00292
00293 case OPR_MSTORE:
00294 case OPR_ISTOREX:
00295 Analyze_body_expr(WN_kid2(tree));
00296
00297
00298 case OPR_ISTORE:
00299 case OPR_ISTBITS:
00300 _istore_count++;
00301 Analyze_body_expr(WN_kid1(tree));
00302
00303
00304 case OPR_STBITS:
00305 case OPR_STID:
00306 _node_count++;
00307 Analyze_body_expr(WN_kid0(tree));
00308 return;
00309
00310 case OPR_BLOCK: {
00311 WN *stmt;
00312 for (stmt = WN_first(tree); stmt; stmt = WN_next(stmt))
00313 Analyze_body_stmt(stmt);
00314 return;
00315 }
00316
00317 case OPR_IF:
00318 _node_count++;
00319 _if_count++;
00320 Analyze_body_expr(WN_kid(tree, 0));
00321 Analyze_body_stmt(WN_kid(tree, 1));
00322 Analyze_body_stmt(WN_kid(tree, 2));
00323 return;
00324
00325 default:
00326 Is_True(FALSE,("WN_UNROLL::Analyze_body_stmt: unexpected operator"));
00327 }
00328 return;
00329 }
00330
00331
00332
00333
00334
00335 WN *
00336 WN_UNROLL::Replicate_expr(WN *expr, INT rep_cnt)
00337 {
00338 OPERATOR opr = WN_operator(expr);
00339 INT i;
00340
00341 WN *new_expr = WN_CopyNode(expr);
00342 #if 0
00343 WN_COPY_All_Maps(new_expr, expr);
00344 #else
00345 WN_set_map_id(new_expr, (WN_MAP_ID) (-1));
00346 #endif
00347
00348
00349 switch (opr) {
00350
00351 case OPR_LDID:
00352 if (rep_cnt != 0 && WN_st_idx(expr) == WN_st_idx(_indx_var) &&
00353 WN_offset(expr) == WN_offset(_indx_var)) {
00354 new_expr = WN_Add(_rtype, new_expr, WN_Intconst(_rtype, rep_cnt));
00355 }
00356 break;
00357
00358 case OPR_LDBITS:
00359 case OPR_INTCONST:
00360 case OPR_CONST:
00361 case OPR_LDA:
00362 case OPR_LDA_LABEL:
00363 break;
00364
00365
00366 case OPR_ILOAD:
00367 case OPR_ILDBITS:
00368 case OPR_SQRT: case OPR_RSQRT: case OPR_RECIP:
00369 case OPR_PAREN:
00370 case OPR_REALPART: case OPR_IMAGPART:
00371 case OPR_HIGHPART: case OPR_LOWPART:
00372 case OPR_ALLOCA:
00373 case OPR_LNOT:
00374 case OPR_EXTRACT_BITS:
00375 case OPR_BNOT:
00376 case OPR_PARM:
00377 case OPR_TAS:
00378 case OPR_RND: case OPR_TRUNC: case OPR_CEIL: case OPR_FLOOR:
00379 #ifdef TARG_X8664
00380 case OPR_REPLICATE:
00381 case OPR_REDUCE_ADD: case OPR_REDUCE_MPY:
00382 case OPR_REDUCE_MAX: case OPR_REDUCE_MIN:
00383 case OPR_SHUFFLE:
00384 case OPR_ATOMIC_RSQRT:
00385 #endif // TARG_X8664
00386 case OPR_NEG:
00387 case OPR_ABS:
00388 case OPR_MINPART: case OPR_MAXPART:
00389 case OPR_CVTL:
00390 case OPR_CVT:
00391 WN_kid0(new_expr) = Replicate_expr(WN_kid0(expr), rep_cnt);
00392 break;
00393
00394
00395 case OPR_MLOAD:
00396 case OPR_MPY: case OPR_HIGHMPY:
00397 case OPR_DIV:
00398 case OPR_MOD: case OPR_REM:
00399 case OPR_DIVREM:
00400 case OPR_ADD: case OPR_SUB:
00401 case OPR_MAX: case OPR_MIN:
00402 case OPR_MINMAX:
00403 case OPR_BAND: case OPR_BIOR: case OPR_BNOR: case OPR_BXOR:
00404 case OPR_ASHR: case OPR_LSHR:
00405 case OPR_SHL:
00406 case OPR_RROTATE:
00407 case OPR_EQ: case OPR_NE:
00408 case OPR_GE: case OPR_GT: case OPR_LE: case OPR_LT:
00409 case OPR_LAND: case OPR_LIOR:
00410 case OPR_COMPLEX:
00411 case OPR_COMPOSE_BITS:
00412 WN_kid0(new_expr) = Replicate_expr(WN_kid0(expr), rep_cnt);
00413 WN_kid1(new_expr) = Replicate_expr(WN_kid1(expr), rep_cnt);
00414 break;
00415
00416
00417 case OPR_SELECT:
00418 WN_kid0(new_expr) = Replicate_expr(WN_kid0(expr), rep_cnt);
00419 WN_kid1(new_expr) = Replicate_expr(WN_kid1(expr), rep_cnt);
00420 WN_kid2(new_expr) = Replicate_expr(WN_kid2(expr), rep_cnt);
00421 break;
00422
00423
00424 case OPR_INTRINSIC_OP:
00425 for (i = 0; i < WN_kid_count(expr); i++)
00426 WN_kid(new_expr, i) = Replicate_expr(WN_kid(expr, i), rep_cnt);
00427 break;
00428
00429 default: Is_True(FALSE,("unexpected operator"));
00430 }
00431
00432 return new_expr;
00433 }
00434
00435
00436
00437
00438
00439 WN *
00440 WN_UNROLL::Replicate_stmt(WN *stmt, INT rep_cnt)
00441 {
00442 OPERATOR opr = WN_operator(stmt);
00443
00444 if (opr == OPR_BLOCK) {
00445 WN *new_block = WN_CreateBlock();
00446 WN *s, *ns;
00447 for (s = WN_first(stmt); s; s= WN_next(s)) {
00448 ns = Replicate_stmt(s, rep_cnt);
00449 WN_INSERT_BlockLast(new_block, ns);
00450 }
00451 return new_block;
00452 }
00453
00454 WN *new_stmt = WN_CopyNode(stmt);
00455 #if 0
00456 WN_COPY_All_Maps(new_stmt, stmt);
00457 #else
00458 WN_set_map_id(new_stmt, (WN_MAP_ID) (-1));
00459 #endif
00460
00461 switch (opr) {
00462 case OPR_COMMENT:
00463 case OPR_PRAGMA:
00464 break;
00465
00466 case OPR_XPRAGMA:
00467 case OPR_PREFETCH:
00468 case OPR_ASSERT:
00469 case OPR_EVAL:
00470 WN_kid0(new_stmt) = Replicate_expr(WN_kid0(stmt), rep_cnt);
00471 break;
00472
00473 case OPR_MSTORE:
00474 WN_kid2(new_stmt) = Replicate_expr(WN_kid2(stmt), rep_cnt);
00475
00476
00477 case OPR_ISTORE:
00478 case OPR_ISTBITS:
00479 WN_kid1(new_stmt) = Replicate_expr(WN_kid1(stmt), rep_cnt);
00480
00481
00482 case OPR_STBITS:
00483 case OPR_STID:
00484 WN_kid0(new_stmt) = Replicate_expr(WN_kid0(stmt), rep_cnt);
00485 break;
00486
00487 case OPR_IF:
00488 WN_kid0(new_stmt) = Replicate_expr(WN_kid0(stmt), rep_cnt);
00489 WN_kid1(new_stmt) = Replicate_stmt(WN_kid1(stmt), rep_cnt);
00490 WN_kid2(new_stmt) = Replicate_stmt(WN_kid2(stmt), rep_cnt);
00491 break;
00492
00493 default:
00494 Is_True(FALSE,("WN_UNROLL::Replicatestmt: unexpected operator"));
00495 }
00496 return new_stmt;
00497 }
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518 void
00519 WN_UNROLL::Unroll(INT unroll_times)
00520 {
00521 INT i;
00522 WN *stmt, *new_stmt;
00523
00524 #if defined(TARG_NVISA)
00525 DevWarn("wn_unroll loop%d %d times", loop_count, unroll_times);
00526 #endif
00527
00528
00529 WN_set_operator(_orig_wn, OPR_BLOCK);
00530 WN_set_rtype(_orig_wn, MTYPE_V);
00531 WN_set_desc(_orig_wn, MTYPE_V);
00532 WN_set_kid_count(_orig_wn, 0);
00533 WN_first(_orig_wn) = WN_last(_orig_wn) = NULL;
00534 WN_set_map_id(_orig_wn, (WN_MAP_ID) (-1));
00535
00536
00537 WN *unrolled_init_stmt = WN_COPY_Tree_With_Map(_init_stmt);
00538
00539 WN *unrolled_trips;
00540 BOOL const_trips = 0;
00541 if (WN_operator(_trips) == OPR_INTCONST)
00542 const_trips = WN_const_val(_trips);
00543 WN *unrolled_end_cond;
00544 if (const_trips)
00545 unrolled_trips = WN_Intconst(_rtype, (const_trips / unroll_times ) *
00546 (unroll_times * _abs_step_amt));
00547 else {
00548 unrolled_trips = WN_Div(_rtype, _trips, WN_Intconst(_rtype, unroll_times));
00549 unrolled_trips = WN_Mpy(_rtype, unrolled_trips, WN_Intconst(_rtype, unroll_times*_abs_step_amt));
00550 }
00551 unrolled_end_cond = WN_Relational(_step_amt > 0 ? OPR_LT : OPR_GT, _rtype,
00552 WN_CopyNode(WN_kid0(_end_cond)),
00553 WN_Binary(_step_amt > 0 ? OPR_ADD : OPR_SUB, _rtype,
00554 WN_COPY_Tree_With_Map(_start), unrolled_trips));
00555
00556 WN *unrolled_incr_stmt = WN_CopyNode(_incr_stmt);
00557 WN_COPY_All_Maps(unrolled_incr_stmt, _incr_stmt);
00558 WN_set_map_id(unrolled_incr_stmt, (WN_MAP_ID) (-1));
00559 WN_kid0(unrolled_incr_stmt) = Replicate_expr(WN_kid0(_incr_stmt),
00560 (unroll_times-1) * _step_amt);
00561
00562 WN *unrolled_body = WN_CreateBlock();
00563 for (i = 0; i < unroll_times; i++) {
00564 for (stmt = WN_first(_loop_body); stmt; stmt = WN_next(stmt)) {
00565 if (i != 0 && WN_operator(stmt) == OPR_PREFETCH)
00566 continue;
00567 new_stmt = Replicate_stmt(stmt, i*_step_amt);
00568 WN_INSERT_BlockLast(unrolled_body, new_stmt);
00569 }
00570 }
00571
00572 if (_loop_info) {
00573 WN_loop_trip_est(_loop_info) /= unroll_times;
00574 WN_Reset_Loop_Nz_Trip(_loop_info);
00575 if (WN_kid1(_loop_info))
00576 WN_kid1(_loop_info) = WN_Div(_rtype, WN_kid1(_loop_info),
00577 WN_Intconst(_rtype, unroll_times));
00578
00579 if (const_trips && (const_trips % unroll_times == 0)
00580 && WN_kid1(_loop_info) != NULL
00581 && WN_operator(WN_kid1(_loop_info)) == OPR_INTCONST
00582 && WN_const_val(WN_kid1(_loop_info)) == 1)
00583 {
00584
00585
00586 DevWarn("only 1 iteration of loop, so remove");
00587 WN_INSERT_BlockLast(_orig_wn, unrolled_init_stmt);
00588 WN_INSERT_BlockLast(_orig_wn, unrolled_body);
00589 WN_INSERT_BlockLast(_orig_wn, unrolled_incr_stmt);
00590 return;
00591 }
00592 }
00593 WN *unrolled_do_loop = WN_CreateDO(WN_CopyNode(_indx_var), unrolled_init_stmt,
00594 unrolled_end_cond, unrolled_incr_stmt,
00595 unrolled_body, _loop_info);
00596 WN_set_map_id(unrolled_do_loop, (WN_MAP_ID) (-1));
00597
00598 WN_INSERT_BlockLast(_orig_wn, unrolled_do_loop);
00599
00600 if (const_trips && (const_trips % unroll_times == 0))
00601 return;
00602
00603
00604 if (const_trips)
00605 WN_kid0(_init_stmt) = WN_Binary(_step_amt > 0 ? OPR_ADD : OPR_SUB, _rtype,
00606 WN_COPY_Tree(_start),
00607 WN_Intconst(_rtype, (const_trips / unroll_times) *
00608 (unroll_times * _abs_step_amt)));
00609 else WN_kid0(_init_stmt) = WN_CopyNode(WN_kid0(_end_cond));
00610 WN *loop_info = WN_CreateLoopInfo(WN_CopyNode(WN_kid0(_end_cond)),
00611 NULL, unroll_times-1,
00612 _loop_info ? WN_loop_depth(_loop_info) : 0,
00613 9);
00614 WN_INSERT_BlockLast(_orig_wn,
00615 WN_CreateDO(_indx_var, _init_stmt, _end_cond, _incr_stmt,
00616 _loop_body, loop_info));
00617 }
00618
00619
00620
00621
00622
00623
00624
00625 static void
00626 WN_UNROLL_loop(WN *doloop)
00627 {
00628
00629
00630
00631 UINT pragma_unroll_times = 0;
00632 WN *stmt = WN_prev(doloop);
00633 while (stmt) {
00634 if (WN_operator(stmt) == OPR_PRAGMA) {
00635 if (WN_pragma(stmt) == WN_PRAGMA_UNROLL) {
00636 pragma_unroll_times = WN_pragma_arg1(stmt);
00637 break;
00638 }
00639 }
00640 else if ( ! OPERATOR_is_store(WN_operator(stmt)))
00641 break;
00642 stmt = WN_prev(stmt);
00643 }
00644
00645 WN_UNROLL wn_unroll(doloop);
00646 if (wn_unroll.Step_amt() == 0)
00647 return;
00648 if (wn_unroll.End() == NULL)
00649 return;
00650 if (wn_unroll.Loop_info()) {
00651 if (WN_Loop_Unimportant_Misc(wn_unroll.Loop_info()))
00652 return;
00653 }
00654 UINT unroll_times = 0;
00655 UINT max_unroll_size;
00656 USRCPOS srcpos;
00657 USRCPOS_srcpos(srcpos) = WN_Get_Linenum(doloop);
00658
00659 #ifdef TARG_NVISA
00660 ++loop_count;
00661 if ( Query_Skiplist ( WOPT_Unroll_Skip_List, loop_count ) )
00662 return;
00663
00664
00665
00666
00667
00668 if (wn_unroll.Loop_info()) {
00669 if (WN_loop_trip_est(wn_unroll.Loop_info())
00670 && WN_operator(wn_unroll.Trips()) == OPR_INTCONST)
00671 {
00672 unroll_times = WN_const_val(wn_unroll.Trips());
00673 }
00674
00675 if (unroll_times == 0) {
00676 if (pragma_unroll_times == UINT32_MAX) {
00677 DevWarn("pragma unroll but no trip count, so ignore");
00678 return;
00679 }
00680 unroll_times = pragma_unroll_times;
00681 }
00682 if (unroll_times == 0)
00683 return;
00684 if (pragma_unroll_times && unroll_times > pragma_unroll_times) {
00685 DevWarn("pragma says to unroll less than full unrolling");
00686 unroll_times = pragma_unroll_times;
00687 }
00688 wn_unroll.Analyze_body_stmt(WN_kid(doloop, 4));
00689 if (wn_unroll.Node_count() == 0)
00690 return;
00691
00692
00693
00694
00695 DevWarn("unrolled size would be %d * %d", wn_unroll.Node_count(), unroll_times);
00696
00697 max_unroll_size = OPT_unroll_times * OPT_unroll_size;
00698 max_unroll_size = MAX(pragma_unroll_times, max_unroll_size);
00699
00700
00701 if (pragma_unroll_times == 0 && (unroll_times * wn_unroll.Node_count()) > max_unroll_size)
00702 return;
00703 #if 0
00704 if (unroll_times > OPT_unroll_times)
00705 return;
00706 if (wn_unroll.Node_count() > OPT_unroll_size)
00707 return;
00708 #endif
00709 }
00710 else
00711 return;
00712 #else // TARG_NVISA
00713 if (wn_unroll.Loop_info()) {
00714 if (WN_loop_trip_est(wn_unroll.Loop_info()) <= 16)
00715 return;
00716 }
00717 if (WN_operator(wn_unroll.Trips()) == OPR_INTCONST)
00718 if (WN_const_val(wn_unroll.Trips()) <= 16)
00719 return;
00720 wn_unroll.Analyze_body_stmt(WN_kid(doloop, 4));
00721 if (WOPT_Enable_WN_Unroll < 2 && wn_unroll.If_count() == 0)
00722 return;
00723 if (wn_unroll.Istore_count() == 0) {
00724 if (wn_unroll.Node_count() < 40)
00725 unroll_times = 8;
00726 else if (wn_unroll.Node_count() < 80)
00727 unroll_times = 4;
00728 else {
00729 if (WOPT_Enable_Verbose)
00730 fprintf(stderr, "WN_UNROLL: loop at %s:%d not unrolled because node count is %d\n",
00731 Cur_PU_Name, USRCPOS_linenum(srcpos), wn_unroll.Node_count());
00732 return;
00733 }
00734 }
00735 else {
00736 if (wn_unroll.Node_count() < 20)
00737 unroll_times = 8;
00738 else if (wn_unroll.Node_count() < 40)
00739 unroll_times = 4;
00740 else {
00741 if (WOPT_Enable_Verbose)
00742 fprintf(stderr, "WN_UNROLL: loop at %s:%d not unrolled because node count is %d\n",
00743 Cur_PU_Name, USRCPOS_linenum(srcpos), wn_unroll.Node_count());
00744 return;
00745 }
00746 }
00747 #endif // TARG_NVISA
00748 wn_unroll.Unroll(unroll_times);
00749 if (WOPT_Enable_Verbose)
00750 fprintf(stderr, "WN_UNROLL has unrolled loop at %s:%d %d times\n",
00751 Cur_PU_Name, USRCPOS_linenum(srcpos), unroll_times);
00752
00753 }
00754
00755
00756
00757
00758
00759
00760
00761 static BOOL
00762 WN_UNROLL_suitable(WN *tree)
00763 {
00764 OPERATOR opr = WN_operator(tree);
00765 INT i;
00766 BOOL suitable;
00767
00768 if (OPERATOR_is_store(opr) && MTYPE_is_vector(WN_desc(tree)))
00769 return FALSE;
00770
00771 switch (opr) {
00772 case OPR_REGION:
00773 case OPR_REGION_EXIT:
00774 case OPR_GOTO:
00775 case OPR_GOTO_OUTER_BLOCK:
00776 case OPR_RETURN:
00777 case OPR_TRAP :
00778 case OPR_FORWARD_BARRIER:
00779 case OPR_BACKWARD_BARRIER:
00780 case OPR_ALTENTRY:
00781 case OPR_LABEL:
00782 case OPR_DEALLOCA:
00783 case OPR_AGOTO:
00784 case OPR_TRUEBR:
00785 case OPR_FALSEBR:
00786 case OPR_RETURN_VAL:
00787 case OPR_COMPGOTO:
00788 case OPR_XGOTO:
00789 case OPR_CALL:
00790 case OPR_ICALL:
00791 case OPR_INTRINSIC_CALL:
00792 case OPR_PICCALL:
00793 case OPR_ASM_STMT:
00794 return FALSE;
00795
00796 case OPR_COMMENT:
00797 case OPR_PRAGMA:
00798 case OPR_PREFETCH:
00799 case OPR_EVAL:
00800 case OPR_ASSERT:
00801 case OPR_XPRAGMA:
00802 case OPR_ISTORE:
00803 case OPR_ISTOREX:
00804 case OPR_STID:
00805 case OPR_ISTBITS:
00806 case OPR_STBITS:
00807 case OPR_MSTORE:
00808 return TRUE;
00809
00810 case OPR_BLOCK: {
00811 WN *stmt;
00812 suitable = TRUE;
00813 stmt = WN_first(tree);
00814 while (stmt != NULL) {
00815 if (! WN_UNROLL_suitable(stmt))
00816 suitable = FALSE;
00817 if (WN_operator(stmt) == OPR_BLOCK) {
00818
00819 WN *unrolled_loop_block = stmt;
00820 stmt = WN_next(stmt);
00821 WN_EXTRACT_FromBlock(tree, unrolled_loop_block);
00822 WN_INSERT_BlockBefore(tree, stmt, unrolled_loop_block);
00823 }
00824 else stmt = WN_next(stmt);
00825 }
00826 return suitable;
00827 }
00828
00829 case OPR_DO_WHILE:
00830 case OPR_WHILE_DO:
00831 WN_UNROLL_suitable(WN_kid(tree, 1));
00832 return FALSE;
00833
00834 case OPR_IF:
00835 suitable = WN_UNROLL_suitable(WN_kid(tree, 1));
00836 if (! WN_UNROLL_suitable(WN_kid(tree, 2)))
00837 suitable = FALSE;
00838 return suitable;
00839
00840 case OPR_DO_LOOP:
00841 if (WN_UNROLL_suitable(WN_kid(tree, 4)))
00842 WN_UNROLL_loop(tree);
00843 return FALSE;
00844
00845 default: Is_True(FALSE,("WN_UNROLL_suitable: unexpected operator"));
00846 }
00847
00848 return FALSE;
00849 }
00850
00851
00852
00853
00854
00855 void
00856 WN_unroll(WN *tree)
00857 {
00858 if (WOPT_Enable_WN_Unroll == 0)
00859 return;
00860
00861 Start_Timer(T_Lower_CU);
00862 Set_Error_Phase("WN_unroll");
00863
00864 if (WN_operator(tree) == OPR_FUNC_ENTRY)
00865 WN_UNROLL_suitable(WN_func_body(tree));
00866 else if (WN_operator(tree) == OPR_REGION)
00867 WN_UNROLL_suitable(WN_region_body(tree));
00868 else if (OPERATOR_is_stmt(WN_operator(tree)) || OPERATOR_is_scf(WN_operator(tree)))
00869 WN_UNROLL_suitable(tree);
00870 else Is_True(FALSE, ("unexpected WHIRL operator"));
00871
00872 Stop_Timer(T_Lower_CU);
00873
00874 WN_Lower_Checkdump("After wn_unroll", tree, 0);
00875
00876 WN_verifier(tree);
00877
00878 return;
00879 }