00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #define __STDC_LIMIT_MACROS
00064 #include <stdint.h>
00065 #ifdef USE_PCH
00066 #include "lno_pch.h"
00067 #endif // USE_PCH
00068 #pragma hdrstop
00069
00070 #ifdef _KEEP_RCS_ID
00071
00072 static char *rcs_id = "$Source: be/lno/SCCS/s.doacross.cxx $";
00073 #endif
00074
00075 #include <sys/types.h>
00076 #include <stdlib.h>
00077 #include <limits.h>
00078 #include <math.h>
00079 #ifdef KEY // to get DBL_MAX
00080 #include <float.h>
00081 #endif
00082 #include "pu_info.h"
00083 #include "defs.h"
00084 #include "glob.h"
00085 #include "wn.h"
00086 #include "wn_map.h"
00087 #include "cxx_memory.h"
00088 #include "lnopt_main.h"
00089 #include "lwn_util.h"
00090 #include "lnoutils.h"
00091 #include "lego_util.h"
00092 #include "tile.h"
00093 #include "mat.h"
00094 #include "snl.h"
00095 #include "snl_trans.h"
00096 #include "region_util.h"
00097 #include "opt_du.h"
00098 #include "mtypes.h"
00099 #include "config_targ.h"
00100 #include "config_cache.h"
00101 #include "scalar_expand.h"
00102 #include "strtab.h"
00103 #include "small_trips.h"
00104 #include "irbdata.h"
00105 #include "data_layout.h"
00106 #include "ff_utils.h"
00107 #include "doacross.h"
00108 #include "dvector.h"
00109 #include "ara_loop.h"
00110 #include "wn_pragmas.h"
00111 #include "prompf.h"
00112 #include "anl_driver.h"
00113
00114 #pragma weak New_Construct_Id
00115
00116
00117 extern BOOL Is_Privatizable_With_Context(WN* loop, WN* wn, BOOL defnitely);
00118
00119 typedef STACK<WN*> STACK_OF_WN;
00120
00121 static ST* Sync_Array_St;
00122 static ST* Sync_Length_St;
00123 static ST* Sync_Offset_St;
00124 static ST* Max_Numthread_St;
00125 static WN* Sync_Array_Alias_Host;
00126 static WN* Sync_Length_Alias_Host;
00127 static WN* Sync_Offset_Alias_Host;
00128 static TY_IDX Sync_Array_Ptr_Ty;
00129 static BOOL Sync_Structure_Created;
00130 static BOOL Doacross_Inited;
00131 static STACK_OF_WN* sync_offset_stid_stack;
00132 static STACK_OF_WN* sync_offset_ldid_stack;
00133 static STACK_OF_WN* sync_length_stid_stack;
00134 static STACK_OF_WN* sync_length_ldid_stack;
00135
00136 static double Single_Sync_Cycle = 200.0;
00137 static WN* Func_Root=NULL;
00138 static MEM_POOL DOACROSS_default_pool;
00139
00140 #define MAX_INT64 0x7fffffffffffffff
00141 #define MIN_INT64 0x8000000000000000
00142
00143
00144 static void Create_Sync_Structure ();
00145 static WN* Create_Initialize_Loop (WN* processor_loop,
00146 ARRAY_DIRECTED_GRAPH16* dg,
00147 DU_MANAGER* du,
00148 BOOL Is_Pdo_Region);
00149
00150 void Update_Sync_Offset_Stid_DU(WN* sync_offset_stid) {
00151 for (INT i=0; i<sync_offset_ldid_stack->Elements(); i++)
00152 Du_Mgr->Add_Def_Use(sync_offset_stid,sync_offset_ldid_stack->Top_nth(i));
00153 sync_offset_stid_stack->Push(sync_offset_stid);
00154 }
00155
00156 void Update_Sync_Offset_Ldid_DU(WN* sync_offset_ldid) {
00157 for (INT i=0; i<sync_offset_stid_stack->Elements(); i++)
00158 Du_Mgr->Add_Def_Use(sync_offset_stid_stack->Top_nth(i),sync_offset_ldid);
00159 sync_offset_ldid_stack->Push(sync_offset_ldid);
00160 }
00161
00162 void Update_Sync_Length_Stid_DU(WN* sync_length_stid) {
00163 for (INT i=0; i<sync_length_ldid_stack->Elements(); i++)
00164 Du_Mgr->Add_Def_Use(sync_length_stid,sync_length_ldid_stack->Top_nth(i));
00165 sync_length_stid_stack->Push(sync_length_stid);
00166 }
00167
00168 void Update_Sync_Length_Ldid_DU(WN* sync_length_ldid) {
00169 for (INT i=0; i<sync_length_stid_stack->Elements(); i++)
00170 Du_Mgr->Add_Def_Use(sync_length_stid_stack->Top_nth(i),sync_length_ldid);
00171 sync_length_ldid_stack->Push(sync_length_ldid);
00172 }
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189 extern WN* Parallelize_Doacross_Loop(
00190 WN* processor_loop,
00191 WN* processor_tile_loop,
00192 INT32 Doacross_Tile_Size,
00193 INT32 sync_distances[2],
00194 ARRAY_DIRECTED_GRAPH16* dg,
00195 DU_MANAGER* du)
00196 {
00197
00198 Doacross_Init(NULL);
00199
00200 Create_Sync_Structure ();
00201
00202
00203
00204 TYPE_ID index_type=MTYPE_I4;
00205 TYPE_ID sync_array_type=MTYPE_I8;
00206 INT cache_line_size=128;
00207 if (Mhd.L[1].Valid())
00208 cache_line_size=Mhd.L[1].Line_Size;
00209 INT element_per_cache_line=cache_line_size/MTYPE_byte_size(sync_array_type);
00210
00211
00212 INT sync_tile_distances[2];
00213 sync_tile_distances[0]= sync_distances[0]/Doacross_Tile_Size;
00214 if (sync_distances[0]==NULL_DIST)
00215 sync_tile_distances[0]=NULL_DIST;
00216 sync_tile_distances[1]= sync_distances[1]/Doacross_Tile_Size;
00217 if (sync_distances[1]==NULL_DIST)
00218 sync_tile_distances[1]=NULL_DIST;
00219
00220 WN* doacross_region=Get_MP_Region(processor_loop);
00221 BOOL Is_Pdo_Region=Get_Do_Loop_Info(processor_loop)->Mp_Info->Is_Pdo();
00222 if (Is_Pdo_Region) {
00223 doacross_region=LWN_Get_Parent(LWN_Get_Parent(doacross_region));
00224 }
00225
00226
00227 WN* pragmas=WN_region_pragmas(doacross_region);
00228 WN* next_wn=WN_first(pragmas);
00229 while (next_wn) {
00230 if (WN_opcode(next_wn)==OPC_PRAGMA)
00231 if ((WN_PRAGMA_ID)WN_pragma(next_wn)==WN_PRAGMA_SYNC_DOACROSS) {
00232 LWN_Delete_From_Block(LWN_Get_Parent(next_wn),next_wn);
00233 break;
00234 }
00235 next_wn=WN_next(next_wn);
00236 }
00237
00238 MEM_POOL_Push(&LNO_local_pool);
00239
00240
00241 WN* loop1=WN_first(WN_do_body(processor_tile_loop));
00242 INT permutation[2]; permutation[0]=1; permutation[1]=0;
00243 SNL_Permute_Loops(processor_tile_loop, loop1, permutation, 2, TRUE, FALSE);
00244 WN* outer_doacross= loop1;
00245 if (Doacross_Tile_Size!= NULL_DIST && Doacross_Tile_Size>1) {
00246 outer_doacross= Tile_Loop(loop1, Doacross_Tile_Size, 0,
00247 SNL_INV_DOACROSS_TILE,
00248 NULL, &LNO_default_pool);
00249 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00250 INT old_id = WN_MAP32_Get(Prompf_Id_Map, loop1);
00251 INT new_id = New_Construct_Id();
00252 WN_MAP32_Set(Prompf_Id_Map, outer_doacross, old_id);
00253 WN_MAP32_Set(Prompf_Id_Map, loop1, new_id);
00254 Prompf_Info->Doacross_Inner_Tile(old_id, new_id);
00255 }
00256 }
00257
00258
00259 WN* init_loop=Create_Initialize_Loop (processor_loop,dg,du,Is_Pdo_Region);
00260 if (Prompf_Info != NULL && Prompf_Info->Is_Enabled()) {
00261 INT old_id = WN_MAP32_Get(Prompf_Id_Map, processor_loop);
00262 INT new_id = New_Construct_Id();
00263 WN_MAP32_Set(Prompf_Id_Map, init_loop, new_id);
00264 Prompf_Info->Doacross_Sync(old_id, new_id);
00265 }
00266
00267 OPCODE op_stid = OPCODE_make_op(OPR_STID, MTYPE_V, index_type);
00268 OPCODE op_ldid = OPCODE_make_op(OPR_LDID, index_type, index_type);
00269 OPCODE op_add = OPCODE_make_op(OPR_ADD, index_type, MTYPE_V);
00270 OPCODE op_sub = OPCODE_make_op(OPR_SUB, index_type, MTYPE_V);
00271 OPCODE op_mpy = OPCODE_make_op(OPR_MPY, index_type, MTYPE_V);
00272 OPCODE op_gt = OPCODE_make_op(OPR_GT, Boolean_type, index_type);
00273 OPCODE op_lt = OPCODE_make_op(OPR_LT, Boolean_type, index_type);
00274
00275 OPCODE op_long_stid = OPCODE_make_op(OPR_STID, MTYPE_V, sync_array_type);
00276 OPCODE op_long_ldid = OPCODE_make_op(OPR_LDID, sync_array_type,
00277 sync_array_type);
00278 OPCODE op_long_add = OPCODE_make_op(OPR_ADD, sync_array_type, MTYPE_V);
00279 OPCODE op_long_sub = OPCODE_make_op(OPR_SUB, sync_array_type, MTYPE_V);
00280 OPCODE op_long_mpy = OPCODE_make_op(OPR_MPY, sync_array_type, MTYPE_V);
00281 OPCODE op_long_gt = OPCODE_make_op(OPR_GT, Boolean_type, sync_array_type);
00282 ST* index_type_preg_st = MTYPE_To_PREG(index_type);
00283 ST* sync_array_type_preg_st = MTYPE_To_PREG(sync_array_type);
00284
00285 BOOL Reversed=FALSE;
00286 DO_LOOP_INFO* dli=Get_Do_Loop_Info(processor_tile_loop);
00287 if (dli->Is_Backward && dli->Auto_Parallelized) {
00288 Reversed=TRUE;
00289 }
00290
00291
00292 #ifdef _NEW_SYMTAB
00293 WN_OFFSET preg_num = Create_Preg(index_type, "my_pid");
00294 #else
00295 WN_OFFSET preg_num = Create_Preg(index_type, "my_pid", NULL);
00296 #endif
00297 WN* pid_ldid=LWN_CreateLdid(op_ldid, WN_start(processor_loop));
00298 du->Add_Def_Use(WN_start(processor_loop), pid_ldid);
00299 du->Add_Def_Use(WN_step(processor_loop), pid_ldid);
00300 du->Ud_Get_Def(pid_ldid)->Set_loop_stmt(processor_loop);
00301 WN* my_pid_stid = LWN_CreateStid(op_stid, preg_num,
00302 index_type_preg_st, Be_Type_Tbl(index_type), pid_ldid);
00303 LWN_Insert_Block_Before(
00304 LWN_Get_Parent(outer_doacross), outer_doacross, my_pid_stid);
00305
00306
00307 #ifdef _NEW_SYMTAB
00308 preg_num = Create_Preg(sync_array_type, "my_step");
00309 #else
00310 preg_num = Create_Preg(sync_array_type, "my_step", NULL);
00311 #endif
00312 WN* my_step_stid = LWN_CreateStid(op_long_stid, preg_num,
00313 sync_array_type_preg_st, Be_Type_Tbl(sync_array_type),
00314 LWN_Make_Icon(sync_array_type, 1));
00315 LWN_Insert_Block_Before(
00316 LWN_Get_Parent(outer_doacross), outer_doacross, my_step_stid);
00317
00318
00319 WN* my_step_ldid = LWN_CreateLdid(op_long_ldid, my_step_stid);
00320 WN* my_step_inc = LWN_CreateExp2(
00321 op_long_add,
00322 my_step_ldid,
00323 LWN_Make_Icon(sync_array_type, 1));
00324 WN* my_step_inc_stid =
00325 LWN_CreateStid(op_long_stid, my_step_stid, my_step_inc);
00326 LWN_Insert_Block_Before(WN_do_body(outer_doacross), NULL, my_step_inc_stid);
00327 du->Add_Def_Use(my_step_inc_stid, my_step_ldid);
00328 du->Add_Def_Use(my_step_stid, my_step_ldid);
00329
00330
00331
00332 WN* my_step_ldid1 = LWN_Copy_Tree(my_step_ldid);
00333 LWN_Copy_Def_Use(my_step_ldid,my_step_ldid1,du);
00334 WN* sync_istore=LWN_Copy_Tree(WN_first(WN_do_body(init_loop)));
00335 LWN_Delete_Tree(WN_array_index(WN_kid1(sync_istore),0));
00336 WN* index=LWN_CreateLdid(op_ldid,my_pid_stid);
00337 du->Add_Def_Use(my_pid_stid,index);
00338 du->Ud_Get_Def(index)->Set_loop_stmt(NULL);
00339 WN* sync_offset_stid=WN_next(init_loop);
00340 WN* sync_offset_ldid=LWN_CreateLdid(op_ldid,sync_offset_stid);
00341 Update_Sync_Offset_Ldid_DU(sync_offset_ldid);
00342 WN_array_index(WN_kid1(sync_istore),0)=
00343 LWN_CreateExp2(
00344 op_add,
00345 LWN_CreateExp2(op_mpy,
00346 index,
00347 LWN_Make_Icon(index_type,
00348 element_per_cache_line)),
00349 sync_offset_ldid);
00350 WN_kid0(sync_istore)= my_step_ldid1;
00351 LWN_Parentize(sync_istore);
00352
00353
00354 WN* fb = WN_CreateBarrier(TRUE, 0);
00355 WN* hw_sync=WN_Create_Intrinsic(OPC_VINTRINSIC_CALL,
00356 INTRN_SYNCHRONIZE,0,NULL);
00357 WN* bb = WN_CreateBarrier(FALSE, 0);
00358 LWN_Insert_Block_Before(WN_do_body(outer_doacross),my_step_inc_stid,fb);
00359 LWN_Insert_Block_Before(WN_do_body(outer_doacross),my_step_inc_stid,hw_sync);
00360 LWN_Insert_Block_Before(WN_do_body(outer_doacross),my_step_inc_stid,bb);
00361 LWN_Insert_Block_Before(
00362 WN_do_body(outer_doacross), my_step_inc_stid, sync_istore);
00363 WN *loop = Enclosing_Do_Loop(fb);
00364 if (loop && Do_Loop_Is_Good(loop)) {
00365 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00366 v = Array_Dependence_Graph->Add_Vertex(bb);
00367 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00368 }
00369
00370
00371 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
00372 &LNO_local_pool);
00373 Build_Doloop_Stack(LWN_Get_Parent(sync_istore), loop_stack);
00374 LNO_Build_Access(sync_istore, loop_stack, &LNO_default_pool);
00375
00376
00377 WN* sync_init_istore=LWN_Copy_Tree(WN_first(WN_do_body(init_loop)));
00378 LWN_Delete_Tree(WN_array_index(WN_kid1(sync_init_istore),0));
00379 index=LWN_CreateLdid(op_ldid,my_pid_stid);
00380 du->Add_Def_Use(my_pid_stid,index);
00381 du->Ud_Get_Def(index)->Set_loop_stmt(NULL);
00382 sync_offset_ldid=LWN_CreateLdid(op_ldid,sync_offset_stid);
00383 Update_Sync_Offset_Ldid_DU(sync_offset_ldid);
00384 WN_array_index(WN_kid1(sync_init_istore),0)=
00385 LWN_CreateExp2(
00386 op_add,
00387 LWN_CreateExp2(op_mpy,
00388 index,
00389 LWN_Make_Icon(index_type,
00390 element_per_cache_line)),
00391 LWN_Make_Icon(index_type, 1));
00392 WN_array_index(WN_kid1(sync_init_istore),0)=
00393 LWN_CreateExp2(op_sub,
00394 WN_array_index(WN_kid1(sync_init_istore),0),
00395 sync_offset_ldid);
00396 LWN_Parentize(sync_init_istore);
00397 fb = WN_CreateBarrier(TRUE, 0);
00398 hw_sync=WN_Create_Intrinsic(OPC_VINTRINSIC_CALL,
00399 INTRN_SYNCHRONIZE,0,NULL);
00400 bb = WN_CreateBarrier(FALSE, 0);
00401 LWN_Insert_Block_After(
00402 WN_do_body(processor_loop), outer_doacross, sync_init_istore);
00403 LWN_Insert_Block_After(WN_do_body(processor_loop),outer_doacross,bb);
00404 LWN_Insert_Block_After(WN_do_body(processor_loop),outer_doacross,hw_sync);
00405 LWN_Insert_Block_After(WN_do_body(processor_loop),outer_doacross,fb);
00406 loop = Enclosing_Do_Loop(fb);
00407 if (loop && Do_Loop_Is_Good(loop)) {
00408 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00409 v = Array_Dependence_Graph->Add_Vertex(bb);
00410 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00411 }
00412
00413 Build_Doloop_Stack(LWN_Get_Parent(sync_init_istore), loop_stack);
00414 LNO_Build_Access(sync_init_istore, loop_stack, &LNO_default_pool);
00415
00416 if (sync_tile_distances[0]!=NULL_DIST || sync_tile_distances[1]!=NULL_DIST) {
00417
00418
00419 WN* left;
00420 WN* right;
00421 WN* upper_guard=NULL;
00422 WN* upper_sync_ldid=NULL;
00423 WN* step_upper_update_stid=NULL;
00424 WN* lower_guard=NULL;
00425 WN* lower_sync_ldid=NULL;
00426 WN* step_lower_update_stid=NULL;
00427 if (sync_tile_distances[1]!= NULL_DIST) {
00428
00429
00430 #ifdef _NEW_SYMTAB
00431 preg_num = Create_Preg(index_type, "upper_pid");
00432 #else
00433 preg_num = Create_Preg(index_type, "upper_pid", NULL);
00434 #endif
00435 WN* pid_ldid=LWN_CreateLdid(op_ldid, WN_start(processor_loop));
00436 du->Add_Def_Use(WN_start(processor_loop), pid_ldid);
00437 du->Add_Def_Use(WN_step(processor_loop), pid_ldid);
00438 du->Ud_Get_Def(pid_ldid)->Set_loop_stmt(processor_loop);
00439 if (!Reversed)
00440 pid_ldid=LWN_CreateExp2(op_sub,pid_ldid,LWN_Make_Icon(index_type,1));
00441 else
00442 pid_ldid=LWN_CreateExp2(op_add,pid_ldid,LWN_Make_Icon(index_type,1));
00443 WN* upper_pid_stid = LWN_CreateStid(op_stid, preg_num,
00444 index_type_preg_st,
00445 Be_Type_Tbl(index_type), pid_ldid);
00446 LWN_Insert_Block_Before(
00447 LWN_Get_Parent(outer_doacross), outer_doacross, upper_pid_stid);
00448
00449
00450 #ifdef _NEW_SYMTAB
00451 preg_num = Create_Preg(sync_array_type, "step_upper");
00452 #else
00453 preg_num = Create_Preg(sync_array_type, "step_upper", NULL);
00454 #endif
00455 WN* step_upper_stid = LWN_CreateStid(op_long_stid, preg_num,
00456 sync_array_type_preg_st,
00457 Be_Type_Tbl(sync_array_type),
00458 LWN_Make_Icon(sync_array_type, 0));
00459 LWN_Insert_Block_Before(
00460 LWN_Get_Parent(outer_doacross), outer_doacross, step_upper_stid);
00461
00462
00463 left = LWN_Copy_Tree(my_step_ldid);
00464 LWN_Copy_Def_Use(my_step_ldid,left,du);
00465 WN* step_upper_ldid = LWN_CreateLdid(op_long_ldid,step_upper_stid);
00466 du->Add_Def_Use(step_upper_stid,step_upper_ldid);
00467 right = LWN_CreateExp2(
00468 op_long_add,
00469 step_upper_ldid,
00470 LWN_Make_Icon(sync_array_type,sync_tile_distances[1]));
00471 upper_guard=
00472 LWN_CreateExp2(op_long_gt, left,right);
00473
00474
00475 WN* ldid=LWN_CreateLdid(op_ldid,my_pid_stid);
00476 du->Add_Def_Use(my_pid_stid,ldid);
00477 left=ldid;
00478 WN* upper_boundary;
00479 if (!Reversed)
00480 upper_boundary=LWN_CreateExp2(op_gt,left,LWN_Make_Icon(index_type,0));
00481 else {
00482 right=LWN_Copy_Tree(WN_kid1(WN_end(processor_loop)));
00483 LWN_Copy_Def_Use(WN_kid1(WN_end(processor_loop)),right,du);
00484 upper_boundary=LWN_CreateExp2(op_lt, left, right);
00485 }
00486 upper_guard=LWN_CreateExp2(OPC_I4LAND, upper_guard, upper_boundary);
00487
00488
00489 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
00490 WN* wn_array = WN_Create(op_array, 3);
00491 WN_element_size(wn_array) = MTYPE_byte_size(sync_array_type);
00492 OPCODE ldaop = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
00493 WN_array_base(wn_array) = WN_CreateLda(ldaop,
00494 0,
00495 Sync_Array_Ptr_Ty,
00496 Sync_Array_St
00497 );
00498
00499 ldid=LWN_CreateLdid(op_ldid, upper_pid_stid);
00500 du->Add_Def_Use(upper_pid_stid,ldid);
00501 du->Ud_Get_Def(ldid)->Set_loop_stmt(NULL);
00502 WN_array_index(wn_array,0) =
00503 LWN_CreateExp2(
00504 op_mpy, ldid,
00505 LWN_Make_Icon(index_type,
00506 element_per_cache_line));
00507 sync_offset_ldid=LWN_CreateLdid(op_ldid,sync_offset_stid);
00508 Update_Sync_Offset_Ldid_DU(sync_offset_ldid);
00509 WN_array_index(wn_array,0)=
00510 LWN_CreateExp2(op_add,
00511 WN_array_index(wn_array,0),
00512 sync_offset_ldid);
00513 WN_array_dim(wn_array,0) =
00514 LWN_Make_Icon(index_type,1024*element_per_cache_line);
00515 OPCODE loadop =
00516 OPCODE_make_op(OPR_ILOAD, sync_array_type, sync_array_type);
00517 TY_IDX wty = Be_Type_Tbl(sync_array_type);
00518 TY_IDX pty = Sync_Array_Ptr_Ty;
00519 WN* load = LWN_CreateIload(loadop, 0, wty, pty, wn_array);
00520
00521 Copy_alias_info(Alias_Mgr, Sync_Array_Alias_Host, load);
00522
00523 step_upper_update_stid=
00524 LWN_CreateStid(op_long_stid, step_upper_stid, load);
00525 LWN_Parentize(step_upper_update_stid);
00526
00527 du->Add_Def_Use(step_upper_update_stid,step_upper_ldid);
00528
00529 }
00530
00531 if (sync_tile_distances[0]!= NULL_DIST) {
00532
00533
00534 #ifdef _NEW_SYMTAB
00535 preg_num = Create_Preg(index_type, "lower_pid");
00536 #else
00537 preg_num = Create_Preg(index_type, "lower_pid", NULL);
00538 #endif
00539 WN* pid_ldid=LWN_CreateLdid(op_ldid, WN_start(processor_loop));
00540 du->Add_Def_Use(WN_start(processor_loop), pid_ldid);
00541 du->Add_Def_Use(WN_step(processor_loop), pid_ldid);
00542 du->Ud_Get_Def(pid_ldid)->Set_loop_stmt(processor_loop);
00543 if (!Reversed)
00544 pid_ldid=LWN_CreateExp2(op_add,pid_ldid,LWN_Make_Icon(index_type,1));
00545 else
00546 pid_ldid=LWN_CreateExp2(op_sub,pid_ldid,LWN_Make_Icon(index_type,1));
00547 WN* lower_pid_stid = LWN_CreateStid(op_stid, preg_num,
00548 index_type_preg_st,
00549 Be_Type_Tbl(index_type), pid_ldid);
00550 LWN_Insert_Block_Before(
00551 LWN_Get_Parent(outer_doacross), outer_doacross, lower_pid_stid);
00552
00553
00554 #ifdef _NEW_SYMTAB
00555 preg_num = Create_Preg(sync_array_type, "step_lower");
00556 #else
00557 preg_num = Create_Preg(sync_array_type, "step_lower", NULL);
00558 #endif
00559 WN* step_lower_stid = LWN_CreateStid(op_long_stid, preg_num,
00560 sync_array_type_preg_st,
00561 Be_Type_Tbl(sync_array_type),
00562 LWN_Make_Icon(sync_array_type, 0));
00563 LWN_Insert_Block_Before(
00564 LWN_Get_Parent(outer_doacross), outer_doacross, step_lower_stid);
00565
00566
00567 left = LWN_Copy_Tree(my_step_ldid);
00568 LWN_Copy_Def_Use(my_step_ldid,left,du);
00569 WN* step_lower_ldid=LWN_CreateLdid(op_long_ldid,step_lower_stid);
00570 right = LWN_CreateExp2(
00571 op_long_add,
00572 step_lower_ldid,
00573 LWN_Make_Icon(sync_array_type,sync_tile_distances[0]));
00574 du->Add_Def_Use(step_lower_stid,step_lower_ldid);
00575 lower_guard= LWN_CreateExp2(op_long_gt, left,right);
00576
00577
00578 WN* ldid=LWN_CreateLdid(op_ldid, my_pid_stid);
00579 du->Add_Def_Use(my_pid_stid,ldid);
00580
00581 left=ldid;
00582 WN* upper_boundary;
00583 if (!Reversed) {
00584 right=LWN_Copy_Tree(WN_kid1(WN_end(processor_loop)));
00585 LWN_Copy_Def_Use(WN_kid1(WN_end(processor_loop)),right,du);
00586 upper_boundary=LWN_CreateExp2(op_lt, left, right);
00587 } else
00588 upper_boundary=LWN_CreateExp2(op_gt,left,LWN_Make_Icon(index_type,0));
00589 lower_guard=LWN_CreateExp2(OPC_I4LAND, lower_guard, upper_boundary);
00590
00591
00592 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
00593 WN* wn_array = WN_Create(op_array, 3);
00594 WN_element_size(wn_array) = MTYPE_byte_size(sync_array_type);
00595 OPCODE ldaop = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
00596 WN_array_base(wn_array) = WN_CreateLda(ldaop,
00597 0,
00598 Sync_Array_Ptr_Ty,
00599 Sync_Array_St
00600 );
00601
00602 ldid=LWN_CreateLdid(op_ldid, lower_pid_stid);
00603 du->Add_Def_Use(lower_pid_stid,ldid);
00604 du->Ud_Get_Def(ldid)->Set_loop_stmt(NULL);
00605 WN_array_index(wn_array,0) =
00606 LWN_CreateExp2(
00607 op_mpy, ldid,
00608 LWN_Make_Icon(index_type,element_per_cache_line));
00609 sync_offset_ldid=LWN_CreateLdid(op_ldid,sync_offset_stid);
00610 Update_Sync_Offset_Ldid_DU(sync_offset_ldid);
00611 WN_array_index(wn_array,0)=
00612 LWN_CreateExp2(op_add,
00613 WN_array_index(wn_array,0),
00614 sync_offset_ldid);
00615 WN_array_dim(wn_array,0) =
00616 LWN_Make_Icon(index_type,1024*element_per_cache_line);
00617 OPCODE loadop =
00618 OPCODE_make_op(OPR_ILOAD, sync_array_type, sync_array_type);
00619 TY_IDX wty = Be_Type_Tbl(sync_array_type);
00620 TY_IDX pty = Sync_Array_Ptr_Ty;
00621 WN* load = LWN_CreateIload(loadop, 0, wty, pty, wn_array);
00622
00623 Copy_alias_info(Alias_Mgr, Sync_Array_Alias_Host, load);
00624
00625 step_lower_update_stid=
00626 LWN_CreateStid(op_stid, step_lower_stid, load);
00627 du->Add_Def_Use(step_lower_update_stid,step_lower_ldid);
00628 LWN_Parentize(step_lower_update_stid);
00629
00630 }
00631
00632 WN* wn_upper_do_while=NULL;
00633 if (upper_guard) {
00634
00635 WN* do_while_guard=LWN_Copy_Tree(WN_kid1(upper_guard));
00636 LWN_Copy_Def_Use(WN_kid1(upper_guard),do_while_guard,du);
00637 wn_upper_do_while=LWN_CreateDoWhile(do_while_guard, WN_CreateBlock());
00638 fb = WN_CreateBarrier(TRUE, 0);
00639 bb = WN_CreateBarrier(FALSE, 0);
00640 LWN_Insert_Block_Before(WN_while_body(wn_upper_do_while),NULL,bb);
00641 if (step_upper_update_stid)
00642 LWN_Insert_Block_Before(
00643 WN_while_body(wn_upper_do_while),NULL,step_upper_update_stid);
00644 LWN_Insert_Block_Before(WN_while_body(wn_upper_do_while),NULL,fb);
00645 WN *loop = Enclosing_Do_Loop(fb);
00646 if (loop && Do_Loop_Is_Good(loop)) {
00647 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00648 v = Array_Dependence_Graph->Add_Vertex(bb);
00649 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00650 }
00651 Build_Doloop_Stack(LWN_Get_Parent(wn_upper_do_while), loop_stack);
00652 LNO_Build_Access(wn_upper_do_while, loop_stack, &LNO_default_pool);
00653
00654
00655 WN* wn_if=LWN_CreateIf(upper_guard, WN_CreateBlock(), WN_CreateBlock());
00656 LWN_Insert_Block_Before(WN_then(wn_if),NULL,wn_upper_do_while);
00657 fb = WN_CreateBarrier(TRUE, 0);
00658 hw_sync=WN_Create_Intrinsic(OPC_VINTRINSIC_CALL,
00659 INTRN_SYNCHRONIZE,0,NULL);
00660 bb = WN_CreateBarrier(FALSE, 0);
00661 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,bb);
00662 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,hw_sync);
00663 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,fb);
00664 LWN_Insert_Block_After(WN_do_body(outer_doacross), NULL, wn_if);
00665 loop = Enclosing_Do_Loop(fb);
00666 if (loop && Do_Loop_Is_Good(loop)) {
00667 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00668 v = Array_Dependence_Graph->Add_Vertex(bb);
00669 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00670 }
00671 IF_INFO *if_info =
00672 CXX_NEW(IF_INFO(&LNO_default_pool,FALSE,FALSE),&LNO_default_pool);
00673 WN_MAP_Set(LNO_Info_Map,wn_if,(void *)if_info);
00674 LNO_Build_If_Access(wn_if, loop_stack);
00675
00676 #if 0
00677 LWN_Delete_DU(wn_if);
00678 LWN_Delete_LNO_dep_graph(wn_if);
00679 LWN_Delete_Tree(wn_if);
00680 #endif
00681 }
00682
00683 WN* wn_lower_do_while=NULL;
00684 if (lower_guard) {
00685
00686 WN* do_while_guard=LWN_Copy_Tree(WN_kid1(lower_guard));
00687 LWN_Copy_Def_Use(WN_kid1(lower_guard),do_while_guard,du);
00688 wn_lower_do_while=LWN_CreateDoWhile(do_while_guard, WN_CreateBlock());
00689 fb = WN_CreateBarrier(TRUE, 0);
00690 bb = WN_CreateBarrier(FALSE, 0);
00691 LWN_Insert_Block_Before(WN_while_body(wn_lower_do_while),NULL,bb);
00692 if (step_lower_update_stid)
00693 LWN_Insert_Block_Before(
00694 WN_while_body(wn_lower_do_while),NULL,step_lower_update_stid);
00695 LWN_Insert_Block_Before(WN_while_body(wn_lower_do_while),NULL,fb);
00696 WN *loop = Enclosing_Do_Loop(fb);
00697 if (loop && Do_Loop_Is_Good(loop)) {
00698 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00699 v = Array_Dependence_Graph->Add_Vertex(bb);
00700 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00701 }
00702 Build_Doloop_Stack(LWN_Get_Parent(wn_lower_do_while), loop_stack);
00703 LNO_Build_Access(wn_lower_do_while, loop_stack, &LNO_default_pool);
00704
00705
00706 WN* wn_if=LWN_CreateIf(lower_guard, WN_CreateBlock(), WN_CreateBlock());
00707 LWN_Insert_Block_Before(WN_then(wn_if),NULL,wn_lower_do_while);
00708 fb = WN_CreateBarrier(TRUE, 0);
00709 hw_sync=WN_Create_Intrinsic(OPC_VINTRINSIC_CALL,
00710 INTRN_SYNCHRONIZE,0,NULL);
00711 bb = WN_CreateBarrier(FALSE, 0);
00712 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,bb);
00713 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,hw_sync);
00714 LWN_Insert_Block_After(WN_do_body(outer_doacross),NULL,fb);
00715 LWN_Insert_Block_After(WN_do_body(outer_doacross), NULL, wn_if);
00716 loop = Enclosing_Do_Loop(fb);
00717 if (loop && Do_Loop_Is_Good(loop)) {
00718 VINDEX16 v = Array_Dependence_Graph->Add_Vertex(fb);
00719 v = Array_Dependence_Graph->Add_Vertex(bb);
00720 if (!v) LNO_Erase_Dg_From_Here_In(bb,Array_Dependence_Graph);
00721 }
00722 IF_INFO *if_info =
00723 CXX_NEW(IF_INFO(&LNO_default_pool,FALSE,FALSE),&LNO_default_pool);
00724 WN_MAP_Set(LNO_Info_Map,wn_if,(void *)if_info);
00725 LNO_Build_If_Access(wn_if, loop_stack);
00726
00727 #if 0
00728 LWN_Delete_DU(wn_if);
00729 LWN_Delete_LNO_dep_graph(wn_if);
00730 LWN_Delete_Tree(wn_if);
00731 #endif
00732 }
00733
00734 }
00735
00736
00737 STACK<WN*>* outer_loop_stack = CXX_NEW(STACK<WN*>(&LNO_local_pool),
00738 &LNO_local_pool);
00739 WN* outer_good_loop=LWN_Get_Parent(processor_loop);
00740 while (outer_good_loop) {
00741 if (WN_opcode(outer_good_loop)==OPC_DO_LOOP &&
00742 Do_Loop_Is_Good(outer_good_loop)) {
00743 outer_loop_stack->Push(outer_good_loop);
00744 }
00745 outer_good_loop=LWN_Get_Parent(outer_good_loop);
00746 }
00747
00748
00749
00750 while (!outer_loop_stack->Is_Empty()) {
00751 outer_good_loop=outer_loop_stack->Pop();
00752 Remove_Unity_Trip_Loop_Dep_Update(outer_good_loop, dg, TRUE);
00753 Get_Do_Loop_Info(outer_good_loop)->Has_Bad_Mem=TRUE;
00754 LNO_Erase_Vertices_In_Loop(outer_good_loop,dg);
00755 }
00756
00757
00758 Remove_Unity_Trip_Loop_Dep_Update(processor_loop, dg, TRUE);
00759 Get_Do_Loop_Info(processor_loop)->Has_Bad_Mem=TRUE;
00760 Get_Do_Loop_Info(processor_loop)->Has_Calls=TRUE;
00761 Get_Do_Loop_Info(processor_loop)->Has_Unsummarized_Calls=TRUE;
00762 Remove_Unity_Trip_Loop_Dep_Update(outer_doacross, dg, TRUE);
00763 Get_Do_Loop_Info(outer_doacross)->Has_Bad_Mem=TRUE;
00764 Get_Do_Loop_Info(outer_doacross)->Has_Calls=TRUE;
00765 Get_Do_Loop_Info(outer_doacross)->Has_Unsummarized_Calls=TRUE;
00766
00767 MEM_POOL_Pop(&LNO_local_pool);
00768
00769 return processor_loop;
00770
00771 }
00772
00773
00774
00775
00776
00777
00778 static WN* Get_Runtime_Max_Numthreads_Ldid () {
00779
00780 OPCODE ldid_op = OPCODE_make_op(OPR_LDID, MTYPE_I4, MTYPE_I4);
00781 WN* ldid_wn = WN_CreateLdid (ldid_op, 0, Max_Numthread_St,
00782 Be_Type_Tbl(MTYPE_I4));
00783 Create_global_alias (Alias_Mgr, Max_Numthread_St, ldid_wn, NULL);
00784 Du_Mgr->Add_Def_Use (Func_Root, ldid_wn);
00785 return ldid_wn;
00786 }
00787
00788
00789
00790
00791
00792
00793
00794 static void Create_Sync_Structure () {
00795
00796 if (Sync_Structure_Created)
00797 return;
00798
00799 Sync_Structure_Created=TRUE;
00800
00801 BOOL is_global=TRUE;
00802 BOOL is_local=FALSE;
00803
00804 #ifdef _NEW_SYMTAB
00805 TY_IDX array_ty_idx;
00806 TY& array_ty = New_TY(array_ty_idx);
00807 TY_Init (array_ty,
00808 1024*128,
00809 KIND_ARRAY,
00810 MTYPE_UNKNOWN,
00811 Save_Str("array_I8"));
00812 ARB_HANDLE arb = New_ARB();
00813 ARB_Init(arb,0,1024*16-1,
00814 1);
00815 Set_ARB_first_dimen (arb);
00816 Set_ARB_last_dimen (arb);
00817 Set_TY_align_exp(array_ty_idx,8);
00818 Set_TY_etype(array_ty,Be_Type_Tbl(MTYPE_I1));
00819 Set_TY_arb(array_ty,arb);
00820 Sync_Array_Ptr_Ty=Make_Pointer_Type(array_ty_idx);
00821 Set_TY_ptr_as_array(Sync_Array_Ptr_Ty);
00822
00823 #else
00824 TY_IDX array_ty = New_TY(is_global);
00825 TY_IDX array_ty_idx = array_ty;
00826 TY_kind(array_ty) = KIND_ARRAY;
00827 TY_btype(array_ty) = MTYPE_M;
00828
00829 ARI *ari = New_ARI (1, is_global);
00830 ARI_etype(ari) = Be_Type_Tbl(MTYPE_I8);
00831 ARI_const_zofst(ari) = TRUE;
00832 ARI_zofst_val(ari) = 0;
00833 ARB_const_lbnd(ARI_bnd(ari,0)) = TRUE;
00834 ARB_lbnd_val(ARI_bnd(ari,0)) = 0;
00835 ARB_const_ubnd(ARI_bnd(ari,0)) = TRUE;
00836 ARB_ubnd_val(ARI_bnd(ari,0)) = 1024*16-1;
00837
00838 ARB_const_stride(ARI_bnd(ari,0)) = TRUE;
00839 ARB_stride_val(ARI_bnd(ari,0)) = 1;
00840 TY_size(array_ty) = 1024*128;
00841 TY_align(array_ty) = 8;
00842 TY_name(array_ty) = Save_Str ("array_I8");
00843 TY_arinfo(array_ty) = ari;
00844 Enter_TY (array_ty);
00845 Sync_Array_Ptr_Ty=Make_Pointer_Type(array_ty);
00846 Set_TY_ptr_as_array(Sync_Array_Ptr_Ty);
00847 #endif
00848
00849 ST* st = New_ST(is_global ? GLOBAL_SYMTAB : CURRENT_SYMTAB);
00850 ST_Init (st,
00851 Save_Str("__sync_length"),
00852 CLASS_VAR,
00853 SCLASS_EXTERN,
00854 EXPORT_PREEMPTIBLE,
00855 Be_Type_Tbl(MTYPE_I4));
00856 Clear_ST_addr_not_saved(st);
00857 Sync_Length_St=st;
00858
00859 st = New_ST(is_global ? GLOBAL_SYMTAB : CURRENT_SYMTAB);
00860 ST_Init (st,
00861 Save_Str("__sync_offset"),
00862 CLASS_VAR,
00863 SCLASS_EXTERN,
00864 EXPORT_PREEMPTIBLE,
00865 Be_Type_Tbl(MTYPE_I4));
00866 Clear_ST_addr_not_saved(st);
00867 Sync_Offset_St=st;
00868
00869 st = New_ST(is_global ? GLOBAL_SYMTAB : CURRENT_SYMTAB);
00870 ST_Init (st,
00871 Save_Str("__sync_array"),
00872 CLASS_VAR,
00873 SCLASS_EXTERN,
00874 EXPORT_PREEMPTIBLE,
00875 array_ty_idx);
00876 Clear_ST_addr_not_saved(st);
00877 Sync_Array_St=st;
00878
00879 TY_IDX vi4_ty = Copy_TY(Be_Type_Tbl(MTYPE_I4));
00880 Set_TY_is_volatile(vi4_ty);
00881
00882 st = New_ST(is_global ? GLOBAL_SYMTAB : CURRENT_SYMTAB);
00883 ST_Init (st,
00884 #ifndef KEY
00885 Save_Str("__mp_max_numthreads"),
00886 #else
00887 Save_Str("__ompc_max_numthreads"),
00888 #endif
00889 CLASS_VAR,
00890 SCLASS_EXTERN,
00891 EXPORT_PREEMPTIBLE,
00892 vi4_ty);
00893 Set_ST_not_gprel(st);
00894 Max_Numthread_St=st;
00895
00896 }
00897
00898
00899
00900
00901
00902
00903 static WN* Create_Initialize_Loop (WN* processor_loop,
00904 ARRAY_DIRECTED_GRAPH16* dg,
00905 DU_MANAGER* du,
00906 BOOL Is_Pdo_Region) {
00907
00908 MEM_POOL_Push(&LNO_local_pool);
00909
00910 TYPE_ID sync_array_type=MTYPE_I8;
00911 TYPE_ID index_type=MTYPE_I4;
00912 INT cache_line_size=128;
00913 if (Mhd.L[1].Valid())
00914 cache_line_size=Mhd.L[1].Line_Size;
00915 INT element_per_cache_line=cache_line_size/MTYPE_byte_size(sync_array_type);
00916 OPCODE op_stid = OPCODE_make_op(OPR_STID, MTYPE_V, index_type);
00917 OPCODE op_ldid = OPCODE_make_op(OPR_LDID, index_type, index_type);
00918 OPCODE op_add = OPCODE_make_op(OPR_ADD, index_type, MTYPE_V);
00919 OPCODE op_sub = OPCODE_make_op(OPR_SUB, index_type, MTYPE_V);
00920 OPCODE op_mpy = OPCODE_make_op(OPR_MPY, index_type, MTYPE_V);
00921 OPCODE op_max = OPCODE_make_op(OPR_MAX, index_type, MTYPE_V);
00922 OPCODE op_min = OPCODE_make_op(OPR_MAX, index_type, MTYPE_V);
00923 OPCODE op_array = OPCODE_make_op(OPR_ARRAY, Pointer_type, MTYPE_V);
00924 OPCODE ldaop = OPCODE_make_op(OPR_LDA, Pointer_type, MTYPE_V);
00925 OPCODE istoreop = OPCODE_make_op(OPR_ISTORE, MTYPE_V, sync_array_type);
00926 ST* sync_array_type_preg_st = MTYPE_To_PREG(sync_array_type);
00927 ST* index_type_preg_st = MTYPE_To_PREG(index_type);
00928 WN* doacross_region=Get_MP_Region(processor_loop);
00929 WN* num_thread_ldid=NULL;
00930 if (Is_Pdo_Region) {
00931 doacross_region=LWN_Get_Parent(LWN_Get_Parent(doacross_region));
00932 num_thread_ldid=Get_Runtime_Max_Numthreads_Ldid();
00933 } else {
00934 WN* num_thread_stid=WN_prev(doacross_region);
00935 if (num_thread_stid==NULL) {
00936
00937 num_thread_stid=WN_prev(LWN_Get_Parent(LWN_Get_Parent(doacross_region)));
00938 }
00939
00940 num_thread_ldid=LWN_CreateLdid(op_ldid,num_thread_stid);
00941 du->Add_Def_Use(num_thread_stid,num_thread_ldid);
00942 }
00943 WN* sync_array_length_ldid = WN_CreateLdid(op_ldid,
00944 0,
00945 Sync_Length_St,
00946 ST_type(Sync_Length_St)
00947 );
00948 WN* sync_array_offset_ldid = WN_CreateLdid(op_ldid,
00949 0,
00950 Sync_Offset_St,
00951 ST_type(Sync_Offset_St)
00952 );
00953
00954
00955 if (Sync_Length_Alias_Host==NULL) {
00956 Create_global_alias(Alias_Mgr,Sync_Length_St,sync_array_length_ldid,NULL);
00957 Create_global_alias(Alias_Mgr,Sync_Offset_St,sync_array_offset_ldid,NULL);
00958 Sync_Length_Alias_Host=sync_array_length_ldid;
00959 Sync_Offset_Alias_Host=sync_array_offset_ldid;
00960 } else {
00961 Copy_alias_info(Alias_Mgr, Sync_Length_Alias_Host, sync_array_length_ldid);
00962 Copy_alias_info(Alias_Mgr, Sync_Offset_Alias_Host, sync_array_offset_ldid);
00963 }
00964
00965
00966 #ifdef _NEW_SYMTAB
00967 WN_OFFSET preg_num = Create_Preg(index_type, "sync_init");
00968 #else
00969 WN_OFFSET preg_num = Create_Preg(index_type, "sync_init", NULL);
00970 #endif
00971
00972 WN* loop_start = LWN_CreateStid(
00973 op_stid,
00974 preg_num,
00975 index_type_preg_st,
00976 Be_Type_Tbl(index_type),
00977 LWN_CreateExp2(
00978 op_mpy,
00979 sync_array_length_ldid,
00980 LWN_Make_Icon(index_type,
00981 element_per_cache_line)));
00982
00983 WN* ldid=LWN_CreateLdid(op_ldid,loop_start);
00984 WN* loop_end = LWN_CreateExp2(OPCODE_make_op(OPR_LT,Boolean_type,index_type),
00985 ldid,
00986 LWN_CreateExp2(
00987 op_mpy,
00988 num_thread_ldid,
00989 LWN_Make_Icon(index_type,
00990 element_per_cache_line)));
00991 WN* ldid1=LWN_CreateLdid(op_ldid,loop_start);
00992 WN* loop_step = LWN_CreateStid(op_stid,loop_start,
00993 LWN_CreateExp2(op_add,
00994 ldid1,
00995 LWN_Make_Icon(index_type,1)));
00996
00997 WN* loop_index = WN_CreateIdname(preg_num, index_type_preg_st);
00998
00999 du->Add_Def_Use(loop_start,ldid);
01000 du->Add_Def_Use(loop_step,ldid);
01001 du->Add_Def_Use(loop_start,ldid1);
01002 du->Add_Def_Use(loop_step,ldid1);
01003
01004 WN* init_loop=LWN_CreateDO( loop_index,
01005 loop_start,
01006 loop_end,
01007 loop_step,
01008 WN_CreateBlock());
01009
01010 du->Ud_Get_Def(ldid)->Set_loop_stmt(init_loop);
01011 du->Ud_Get_Def(ldid1)->Set_loop_stmt(init_loop);
01012
01013
01014 WN* wn_array = WN_Create(op_array, 3);
01015 WN_element_size(wn_array) = MTYPE_byte_size(sync_array_type);
01016 WN_array_base(wn_array) = WN_CreateLda(ldaop,
01017 0,
01018 Sync_Array_Ptr_Ty,
01019 Sync_Array_St
01020 );
01021
01022 WN* ldid4=LWN_CreateLdid(op_ldid, loop_start);
01023 LWN_Copy_Def_Use(ldid,ldid4,du);
01024 WN_array_index(wn_array,0) = ldid4;
01025 WN_array_dim(wn_array,0) = LWN_Make_Icon(index_type,1024*16);
01026 TY_IDX pty = Sync_Array_Ptr_Ty;
01027 WN* store=LWN_CreateIstore(
01028 istoreop, 0, pty, LWN_Make_Icon(sync_array_type,0),wn_array);
01029
01030 if (Sync_Array_Alias_Host==NULL) {
01031 Create_lda_array_alias(Alias_Mgr, WN_array_base(wn_array), store);
01032 Sync_Array_Alias_Host = store;
01033 } else {
01034 Copy_alias_info(Alias_Mgr, Sync_Array_Alias_Host, store);
01035 }
01036
01037 LWN_Insert_Block_After(WN_do_body(init_loop),NULL,store);
01038
01039 LWN_Parentize(init_loop);
01040
01041
01042 LWN_Insert_Block_Before(
01043 LWN_Get_Parent(doacross_region),
01044 doacross_region,
01045 init_loop);
01046
01047
01048 DO_LOOP_INFO* dli = (DO_LOOP_INFO *)
01049 CXX_NEW(DO_LOOP_INFO(&LNO_default_pool,NULL,NULL,NULL,
01050 FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE) ,&LNO_default_pool);
01051 dli->Depth = Get_Do_Loop_Info(processor_loop)->Depth;
01052 WN_MAP_Set(LNO_Info_Map,init_loop,(void *)dli);
01053
01054 DOLOOP_STACK *loop_stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01055 &LNO_local_pool);
01056
01057 Build_Doloop_Stack(LWN_Get_Parent(init_loop), loop_stack);
01058
01059 LNO_Build_Access(init_loop, loop_stack, &LNO_default_pool);
01060 LNO_Build_Do_Access(init_loop, loop_stack);
01061
01062 if (!dg->Build_Region(init_loop,init_loop,loop_stack, TRUE)) {
01063 DevWarn("Array dependence graph overflowed");
01064 LNO_Erase_Dg_From_Here_In(LWN_Get_Parent(init_loop), dg);
01065 }
01066
01067
01068
01069 WN* sync_offset_update_stid=
01070 LWN_CreateStid(op_stid, sync_array_offset_ldid,
01071 LWN_CreateExp2( op_sub,
01072 LWN_Make_Icon(index_type,1),
01073 sync_array_offset_ldid));
01074 Update_Sync_Offset_Stid_DU(sync_offset_update_stid);
01075 Update_Sync_Offset_Ldid_DU(sync_array_offset_ldid);
01076
01077 LWN_Insert_Block_After(
01078 LWN_Get_Parent(init_loop),
01079 init_loop,
01080 sync_offset_update_stid);
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093 WN* ldid3=LWN_Copy_Tree(num_thread_ldid);
01094 LWN_Copy_Def_Use(num_thread_ldid,ldid3,du);
01095 WN* sync_length_update_stid=
01096 LWN_CreateStid(op_stid, sync_array_length_ldid, ldid3);
01097
01098 Update_Sync_Length_Stid_DU(sync_length_update_stid);
01099 Update_Sync_Length_Ldid_DU(sync_array_length_ldid);
01100
01101
01102 LWN_Insert_Block_After(
01103 LWN_Get_Parent(init_loop),
01104 sync_offset_update_stid,
01105 sync_length_update_stid);
01106
01107 MEM_POOL_Pop(&LNO_local_pool);
01108 return init_loop;
01109 }
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121 extern void Doacross_Init(
01122 WN* func_nd)
01123 {
01124
01125 if (func_nd!=NULL) {
01126
01127 Func_Root=func_nd;
01128 return;
01129 }
01130
01131 if (Doacross_Inited)
01132
01133 return;
01134
01135 Sync_Array_Alias_Host=NULL;
01136 Sync_Length_Alias_Host=NULL;
01137 Sync_Offset_Alias_Host=NULL;
01138 Sync_Structure_Created=FALSE;
01139 MEM_POOL_Initialize(&DOACROSS_default_pool,"DOACROSS_default_pool",FALSE);
01140 MEM_POOL_Push(&DOACROSS_default_pool);
01141
01142 sync_offset_stid_stack=CXX_NEW(STACK_OF_WN(&DOACROSS_default_pool),
01143 &DOACROSS_default_pool);
01144 sync_offset_ldid_stack=CXX_NEW(STACK_OF_WN(&DOACROSS_default_pool),
01145 &DOACROSS_default_pool);
01146 sync_length_stid_stack=CXX_NEW(STACK_OF_WN(&DOACROSS_default_pool),
01147 &DOACROSS_default_pool);
01148 sync_length_ldid_stack=CXX_NEW(STACK_OF_WN(&DOACROSS_default_pool),
01149 &DOACROSS_default_pool);
01150
01151
01152
01153 WN* wn=Func_Root;
01154 while (wn) {
01155 OPCODE opc=WN_opcode(wn);
01156 if (opc==OPC_FUNC_ENTRY || opc==OPC_ALTENTRY) {
01157 sync_offset_stid_stack->Push(wn);
01158 sync_length_stid_stack->Push(wn);
01159 } else if (opc==OPC_RETURN
01160 #ifdef KEY
01161 || opc==OPC_GOTO_OUTER_BLOCK
01162 #endif
01163 ) {
01164 sync_offset_ldid_stack->Push(wn);
01165 sync_length_ldid_stack->Push(wn);
01166 }
01167 wn=LWN_Get_Next_Stmt_Node(wn);
01168 }
01169 Doacross_Inited=TRUE;
01170 }
01171
01172 extern void Doacross_Finish()
01173 {
01174 Func_Root=NULL;
01175
01176 if (!Doacross_Inited)
01177 return;
01178
01179 MEM_POOL_Pop(&DOACROSS_default_pool);
01180 MEM_POOL_Delete(&DOACROSS_default_pool);
01181 Doacross_Inited=FALSE;
01182 }
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193 static BOOL Dep_Carried_By_Outer_Loop(
01194 INT outer_depth,
01195 INT parallel_depth,
01196 INT dep_id,
01197 SNL_DEP_MATRIX* doacross_dep_info)
01198 {
01199
01200 INT carried_by= -1;
01201 for (INT i=0; i<parallel_depth-outer_depth; i++) {
01202
01203 SNL_DEP dep=(*doacross_dep_info)(dep_id,i);
01204 INT distance=dep.Distance;
01205 if (distance==0)
01206 continue;
01207 if (dep.Unbounded_Min() || dep.Unbounded_Max()) {
01208
01209 if (dep.Unbounded_Min() && dep.Unbounded_Max())
01210 continue;
01211 else if (dep.Unbounded_Max() && distance>0) {
01212
01213 carried_by=i;
01214 break;
01215 } else
01216 continue;
01217 } else {
01218
01219 if (distance>0) {
01220
01221 carried_by=i;
01222 break;
01223 } else if (distance<0)
01224 Is_True(0, ("strange dep. vector"));
01225 }
01226 }
01227 if (carried_by != -1) {
01228 return TRUE;
01229 }
01230 return FALSE;
01231 }
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245 static void Compute_Sync_Distances_From_Dep(
01246 SNL_DEP dep1,
01247 SNL_DEP dep2,
01248 INT sync_distances[])
01249 {
01250 INT upper=0;
01251 INT lower=1;
01252
01253 INT d1=dep1.Distance;
01254 if (dep1.Unbounded_Min() || d1<0) {
01255 sync_distances[upper]=0;
01256 sync_distances[lower]=0;
01257 return;
01258 }
01259
01260 INT d2=dep2.Distance;
01261 if (!dep2.Unbounded_Min()) {
01262 if (d2<0)
01263 if (-d2>d1)
01264 sync_distances[upper]=0;
01265 else
01266 sync_distances[upper]=d1/(-d2);
01267 else
01268 sync_distances[upper]= NULL_DIST;
01269 } else
01270 sync_distances[upper]=0;
01271
01272 if (!dep2.Unbounded_Max()) {
01273 if (d2>0)
01274 if (d2>d1)
01275 sync_distances[lower]=0;
01276 else
01277 sync_distances[lower]=d1/d2;
01278 else
01279 sync_distances[lower]= NULL_DIST;
01280 } else
01281 sync_distances[lower]=0;
01282 }
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299 extern void Compute_Sync_Distances(
01300 WN* wn_outer,
01301 INT nloops,
01302 INT permutation[],
01303 INT parallel_depth,
01304 SNL_DEP_MATRIX** sdm_inv,
01305 BOOL retained[],
01306 INT sync_distances[])
01307 {
01308
01309 MEM_POOL_Push(&LNO_local_pool);
01310
01311 sync_distances[0] = NULL_DIST;
01312 sync_distances[1] = NULL_DIST;
01313 INT outer_depth=Do_Loop_Depth(wn_outer);
01314
01315 for (INT section=parallel_depth-outer_depth+1; section<nloops; section++) {
01316
01317 if (!retained[section])
01318 continue;
01319
01320
01321 SNL_DEP_MATRIX* doacross_dep_info=
01322 CXX_NEW(SNL_DEP_MATRIX(sdm_inv[section],&LNO_local_pool),
01323 &LNO_local_pool);
01324 doacross_dep_info->Apply(permutation);
01325
01326
01327
01328 INT num_deps=doacross_dep_info->Ndep();
01329 for (INT d=0; d<num_deps; d++) {
01330
01331 if (Dep_Carried_By_Outer_Loop(
01332 outer_depth,parallel_depth,d,doacross_dep_info))
01333 continue;
01334
01335 SNL_DEP d1=(*doacross_dep_info)(d,parallel_depth+1-outer_depth);
01336 SNL_DEP d2=(*doacross_dep_info)(d,parallel_depth-outer_depth);
01337
01338 if (d1.Moreless == SNL_DEP::SNL_DEP_EXACT && d1.Distance==0 &&
01339 d2.Moreless == SNL_DEP::SNL_DEP_EXACT && d2.Distance==0)
01340 continue;
01341
01342 INT sync_distances_tmp[2];
01343 Compute_Sync_Distances_From_Dep(d1,d2, sync_distances_tmp);
01344
01345 if (sync_distances[0] == NULL_DIST)
01346 sync_distances[0] = sync_distances_tmp[0];
01347 else if (sync_distances_tmp[0] != NULL_DIST)
01348 if (sync_distances[0] == 0 || sync_distances_tmp[0] == 0)
01349 sync_distances[0] = 0;
01350 else
01351 sync_distances[0] = Min(sync_distances[0],sync_distances_tmp[0]);
01352
01353 if (sync_distances[1] == NULL_DIST)
01354 sync_distances[1] = sync_distances_tmp[1];
01355 else if (sync_distances_tmp[1] != NULL_DIST)
01356 if (sync_distances[1] == 0 || sync_distances_tmp[1] == 0)
01357 sync_distances[1] = 0;
01358 else
01359 sync_distances[1] = Min(sync_distances[1],sync_distances_tmp[1]);
01360
01361 }
01362 }
01363
01364 MEM_POOL_Pop(&LNO_local_pool);
01365 }
01366
01367
01368 extern WN* Get_Only_Loop_Inside(const WN* wn, BOOL regions_ok);
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380 extern INT Get_Doacross_Tile_Size(
01381 INT sync_distances[],
01382 WN* wn_outer,
01383 INT permutation[],
01384 INT nloops,
01385 INT parallel_depth,
01386 INT num_procs,
01387 double work_estimate)
01388 {
01389
01390 INT tile_size;
01391 INT outer_depth=Do_Loop_Depth(wn_outer);
01392
01393 if (sync_distances[0]==0 && sync_distances[1]==0)
01394 return 0;
01395
01396 INT s1=sync_distances[0];
01397 INT s2=sync_distances[1];
01398 INT num_syncs=0;
01399 if (s1!=NULL_DIST)
01400 num_syncs++;
01401 if (s2!=NULL_DIST)
01402 num_syncs++;
01403 double C=Single_Sync_Cycle*(double)num_syncs;
01404
01405 MEM_POOL_Push(&LNO_local_pool);
01406
01407 WN** loops=CXX_NEW_ARRAY(WN*,nloops,&LNO_local_pool);
01408 WN* wn=wn_outer;
01409 INT i;
01410 for (i=0; i<nloops; i++) {
01411 loops[i]=wn;
01412 wn=Get_Only_Loop_Inside(wn,TRUE);
01413 }
01414 INT* iter_count_after_permutation=CXX_NEW_ARRAY(INT,nloops,&LNO_local_pool);
01415 for (i=0; i<nloops; i++) {
01416 iter_count_after_permutation[i]=
01417 Get_Do_Loop_Info(loops[permutation[i]])->Est_Num_Iterations;
01418 }
01419
01420 double M=(double)iter_count_after_permutation[parallel_depth+1-outer_depth];
01421 double N=(double)iter_count_after_permutation[parallel_depth-outer_depth];
01422 double P=(double)num_procs;
01423 double t=work_estimate;
01424 for (i=nloops-1; i>parallel_depth+1-outer_depth; i--)
01425 t = t * iter_count_after_permutation[i];
01426 double T=t*N/P;
01427
01428
01429
01430 double tmp=sqrt((C*M)/(T*(P-1.0)));
01431 INT best_skewed_block_size=MAX(1,(INT)(tmp+.5));
01432 INT legal_block_size_limit;
01433 INT not_skewed_block_size;
01434
01435
01436
01437
01438 if (s1==NULL_DIST && s2==NULL_DIST) {
01439 not_skewed_block_size=INT_MAX;
01440 legal_block_size_limit=INT_MAX;
01441 } else if (s1==NULL_DIST) {
01442 not_skewed_block_size=s2;
01443 legal_block_size_limit=INT_MAX;
01444 } else if (s2==NULL_DIST) {
01445 not_skewed_block_size=s1;
01446 legal_block_size_limit=INT_MAX;
01447 } else {
01448 not_skewed_block_size=MIN(s1,s2);
01449 legal_block_size_limit=MAX(s1,s2);
01450 }
01451
01452 extern INT Parallel_Debug_Level;
01453 INT parallel_debug_level = Get_Trace(TP_LNOPT2, TT_LNO_PARALLEL_DEBUG)
01454 ? Parallel_Debug_Level : 0;
01455
01456
01457 if (LNO_Preferred_doacross_tile_size!=0 &&
01458 LNO_Preferred_doacross_tile_size<=legal_block_size_limit)
01459 tile_size=LNO_Preferred_doacross_tile_size;
01460 else if (not_skewed_block_size==0)
01461 if (best_skewed_block_size>legal_block_size_limit)
01462 tile_size=1;
01463 else
01464 tile_size=best_skewed_block_size;
01465 else if (best_skewed_block_size<=legal_block_size_limit) {
01466 double use_best = M * C;
01467 double use_no_skew = (double)not_skewed_block_size * C * (P-1.0) +
01468 2.0 * sqrt(T*(P-1.0)*M*C);
01469 if (use_best>use_no_skew)
01470 tile_size=best_skewed_block_size;
01471 else
01472 tile_size=not_skewed_block_size;
01473 } else
01474 tile_size=not_skewed_block_size;
01475
01476 if (parallel_debug_level >= 2) {
01477 printf(" C=%13.2f, M=%13.2f, N=%13.2f, T=%13.2f\n", C, M, N, T);
01478 printf(" P=%13.2f, ", P);
01479 if (s1==NULL_DIST) printf("s1=NULL_DIST, "); else printf("s1=%d, ", s1);
01480 if (s2==NULL_DIST) printf("s2=NULL_DIST\n"); else printf("s2=%d\n", s2);
01481 if (not_skewed_block_size==INT_MAX)
01482 printf(" not_skewed_block_size=inf\n");
01483 else
01484 printf(" not_skewed_block_size=%d\n", not_skewed_block_size);
01485 printf(" best_skewed_block_size=%d\n", best_skewed_block_size);
01486 printf(" preferred_doacross_tile_size=%d\n",
01487 LNO_Preferred_doacross_tile_size);
01488 if (legal_block_size_limit==INT_MAX)
01489 printf(" legal_block_size_limit=inf\n");
01490 else
01491 printf(" legal_block_size_limit=%d\n", legal_block_size_limit);
01492 printf(" doacross_tile_size=%d\n", tile_size);
01493 }
01494
01495 MEM_POOL_Pop(&LNO_local_pool);
01496
01497 return tile_size;
01498
01499 }
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517 extern double Compute_Doacross_Delay_Cycle(
01518 WN* wn_outer,
01519 INT permutation[],
01520 INT parallel_depth,
01521 INT num_proc,
01522 INT doacross_tile_size,
01523 INT sync_distances[],
01524 double machine_cycles)
01525 {
01526
01527 INT outer_depth=Do_Loop_Depth(wn_outer);
01528 if (doacross_tile_size==INT_MAX)
01529 return (double)0.0;
01530
01531 if (doacross_tile_size==0)
01532 return (double)DBL_MAX;
01533
01534 INT orig_depth=permutation[parallel_depth+1-outer_depth];
01535
01536 WN* loop=wn_outer;
01537 for (INT i=0; i<orig_depth; i++) {
01538 loop=Get_Only_Loop_Inside(loop, TRUE);
01539 }
01540
01541 INT64 num_iter = Get_Do_Loop_Info(loop)->Est_Num_Iterations;
01542
01543 BOOL need_skew = TRUE;
01544 if (sync_distances[0]>=doacross_tile_size &&
01545 sync_distances[1]>=doacross_tile_size)
01546 need_skew=FALSE;
01547
01548 double delay_cycles;
01549
01550 if (need_skew) {
01551 double cycle_per_tile=
01552 (double)machine_cycles*(double)doacross_tile_size/
01553 (double)(num_iter)+Single_Sync_Cycle;
01554
01555 delay_cycles= cycle_per_tile * (double)(num_proc-1);
01556 } else
01557 delay_cycles= 0.0;
01558
01559 return delay_cycles;
01560 }
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574 extern double Compute_Doacross_Sync_Cycle(
01575 WN* wn_outer,
01576 INT permutation[],
01577 INT parallel_depth,
01578 INT doacross_tile_size,
01579 INT sync_distances[])
01580 {
01581 INT outer_depth=Do_Loop_Depth(wn_outer);
01582
01583 if (doacross_tile_size==INT_MAX)
01584 return (double)0.0;
01585
01586 if (doacross_tile_size==0)
01587 return (double)DBL_MAX;
01588
01589 INT orig_depth=permutation[parallel_depth+1-outer_depth];
01590
01591 WN* loop=wn_outer;
01592 for (INT i=0; i<orig_depth; i++) {
01593 loop=Get_Only_Loop_Inside(loop, TRUE);
01594 }
01595
01596 INT num_syncs=0;
01597 if (sync_distances[0]!=NULL_DIST)
01598 num_syncs++;
01599 if (sync_distances[1]!=NULL_DIST)
01600 num_syncs++;
01601
01602 INT64 num_iter = Get_Do_Loop_Info(loop)->Est_Num_Iterations;
01603
01604 INT num_tiles = num_iter / doacross_tile_size;
01605 if (num_iter % doacross_tile_size !=0)
01606 num_tiles++;
01607 double sync_cycles= Single_Sync_Cycle * (double)(num_syncs * num_tiles);
01608
01609 return sync_cycles;
01610 }
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622 extern BOOL Depv_Carried_By_Outer_Loop(
01623 DEPV* depv,
01624 INT level)
01625 {
01626 for (INT i=0; i<level; i++) {
01627 DEP dep=DEPV_Dep(depv,i);
01628 DIRECTION dir=DEP_Direction(dep);
01629 if (dir==DIR_POS)
01630 return TRUE;
01631 }
01632 return FALSE;
01633 }
01634
01635
01636
01637
01638
01639
01640
01641
01642
01643
01644
01645
01646
01647
01648
01649 extern BOOL Dep_Preserved(
01650 DEPV* depv,
01651 INT doacross_dim,
01652 INT sync_distances[2])
01653 {
01654
01655 if (Depv_Carried_By_Outer_Loop(depv, doacross_dim))
01656 return TRUE;
01657
01658 DEP dep1=DEPV_Dep(depv,doacross_dim+1);
01659 DEP dep2=DEPV_Dep(depv,doacross_dim);
01660
01661 INT v0=sync_distances[0];
01662 INT v1=sync_distances[1];
01663 INT d1;
01664 INT d2;
01665
01666 if (DEP_IsDistance(dep1))
01667 d1=DEP_Distance(dep1);
01668 else
01669 d1=DEP_DistanceBound(dep1);
01670
01671 if (d1<0)
01672 return FALSE;
01673
01674 if (DEP_IsDistance(dep2))
01675 d2=DEP_Distance(dep2);
01676 else {
01677 DIRECTION dir=DEP_Direction(dep2);
01678 switch (dir) {
01679 case DIR_EQ: return TRUE;
01680 case DIR_POS:
01681 case DIR_POSEQ: if (v1==0) return TRUE; else return FALSE;
01682 case DIR_NEG:
01683 case DIR_NEGEQ: if (v0==0) return TRUE; else return FALSE;
01684 case DIR_POSNEG:
01685 case DIR_STAR: return FALSE;
01686 }
01687 }
01688
01689 if (d2==0)
01690 return TRUE;
01691 else if (d2<0) {
01692 if (v0==NULL_DIST)
01693 return FALSE;
01694 else if ( -d2 * v0 <= d1 )
01695 return TRUE;
01696 else
01697 return FALSE;
01698 } else {
01699 if (v1==NULL_DIST)
01700 return FALSE;
01701 else if ( d2 * v1 <= d1 )
01702 return TRUE;
01703 else
01704 return FALSE;
01705 }
01706 }
01707
01708 extern BOOL Check_Doacross_Sync_Coverage(
01709 WN* doacross_loop,
01710 INT sync_distances[2])
01711 {
01712
01713 MEM_POOL_Push(&LNO_local_pool);
01714
01715 ARRAY_DIRECTED_GRAPH16* adg=Array_Dependence_Graph;
01716
01717 BOOL ok=TRUE;
01718 DO_LOOP_INFO *dli=Get_Do_Loop_Info(doacross_loop);
01719
01720 INT doacross_depth=Do_Loop_Depth(doacross_loop);
01721
01722
01723 REF_LIST_STACK *writes = CXX_NEW(REF_LIST_STACK(&LNO_local_pool),
01724 &LNO_local_pool);
01725 REF_LIST_STACK *reads = CXX_NEW(REF_LIST_STACK(&LNO_local_pool),
01726 &LNO_local_pool);
01727 SCALAR_STACK *scalar_writes = CXX_NEW(SCALAR_STACK(&LNO_local_pool),
01728 &LNO_local_pool);
01729 SCALAR_STACK *scalar_reads = CXX_NEW(SCALAR_STACK(&LNO_local_pool),
01730 &LNO_local_pool);
01731 SCALAR_REF_STACK *params =
01732 CXX_NEW(SCALAR_REF_STACK(&LNO_local_pool), &LNO_local_pool);
01733 DOLOOP_STACK *stack=CXX_NEW(DOLOOP_STACK(&LNO_local_pool),
01734 &LNO_local_pool);
01735 Build_Doloop_Stack(doacross_loop, stack);
01736 Init_Ref_Stmt_Counter();
01737 INT32 status=New_Gather_References(doacross_loop,writes,reads,stack,
01738 scalar_writes,scalar_reads,params,&LNO_local_pool,
01739 Gather_Array_Refs);
01740 if (status == -1)
01741 return FALSE;
01742
01743 REF_LIST_STACK *ref_list_stack[2];
01744 ref_list_stack[0]=reads;
01745 ref_list_stack[1]=writes;
01746
01747
01748 for (INT ii=0; ii<2; ii++) {
01749
01750 for (INT i=0;i<ref_list_stack[ii]->Elements(); i++) {
01751
01752 REFERENCE_ITER iter(ref_list_stack[ii]->Bottom_nth(i));
01753 for (REFERENCE_NODE *n=iter.First(); !iter.Is_Empty(); n=iter.Next()) {
01754
01755 WN* ref=n->Wn;
01756 if (Is_Privatizable_With_Context(doacross_loop,ref,TRUE))
01757 continue;
01758
01759 VINDEX16 array_v=adg->Get_Vertex(ref);
01760 if (array_v==0) {
01761 DevWarn("Found array ref without vertex\n");
01762 ok=FALSE;
01763 continue;
01764 } else {
01765 EINDEX16 in_edge=adg->Get_In_Edge(array_v);
01766 while (in_edge) {
01767 WN* source_wn=adg->Get_Wn(adg->Get_Source(in_edge));
01768 if (Wn_Is_Inside(source_wn,doacross_loop) &&
01769 (red_manager == NULL ||
01770 red_manager->Which_Reduction(source_wn) !=
01771 red_manager->Which_Reduction(ref))) {
01772
01773 DEPV_ARRAY* depv_array=adg->Depv_Array(in_edge);
01774 if (depv_array->Num_Dim()>=doacross_depth) {
01775 for (INT i=0; i<depv_array->Num_Vec(); i++)
01776 if (!Dep_Preserved(depv_array->Depv(i),
01777 doacross_depth-depv_array->Num_Unused_Dim(),
01778 sync_distances)) {
01779 DevWarn("Array dep not preserved by doacross sync\n");
01780 ok=FALSE;
01781 }
01782 }
01783 }
01784 in_edge = adg->Get_Next_In_Edge(in_edge);
01785 }
01786 EINDEX16 out_edge=adg->Get_Out_Edge(array_v);
01787 while (out_edge) {
01788 WN* sink_wn=adg->Get_Wn(adg->Get_Sink(out_edge));
01789 if (Wn_Is_Inside(sink_wn,doacross_loop) &&
01790 (red_manager == NULL ||
01791 red_manager->Which_Reduction(sink_wn) !=
01792 red_manager->Which_Reduction(ref))) {
01793
01794 DEPV_ARRAY* depv_array=adg->Depv_Array(out_edge);
01795 if (depv_array->Num_Dim()>=doacross_depth) {
01796 for (INT i=0; i<depv_array->Num_Vec(); i++)
01797 if (!Dep_Preserved(depv_array->Depv(i),
01798 doacross_depth-depv_array->Num_Unused_Dim(),
01799 sync_distances)) {
01800 DevWarn("Array dep not preserved by doacross sync\n");
01801 ok=FALSE;
01802 }
01803 }
01804 }
01805 out_edge = adg->Get_Next_Out_Edge(out_edge);
01806 }
01807 }
01808 }
01809 }
01810 }
01811
01812 MEM_POOL_Pop(&LNO_local_pool);
01813 return ok;
01814 }
01815