Skip to content

Commit b805f0d

Browse files
committed
Teams: Correct mapping of teams members.
1 parent 1b212c0 commit b805f0d

File tree

1 file changed

+39
-36
lines changed

1 file changed

+39
-36
lines changed

src/runtime-libraries/mpi/mpi_caf.c

+39-36
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ error_stop_str(const char *string, size_t len, bool quiet)
172172

173173
/* Global variables. */
174174
static int caf_this_image;
175+
static int mpi_this_image;
175176
static int caf_num_images = 0;
176177
static int caf_is_finalized = 0;
177178
static MPI_Win global_dynamic_win;
@@ -901,10 +902,10 @@ PREFIX(init)(int *argc, char ***argv)
901902

902903
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
903904
chk_err(ierr);
904-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
905+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
905906
chk_err(ierr);
906907

907-
++caf_this_image;
908+
caf_this_image = mpi_this_image + 1;
908909
caf_is_finalized = 0;
909910

910911
/* BEGIN SYNC IMAGE preparation
@@ -1010,22 +1011,22 @@ finalize_internal(int status_code)
10101011
chk_err(ierr);
10111012
#endif
10121013
/* For future security enclose setting img_status in a lock. */
1013-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *stat_tok);
1014+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *stat_tok);
10141015
if (status_code == 0)
10151016
{
10161017
img_status = STAT_STOPPED_IMAGE;
10171018
#ifdef WITH_FAILED_IMAGES
1018-
image_stati[caf_this_image - 1] = STAT_STOPPED_IMAGE;
1019+
image_stati[mpi_this_image] = STAT_STOPPED_IMAGE;
10191020
#endif
10201021
}
10211022
else
10221023
{
10231024
img_status = status_code;
10241025
#ifdef WITH_FAILED_IMAGES
1025-
image_stati[caf_this_image - 1] = status_code;
1026+
image_stati[mpi_this_image] = status_code;
10261027
#endif
10271028
}
1028-
CAF_Win_unlock(caf_this_image - 1, *stat_tok);
1029+
CAF_Win_unlock(mpi_this_image, *stat_tok);
10291030

10301031
/* Announce to all other images, that this one has changed its execution
10311032
* status. */
@@ -1371,11 +1372,11 @@ void PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
13711372
if (l_var)
13721373
{
13731374
init_array = (int *)calloc(size, sizeof(int));
1374-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1375-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1375+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1376+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size,
13761377
MPI_INT, *p);
13771378
chk_err(ierr);
1378-
CAF_Win_unlock(caf_this_image - 1, *p);
1379+
CAF_Win_unlock(mpi_this_image, *p);
13791380
free(init_array);
13801381
}
13811382

@@ -1472,11 +1473,11 @@ void *PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
14721473
if (l_var)
14731474
{
14741475
init_array = (int *)calloc(size, sizeof(int));
1475-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1476-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1477-
MPI_INT, *p);
1476+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1477+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size, MPI_INT,
1478+
*p);
14781479
chk_err(ierr);
1479-
CAF_Win_unlock(caf_this_image - 1, *p);
1480+
CAF_Win_unlock(mpi_this_image, *p);
14801481
free(init_array);
14811482
}
14821483

@@ -3579,16 +3580,23 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
35793580
bool free_pad_str = false, free_t_buff = false;
35803581
const bool dest_char_array_is_longer
35813582
= dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
3582-
int remote_image = image_index - 1;
3583+
int remote_image = image_index - 1, this_image = mpi_this_image;
3584+
35833585
if (!same_image)
35843586
{
35853587
MPI_Group current_team_group, win_group;
3588+
int trans_ranks[2];
35863589
ierr = MPI_Comm_group(CAF_COMM_WORLD, &current_team_group);
35873590
chk_err(ierr);
35883591
ierr = MPI_Win_get_group(*p, &win_group);
35893592
chk_err(ierr);
3590-
ierr = MPI_Group_translate_ranks(
3591-
current_team_group, 1, (int[]){remote_image}, win_group, &remote_image);
3593+
ierr = MPI_Group_translate_ranks(current_team_group, 2,
3594+
(int[]){remote_image, this_image},
3595+
win_group, trans_ranks);
3596+
dprint("rank translation: remote: %d -> %d, this: %d -> %d.\n",
3597+
remote_image, trans_ranks[0], this_image, trans_ranks[1]);
3598+
remote_image = trans_ranks[0];
3599+
this_image = trans_ranks[1];
35923600
chk_err(ierr);
35933601
ierr = MPI_Group_free(&current_team_group);
35943602
chk_err(ierr);
@@ -3618,8 +3626,8 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
36183626
if (size == 0)
36193627
return;
36203628

3621-
dprint("src_vector = %p, image_index = %d, offset = %zd.\n", src_vector,
3622-
image_index, offset);
3629+
dprint("src_vector = %p, image_index = %d (remote = %d), offset = %zd.\n",
3630+
src_vector, image_index, remote_image, offset);
36233631
check_image_health(image_index, stat);
36243632

36253633
/* For char arrays: create the padding array, when dst is longer than src. */
@@ -7995,8 +8003,7 @@ PREFIX(atomic_define)(caf_token_t token, size_t offset, int image_index,
79958003
{
79968004
MPI_Win *p = TOKEN(token);
79978005
MPI_Datatype dt;
7998-
int ierr = 0,
7999-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8006+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80008007

80018008
selectType(kind, &dt);
80028009

@@ -8027,8 +8034,7 @@ PREFIX(atomic_ref)(caf_token_t token, size_t offset, int image_index,
80278034
{
80288035
MPI_Win *p = TOKEN(token);
80298036
MPI_Datatype dt;
8030-
int ierr = 0,
8031-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8037+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80328038

80338039
selectType(kind, &dt);
80348040

@@ -8059,8 +8065,7 @@ PREFIX(atomic_cas)(caf_token_t token, size_t offset, int image_index, void *old,
80598065
{
80608066
MPI_Win *p = TOKEN(token);
80618067
MPI_Datatype dt;
8062-
int ierr = 0,
8063-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8068+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80648069

80658070
selectType(kind, &dt);
80668071

@@ -8091,7 +8096,7 @@ PREFIX(atomic_op)(int op, caf_token_t token, size_t offset, int image_index,
80918096
int ierr = 0;
80928097
MPI_Datatype dt;
80938098
MPI_Win *p = TOKEN(token);
8094-
int image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8099+
int image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80958100

80968101
#if MPI_VERSION >= 3
80978102
old = malloc(kind);
@@ -8146,7 +8151,7 @@ PREFIX(event_post)(caf_token_t token, size_t index, int image_index, int *stat,
81468151
int value = 1, ierr = 0, flag;
81478152
MPI_Win *p = TOKEN(token);
81488153
const char msg[] = "Error on event post";
8149-
int image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8154+
int image = (image_index == 0) ? mpi_this_image : image_index - 1;
81508155

81518156
if (stat != NULL)
81528157
*stat = 0;
@@ -8184,7 +8189,7 @@ void
81848189
PREFIX(event_wait)(caf_token_t token, size_t index, int until_count, int *stat,
81858190
char *errmsg, charlen_t errmsg_len)
81868191
{
8187-
int ierr = 0, count = 0, i, image = caf_this_image - 1;
8192+
int ierr = 0, count = 0, i, image = mpi_this_image;
81888193
int *var = NULL, flag, old = 0, newval = 0;
81898194
const int spin_loop_max = 20000;
81908195
MPI_Win *p = TOKEN(token);
@@ -8250,8 +8255,7 @@ PREFIX(event_query)(caf_token_t token, size_t index, int image_index,
82508255
int *count, int *stat)
82518256
{
82528257
MPI_Win *p = TOKEN(token);
8253-
int ierr = 0,
8254-
image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8258+
int ierr = 0, image = (image_index == 0) ? mpi_this_image : image_index - 1;
82558259

82568260
if (stat != NULL)
82578261
*stat = 0;
@@ -8590,13 +8594,12 @@ PREFIX(form_team)(int team_id, caf_team_t *team,
85908594
int index __attribute__((unused)))
85918595
{
85928596
struct caf_teams_list *tmp;
8593-
void *tmp_team;
85948597
MPI_Comm *newcomm;
8595-
MPI_Comm *current_comm = &CAF_COMM_WORLD;
8598+
MPI_Comm current_comm = CAF_COMM_WORLD;
85968599
int ierr;
85978600

85988601
newcomm = (MPI_Comm *)calloc(1, sizeof(MPI_Comm));
8599-
ierr = MPI_Comm_split(*current_comm, team_id, caf_this_image, newcomm);
8602+
ierr = MPI_Comm_split(current_comm, team_id, mpi_this_image, newcomm);
86008603
chk_err(ierr);
86018604

86028605
tmp = calloc(1, sizeof(struct caf_teams_list));
@@ -8646,9 +8649,9 @@ PREFIX(change_team)(caf_team_t *team, int coselector __attribute__((unused)))
86468649
tmp_team = tmp_used->team_list_elem->team;
86478650
tmp_comm = (MPI_Comm *)tmp_team;
86488651
CAF_COMM_WORLD = *tmp_comm;
8649-
int ierr = MPI_Comm_rank(*tmp_comm, &caf_this_image);
8652+
int ierr = MPI_Comm_rank(*tmp_comm, &mpi_this_image);
86508653
chk_err(ierr);
8651-
caf_this_image++;
8654+
caf_this_image = mpi_this_image + 1;
86528655
ierr = MPI_Comm_size(*tmp_comm, &caf_num_images);
86538656
chk_err(ierr);
86548657
ierr = MPI_Barrier(*tmp_comm);
@@ -8699,9 +8702,9 @@ PREFIX(end_team)(caf_team_t *team __attribute__((unused)))
86998702
tmp_comm = (MPI_Comm *)tmp_team;
87008703
CAF_COMM_WORLD = *tmp_comm;
87018704
/* CAF_COMM_WORLD = (MPI_Comm)*tmp_used->team_list_elem->team; */
8702-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
8705+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
87038706
chk_err(ierr);
8704-
caf_this_image++;
8707+
caf_this_image = mpi_this_image + 1;
87058708
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
87068709
chk_err(ierr);
87078710
}

0 commit comments

Comments
 (0)