[mpich-commits] [mpich] MPICH primary repository branch, master, updated. v3.2-174-g410add1
Service Account
noreply at mpich.org
Fri Feb 5 16:24:12 CST 2016
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "MPICH primary repository".
The branch, master has been updated
via 410add1042cbd467288c83ffec437c549c93cea3 (commit)
via aec9468d5d52d8e780522ce78b4692104c4d8e60 (commit)
via 7f4206c121b5bb251de90da07161fa414046ffc0 (commit)
from 14c326b77674d7215fc32d809a21048ebfcded6f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.mpich.org/mpich.git/commitdiff/410add1042cbd467288c83ffec437c549c93cea3
commit 410add1042cbd467288c83ffec437c549c93cea3
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date: Thu Feb 4 09:51:18 2016 -0600
test/mpi/spawn: remove xfail for spawn-rootargs
Closes #2282
Signed-off-by: Pavan Balaji <balaji at anl.gov>
diff --git a/test/mpi/spawn/testlist.in b/test/mpi/spawn/testlist.in
index e497105..586322d 100644
--- a/test/mpi/spawn/testlist.in
+++ b/test/mpi/spawn/testlist.in
@@ -26,4 +26,4 @@ disconnect3 3
concurrent_spawns 1
pgroup_connect_test 4
pgroup_intercomm_test 4
-spawn-rootargs 10 xfail=ticket2282
+spawn-rootargs 10
http://git.mpich.org/mpich.git/commitdiff/aec9468d5d52d8e780522ce78b4692104c4d8e60
commit aec9468d5d52d8e780522ce78b4692104c4d8e60
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date: Tue Feb 2 18:14:52 2016 -0600
test/mpi/spawn: fixup spawn-rootargs test
MPI_Comm_spawn should launch new instances of "spawn-rootargs" for this
test to succeed. There is no "spawn-nullargs" binary.
Signed-off-by: Pavan Balaji <balaji at anl.gov>
diff --git a/test/mpi/spawn/spawn-rootargs.c b/test/mpi/spawn/spawn-rootargs.c
index a6a135a..1657d3e 100644
--- a/test/mpi/spawn/spawn-rootargs.c
+++ b/test/mpi/spawn/spawn-rootargs.c
@@ -26,7 +26,7 @@ int main(int argc, char *argv[])
if (master == MPI_COMM_NULL) {
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- MPI_Comm_spawn("./spawn-nullargs", args, /*MPI_ARGV_NULL, */
+ MPI_Comm_spawn("./spawn-rootargs", args, /*MPI_ARGV_NULL, */
5, MPI_INFO_NULL, 0, MPI_COMM_SELF, &worker, MPI_ERRCODES_IGNORE);
MPI_Barrier(worker);
MPI_Comm_disconnect(&worker);
http://git.mpich.org/mpich.git/commitdiff/7f4206c121b5bb251de90da07161fa414046ffc0
commit 7f4206c121b5bb251de90da07161fa414046ffc0
Author: Ken Raffenetti <raffenet at mcs.anl.gov>
Date: Tue Feb 2 18:08:58 2016 -0600
pm/hydra: fix pmi mcmd interleaving problem
Incomplete PMI-1 mcmds suffer from an interleaving problem when there
are multiple commands written to the PMI proxy server from different procs
at the same time. This commit forces the server to block until the full mcmd
is read and processed. Refs #2282
Algo:
- If we do not have a full command, read more from the fd.
- If pmi_storage has multiple commands, process all of them, even if
that means reading more from the fd.
- else break;
Signed-off-by: Pavan Balaji <balaji at anl.gov>
diff --git a/src/pm/hydra/pm/pmiserv/pmip_cb.c b/src/pm/hydra/pm/pmiserv/pmip_cb.c
index 1866f29..316c01c 100644
--- a/src/pm/hydra/pm/pmiserv/pmip_cb.c
+++ b/src/pm/hydra/pm/pmiserv/pmip_cb.c
@@ -214,15 +214,6 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
HYD_pmcd_init_header(&hdr);
- /* PMI-1 does not tell us how much to read. We read how much ever
- * we can, parse out full PMI commands from it, and process
- * them. When we don't have a full PMI command, we store the
- * rest. */
- status =
- HYDU_sock_read(fd, pmi_storage + pmi_storage_len, HYD_TMPBUF_SIZE - pmi_storage_len,
- &linelen, &closed, HYDU_SOCK_COMM_NONE);
- HYDU_ERR_POP(status, "unable to read PMI command\n");
-
/* Try to find the PMI FD */
for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) {
if (HYD_pmcd_pmip.downstream.pmi_fd[i] == fd) {
@@ -231,6 +222,17 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
}
}
+ read_cmd:
+ /* PMI-1 does not tell us how much to read. We read how much ever
+ * we can, parse out full PMI commands from it, and process
+ * them. When we don't have a full PMI command, we go back and
+ * read from the same FD until we do. PMI clients (1 and 2) always
+ * send full commands, then wait for response. */
+ status =
+ HYDU_sock_read(fd, pmi_storage + pmi_storage_len, HYD_TMPBUF_SIZE - pmi_storage_len,
+ &linelen, &closed, HYDU_SOCK_COMM_NONE);
+ HYDU_ERR_POP(status, "unable to read PMI command\n");
+
if (closed) {
/* If a PMI application terminates, we clean up the remaining
* processes. For a correct PMI application, we should never
@@ -282,6 +284,14 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
pmi_storage[pmi_storage_len] = 0;
}
+ check_cmd:
+ status = check_pmi_cmd(&buf, &hdr.pmi_version, &repeat);
+ HYDU_ERR_POP(status, "error checking the PMI command\n");
+
+ if (buf == NULL)
+ /* read more to get a full command. */
+ goto read_cmd;
+
/* We were able to read the PMI command correctly. If we were able
* to identify what PMI FD this is, activate it. If we were not
* able to identify the PMI FD, we will activate it when we get
@@ -289,62 +299,57 @@ static HYD_status pmi_cb(int fd, HYD_event_t events, void *userp)
if (pid != -1 && !HYD_pmcd_pmip.downstream.pmi_fd_active[pid])
HYD_pmcd_pmip.downstream.pmi_fd_active[pid] = 1;
- do {
- status = check_pmi_cmd(&buf, &hdr.pmi_version, &repeat);
- HYDU_ERR_POP(status, "error checking the PMI command\n");
-
- if (buf == NULL)
- break;
-
- if (hdr.pmi_version == 1)
- HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v1;
- else
- HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v2;
+ if (hdr.pmi_version == 1)
+ HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v1;
+ else
+ HYD_pmcd_pmip_pmi_handle = HYD_pmcd_pmip_pmi_v2;
- HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status);
- for(i = 0;i < MAX_PMI_ARGS; i++)
- args[i]= NULL;
+ HYDU_MALLOC(args, char **, MAX_PMI_ARGS * sizeof(char *), status);
+ for(i = 0;i < MAX_PMI_ARGS; i++)
+ args[i]= NULL;
- status = HYD_pmcd_pmi_parse_pmi_cmd(buf, hdr.pmi_version, &pmi_cmd, args);
- HYDU_ERR_POP(status, "unable to parse PMI command\n");
+ status = HYD_pmcd_pmi_parse_pmi_cmd(buf, hdr.pmi_version, &pmi_cmd, args);
+ HYDU_ERR_POP(status, "unable to parse PMI command\n");
- if (HYD_pmcd_pmip.user_global.debug) {
- HYDU_dump(stdout, "got pmi command (from %d): %s\n", fd, pmi_cmd);
- HYDU_print_strlist(args);
- }
+ if (HYD_pmcd_pmip.user_global.debug) {
+ HYDU_dump(stdout, "got pmi command (from %d): %s\n", fd, pmi_cmd);
+ HYDU_print_strlist(args);
+ }
- h = HYD_pmcd_pmip_pmi_handle;
- while (h->handler) {
- if (!strcmp(pmi_cmd, h->cmd)) {
- status = h->handler(fd, args);
- HYDU_ERR_POP(status, "PMI handler returned error\n");
- goto fn_exit;
- }
- h++;
+ h = HYD_pmcd_pmip_pmi_handle;
+ while (h->handler) {
+ if (!strcmp(pmi_cmd, h->cmd)) {
+ status = h->handler(fd, args);
+ HYDU_ERR_POP(status, "PMI handler returned error\n");
+ goto fn_exit;
}
+ h++;
+ }
- if (HYD_pmcd_pmip.user_global.debug) {
- HYDU_dump(stdout, "we don't understand this command %s; forwarding upstream\n",
- pmi_cmd);
- }
+ if (HYD_pmcd_pmip.user_global.debug) {
+ HYDU_dump(stdout, "we don't understand this command %s; forwarding upstream\n",
+ pmi_cmd);
+ }
- /* We don't understand the command; forward it upstream */
- hdr.cmd = PMI_CMD;
- hdr.pid = fd;
- hdr.buflen = strlen(buf);
- status =
- HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
- HYDU_SOCK_COMM_MSGWAIT);
- HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
- HYDU_ASSERT(!closed, status);
+ /* We don't understand the command; forward it upstream */
+ hdr.cmd = PMI_CMD;
+ hdr.pid = fd;
+ hdr.buflen = strlen(buf);
+ status =
+ HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed,
+ HYDU_SOCK_COMM_MSGWAIT);
+ HYDU_ERR_POP(status, "unable to send PMI header upstream\n");
+ HYDU_ASSERT(!closed, status);
- status =
- HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
- HYDU_SOCK_COMM_MSGWAIT);
- HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
- HYDU_ASSERT(!closed, status);
+ status =
+ HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed,
+ HYDU_SOCK_COMM_MSGWAIT);
+ HYDU_ERR_POP(status, "unable to send PMI command upstream\n");
+ HYDU_ASSERT(!closed, status);
- } while (repeat);
+ if (repeat)
+ /* there are more commands to process. */
+ goto check_cmd;
fn_exit:
if (pmi_cmd)
-----------------------------------------------------------------------
Summary of changes:
src/pm/hydra/pm/pmiserv/pmip_cb.c | 119 +++++++++++++++++++------------------
test/mpi/spawn/spawn-rootargs.c | 2 +-
test/mpi/spawn/testlist.in | 2 +-
3 files changed, 64 insertions(+), 59 deletions(-)
hooks/post-receive
--
MPICH primary repository
More information about the commits
mailing list