[mpich-discuss] can't MPI_Abort a program due to deadlock at pthread_mutex_lock

Jim Dinan dinan at mcs.anl.gov
Wed Jan 2 10:49:06 CST 2013


Jeff,

If you really need this to work, you can also disable pthread 
cancellation when the thread is in MPI calls:

         pthread_setcanceltype(PTHREAD_CANCEL_DISABLE, &cancel_val);
         MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD,
                   MPI_STATUS_IGNORE);
         pthread_setcanceltype(cancel_val, &junk);

With that change, the program you sent works correctly.

It think that adding setcanceltype() to the MPICH thread safety macros 
might be all that we have to do to make the library cancel-safe, but I'm 
not a pthreads expert.  It could have a nonzero performance cost, so it 
would likely need to be a compile-time option.

  ~Jim.



On 1/2/13 10:25 AM, Jim Dinan wrote:
> Jeff,
>
> Pthread_cancel is unsafe in any context where the cancelled thread can
> hold a mutex needed by other threads.  This looks to me like a bad
> programming practice (e.g. you will also likely leak memory, since you
> can't free MPI objects allocated by the thread from inside of a
> cancellation handler), not an MPICH-specific problem; there are safer
> ways to cancel/cleanup threads.
>
> Do you have a good case for why such functionality is needed?
>
>   ~Jim.
>
> On 1/2/13 9:49 AM, Jeff Hammond wrote:
>> I wrote a program that calls MPI from a pthread.  MPI_THREAD_MULTIPLE
>> is used.  I cancel and then join the pthread back into the main
>> program.  After I do this, any calls to MPI, including MPI_Abort (see
>> first gdb+source below) and MPI_Barrier (see second gdb+source below)
>> deadlock on pthread_mutex_lock.
>>
>> I can understand that canceling the thread while it is holding the MPI
>> pthread mutex is a bad idea, but I'm not sure this is an incorrect
>> program.  Is pthread_cancel using signals?  I recall that MPICH
>> functions are not guaranteed to be signal-safe.
>>
>> Is it possible to implement MPI_Abort such that acquisition of the MPI
>> mutex is not required in order to allow deadlocked programs to
>> terminate?  It seems rather unfortunate that MPI_Abort does not
>> perform the desired action here.  Obviously, I can use abort(), but on
>> some implementations, this will leave the system resources in a dirty
>> state.
>>
>> I know a bunch of ways to work around this, but it would be helpful to
>> understand exactly what the problem is.  I wonder if there is a way to
>> modify MPICH such that the mutex is released when a thread holding it
>> is cancelled.
>>
>> Is it possible for MPICH to provide an MPIX call or otherwise document
>> an internal, MPICH-specific function to clear the MPI mutex?  I could
>> resolve this problem if I could merely call MPIX_Mutex_force_unlock()
>> and let the program terminate as desired.
>>
>> Thanks,
>>
>> Jeff
>>
>> $ make
>> mpicc -g -O0 -Wall -std=gnu99 -c bug.c -o bug.o
>> mpicc -g -O0 -Wall -std=gnu99 safemalloc.o bug.o -lm -o bug.x
>> rm bug.o
>>
>> ======== stuck in abort/finalize ========
>>
>> $ gdb ./bug.x
>> GNU gdb 6.3.50-20050815 (Apple version gdb-1752) (Sat Jan 28 03:02:46
>> UTC 2012)
>> Copyright 2004 Free Software Foundation, Inc.
>> GDB is free software, covered by the GNU General Public License, and
>> you are
>> welcome to change it and/or distribute copies of it under certain
>> conditions.
>> Type "show copying" to see the conditions.
>> There is absolutely no warranty for GDB.  Type "show warranty" for
>> details.
>> This GDB was configured as "x86_64-apple-darwin"...Reading symbols for
>> shared libraries ...... done
>>
>> warning: Could not find object file
>> "/Users/jhammond/eclipse/OSPRI/trunk/tests/devices/mpi-pt/bug.o" - no
>> debug information available for "bug.c".
>>
>>
>> (gdb) run
>> Starting program:
>> /Users/jhammond/eclipse/OSPRI/trunk/tests/devices/mpi-pt/bug.x
>> Reading symbols for shared libraries +++++........................ done
>> test done except for pthread shutdown
>> 0: Progress
>> all done
>>
>> ^C
>> Program received signal SIGINT, Interrupt.
>> 0x00007fff912fdbf2 in __psynch_mutexwait ()
>> (gdb) bt
>> #0  0x00007fff912fdbf2 in __psynch_mutexwait ()
>> #1  0x00007fff8c80a1a1 in pthread_mutex_lock ()
>> #2  0x000000010004912b in
>> MPIU_Thread_CS_enter_lockname_recursive_impl_ ()
>> #3  0x0000000100048d64 in MPI_Abort ()
>> #4  0x0000000100000cc0 in main ()
>> (gdb) quit
>> The program is running.  Exit anyway? (y or n) y
>>
>> $ cat bug.c
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <unistd.h>
>> #include <string.h>
>> #include <assert.h>
>> #include <limits.h>
>> #include <pthread.h>
>>
>> #include <mpi.h>
>>
>> #define DEBUG
>>
>> pthread_t Progress_thread;
>>
>> static void * Progress_function(void * dummy)
>> {
>>      int rank;
>>      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
>>
>>     while (1)
>>     {
>> #ifdef DEBUG
>>          printf("%d: Progress \n", rank);
>> #endif
>>          MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD,
>> MPI_STATUS_IGNORE);
>>         //usleep(500);
>>     }
>>
>>     return NULL;
>> }
>>
>> int main(int argc, char * argv[])
>> {
>>      int rc;
>>      int rank, size;
>>      int provided;
>>
>>      MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
>>      if (provided!=MPI_THREAD_MULTIPLE) MPI_Abort(MPI_COMM_WORLD, 1);
>>
>>      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
>>      MPI_Comm_size(MPI_COMM_WORLD, &size);
>>
>>      rc = pthread_create(&Progress_thread, NULL, &Progress_function,
>> NULL);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      MPI_Barrier(MPI_COMM_WORLD);
>>
>>      printf("test done except for pthread shutdown \n");
>>      fflush(stdout);
>>
>>      MPI_Barrier(MPI_COMM_WORLD);
>>
>>      rc = pthread_cancel(Progress_thread);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      void * rv;
>>      rc = pthread_join(Progress_thread, &rv);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      printf("all done \n");
>>      fflush(stdout);
>>
>>      MPI_Abort(MPI_COMM_SELF, 2);
>>      MPI_Finalize();
>>
>>      return 0;
>> }
>>
>> ======== stuck in barrier ========
>>
>> $ gdb ./bug.x
>> GNU gdb 6.3.50-20050815 (Apple version gdb-1752) (Sat Jan 28 03:02:46
>> UTC 2012)
>> Copyright 2004 Free Software Foundation, Inc.
>> GDB is free software, covered by the GNU General Public License, and
>> you are
>> welcome to change it and/or distribute copies of it under certain
>> conditions.
>> Type "show copying" to see the conditions.
>> There is absolutely no warranty for GDB.  Type "show warranty" for
>> details.
>> This GDB was configured as "x86_64-apple-darwin"...Reading symbols for
>> shared libraries ...... done
>>
>> warning: Could not find object file
>> "/Users/jhammond/eclipse/OSPRI/trunk/tests/devices/mpi-pt/bug.o" - no
>> debug information available for "bug.c".
>>
>>
>> (gdb) run
>> Starting program:
>> /Users/jhammond/eclipse/OSPRI/trunk/tests/devices/mpi-pt/bug.x
>> Reading symbols for shared libraries +++++........................ done
>> before pthread shutdown
>> 0: Progress
>> after pthread shutdown
>> ^C
>> Program received signal SIGINT, Interrupt.
>> 0x00007fff912fdbf2 in __psynch_mutexwait ()
>> (gdb) bt
>> #0  0x00007fff912fdbf2 in __psynch_mutexwait ()
>> #1  0x00007fff8c80a1a1 in pthread_mutex_lock ()
>> #2  0x000000010000f7eb in
>> MPIU_Thread_CS_enter_lockname_recursive_impl_ ()
>> #3  0x000000010000f4f7 in MPI_Barrier ()
>> #4  0x0000000100000c4d in main ()
>> (gdb) quit
>> The program is running.  Exit anyway? (y or n) y
>> Jeffs-MacBook-Pro:mpi-pt jhammond$ cat bug.c
>> #include <stdio.h>
>> #include <stdlib.h>
>> #include <unistd.h>
>> #include <string.h>
>> #include <assert.h>
>> #include <limits.h>
>> #include <pthread.h>
>>
>> #include <mpi.h>
>>
>> #define DEBUG
>>
>> pthread_t Progress_thread;
>>
>> static void * Progress_function(void * dummy)
>> {
>>      int rank;
>>      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
>>
>>     while (1)
>>     {
>> #ifdef DEBUG
>>          printf("%d: Progress \n", rank);
>> #endif
>>          MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD,
>> MPI_STATUS_IGNORE);
>>         //usleep(500);
>>     }
>>
>>     return NULL;
>> }
>>
>> int main(int argc, char * argv[])
>> {
>>      int rc;
>>      int rank, size;
>>      int provided;
>>
>>      MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
>>      if (provided!=MPI_THREAD_MULTIPLE) MPI_Abort(MPI_COMM_WORLD, 1);
>>
>>      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
>>      MPI_Comm_size(MPI_COMM_WORLD, &size);
>>
>>      rc = pthread_create(&Progress_thread, NULL, &Progress_function,
>> NULL);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      MPI_Barrier(MPI_COMM_WORLD);
>>
>>      printf("before pthread shutdown \n");
>>      fflush(stdout);
>>
>>      MPI_Barrier(MPI_COMM_WORLD);
>>
>>      rc = pthread_cancel(Progress_thread);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      void * rv;
>>      rc = pthread_join(Progress_thread, &rv);
>>      if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);
>>
>>      printf("after pthread shutdown \n");
>>      fflush(stdout);
>>
>>      MPI_Barrier(MPI_COMM_WORLD);
>>
>>      printf("before MPI Abort/Finalize \n");
>>      fflush(stdout);
>>
>>      MPI_Abort(MPI_COMM_SELF, 2);
>>      MPI_Finalize();
>>
>>      printf("after MPI Abort/Finalize \n");
>>      fflush(stdout);
>>
>>      return 0;
>> }
>>
> _______________________________________________
> discuss mailing list     discuss at mpich.org
> To manage subscription options or unsubscribe:
> https://lists.mpich.org/mailman/listinfo/discuss
-------------- next part --------------
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <pthread.h>

#include <mpi.h>

#define DEBUG

pthread_t Progress_thread;

static void * Progress_function(void * dummy)
{
    int rank, cancel_val, junk;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    pthread_setcanceltype(PTHREAD_CANCEL_ENABLE, &junk);

    while (1)
    {
#ifdef DEBUG
        printf("%d: Progress \n", rank);
#endif

        pthread_setcanceltype(PTHREAD_CANCEL_DISABLE, &cancel_val);
        MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
        pthread_setcanceltype(cancel_val, &junk);
        //usleep(500);
    }

    return NULL;
}

int main(int argc, char * argv[])
{
    int rc;
    int rank, size;
    int provided;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    if (provided!=MPI_THREAD_MULTIPLE) MPI_Abort(MPI_COMM_WORLD, 1);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    rc = pthread_create(&Progress_thread, NULL, &Progress_function, NULL);
    if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);

    MPI_Barrier(MPI_COMM_WORLD);

    printf("test done except for pthread shutdown \n");
    fflush(stdout);

    MPI_Barrier(MPI_COMM_WORLD);

    rc = pthread_cancel(Progress_thread);
    if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);

    void * rv;
    rc = pthread_join(Progress_thread, &rv);
    if (rc!=0) MPI_Abort(MPI_COMM_WORLD, rc);

    printf("all done \n");
    fflush(stdout);

    MPI_Abort(MPI_COMM_SELF, 2);
    MPI_Finalize();

    return 0;
}


More information about the discuss mailing list