[mpich-commits] r10732 - in mpich2/trunk/src/mpl: . include src

gropp at mcs.anl.gov gropp at mcs.anl.gov
Sun Dec 9 13:23:05 CST 2012


Author: gropp
Date: 2012-12-09 13:23:05 -0600 (Sun, 09 Dec 2012)
New Revision: 10732

Modified:
   mpich2/trunk/src/mpl/configure.ac
   mpich2/trunk/src/mpl/include/mpltrmem.h
   mpich2/trunk/src/mpl/src/mpltrmem.c
Log:
Enhanced trmem (tracing memory) code in preparation for changing to size_t from int for sizes

Modified: mpich2/trunk/src/mpl/configure.ac
===================================================================
--- mpich2/trunk/src/mpl/configure.ac	2012-12-09 17:13:56 UTC (rev 10731)
+++ mpich2/trunk/src/mpl/configure.ac	2012-12-09 19:23:05 UTC (rev 10732)
@@ -60,7 +60,7 @@
 fi
 
 dnl Check if the necessary headers are available
-AC_CHECK_HEADERS(stdio.h stdlib.h string.h stdarg.h ctype.h)
+AC_CHECK_HEADERS(stdio.h stdlib.h string.h stdarg.h ctype.h search.h)
 
 # A C99 compliant compiler should have inttypes.h for fixed-size int types
 AC_CHECK_HEADERS(inttypes.h stdint.h)

Modified: mpich2/trunk/src/mpl/include/mpltrmem.h
===================================================================
--- mpich2/trunk/src/mpl/include/mpltrmem.h	2012-12-09 17:13:56 UTC (rev 10731)
+++ mpich2/trunk/src/mpl/include/mpltrmem.h	2012-12-09 19:23:05 UTC (rev 10732)
@@ -15,6 +15,7 @@
 void *MPL_trmalloc(unsigned int, int, const char[]);
 void MPL_trfree(void *, int, const char[]);
 int MPL_trvalid(const char[]);
+int MPL_trvalid2(const char[],int,const char[]);
 void MPL_trspace(int *, int *);
 void MPL_trid(int);
 void MPL_trlevel(int);

Modified: mpich2/trunk/src/mpl/src/mpltrmem.c
===================================================================
--- mpich2/trunk/src/mpl/src/mpltrmem.c	2012-12-09 17:13:56 UTC (rev 10731)
+++ mpich2/trunk/src/mpl/src/mpltrmem.c	2012-12-09 19:23:05 UTC (rev 10732)
@@ -84,7 +84,7 @@
 #define ALREADY_FREED  0x0f0e0d9c
 
 typedef struct TRSPACE {
-    unsigned long size;
+    size_t size;
     int id;
     int lineno;
     int freed_lineno;
@@ -106,9 +106,13 @@
  * This package maintains some state about itself.  These globals hold
  * this information.
  */
+#define TRHEAD_PRESENTINAL ((TRSPACE *)0xbacdef01)
+#define TRHEAD_POSTSENTINAL ((TRSPACE *)0x10fedcba)
 static int world_rank = -1;
-static volatile long allocated = 0, frags = 0;
-static TRSPACE *volatile TRhead = 0;
+static volatile size_t allocated = 0;
+static volatile long   frags = 0;
+static TRSPACE *volatile TRhead[3] = 
+    { TRHEAD_PRESENTINAL, 0, TRHEAD_POSTSENTINAL };
 static volatile int TRid = 0;
 static volatile int TRidSet = 0;
 static volatile int TRlevel = 0;
@@ -120,10 +124,10 @@
 #define TR_FREE   0x2
 
 /* Used to keep track of allocations */
-static volatile long TRMaxMem = 0;
-static volatile long TRMaxMemId = 0;
+static volatile size_t TRMaxMem = 0;
+static volatile int    TRMaxMemId = 0;
 /* Used to limit allocation */
-static volatile long TRMaxMemAllow = 0;
+static volatile size_t TRMaxMemAllow = 0;
 
 /*
  * Printing of addresses.
@@ -178,6 +182,11 @@
         TRDefaultByte = 0;
         TRFreedByte = 0;
     }
+    s = getenv("MPL_TRMEM_TRACELEVEL");
+    if (s && *s) {
+        int l = atoi(s);
+        TRlevel = l;
+    }
 
 }
 
@@ -198,21 +207,19 @@
     TRSPACE *head;
     char *new = NULL;
     unsigned long *nend;
-    unsigned int nsize;
+    size_t nsize;
     int l;
 
     if (TRdebugLevel > 0) {
-        char buf[256];
-        MPL_snprintf(buf, 256,
-                     "Invalid MALLOC arena detected at line %d in %s\n", lineno, fname);
-        if (MPL_trvalid(buf))
+        if (MPL_trvalid2( "Invalid MALLOC arena detected at line %d in %s\n", 
+                          lineno, fname))
             goto fn_exit;
     }
 
     nsize = a;
     if (nsize & TR_ALIGN_MASK)
         nsize += (TR_ALIGN_BYTES - (nsize & TR_ALIGN_MASK));
-    if ((allocated + (long) nsize > TRMaxMemAllow) && TRMaxMemAllow) {
+    if ((allocated + nsize > TRMaxMemAllow) && TRMaxMemAllow) {
         /* Return a null when memory would be exhausted */
         /* This is only called when additional debugging is enabled,
          * so the fact that this does not go through the regular error
@@ -221,7 +228,7 @@
         goto fn_exit;
     }
 
-    new = malloc((unsigned) (nsize + sizeof(TrSPACE) + sizeof(unsigned long)));
+    new = (char *)malloc(nsize + sizeof(TrSPACE) + sizeof(unsigned long));
     if (!new)
         goto fn_exit;
 
@@ -229,13 +236,17 @@
     head = (TRSPACE *) new;
     new += sizeof(TrSPACE);
 
-    if (TRhead) {
-        MPL_VG_MAKE_MEM_DEFINED(&TRhead->prev, sizeof(TRhead->prev));
-        TRhead->prev = head;
-        MPL_VG_MAKE_MEM_NOACCESS(&TRhead->prev, sizeof(TRhead->prev));
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        goto fn_exit;
     }
-    head->next = TRhead;
-    TRhead = head;
+    if (TRhead[1]) {
+        MPL_VG_MAKE_MEM_DEFINED(&TRhead[1]->prev, sizeof(TRhead[1]->prev));
+        TRhead[1]->prev = head;
+        MPL_VG_MAKE_MEM_NOACCESS(&TRhead[1]->prev, sizeof(TRhead[1]->prev));
+    }
+    head->next = TRhead[1];
+    TRhead[1] = head;
     head->prev = 0;
     head->size = nsize;
     head->id = TRid;
@@ -258,8 +269,8 @@
     if (TRlevel & TR_MALLOC) {
         /* Note that %08p (what we'd like to use) isn't accepted by
          * all compilers */
-        MPL_error_printf("[%d] Allocating %d bytes at %8p in %s:%d\n",
-                         world_rank, a, new, fname, lineno);
+        MPL_error_printf("[%d] Allocating %ld(%ld) bytes at %8p in %s[%d]\n",
+                         world_rank, (long)a, (long)nsize, new, fname, lineno);
     }
 
     /* Without these macros valgrind actually catches far fewer errors when
@@ -284,55 +295,54 @@
 void MPL_trfree(void *a_ptr, int line, const char file[])
 {
     TRSPACE *head;
-    char *ahead;
-    char *a = (char *) a_ptr;
     unsigned long *nend;
-    int l, nset;
+    size_t nset;
+    int l;
     char hexstring[MAX_ADDRESS_CHARS];
 
 /* Don't try to handle empty blocks */
-    if (!a)
+    if (!a_ptr)
         return;
 
     if (TRdebugLevel > 0) {
-        if (MPL_trvalid("Invalid MALLOC arena detected by FREE"))
+        if (MPL_trvalid2("Invalid MALLOC arena detected by FREE at line %d in %s\n", line, file ))
             return;
     }
 
-    ahead = a;
-    a = a - sizeof(TrSPACE);
-    head = (TRSPACE *) a;
+    head = (TRSPACE *) ( ((char *)a_ptr) - sizeof(TrSPACE) );
 
     /* We need to mark the memory as defined before performing our own error
      * checks or valgrind will flag the trfree function as erroneous.  The real
      * free() at the end of this function will mark the whole block as NOACCESS
      * again.  See the corresponding block in the trmalloc function for more
      * info. */
-    MPL_VG_MAKE_MEM_DEFINED(a, sizeof(TrSPACE));
+    MPL_VG_MAKE_MEM_DEFINED(head, sizeof(TrSPACE));
 
     if (head->cookie != COOKIE_VALUE) {
         /* Damaged header */
         /* Note that %08p (what we'd like to use) isn't accepted by
          * all compilers */
-        MPL_error_printf("[%d] Block at address %8p is corrupted; cannot free;\n"
+        addrToHex( a_ptr, hexstring );
+        MPL_error_printf("[%d] Block at address %s is corrupted; cannot free;\n"
                          "may be block not allocated with MPL_trmalloc or MALLOC\n"
-                         "called in %s at line %d\n", world_rank, a, file, line);
+                         "called in %s at line %d\n", world_rank, hexstring, file, line);
         return;
     }
-    nend = (unsigned long *) (ahead + head->size);
+    nend = (unsigned long *) ((char *)a_ptr + head->size);
 /* Check that nend is properly aligned */
     if ((sizeof(long) == 4 && ((long) nend & 0x3) != 0) ||
         (sizeof(long) == 8 && ((long) nend & 0x7) != 0)) {
+        addrToHex( a_ptr, hexstring );
         MPL_error_printf
-            ("[%d] Block at address %lx is corrupted (invalid address or header)\n"
-             "called in %s at line %d\n", world_rank, (long) a + sizeof(TrSPACE), file, line);
+            ("[%d] Block at address %s is corrupted (invalid address or header)\n"
+             "called in %s at line %d\n", world_rank, hexstring, file, line);
         return;
     }
 
     MPL_VG_MAKE_MEM_DEFINED(nend, sizeof(*nend));
     if (*nend != COOKIE_VALUE) {
         if (*nend == ALREADY_FREED) {
-            addrToHex((char *) a + sizeof(TrSPACE), hexstring);
+            addrToHex(a_ptr, hexstring);
             if (TRidSet) {
                 MPL_error_printf
                     ("[%d] Block [id=%d(%lu)] at address %s was already freed\n", world_rank,
@@ -352,7 +362,7 @@
         }
         else {
             /* Damaged tail */
-            addrToHex(a, hexstring);
+            addrToHex(a_ptr, hexstring);
             if (TRidSet) {
                 MPL_error_printf
                     ("[%d] Block [id=%d(%lu)] at address %s is corrupted (probably write past end)\n",
@@ -364,8 +374,10 @@
                      world_rank, hexstring);
             }
             head->fname[TR_FNAME_LEN - 1] = 0;  /* Just in case */
-            MPL_error_printf("[%d] Block allocated in %s[%d]\n",
+            MPL_error_printf("[%d] Block being freed allocated in %s[%d]\n",
                              world_rank, head->fname, head->lineno);
+            MPL_error_printf("[%d] Block cookie should be %lx but was %lx\n",
+                             world_rank, (long)COOKIE_VALUE, *nend );
         }
     }
 /* Mark the location freed */
@@ -383,7 +395,7 @@
         MPL_VG_MAKE_MEM_NOACCESS(&head->prev->next, sizeof(head->prev->next));
     }
     else {
-        TRhead = head->next;
+        TRhead[1] = head->next;
     }
 
     if (head->next) {
@@ -393,8 +405,8 @@
     }
 
     if (TRlevel & TR_FREE) {
-        addrToHex((char *) a + sizeof(TrSPACE), hexstring);
-        MPL_error_printf("[%d] Freeing %lu bytes at %s in %s:%d\n",
+        addrToHex(a_ptr, hexstring);
+        MPL_error_printf("[%d] Freeing %lu bytes at %s in %s[%d]\n",
                          world_rank, head->size, hexstring, file, line);
     }
 
@@ -403,16 +415,21 @@
      * help catch access to already freed data
      */
     /* FIXME why do we skip the first few ints? [goodell@] */
-    nset = head->size - 2 * sizeof(int);
-    if (nset > 0) {
+    /* Answer lost in time.  Probably because in some case, the
+       first few bytes provided useful information in tracking down
+       a problem. */
+    if (head->size > 2*sizeof(int)) {
+        /* Now that nset is size_t, it might be defined as unsigned,
+           so we can't compare nset - 2*sizeof(int) against zero */
+        nset = head->size - 2 * sizeof(int);
         /* If an upper layer (like the handle allocation code) ever used the
          * MPL_VG_MAKE_MEM_NOACCESS macro on part/all of the data we gave
          * them then our memset will elicit "invalid write" errors from
          * valgrind.  Mark it as accessible but undefined here to prevent this. */
-        MPL_VG_MAKE_MEM_UNDEFINED(ahead + 2 * sizeof(int), nset);
-        memset(ahead + 2 * sizeof(int), TRFreedByte, nset);
+        MPL_VG_MAKE_MEM_UNDEFINED((char *)a_ptr + 2 * sizeof(int), nset);
+        memset((char *)a_ptr + 2 * sizeof(int), TRFreedByte, nset);
     }
-    free(a);
+    free(head);
 }
 
 /*+C
@@ -442,24 +459,38 @@
 
    No output is generated if there are no problems detected.
 +*/
-int MPL_trvalid(const char str[])
+int MPL_trvalid( const char str[] )
 {
+    return MPL_trvalid2( str, -1, (const char *)0 );
+}
+
+int MPL_trvalid2(const char str[], int line, const char file[] )
+{
     TRSPACE *head;
     TRSPACE *next;
-    char *a;
+    char    *a;
     unsigned long *nend;
     int errs = 0;
     char hexstring[MAX_ADDRESS_CHARS];
 
-    head = TRhead;
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        errs++;
+        goto fn_exit;
+    }
+    head = TRhead[1];
     while (head) {
         /* mark defined before accessing head contents */
         MPL_VG_MAKE_MEM_DEFINED(head, sizeof(*head));
         if (head->cookie != COOKIE_VALUE) {
-            if (!errs)
-                MPL_error_printf("%s\n", str);
+            if (!errs) {
+                if (line > 0) 
+                    MPL_error_printf(str, line, file);
+                else 
+                    MPL_error_printf( "%s\n", str );
+            }
             errs++;
-            addrToHex(head, hexstring);
+            addrToHex(head+1, hexstring);
             MPL_error_printf
                 ("[%d] Block at address %s is corrupted (invalid cookie in head)\n",
                  world_rank, hexstring);
@@ -469,16 +500,24 @@
              * SEGV or BUS  */
             goto fn_exit;
         }
-        /* FIXME why is this +1 here? */
-        a = (char *) (((TrSPACE *) head) + 1);
+        /* Get the address of the first byte of the memory, which begins
+           just after the end of the header.  We must use the full header 
+           (TrSPACE) rather than the struct with the data (TRSPACE) because
+           the full header is padded to ensure correct byte alignment with
+           the data */
+        a    = (char *)( (TrSPACE *)head + 1 );
         nend = (unsigned long *) (a + head->size);
 
         /* mark defined before accessing nend contents */
         MPL_VG_MAKE_MEM_DEFINED(nend, sizeof(*nend));
 
         if (nend[0] != COOKIE_VALUE) {
-            if (!errs)
-                MPL_error_printf("%s\n", str);
+            if (!errs) {
+                if (line > 0) 
+                    MPL_error_printf(str, line, file);
+                else 
+                    MPL_error_printf( "%s\n", str );
+            }
             errs++;
             head->fname[TR_FNAME_LEN - 1] = 0;  /* Just in case */
             addrToHex(a, hexstring);
@@ -494,6 +533,8 @@
             }
             MPL_error_printf("[%d] Block allocated in %s[%d]\n",
                              world_rank, head->fname, head->lineno);
+            MPL_error_printf("[%d] Block cookie should be %lx but was %lx\n",
+                             world_rank, (long)COOKIE_VALUE, *nend );
         }
 
         /* set both regions back to NOACCESS */
@@ -538,7 +579,11 @@
 
     if (fp == 0)
         fp = stderr;
-    head = TRhead;
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        return;
+    }
+    head = TRhead[1];
     while (head) {
         MPL_VG_MAKE_MEM_DEFINED(head, sizeof(*head));
         if (head->id >= minid) {
@@ -585,7 +630,7 @@
 }
 
 static volatile FILE *TRFP = 0;
- /*ARGSUSED*/ static void PrintSum(TRINFO ** a, VISIT order, int level)
+static void PrintSum(TRINFO ** a, VISIT order, int level)
 {
     if (order == postorder || order == leaf)
         fprintf(TRFP, "[%d]%s[%d] has %d\n", (*a)->id, (*a)->fname, (*a)->lineno, (*a)->size);
@@ -611,7 +656,11 @@
     if (fp == 0)
         fp = stderr;
     root = 0;
-    head = TRhead;
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        return;
+    }
+    head = TRhead[1];
     key = nspace;
     while (head) {
         if (head->id >= minid) {
@@ -646,8 +695,8 @@
 {
     if (fp == 0)
         fp = stderr;
-    fprintf(fp, "# [%d] The maximum space allocated was %ld bytes [%ld]\n",
-            world_rank, TRMaxMem, TRMaxMemId);
+    fprintf(fp, "# [%d] The maximum space allocated was %lu bytes [%d]\n",
+            world_rank, (unsigned long)TRMaxMem, TRMaxMemId);
 }
 #endif
 
@@ -732,19 +781,17 @@
 void *MPL_trrealloc(void *p, int size, int lineno, const char fname[])
 {
     void *pnew;
-    char *pa;
-    int nsize;
+    size_t nsize;
     TRSPACE *head = 0;
     char hexstring[MAX_ADDRESS_CHARS];
 
 /* We should really use the size of the old block... */
     if (p) {
-        pa = (char *) p;
-        head = (TRSPACE *) (pa - sizeof(TrSPACE));
+        head = (TRSPACE *) ((char *)p - sizeof(TrSPACE));
         MPL_VG_MAKE_MEM_DEFINED(head, sizeof(*head));   /* mark defined before accessing contents */
         if (head->cookie != COOKIE_VALUE) {
             /* Damaged header */
-            addrToHex(pa, hexstring);
+            addrToHex(p, hexstring);
             MPL_error_printf("[%d] Block at address %s is corrupted; cannot realloc;\n"
                              "may be block not allocated with MPL_trmalloc or MALLOC\n",
                              world_rank, hexstring);
@@ -766,13 +813,14 @@
 
     if (p && pnew) {
         nsize = size;
-        if (head->size < (unsigned long) nsize)
-            nsize = (int) (head->size);
+        if (head->size < nsize)
+            nsize = head->size;
         memcpy(pnew, p, nsize);
         MPL_trfree(p, lineno, fname);
     }
 
     /* Re-mark the head as NOACCESS before returning. */
+    /* FIXME: Note head is no longer valid after MPL_trfree above */
     if (head) {
         MPL_VG_MAKE_MEM_NOACCESS(head, sizeof(*head));
     }
@@ -796,9 +844,9 @@
 void *MPL_trstrdup(const char *str, int lineno, const char fname[])
 {
     void *p;
-    unsigned len = (unsigned) strlen(str) + 1;
+    size_t len = strlen(str) + 1;
 
-    p = MPL_trmalloc(len, lineno, (char *) fname);
+    p = MPL_trmalloc((unsigned)len, lineno, (char *) fname);
     if (p) {
         memcpy(p, str, len);
     }
@@ -891,13 +939,17 @@
     TRSPACE *head;
     int cnt;
 
-    head = TRhead;
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        return;
+    }
+    head = TRhead[1];
     cnt = 0;
     while (head) {
         cnt++;
         head = head->next;
     }
-    TRhead = MPL_trIsort(TRhead, cnt);
+    TRhead[1] = MPL_trIsort(TRhead[1], cnt);
 }
 
 /* Takes sorted input and dumps as an aggregate */
@@ -909,8 +961,13 @@
     if (fp == 0)
         fp = stderr;
 
+    if (TRhead[0] != TRHEAD_PRESENTINAL || TRhead[2] != TRHEAD_POSTSENTINAL) {
+        MPL_error_printf("TRhead corrupted - likely memory overwrite.\n");
+        return;
+    }
+
     MPL_trSortBlocks();
-    head = TRhead;
+    head = TRhead[1];
     cur = 0;
     while (head) {
         cur = head->next;



More information about the commits mailing list