[padb-devel] [padb] r306 committed - Tidy up the minfo.c code considerably to make it easier to read and...

padb at googlecode.com padb at googlecode.com
Thu Oct 29 22:52:58 GMT 2009


Revision: 306
Author: apittman
Date: Thu Oct 29 15:52:20 2009
Log: Tidy up the minfo.c code considerably to make it easier to read and
hopefully extend in future.  Use a struct for the dll callbacks and
give the struct entries meaningful names to help the readability
of the code.  Add a few helper functions to ease the control flow.
Also update the copyright information from 2004 to 2009

http://code.google.com/p/padb/source/detail?r=306

Modified:
  /trunk/src/minfo.c

=======================================
--- /trunk/src/minfo.c	Fri Oct 23 10:43:31 2009
+++ /trunk/src/minfo.c	Thu Oct 29 15:52:20 2009
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2003,2004 Quadrics Ltd
+ * Copyright (c) 2009, Ashley Pittman.
   */

  #ident "elfN.c,v 1.14 2005-11-03 11:23:04 ashley Exp"
@@ -12,6 +13,24 @@
  #include <unistd.h>
  #include "mpi_interface.h"

+struct dll_entry_points {
+    char *(*dll_error_string)           (int);
+    void  (*setup_basic_callbacks)      (mqs_basic_callbacks *);
+    int   (*setup_image)                (mqs_image *, mqs_image_callbacks  
*);
+    int   (*image_has_queues)           (mqs_image *, char **);
+    int   (*setup_process)              (mqs_process *,  
mqs_process_callbacks *);
+    int   (*process_has_queues)         (mqs_process *, char **);
+    void  (*update_communicator_list)   (mqs_process *);
+    int   (*setup_communicator_iterator)(mqs_process *);
+    int   (*get_communicator)           (mqs_process *, mqs_communicator  
*);
+    int   (*next_communicator)          (mqs_process *);
+    int   (*get_global_rank)            (mqs_process *);
+    int   (*get_comm_coll_state)        (mqs_process *, int, int *, int *);
+    int   (*get_comm_group)             (mqs_process *, int *);
+    int   (*setup_operation_iterator)   (mqs_process *, int);
+    int   (*next_operation)             (mqs_process *,  
mqs_pending_operation *);
+};
+
  struct image {
      mqs_image_info *blob;
  };
@@ -27,7 +46,30 @@
      int    size;
  };

-char *(*es)(int errorcode);
+struct dll_entry_points dll_ep = {};
+
+char *collective_names[] = { "Barrier",
+			     "Bcast",
+			     "Allgather",
+			     "Allgatherv",
+			     "Allreduce",
+			     "Alltoall",
+			     "Alltoallv",
+			     "Reduce_Scatter",
+			     "Reduce",
+			     "Gather",
+			     "Gatherv",
+			     "Scan",
+			     "Scatter",
+			     "Scatterv" };
+
+char *op_types[] = { "pending_send",
+		     "pending_receive",
+		     "unexpected_message" };
+
+char *op_status[] = { "pending",
+		      "matched",
+		      "complete" };

  void show_string (char *desc, char *str)
  {
@@ -45,7 +87,7 @@
  void show_dll_error_code (int res)
  {
      char *msg;
-    msg = es(res);
+    msg = dll_ep.dll_error_string(res);
      show_string("dllerror",msg);
  }

@@ -160,7 +202,7 @@
  	    *addr = (mqs_taddr_t)base;
  	return mqs_ok;
      }
-    return -1;
+    return mqs_no_information;
  }

  int find_symbol (mqs_image *image, char *name, mqs_taddr_t *addr)
@@ -172,7 +214,7 @@
  	}
  	return mqs_ok;
      }
-    return 100;
+    return mqs_no_information;
  }

  int req_to_int (char *req,int *res)
@@ -259,7 +301,7 @@

      i = ask(req,ans);
      if ( i != 0 )
-	return -1;
+	return mqs_no_information;

      for ( i = 0 ; i < size ; i++ ) {
  	char *e;
@@ -284,14 +326,14 @@
      int res;
      // printf("Trying to read data for %d from %p\n",size,(void *)addr);
      if ( ! addr ) {
-	return 100;
+	return mqs_no_information;
      }
      do {
  	if ( offset > size )
  	    offset = size;
  	res = _find_data(proc,addr,offset,local);
  	if ( res != mqs_ok )
-	    return 100;
+	    return mqs_no_information;

  	addr  += offset;
  	local += offset;
@@ -329,11 +371,10 @@
      return 0;
  }

-int msgid = 0;
-
-int show_comm (struct process *p,mqs_communicator *comm)
-{
-    static int c = 0;
+int msg_id = 0;
+
+int show_comm (struct process *p, mqs_communicator *comm, int c)
+{
      if ( comm->local_rank >= 0 )
  	printf("out: c:%d rank:%d\n",
  	       c,
@@ -352,15 +393,46 @@
  	   c,
  	   comm->unique_id);

-
-    msgid=0;
      return c++; /* This is not a political statement although if it was  
I'd stand by it */
  }

-void show_op (mqs_pending_operation *op, int type)
-{
-    static char *types[] =  
{ "pending_send", "pending_receive", "unexpected_message" };
-    static char *status[] = { "pending", "matched", "complete" };
+void show_comm_members (mqs_process *target_process, mqs_communicator  
*comm, int comm_id)
+{
+    int *ranks = malloc(comm->size*sizeof(int));
+    int r = dll_ep.get_comm_group(target_process,ranks);
+    if ( r == mqs_ok ) {
+	int i;
+	for ( i = 0 ; i < comm->size ; i++ ) {
+	    printf("out: c:%d rt:%d\n",
+		   comm_id,
+		   ranks[i]);
+	}
+    }
+    free(ranks);
+}
+
+void show_comm_coll_state (mqs_process *target_process, mqs_communicator  
*comm, int comm_id)
+{
+    int i;
+    for ( i = 0 ; i < 14 ; i++ ) {
+	int seq = -1;
+	int active = -1;
+	int r = dll_ep.get_comm_coll_state(target_process,i,&seq,&active);
+	if ( r == mqs_ok ) {
+	    if ( seq != 0 )
+		printf("comm%d: Collective '%s': call count %d, %sactive\n",
+		       comm_id,
+		       collective_names[i],
+		       seq,
+		       active ? "" : "not ");
+	} else if ( r != mqs_no_information ) {
+	    show_dll_error_code(r);
+	}
+    }
+}
+
+void show_op (mqs_pending_operation *op, int msgid, int type)
+{
      int i;
      int all = 0;

@@ -368,7 +440,7 @@
  	all = 1;

      printf("msg%d: Operation %d (%s) status %d (%s)\n",
-	   msgid,type,types[type],op->status,status[op->status]);
+	   msgid,type,op_types[type],op->status,op_status[op->status]);
      printf("msg%d: Rank local %d global %d\n",
  	   msgid,(int)op->desired_local_rank, (int)op->desired_global_rank);
      if ( all )
@@ -396,17 +468,12 @@
  	else
  	    i = 10;
      } while ( i++ < 5 );
-
-    msgid++;
  }

-int (*soi)(mqs_process *process,int type);
-int (*no)(mqs_process *process, mqs_pending_operation *op);
-
-void load_ops (mqs_process *p,int type)
-{
-    mqs_pending_operation op;
-    int res = soi((mqs_process *)p,type);
+void load_ops (mqs_process *target_process,int type)
+{
+
+    int res = dll_ep.setup_operation_iterator(target_process,type);
      if ( res != mqs_ok ) {
  	if ( res != mqs_ok && res != mqs_no_information )
  	    printf("Setup operation iterator failed %d for type %d\n",res,type);
@@ -414,16 +481,64 @@
      }

      do {
-	memset(&op,0,sizeof(mqs_pending_operation));
-	res = no((mqs_process *)p,&op);
+	mqs_pending_operation op = {};
+	res = dll_ep.next_operation(target_process,&op);
  	if ( res == mqs_ok ) {
-	    show_op(&op,type);
+	    show_op(&op,msg_id,type);
+	    msg_id++;
  	} else if ( res != mqs_end_of_list ) {
  	    printf("Res from mqs_pending_operation is %d type %d\n",res,type);
  	}

      } while ( res == mqs_ok );
  }
+
+void load_all_ops (mqs_process *target_process)
+{
+    msg_id = 0;
+    load_ops(target_process,mqs_pending_receives);
+    load_ops(target_process,mqs_unexpected_messages);
+    load_ops(target_process,mqs_pending_sends);
+}
+
+#define DLSYM_LAX(VAR,HANDLE,NAME) VAR.NAME = dlsym(HANDLE,"mqs_" #NAME)
+
+#define DLSYM(VAR,HANDLE,NAME) do {			      \
+	if ( (DLSYM_LAX(VAR,HANDLE,NAME)) == NULL ) {	      \
+	    show_warning("Failed to load symbol mqs_" #NAME); \
+	    return -1;					      \
+	}						      \
+    } while (0)
+
+/* Try and load the dll from a given filename, returns true if successfull.
+ * populates the contents of dll_ep if true.
+ */
+int load_msgq_dll(char *filename)
+{
+    void *dlhandle;
+
+    dlhandle = dlopen(filename,RTLD_NOW);
+    if ( ! dlhandle )
+	return -1;
+
+    DLSYM(dll_ep,dlhandle,setup_basic_callbacks);
+    DLSYM(dll_ep,dlhandle,setup_image);
+    DLSYM(dll_ep,dlhandle,image_has_queues);
+    DLSYM(dll_ep,dlhandle,setup_process);
+    DLSYM(dll_ep,dlhandle,process_has_queues);
+    DLSYM(dll_ep,dlhandle,dll_error_string);
+    DLSYM(dll_ep,dlhandle,update_communicator_list);
+    DLSYM(dll_ep,dlhandle,setup_communicator_iterator);
+    DLSYM(dll_ep,dlhandle,get_communicator);
+    DLSYM(dll_ep,dlhandle,next_communicator);
+    DLSYM(dll_ep,dlhandle,setup_operation_iterator);
+    DLSYM(dll_ep,dlhandle,next_operation);
+    DLSYM(dll_ep,dlhandle,get_comm_group);
+
+    DLSYM_LAX(dll_ep,dlhandle,get_global_rank);
+    DLSYM_LAX(dll_ep,dlhandle,get_comm_coll_state);
+    return 0;
+}

  #define PATH_MAX 1024

@@ -431,62 +546,32 @@
  main ()
  {
      int res;
-    int nres;
      char *dll_name;
-    void *dlhandle;
-    void (*b)(mqs_basic_callbacks *bcb);
-    int (*si)(mqs_image *image,mqs_image_callbacks *icb);
-    int (*ihq)(mqs_image *image, char **msg);
-    int (*sp)(mqs_process *process,mqs_process_callbacks *pcb);
-    int (*phq)(mqs_process *process, char **msg);
-    void (*ucl)(mqs_process *process);
-
-    int (*sci)(mqs_process *process);
-    int (*gc)(mqs_process *process, mqs_communicator *comm);
-    int (*nc)(mqs_process *process);
-    int (*gr)(mqs_process *process);
-    int (*gcs)(mqs_process *, int, int *, int *);
-    int (*gcg)(mqs_process *, int *);
-
-    struct image i;
-    struct process p;
-
+    int comm_id = 0;
+
+    struct image   image;
+    struct process process;
+
+    mqs_image   *target_image   = (mqs_image   *)ℑ
+    mqs_process *target_process = (mqs_process *)&process;
+
      dll_name = getenv("MPINFO_DLL");
-    if ( dll_name ) {
-	dlhandle = dlopen(dll_name,RTLD_NOW);
-    } else {
-	char dll[PATH_MAX];
+    if ( ! dll_name ) {
+
  	void *base = find_sym("sym","MPIR_dll_name");
  	if ( ! base ) {
  	    die("Could not find MPIR_dll_name symbol");
  	}
-	fetch_string(NULL,&dll[0],(mqs_taddr_t)base,PATH_MAX);
-	dlhandle = dlopen(dll,RTLD_NOW);
-    }
-
-    if ( ! dlhandle ) {
-	die("Could not open dll");
-    }
-
-    b = dlsym(dlhandle,"mqs_setup_basic_callbacks");
-    if ( ! b ) {
+	dll_name = malloc(PATH_MAX);
+	if ( fetch_string(NULL,dll_name,(mqs_taddr_t)base,PATH_MAX) != 0 ) {
+	    die("Could not read value of MPIR_dll_name");
+	}
+    }
+
+    if ( load_msgq_dll(dll_name) != 0 ) {
  	die("Could not load symbols from dll");
      }
-    si  = dlsym(dlhandle,"mqs_setup_image");
-    ihq = dlsym(dlhandle,"mqs_image_has_queues");
-    sp  = dlsym(dlhandle,"mqs_setup_process");
-    phq = dlsym(dlhandle,"mqs_process_has_queues");
-    es  = dlsym(dlhandle,"mqs_dll_error_string");
-    ucl = dlsym(dlhandle,"mqs_update_communicator_list");
-    sci = dlsym(dlhandle,"mqs_setup_communicator_iterator");
-    gc  = dlsym(dlhandle,"mqs_get_communicator");
-    nc  = dlsym(dlhandle,"mqs_next_communicator");
-    gr  = dlsym(dlhandle,"mqs_get_global_rank");
-    soi = dlsym(dlhandle,"mqs_setup_operation_iterator");
-    no  = dlsym(dlhandle,"mqs_next_operation");
-    gcs = dlsym(dlhandle,"mqs_get_comm_coll_state");
-    gcg = dlsym(dlhandle,"mqs_get_comm_group");
-
+
      bcb.mqs_malloc_fp           = malloc;
      bcb.mqs_free_fp             = free;
      bcb.mqs_dprints_fp          = show_msg;
@@ -496,7 +581,7 @@
      bcb.mqs_put_process_info_fp = process_put;
      bcb.mqs_get_process_info_fp = process_get;

-    b(&bcb);
+    dll_ep.setup_basic_callbacks(&bcb);

      icb.mqs_get_type_sizes_fp = get_type_size;
      icb.mqs_find_function_fp  = find_function;
@@ -505,16 +590,16 @@
      icb.mqs_field_offset_fp   = find_offset;
      icb.mqs_sizeof_fp         = find_sizeof;

-    res = si((mqs_image *)&i,&icb);
+    res = dll_ep.setup_image(target_image,&icb);
      if ( res != mqs_ok ) {
  	die_with_code(res,"setup_image() failed");
      }

      {
-	char *m = NULL;
-	res = ihq((mqs_image *)&i,&m);
-	if ( m ) {
-	    show_string("ihqm",m);
+	char *user_message = NULL;
+	res = dll_ep.image_has_queues(target_image,&user_message);
+	if ( user_message ) {
+	    show_string("ihqm",user_message);
  	}
  	if ( res != mqs_ok ) {
  	    die_with_code(res,"image_has_queues() failed");
@@ -526,96 +611,65 @@
      pcb.mqs_fetch_data_fp      = find_data;
      pcb.mqs_target_to_host_fp  = convert_data;

-    p.rank = -1;
-    p.image = &i;
-
-    res = sp((mqs_process *)&p,&pcb);
+    process.rank  = -1;
+    process.image = ℑ
+
+    res = dll_ep.setup_process(target_process,&pcb);
      if ( res != mqs_ok ) {
  	die_with_code(res,"mqs_setup_process() failed");
      }

-    if ( gr ) {
-	p.rank = gr((mqs_process *)&p);
+    if ( dll_ep.get_global_rank ) {
+	process.rank = dll_ep.get_global_rank(target_process);
      } else {
  	/* Load the rank into p */
-	req_to_int("rank", &p.rank);
+	req_to_int("rank", &process.rank);
      }

      {
-	char *m = NULL;
-	res = phq((mqs_process *)&p,&m);
-	if ( m )
-	    show_string("phqm",m);
+	char *user_message = NULL;
+	res = dll_ep.process_has_queues(target_process,&user_message);
+	if ( user_message )
+	    show_string("phqm",user_message);
  	if ( res != mqs_ok ) {
-	    die_with_code(res,"process_has_queue() failed");
+	    die_with_code(res,"process_has_queues() failed");
  	}
      }

-    ucl((mqs_process *)&p);
-
-    res = sci((mqs_process *)&p);
+    dll_ep.update_communicator_list(target_process);
+
+    res = dll_ep.setup_communicator_iterator(target_process);
      if ( res != mqs_ok ) {
  	die_with_code(res,"setup_communicator_iterator() failed");
      }

      do {
-	mqs_communicator comm;
-
-	res = gc((mqs_process *)&p,&comm);
+	mqs_communicator comm = {};
+
+	res = dll_ep.get_communicator(target_process,&comm);
  	if ( res != mqs_ok ) {
  	    die_with_code(res,"get_communicator() failed");
  	}

-	if ( res == mqs_ok ) {
-	    /* Should check for comm.size here, open-mpi puts MPI_COMM_NULL in  
the list with a size of 0 */
-	    char *names[] =  
{ "Barrier", "Bcast", "Allgather", "Allgatherv", "Allreduce", "Alltoall", "Alltoallv",
-		 
	      "Reduce_Scatter", "Reduce", "Gather", "Gatherv", "Scan", "Scatter", "Scatterv"  
};
-	    int c;
-	    c = show_comm(&p,&comm);
-	    if ( comm.size > 1 ) {
-		if ( gcg ) {
-		    int *ranks = malloc(comm.size*sizeof(int));
-		    int r = gcg((mqs_process *)&p,ranks);
-		    if ( r == mqs_ok ) {
-			int i;
-			for ( i = 0 ; i < comm.size ; i++ ) {
-			    printf("out: c:%d rt:%d\n",c,ranks[i]);
-			}
-		    }
-		    free(ranks);
-		}
-		if ( gcs ) {
-		    int seq;
-		    int active;
-		    int r;
-		    int i = 0;
-		    for ( i = 0 ; i<14 ; i++ ) {
-			seq = -1;
-			active = -1;
-			r = gcs((mqs_process *)&p,i,&seq,&active);
-			if ( r == mqs_ok ) {
-			    if ( seq != 0 )
-				printf("comm%d: Collective '%s': call  
count %d, %sactive\n",c,names[i],seq,active ? "" : "not ");
-			} else if ( r != mqs_no_information ) {
-			    char *msg;
-			    msg = es(r);
-			    printf("Error: %s\n",msg);
-			}
-		    }
-		}
-
-		load_ops((mqs_process *)&p,mqs_pending_receives);
-		load_ops((mqs_process *)&p,mqs_unexpected_messages);
-		load_ops((mqs_process *)&p,mqs_pending_sends);
-
-	    }
-	    printf("done\n"
-		   );
-
-	    nres = nc((mqs_process *)&p);
+	show_comm(&process,&comm,comm_id);
+
+	if ( comm.size > 1 ) {
+
+	    if ( dll_ep.get_comm_group )
+		show_comm_members(target_process,&comm,comm_id);
+
+	    if ( dll_ep.get_comm_coll_state )
+		show_comm_coll_state(target_process,&comm,comm_id);
+
+	    load_all_ops(target_process);

  	}
-    } while ( res == mqs_ok && nres == mqs_ok );
+	printf("done\n");
+
+	res = dll_ep.next_communicator(target_process);
+	comm_id++;
+
+    } while ( res == mqs_ok );

      show_string("exit","ok");
      return 0;




More information about the padb-devel mailing list