bcast version of graph collect.

git-svn-id: https://dune.mathematik.uni-freiburg.de/svn/alugrid/trunk@1612 0d966ed9-3843-0410-af09-ebfb50bd7c74

bcast version of graph collect.
git-svn-id: https://dune.mathematik.uni-freiburg.de/svn/alugrid/trunk@1612 0d966ed9-3843-0410-af09-ebfb50bd7c74
19570978 · Robert Klöfkorn · 212e2326 · 19570978 · 19570978 · 19570978
Commit 19570978 authored 12 years ago by Robert Klöfkorn
--- a/src/parallel/gitter_pll_idn.cc
+++ b/src/parallel/gitter_pll_idn.cc
@@ -339,9 +339,9 @@ void GitterPll :: MacroGitterPll :: vertexLinkageEstimate (MpAccessLocal & mpAcc
  // for small processor numbers use gcollect version 
  // this method should be faster (log p), 
  // but is more memory consuming O( p ) 
-  if( mpAccess.psize () < ALUGridExternalParameters :: vertexEstimateRankLimit() ) 
-    vertexLinkageEstimateGCollect ( mpAccess );
-  else 
+  //if( mpAccess.psize () < ALUGridExternalParameters :: vertexEstimateRankLimit() ) 
+  //  vertexLinkageEstimateGCollect ( mpAccess );
+  //else 
    // for larger processor numbers use bcast version 
    // this method is more time consuming (p log p)
    // but is the memory consumption is only O( 1 )

--- a/src/parallel/gitter_pll_ldb.cc
+++ b/src/parallel/gitter_pll_ldb.cc
@@ -121,6 +121,7 @@ graphCollect (const MpAccessGlobal & mpa,

  try 
  {
+
    // exchange data 
    vector < ObjectStream > osv = mpa.gcollect (os) ;

@@ -182,6 +183,171 @@ graphCollect (const MpAccessGlobal & mpa,
  return ;
 }

+template <class idx_t>
+void LoadBalancer :: DataBase :: 
+graphCollectBcast (const MpAccessGlobal & mpa, 
+                   insert_iterator < ldb_vertex_map_t > nodes, 
+                   insert_iterator < ldb_edge_set_t > edges,
+                   idx_t* vtxdist, const bool serialPartitioner ) const 
+{
+  // for parallel partitioner return local vertices and edges 
+  // for serial partitioner these have to be communicates to all
+  // processes 
+   
+  // my rank number 
+  const int me = mpa.myrank();
+  // get number of processes 
+  const int np = mpa.psize () ;
+
+  // my data stream 
+  ObjectStream os;
+   
+  if( ! serialPartitioner )
+  {
+    {
+      ldb_vertex_map_t :: const_iterator iEnd = _vertexSet.end () ;
+      for (ldb_vertex_map_t :: const_iterator i = _vertexSet.begin () ; 
+         i != iEnd; ++i ) 
+      {
+        {
+          const GraphVertex& x = (*i).first;
+          * nodes ++ = pair < const GraphVertex, int > ( x , me ) ;
+        } 
+      }
+    }
+
+    {
+      ldb_edge_set_t :: const_iterator eEnd = _edgeSet.end () ;
+      for (ldb_edge_set_t :: const_iterator e = _edgeSet.begin () ; 
+           e != eEnd; ++e) 
+      {
+        const GraphEdge& x = (*e) ;
+        // edges exists twice ( u , v ) and ( v , u )
+        // with both orientations 
+        * edges ++ = x ;
+        * edges ++ = - x ;
+      }
+    }
+
+    // make sure vtxdist exists 
+    assert( vtxdist );
+
+    // vtxdist always starts with 0 
+    // so initialize here 
+    vtxdist[ 0 ] = 0 ;
+  }
+
+  {
+    // write number of elements  
+    const int vertexSize = _vertexSet.size () ;
+    os.writeObject ( vertexSize ) ;
+
+    if( serialPartitioner )
+    {
+
+      // write vertices 
+      ldb_vertex_map_t :: const_iterator iEnd = _vertexSet.end () ;
+      for (ldb_vertex_map_t :: const_iterator i = _vertexSet.begin () ; i != iEnd; ++i ) 
+      {
+        os.writeObject ((*i).first) ;
+      }
+
+      // write number of edges 
+      const int edgeSize = _edgeSet.size () ;
+      os.writeObject ( edgeSize ) ;
+
+      // write edges 
+      ldb_edge_set_t :: const_iterator eEnd = _edgeSet.end () ;
+      for (ldb_edge_set_t :: const_iterator e = _edgeSet.begin () ; e != eEnd; ++e )
+      {
+        os.writeObject (*e);
+      }
+    }
+  }
+
+  try 
+  {
+    // get max buffer size (only for serial partitioner we need to communicate)
+    const int maxSize = serialPartitioner ? mpa.gmax( os.size() ) : os.size();
+
+    // create bcast buffer  
+    ObjectStream sendrecv ;
+    sendrecv.reserve( maxSize * sizeof(char) );
+
+    for( int rank = 0; rank < np; ++ rank ) 
+    {
+      // reset read/write positions 
+      sendrecv.clear();
+
+      // write my data 
+      if( rank == me ) 
+      {
+        // write my stream 
+        sendrecv.writeStream( os );
+        // clear data 
+        os.reset();
+      }
+
+      // make sure size is still ok 
+      assert( sendrecv.capacity() == maxSize );
+
+      // exchange data 
+      mpa.bcast( sendrecv.getBuff(0), maxSize, rank );
+
+      // insert data into graph map 
+      {
+        // reset read posistion 
+        sendrecv.resetReadPosition();
+        // adjust write count to max length to avoid eof errors 
+        sendrecv.seekp( maxSize );
+
+        int len ;
+        sendrecv.readObject ( len ) ;
+        assert (len >= 0) ;
+
+        // read graph for serial partitioner 
+        if( serialPartitioner ) 
+        {
+          for (int j = 0 ; j < len ; ++j) 
+          {
+            GraphVertex x ;
+            sendrecv.readObject (x) ;
+            * nodes ++ = pair < const GraphVertex, int > (x, rank) ;
+          } 
+
+          sendrecv.readObject (len) ;
+          assert (len >= 0) ;
+
+          for (int j = 0 ; j < len ; ++j) 
+          {
+            GraphEdge x ;
+            sendrecv.readObject (x) ;
+            * edges ++ = x ;
+            * edges ++ = - x ;
+          }
+        }
+        else 
+        {
+          // see above vtxdist [ 0 ] = 0
+          // sum up number of vertices for processor rank 
+          vtxdist[ rank + 1 ] = vtxdist[ rank ] + len ;
+        }
+      }
+    }
+  } 
+  catch (ObjectStream :: EOFException) 
+  {
+    cerr << "**FEHLER (FATAL) EOF gelesen in " << __FILE__ << " " << __LINE__ << endl ;
+    abort () ;
+  } 
+  catch (ObjectStream :: OutOfMemoryException) 
+  {
+    cerr << "**FEHLER (FATAL) Out Of Memory in " << __FILE__ << " " << __LINE__ << endl ;
+    abort () ;
+  }
+  return ;
+}
+
 template <class real_t, class idx_t>
 static void optimizeCoverage (const int nparts, 
                              const int len, 
@@ -380,7 +546,7 @@ bool LoadBalancer :: DataBase :: repartition (MpAccessGlobal & mpa,

  // collect graph from all processors 
  // needs a all-to-all (allgather) communication 
-  graphCollect (mpa,
+  graphCollectBcast (mpa,
                insert_iterator < ldb_vertex_map_t > (nodes,nodes.begin ()),
                insert_iterator < ldb_edge_set_t > (edges,edges.begin ()), 
                vtxdist,
@@ -405,7 +571,7 @@ bool LoadBalancer :: DataBase :: repartition (MpAccessGlobal & mpa,
    mth = METIS_PartGraphKway ;
    
    // redo the graph collect in the case that the mesh is not distributed 
-    graphCollect (mpa,
+    graphCollectBcast (mpa,
                  insert_iterator < ldb_vertex_map_t > (nodes,nodes.begin ()),
                  insert_iterator < ldb_edge_set_t > (edges,edges.begin ()), 
                  vtxdist,

--- a/src/parallel/gitter_pll_ldb.h
+++ b/src/parallel/gitter_pll_ldb.h
@@ -98,6 +98,11 @@ class LoadBalancer {
        void graphCollect (const MpAccessGlobal &,insert_iterator < ldb_vertex_map_t >,
                           insert_iterator < ldb_edge_set_t >,
                           idx_t* , const bool ) const ;
+
+        template <class idx_t>
+        void graphCollectBcast (const MpAccessGlobal &,insert_iterator < ldb_vertex_map_t >,
+                                insert_iterator < ldb_edge_set_t >,
+                                idx_t* , const bool ) const ;
      public :
        static const char * methodToString (method) ;
        inline DataBase () ;

--- a/src/parallel/mpAccess.h
+++ b/src/parallel/mpAccess.h
@@ -39,6 +39,7 @@ class MpAccessGlobal {
    virtual pair<double,double> gmin (pair<double,double>) const = 0 ;
    virtual pair<double,double> gsum (pair<double,double>) const = 0 ;
    virtual void bcast(int*,int, int) const = 0 ;
+    virtual void bcast(char*,int, int) const = 0 ;
    virtual void bcast(double*,int, int) const = 0 ;
    virtual int exscan( int ) const = 0; 
    virtual int scan( int ) const = 0; 

--- a/src/parallel/mpAccess_MPI.cc
+++ b/src/parallel/mpAccess_MPI.cc
@@ -397,6 +397,11 @@ void MpAccessMPI :: bcast (int* buff, int length, int root ) const
  MPI_Bcast(buff, length, MPI_INT, root, _mpiComm) ;
 }

+void MpAccessMPI :: bcast (char* buff, int length, int root ) const 
+{
+  MPI_Bcast(buff, length, MPI_BYTE, root, _mpiComm) ;
+}
+
 void MpAccessMPI :: bcast (double* buff, int length, int root ) const 
 {
  MPI_Bcast(buff, length, MPI_DOUBLE, root, _mpiComm) ;

--- a/src/parallel/mpAccess_MPI.h
+++ b/src/parallel/mpAccess_MPI.h
@@ -104,6 +104,7 @@ public:
  pair<double,double> gmin (pair<double,double>) const ;
  pair<double,double> gsum (pair<double,double>) const ;
  void bcast(int*, int, int ) const; 
+  void bcast(char*, int, int ) const; 
  void bcast(double*, int, int ) const; 
  int exscan ( int ) const ;
  int scan ( int ) const ;

--- a/src/serial/serialize.h
+++ b/src/serial/serialize.h
@@ -54,10 +54,16 @@ public :
  // reset read position 
  inline void resetReadPosition() { _rb = 0; }
  
+  //! set position of write counter 
+  void seekp( const size_t pos ) { _wb = pos ; }
+
  // return's true if size > 0 and read position is zero
  // i.e. a read othe stream will result some valid data  
  inline bool validToRead () const { return (_wb > 0) && (_rb == 0); }

+  // return size of bytes allready written to stream 
+  inline int capacity() const { return _len; }
+
  // return size of bytes allready written to stream 
  inline int size() const { return _wb; }

@@ -220,10 +226,10 @@ public:
    _rb = newRb;
  }
  
-protected:
  inline char * getBuff (const size_t ap) { return (_buf + ap); }
  inline const char * getBuff (const size_t ap) const { return (_buf + ap); }

+protected:
  // reallocated the buffer if necessary 
  inline void reallocateBuffer(size_t newSize) throw (OutOfMemoryException)
  {