diff --git a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
index 7afbd2fb8..ea6fd79a7 100644
--- a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
+++ b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.C
@@ -31,8 +31,10 @@ License
 #include "IFstream.H"
 #include "IStringStream.H"
 #include "dictionary.H"
-#include <sys/time.h>
 #include "objectRegistry.H"
+#include "SubList.H"
+#include "labelPair.H"
+#include "masterUncollatedFileOperation.H"
 
 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
 
@@ -592,6 +594,7 @@ bool Foam::decomposedBlockData::writeBlocks
     autoPtr<OSstream>& osPtr,
     List<std::streamoff>& start,
     const UList<char>& data,
+    const labelUList& recvSizes,
     const UPstream::commsTypes commsType,
     const bool syncReturnState
 )
@@ -604,17 +607,16 @@ bool Foam::decomposedBlockData::writeBlocks
             << " commsType:" << Pstream::commsTypeNames[commsType] << endl;
     }
 
-    bool ok = true;
+    const label nProcs = UPstream::nProcs(comm);
+
 
-    labelList recvSizes(Pstream::nProcs(comm));
-    recvSizes[Pstream::myProcNo(comm)] = data.byteSize();
-    Pstream::gatherList(recvSizes, Pstream::msgType(), comm);
+    bool ok = true;
 
     if (commsType == UPstream::commsTypes::scheduled)
     {
         if (UPstream::master(comm))
         {
-            start.setSize(UPstream::nProcs(comm));
+            start.setSize(nProcs);
 
             OSstream& os = osPtr();
 
@@ -626,7 +628,7 @@ bool Foam::decomposedBlockData::writeBlocks
             }
             // Write slaves
             List<char> elems;
-            for (label proci = 1; proci < UPstream::nProcs(comm); proci++)
+            for (label proci = 1; proci < nProcs; proci++)
             {
                 elems.setSize(recvSizes[proci]);
                 IPstream::read
@@ -661,102 +663,147 @@ bool Foam::decomposedBlockData::writeBlocks
     }
     else
     {
-        if (debug)
+        // Write master data
+        if (UPstream::master(comm))
         {
-            struct timeval tv;
-            gettimeofday(&tv, nullptr);
-            Pout<< "Starting sending at:"
-                << 1.0*tv.tv_sec+tv.tv_usec/1e6 << " s"
-                << Foam::endl;
+            start.setSize(nProcs);
+
+            OSstream& os = osPtr();
+
+            os << nl << "// Processor" << UPstream::masterNo() << nl;
+            start[UPstream::masterNo()] = os.stdStream().tellp();
+            os << data;
         }
 
 
-        label startOfRequests = Pstream::nRequests();
+        // Find out how many processor can be received into
+        // masterUncollatedFileOperation::maxMasterFileBufferSize
 
-        if (!UPstream::master(comm))
-        {
-            UOPstream::write
-            (
-                UPstream::commsTypes::nonBlocking,
-                UPstream::masterNo(),
-                data.begin(),
-                data.byteSize(),
-                Pstream::msgType(),
-                comm
-            );
-            Pstream::waitRequests(startOfRequests);
-        }
-        else
+        // Starting slave processor and number of processors
+        labelPair startAndSize(1, nProcs-1);
+
+        while (startAndSize[1] > 0)
         {
-            List<List<char>> recvBufs(Pstream::nProcs(comm));
-            for (label proci = 1; proci < UPstream::nProcs(comm); proci++)
+            labelPair masterData(startAndSize);
+            if (UPstream::master(comm))
             {
-                recvBufs[proci].setSize(recvSizes[proci]);
-                UIPstream::read
+                label totalSize = 0;
+                label proci = masterData[0];
+                while
                 (
-                    UPstream::commsTypes::nonBlocking,
-                    proci,
-                    recvBufs[proci].begin(),
-                    recvSizes[proci],
-                    Pstream::msgType(),
-                    comm
-                );
+                    proci < nProcs
+                 && (
+                        totalSize+recvSizes[proci]
+                      < fileOperations::masterUncollatedFileOperation::
+                            maxMasterFileBufferSize
+                    )
+                )
+                {
+                    totalSize += recvSizes[proci];
+                    proci++;
+                }
+
+                masterData[1] = proci-masterData[0];
             }
 
-            if (debug)
+
+            // Scatter masterData
+            UPstream::scatter
+            (
+                reinterpret_cast<const char*>(masterData.cdata()),
+                List<int>(nProcs, sizeof(masterData)),
+                List<int>(nProcs, 0),
+                reinterpret_cast<char*>(startAndSize.data()),
+                sizeof(startAndSize),
+                comm
+            );
+
+            if (startAndSize[1] == 0)
             {
-                struct timeval tv;
-                gettimeofday(&tv, nullptr);
-                Pout<< "Starting master-only writing at:"
-                    << 1.0*tv.tv_sec+tv.tv_usec/1e6 << " s"
-                    << Foam::endl;
+                break;
             }
 
-            start.setSize(UPstream::nProcs(comm));
 
-            OSstream& os = osPtr();
-
-            // Write master data
+            // Calculate master data
+            List<int> sliceSizes;
+            List<int> sliceOffsets;
+            List<char> recvData;
+            if (UPstream::master(comm))
             {
-                os << nl << "// Processor" << UPstream::masterNo() << nl;
-                start[UPstream::masterNo()] = os.stdStream().tellp();
-                os << data;
+                sliceSizes.setSize(nProcs, 0);
+                sliceOffsets.setSize(nProcs, 0);
+
+                int totalSize = 0;
+                for
+                (
+                    label proci = startAndSize[0];
+                    proci < startAndSize[0]+startAndSize[1];
+                    proci++
+                )
+                {
+                    sliceSizes[proci] = int(recvSizes[proci]);
+                    sliceOffsets[proci] = totalSize;
+                    totalSize += sliceSizes[proci];
+                }
+                recvData.setSize(totalSize);
             }
 
-            if (debug)
+            int nSend = 0;
+            if
+            (
+               !UPstream::master(comm)
+             && (UPstream::myProcNo(comm) >= startAndSize[0])
+             && (UPstream::myProcNo(comm) < startAndSize[0]+startAndSize[1])
+            )
             {
-                struct timeval tv;
-                gettimeofday(&tv, nullptr);
-                Pout<< "Starting slave writing at:"
-                    << 1.0*tv.tv_sec+tv.tv_usec/1e6 << " s"
-                    << Foam::endl;
+                nSend = data.byteSize();
             }
 
-            // Write slaves
-            for (label proci = 1; proci < UPstream::nProcs(comm); proci++)
+
+            UPstream::gather
+            (
+                data.begin(),
+                nSend,
+
+                recvData.begin(),
+                sliceSizes,
+                sliceOffsets,
+                comm
+            );
+
+            if (UPstream::master(comm))
             {
-                os << nl << nl << "// Processor" << proci << nl;
-                start[proci] = os.stdStream().tellp();
+                OSstream& os = osPtr();
 
-                if (Pstream::finishedRequest(startOfRequests+proci-1))
+                // Write slaves
+                for
+                (
+                    label proci = startAndSize[0];
+                    proci < startAndSize[0]+startAndSize[1];
+                    proci++
+                )
                 {
-                    os << recvBufs[proci];
+                    os << nl << nl << "// Processor" << proci << nl;
+                    start[proci] = os.stdStream().tellp();
+
+                    os <<
+                        SubList<char>
+                        (
+                            recvData,
+                            sliceSizes[proci],
+                            sliceOffsets[proci]
+                        );
                 }
             }
 
-            Pstream::resetRequests(startOfRequests);
+            startAndSize[0] += startAndSize[1];
+        }
 
-            ok = os.good();
+        if (UPstream::master(comm))
+        {
+            ok = osPtr().good();
         }
     }
-    if (debug)
-    {
-        struct timeval tv;
-        gettimeofday(&tv, nullptr);
-        Pout<< "Finished master-only writing at:"
-            << 1.0*tv.tv_sec+tv.tv_usec/1e6 << " s"
-            << Foam::endl;
-    }
 
     if (syncReturnState)
     {
@@ -868,8 +915,34 @@ bool Foam::decomposedBlockData::writeObject
         osPtr.reset(new OFstream(objectPath(), IOstream::BINARY, ver, cmp));
         IOobject::writeHeader(osPtr());
     }
+
+    const label nProcs = Pstream::nProcs(comm_);
+
+    labelList recvSizes(nProcs);
+    {
+        char* data = reinterpret_cast<char*>(recvSizes.begin());
+
+        labelList recvOffsets(nProcs);
+        forAll(recvOffsets, proci)
+        {
+            recvOffsets[proci] =
+                reinterpret_cast<char*>(&recvSizes[proci])
+              - data;
+        }
+        label sz = this->byteSize();
+        UPstream::gather
+        (
+            reinterpret_cast<char*>(&sz),
+            sizeof(label),
+            data,
+            List<int>(recvSizes, sizeof(label)),
+            recvOffsets,
+            comm_
+        );
+    }
+
     List<std::streamoff> start;
-    return writeBlocks(comm_, osPtr, start, *this, commsType_);
+    return writeBlocks(comm_, osPtr, start, *this, recvSizes, commsType_);
 }
 
 
diff --git a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
index dc4e3d70f..c151c3abe 100644
--- a/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
+++ b/src/OpenFOAM/db/IOobjects/decomposedBlockData/decomposedBlockData.H
@@ -177,6 +177,7 @@ public:
             autoPtr<OSstream>& osPtr,
             List<std::streamoff>& start,
             const UList<char>&,
+            const labelUList& recvSizes,
             const UPstream::commsTypes,
             const bool syncReturnState = true
         );
diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
index fab2dd7ff..d3c4e2da6 100644
--- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
+++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H
@@ -499,6 +499,47 @@ public:
             labelUList& recvData,
             const label communicator = 0
         );
+
+        //- Exchange data with all processors (in the communicator)
+        //  sendSizes, sendOffsets give (per processor) the slice of
+        //  sendData to send, similarly recvSizes, recvOffsets give the slice
+        //  of recvData to receive
+        static void allToAll
+        (
+            const char* sendData,
+            const UList<int>& sendSizes,
+            const UList<int>& sendOffsets,
+
+            char* recvData,
+            const UList<int>& recvSizes,
+            const UList<int>& recvOffsets,
+
+            const label communicator = 0
+        );
+
+        //- Receive data from all processors on the master
+        static void gather
+        (
+            const char* sendData,
+            int sendSize,
+
+            char* recvData,
+            const UList<int>& recvSizes,
+            const UList<int>& recvOffsets,
+            const label communicator = 0
+        );
+
+        //- Send data to all processors from the root of the communicator
+        static void scatter
+        (
+            const char* sendData,
+            const UList<int>& sendSizes,
+            const UList<int>& sendOffsets,
+
+            char* recvData,
+            int recvSize,
+            const label communicator = 0
+        );
 };
 
 
diff --git a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
index b5c8ab494..4aeb79196 100644
--- a/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
+++ b/src/OpenFOAM/global/fileOperations/collatedFileOperation/OFstreamCollator.C
@@ -92,9 +92,34 @@ bool Foam::OFstreamCollator::writeFile
 
     UList<char> slice(const_cast<char*>(s.data()), label(s.size()));
 
-    // Assuming threaded writing hides any slowness so we might
-    // as well use scheduled communication to send the data to
-    // the master processor in order.
+    // Determine sizes to receive
+    labelList recvSizes(Pstream::nProcs(comm));
+    {
+        char* data = reinterpret_cast<char*>(recvSizes.begin());
+
+        List<int> recvOffsets(recvSizes.size());
+        forAll(recvOffsets, proci)
+        {
+            recvOffsets[proci] =
+                reinterpret_cast<char*>(&recvSizes[proci])
+              - data;
+        }
+        label sz = slice.byteSize();
+        UPstream::gather
+        (
+            reinterpret_cast<char*>(&sz),
+            sizeof(sz),
+            data,
+            List<int>(recvSizes.size(), sizeof(label)),
+            recvOffsets,
+            comm
+        );
+    }
+
+    // Assuming threaded writing hides any slowness so we
+    // can use scheduled communication to send the data to
+    // the master processor in order. However can be unstable so
+    // default is non-blocking.
 
     List<std::streamoff> start;
     decomposedBlockData::writeBlocks
@@ -103,7 +128,8 @@ bool Foam::OFstreamCollator::writeFile
         osPtr,
         start,
         slice,
-        UPstream::commsTypes::scheduled,
+        recvSizes,
+        UPstream::commsTypes::nonBlocking,  //scheduled,
         false       // do not reduce return state
     );
 
@@ -115,8 +141,17 @@ bool Foam::OFstreamCollator::writeFile
 
     if (debug)
     {
-        Pout<< "OFstreamCollator : Finished writing " << s.size()
-            << " bytes to " << fName
+        Pout<< "OFstreamCollator : Finished writing " << s.size() << " bytes";
+        if (UPstream::master(comm))
+        {
+            label sum = 0;
+            forAll(recvSizes, i)
+            {
+                sum += recvSizes[i];
+            }
+            Pout<< " (overall " << sum << ")";
+        }
+        Pout<< " to " << fName
             << " using comm " << comm << endl;
     }
 
@@ -133,14 +168,11 @@ void* Foam::OFstreamCollator::writeAll(void *threadarg)
     {
         writeData* ptr = nullptr;
 
-        //pthread_mutex_lock(&handler.mutex_);
         lockMutex(handler.mutex_);
-
         if (handler.objects_.size())
         {
             ptr = handler.objects_.pop();
         }
-        //pthread_mutex_unlock(&handler.mutex_);
         unlockMutex(handler.mutex_);
 
         if (!ptr)
@@ -177,10 +209,8 @@ void* Foam::OFstreamCollator::writeAll(void *threadarg)
         Pout<< "OFstreamCollator : Exiting write thread " << endl;
     }
 
-    //pthread_mutex_lock(&handler.mutex_);
     lockMutex(handler.mutex_);
     handler.threadRunning_ = false;
-    //pthread_mutex_unlock(&handler.mutex_);
     unlockMutex(handler.mutex_);
 
     return nullptr;
@@ -192,7 +222,6 @@ void* Foam::OFstreamCollator::writeAll(void *threadarg)
 Foam::OFstreamCollator::OFstreamCollator(const off_t maxBufferSize)
 :
     maxBufferSize_(maxBufferSize),
-    //mutex_(PTHREAD_MUTEX_INITIALIZER),
     mutex_
     (
         maxBufferSize_ > 0
@@ -228,7 +257,6 @@ Foam::OFstreamCollator::~OFstreamCollator()
             Pout<< "~OFstreamCollator : Waiting for write thread" << endl;
         }
 
-        //pthread_join(thread_, nullptr);
         joinThread(thread_);
     }
     if (thread_ != -1)
@@ -265,13 +293,12 @@ bool Foam::OFstreamCollator::write
         {
             // Count files to be written
             off_t totalSize = 0;
-            //pthread_mutex_lock(&mutex_);
+
             lockMutex(mutex_);
             forAllConstIter(FIFOStack<writeData*>, objects_, iter)
             {
                 totalSize += iter()->data_.size();
             }
-            //pthread_mutex_unlock(&mutex_);
             unlockMutex(mutex_);
 
             if
@@ -285,10 +312,13 @@ bool Foam::OFstreamCollator::write
 
             if (debug)
             {
+                lockMutex(mutex_);
                 Pout<< "OFstreamCollator : Waiting for buffer space."
                     << " Currently in use:" << totalSize
                     << " limit:" << maxBufferSize_
+                    << " files:" << objects_.size()
                     << endl;
+                unlockMutex(mutex_);
             }
 
             sleep(5);
@@ -299,16 +329,14 @@ bool Foam::OFstreamCollator::write
             Pout<< "OFstreamCollator : relaying write of " << fName
                 << " to thread " << endl;
         }
-        //pthread_mutex_lock(&mutex_);
+
         lockMutex(mutex_);
         objects_.push
         (
             new writeData(typeName, fName, data, fmt, ver, cmp, append)
         );
-        //pthread_mutex_unlock(&mutex_);
         unlockMutex(mutex_);
 
-        //pthread_mutex_lock(&mutex_);
         lockMutex(mutex_);
         if (!threadRunning_)
         {
@@ -319,7 +347,6 @@ bool Foam::OFstreamCollator::write
             }
             threadRunning_ = true;
         }
-        //pthread_mutex_unlock(&mutex_);
         unlockMutex(mutex_);
 
         return true;
diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C
index 124e22a0c..dd5ac01b3 100644
--- a/src/Pstream/dummy/UPstream.C
+++ b/src/Pstream/dummy/UPstream.C
@@ -92,6 +92,36 @@ void Foam::UPstream::allToAll
 }
 
 
+void Foam::UPstream::gather
+(
+    const char* sendData,
+    int sendSize,
+
+    char* recvData,
+    const UList<int>& recvSizes,
+    const UList<int>& recvOffsets,
+    const label communicator
+)
+{
+    memmove(recvData, sendData, sendSize);
+}
+
+
+void Foam::UPstream::scatter
+(
+    const char* sendData,
+    const UList<int>& sendSizes,
+    const UList<int>& sendOffsets,
+
+    char* recvData,
+    int recvSize,
+    const label communicator
+)
+{
+    memmove(recvData, sendData, recvSize);
+}
+
+
 void Foam::UPstream::allocatePstreamCommunicator
 (
     const label,
diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C
index 317eaf260..b36ca35ca 100644
--- a/src/Pstream/mpi/UPstream.C
+++ b/src/Pstream/mpi/UPstream.C
@@ -2,7 +2,7 @@
   =========                 |
   \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
    \\    /   O peration     |
-    \\  /    A nd           | Copyright (C) 2011-2016 OpenFOAM Foundation
+    \\  /    A nd           | Copyright (C) 2011-2017 OpenFOAM Foundation
      \\/     M anipulation  |
 -------------------------------------------------------------------------------
 License
@@ -362,6 +362,196 @@ void Foam::UPstream::allToAll
 }
 
 
+void Foam::UPstream::allToAll
+(
+    const char* sendData,
+    const UList<int>& sendSizes,
+    const UList<int>& sendOffsets,
+
+    char* recvData,
+    const UList<int>& recvSizes,
+    const UList<int>& recvOffsets,
+
+    const label communicator
+)
+{
+    label np = nProcs(communicator);
+
+    if
+    (
+        sendSizes.size() != np
+     || sendOffsets.size() != np
+     || recvSizes.size() != np
+     || recvOffsets.size() != np
+    )
+    {
+        FatalErrorInFunction
+            << "Size of sendSize " << sendSizes.size()
+            << ", sendOffsets " << sendOffsets.size()
+            << ", recvSizes " << recvSizes.size()
+            << " or recvOffsets " << recvOffsets.size()
+            << " is not equal to the number of processors in the domain "
+            << np
+            << Foam::abort(FatalError);
+    }
+
+    if (!UPstream::parRun())
+    {
+        if (recvSizes[0] != sendSizes[0])
+        {
+            FatalErrorInFunction
+                << "Bytes to send " << sendSizes[0]
+                << " does not equal bytes to receive " << recvSizes[0]
+                << Foam::abort(FatalError);
+        }
+        memmove(recvData, &sendData[sendOffsets[0]], recvSizes[0]);
+    }
+    else
+    {
+        if
+        (
+            MPI_Alltoallv
+            (
+                sendData,
+                sendSizes.begin(),
+                sendOffsets.begin(),
+                MPI_BYTE,
+                recvData,
+                recvSizes.begin(),
+                recvOffsets.begin(),
+                MPI_BYTE,
+                PstreamGlobals::MPICommunicators_[communicator]
+            )
+        )
+        {
+            FatalErrorInFunction
+                << "MPI_Alltoallv failed for sendSizes " << sendSizes
+                << " recvSizes " << recvSizes
+                << " communicator " << communicator
+                << Foam::abort(FatalError);
+        }
+    }
+}
+
+
+void Foam::UPstream::gather
+(
+    const char* sendData,
+    int sendSize,
+
+    char* recvData,
+    const UList<int>& recvSizes,
+    const UList<int>& recvOffsets,
+    const label communicator
+)
+{
+    label np = nProcs(communicator);
+
+    if
+    (
+        UPstream::master(communicator)
+     && (recvSizes.size() != np || recvOffsets.size() != np)
+    )
+    {
+        FatalErrorInFunction
+            << "Size of recvSizes " << recvSizes.size()
+            << " or recvOffsets " << recvOffsets.size()
+            << " is not equal to the number of processors in the domain "
+            << np
+            << Foam::abort(FatalError);
+    }
+
+    if (!UPstream::parRun())
+    {
+        memmove(recvData, sendData, sendSize);
+    }
+    else
+    {
+        if
+        (
+            MPI_Gatherv
+            (
+                sendData,
+                sendSize,
+                MPI_BYTE,
+                recvData,
+                recvSizes.begin(),
+                recvOffsets.begin(),
+                MPI_BYTE,
+                0,
+                MPI_Comm(PstreamGlobals::MPICommunicators_[communicator])
+            )
+        )
+        {
+            FatalErrorInFunction
+                << "MPI_Gatherv failed for sendSize " << sendSize
+                << " recvSizes " << recvSizes
+                << " communicator " << communicator
+                << Foam::abort(FatalError);
+        }
+    }
+}
+
+
+void Foam::UPstream::scatter
+(
+    const char* sendData,
+    const UList<int>& sendSizes,
+    const UList<int>& sendOffsets,
+
+    char* recvData,
+    int recvSize,
+    const label communicator
+)
+{
+    label np = nProcs(communicator);
+
+    if
+    (
+        UPstream::master(communicator)
+     && (sendSizes.size() != np || sendOffsets.size() != np)
+    )
+    {
+        FatalErrorInFunction
+            << "Size of sendSizes " << sendSizes.size()
+            << " or sendOffsets " << sendOffsets.size()
+            << " is not equal to the number of processors in the domain "
+            << np
+            << Foam::abort(FatalError);
+    }
+
+    if (!UPstream::parRun())
+    {
+        memmove(recvData, sendData, recvSize);
+    }
+    else
+    {
+        if
+        (
+            MPI_Scatterv
+            (
+                sendData,
+                sendSizes.begin(),
+                sendOffsets.begin(),
+                MPI_BYTE,
+                recvData,
+                recvSize,
+                MPI_BYTE,
+                0,
+                MPI_Comm(PstreamGlobals::MPICommunicators_[communicator])
+            )
+        )
+        {
+            FatalErrorInFunction
+                << "MPI_Scatterv failed for sendSizes " << sendSizes
+                << " sendOffsets " << sendOffsets
+                << " communicator " << communicator
+                << Foam::abort(FatalError);
+        }
+    }
+}
+
+
 void Foam::UPstream::allocatePstreamCommunicator
 (
     const label parentIndex,
