1 #ifndef PURIFY_H5READER_H 
    2 #define PURIFY_H5READER_H 
    3 #include "purify/config.h" 
   10 #include <sopt/mpi/communicator.h> 
   13 #include "highfive/H5File.hpp" 
   32 HighFive::FileAccessProps MPIFileAccess() {
 
   33   HighFive::FileAccessProps fap;
 
   34   fap.add(HighFive::MPIOFileAccess{MPI_COMM_WORLD, MPI_INFO_NULL});
 
   35   fap.add(HighFive::MPIOCollectiveMetadata{});
 
   39 HighFive::DataTransferProps MPIDataTransfer() {
 
   40   HighFive::DataTransferProps dtp;
 
   41   dtp.add(HighFive::UseCollectiveIO{});
 
   49   using DatsetMap = std::map<std::string, HighFive::DataSet>;
 
   57         _fap(HighFive::FileAccessProps{}),
 
   58         _dtp(HighFive::DataTransferProps{}),
 
   59         _file(filename, HighFive::File::ReadOnly) {
 
   60     _datalen = _slicepos = _slicelen = _batchpos = 0;
 
   65   H5Handler(
const std::string& filename, 
const sopt::mpi::Communicator& comm)
 
   67         _fap(MPIFileAccess()),
 
   68         _dtp(MPIDataTransfer()),
 
   69         _file(filename, HighFive::File::ReadOnly, _fap) {
 
   70     _datalen = _slicepos = _slicelen = _batchpos = 0;
 
   75   template <
typename T = 
double>
 
   76   std::vector<T> 
read(
const std::string& label)
 const {
 
   77     auto dataset = _file.getDataSet(label);
 
   78     return dataset.read<std::vector<T>>();
 
   83   template <
typename T = 
double>
 
   84   std::vector<T> 
distread(
const std::string& label) {
 
   85     if (!_comm) 
throw std::runtime_error(
"No MPI-collective reading enabled!");
 
   88     data.reserve(_slicelen);
 
   89     _ds[label].select({_slicepos}, {_slicelen}).
read(data, _dtp);
 
   95   template <
typename T = 
double>
 
   96   std::vector<T> 
stochread(
const std::string& label, 
size_t batchsize, 
bool shuffle = 
false) {
 
   97     if (!_comm) 
throw std::runtime_error(
"No MPI-collective reading enabled!");
 
  100     if (shuffle) _shuffle();
 
  103     data.reserve(batchsize);
 
  106     size_t pos = _batchpos;
 
  109       size_t len = std::min(batchsize, _slicepos + _slicelen - pos);
 
  110       _ds[label].select({pos}, {len}).
read(tmp, _dtp);
 
  111       data.insert(data.end(), std::make_move_iterator(std::begin(tmp)),
 
  112                   std::make_move_iterator(std::end(tmp)));
 
  120   void _loadDataSet(
const std::string& label) {
 
  122     if (_ds.find(label) != _ds.end()) 
return;
 
  124     _ds[label] = std::move(_file.getDataSet(label));
 
  125     const auto& dims = _ds[label].getDimensions();
 
  126     size_t len = dims.at(0);
 
  127     if (len == 0) 
throw std::runtime_error(
"Dataset has zero length!");
 
  128     if (len < _comm->size()) 
throw std::runtime_error(
"Not enough data for each MPI rank!");
 
  132       _slicelen = _datalen / _comm->size();
 
  133       _slicepos = _comm->rank() * _slicelen;
 
  134       if (_comm->rank() == _comm->size() - 1) {
 
  135         _slicelen += _datalen % _comm->size();
 
  137     } 
else if (len != _datalen) {
 
  138       throw std::runtime_error(
"Inconsistent dataset length!");
 
  144     std::uniform_int_distribution<size_t> uni(_slicepos, _slicepos + _slicelen - 1);
 
  145     _batchpos = uni(_rng);
 
  148   const sopt::mpi::Communicator* _comm;
 
  150   const HighFive::FileAccessProps _fap;
 
  152   const HighFive::DataTransferProps _dtp;
 
  154   const HighFive::File _file;
 
  160   size_t _datalen, _slicepos, _slicelen, _batchpos;
 
  170   std::vector<t_real> utemp = vis_file.
read<t_real>(
"u");
 
  171   uv_vis.
u = Eigen::Map<Vector<t_real>>(utemp.data(), utemp.size(), 1);
 
  175   std::vector<t_real> vtemp = vis_file.
read<t_real>(
"v");
 
  176   uv_vis.
v = -Eigen::Map<Vector<t_real>>(vtemp.data(), vtemp.size(), 1);
 
  179     std::vector<t_real> wtemp = vis_file.
read<t_real>(
"w");
 
  180     uv_vis.
w = Eigen::Map<Vector<t_real>>(wtemp.data(), wtemp.size(), 1);
 
  182     uv_vis.
w = Vector<t_real>::Zero(utemp.size());
 
  185   std::vector<t_real> retemp = vis_file.
read<t_real>(
"re");
 
  186   std::vector<t_real> imtemp = vis_file.
read<t_real>(
"im");
 
  187   std::vector<t_real> sigma = vis_file.
read<t_real>(
"sigma");
 
  188   assert(retemp.size() == imtemp.size());
 
  190   uv_vis.
vis = Vector<t_complex>::Zero(retemp.size());
 
  191   uv_vis.
weights = Vector<t_complex>::Zero(retemp.size());
 
  192   for (
size_t i = 0; i < retemp.size(); ++i) {
 
  193     uv_vis.
vis(i) = t_complex(retemp[i], imtemp[i]);
 
  194     uv_vis.
weights(i) = 1 / sigma[i];
 
  209   std::vector<t_real> utemp =
 
  211   uv_vis.
u = Eigen::Map<Vector<t_real>>(utemp.data(), utemp.size(), 1);
 
  215   std::vector<t_real> vtemp = file.
stochread<t_real>(
"v", N);
 
  216   uv_vis.
v = -Eigen::Map<Vector<t_real>>(vtemp.data(), vtemp.size(), 1);
 
  219     std::vector<t_real> wtemp = file.
stochread<t_real>(
"w", N);
 
  220     uv_vis.
w = Eigen::Map<Vector<t_real>>(wtemp.data(), wtemp.size(), 1);
 
  222     uv_vis.
w = Vector<t_real>::Zero(utemp.size());
 
  225   std::vector<t_real> retemp = file.
stochread<t_real>(
"re", N);
 
  226   std::vector<t_real> imtemp = file.
stochread<t_real>(
"im", N);
 
  227   std::vector<t_real> sigma = file.
stochread<t_real>(
"sigma", N);
 
  229   uv_vis.
vis = Vector<t_complex>::Zero(retemp.size());
 
  230   uv_vis.
weights = Vector<t_complex>::Zero(retemp.size());
 
  231   for (
size_t i = 0; i < retemp.size(); ++i) {
 
  232     uv_vis.
vis(i) = t_complex(retemp[i], imtemp[i]);
 
  233     uv_vis.
weights(i) = 1 / sigma[i];
 
  245                       const bool w_term, 
const size_t chunksize = 0) {
 
  247   HighFive::File h5file(h5name, HighFive::File::OpenOrCreate | HighFive::File::Truncate);
 
  254   HighFive::DataSetCreateProps props;
 
  255   if (uv_vis.
u.size()) {
 
  257       props.add(HighFive::Chunking(std::vector<hsize_t>{chunksize}));
 
  259       props.add(HighFive::Chunking(std::vector<hsize_t>{
static_cast<hsize_t
>(uv_vis.
u.size())}));
 
  261     props.add(HighFive::Deflate(9));  
 
  264   h5file.createDataSet(
"u", std::vector<t_real>(uv_vis.
u.data(), uv_vis.
u.data() + uv_vis.
u.size()),
 
  266   h5file.createDataSet(
"v", std::vector<t_real>(uv_vis.
v.data(), uv_vis.
v.data() + uv_vis.
v.size()),
 
  269     h5file.createDataSet(
 
  270         "w", std::vector<t_real>(uv_vis.
w.data(), uv_vis.
w.data() + uv_vis.
w.size()), props);
 
  273   std::vector<t_real> redata, imdata, sigma;
 
  274   redata.reserve(uv_vis.
vis.size());
 
  275   imdata.reserve(uv_vis.
vis.size());
 
  276   sigma.reserve(uv_vis.
weights.size());
 
  277   for (
size_t i = 0; i < uv_vis.
vis.size(); ++i) {
 
  278     redata.push_back(uv_vis.
vis(i).real());
 
  279     imdata.push_back(uv_vis.
vis(i).imag());
 
  280     sigma.push_back(1.0 / uv_vis.
weights(i).real());
 
  282   h5file.createDataSet(
"re", std::move(redata), props);
 
  283   h5file.createDataSet(
"im", std::move(imdata), props);
 
  284   h5file.createDataSet(
"sigma", std::move(imdata), props);
 
Purify interface class to handle HDF5 input files.
 
std::vector< T > read(const std::string &label) const
Method to read the entire dataset.
 
std::vector< T > distread(const std::string &label)
 
std::vector< T > stochread(const std::string &label, size_t batchsize, bool shuffle=false)
 
H5Handler(const std::string &filename)
Default constructor (serial behaviour)
 
utilities::vis_params read_visibility(const std::string &vis_name, const bool w_term)
Reads an HDF5 file with u,v visibilities, constructs a vis_params object and returns it.
 
utilities::vis_params stochread_visibility(H5Handler &file, const size_t N, const bool w_term)
Stochastically reads dataset slices from the supplied HDF5-file handler, constructs a vis_params obje...
 
void write_visibility(const utilities::vis_params &uv_vis, const std::string &h5name, const bool w_term, const size_t chunksize=0)
Write an HDF5 file with u,v visibilities from a vis_params object.
 
Vector< t_complex > weights