1 #ifndef PURIFY_H5READER_H
2 #define PURIFY_H5READER_H
3 #include "purify/config.h"
10 #include <sopt/mpi/communicator.h>
13 #include "highfive/H5File.hpp"
32 HighFive::FileAccessProps MPIFileAccess() {
33 HighFive::FileAccessProps fap;
34 fap.add(HighFive::MPIOFileAccess{MPI_COMM_WORLD, MPI_INFO_NULL});
35 fap.add(HighFive::MPIOCollectiveMetadata{});
39 HighFive::DataTransferProps MPIDataTransfer() {
40 HighFive::DataTransferProps dtp;
41 dtp.add(HighFive::UseCollectiveIO{});
49 using DatsetMap = std::map<std::string, HighFive::DataSet>;
57 _fap(HighFive::FileAccessProps{}),
58 _dtp(HighFive::DataTransferProps{}),
59 _file(filename, HighFive::File::ReadOnly) {
60 _datalen = _slicepos = _slicelen = _batchpos = 0;
65 H5Handler(
const std::string& filename,
const sopt::mpi::Communicator& comm)
67 _fap(MPIFileAccess()),
68 _dtp(MPIDataTransfer()),
69 _file(filename, HighFive::File::ReadOnly, _fap) {
70 _datalen = _slicepos = _slicelen = _batchpos = 0;
75 template <
typename T =
double>
76 std::vector<T>
read(
const std::string& label)
const {
77 auto dataset = _file.getDataSet(label);
78 return dataset.read<std::vector<T>>();
83 template <
typename T =
double>
84 std::vector<T>
distread(
const std::string& label) {
85 if (!_comm)
throw std::runtime_error(
"No MPI-collective reading enabled!");
88 data.reserve(_slicelen);
89 _ds[label].select({_slicepos}, {_slicelen}).
read(data, _dtp);
95 template <
typename T =
double>
96 std::vector<T>
stochread(
const std::string& label,
size_t batchsize,
bool shuffle =
false) {
97 if (!_comm)
throw std::runtime_error(
"No MPI-collective reading enabled!");
100 if (shuffle) _shuffle();
103 data.reserve(batchsize);
106 size_t pos = _batchpos;
109 size_t len = std::min(batchsize, _slicepos + _slicelen - pos);
110 _ds[label].select({pos}, {len}).
read(tmp, _dtp);
111 data.insert(data.end(), std::make_move_iterator(std::begin(tmp)),
112 std::make_move_iterator(std::end(tmp)));
120 void _loadDataSet(
const std::string& label) {
122 if (_ds.find(label) != _ds.end())
return;
124 _ds[label] = std::move(_file.getDataSet(label));
125 const auto& dims = _ds[label].getDimensions();
126 size_t len = dims.at(0);
127 if (len == 0)
throw std::runtime_error(
"Dataset has zero length!");
128 if (len < _comm->size())
throw std::runtime_error(
"Not enough data for each MPI rank!");
132 _slicelen = _datalen / _comm->size();
133 _slicepos = _comm->rank() * _slicelen;
134 if (_comm->rank() == _comm->size() - 1) {
135 _slicelen += _datalen % _comm->size();
137 }
else if (len != _datalen) {
138 throw std::runtime_error(
"Inconsistent dataset length!");
144 std::uniform_int_distribution<size_t> uni(_slicepos, _slicepos + _slicelen - 1);
145 _batchpos = uni(_rng);
148 const sopt::mpi::Communicator* _comm;
150 const HighFive::FileAccessProps _fap;
152 const HighFive::DataTransferProps _dtp;
154 const HighFive::File _file;
160 size_t _datalen, _slicepos, _slicelen, _batchpos;
170 std::vector<t_real> utemp = vis_file.
read<t_real>(
"u");
171 uv_vis.
u = Eigen::Map<Vector<t_real>>(utemp.data(), utemp.size(), 1);
175 std::vector<t_real> vtemp = vis_file.
read<t_real>(
"v");
176 uv_vis.
v = -Eigen::Map<Vector<t_real>>(vtemp.data(), vtemp.size(), 1);
179 std::vector<t_real> wtemp = vis_file.
read<t_real>(
"w");
180 uv_vis.
w = Eigen::Map<Vector<t_real>>(wtemp.data(), wtemp.size(), 1);
182 uv_vis.
w = Vector<t_real>::Zero(utemp.size());
185 std::vector<t_real> retemp = vis_file.
read<t_real>(
"re");
186 std::vector<t_real> imtemp = vis_file.
read<t_real>(
"im");
187 std::vector<t_real> sigma = vis_file.
read<t_real>(
"sigma");
188 assert(retemp.size() == imtemp.size());
190 uv_vis.
vis = Vector<t_complex>::Zero(retemp.size());
191 uv_vis.
weights = Vector<t_complex>::Zero(retemp.size());
192 for (
size_t i = 0; i < retemp.size(); ++i) {
193 uv_vis.
vis(i) = t_complex(retemp[i], imtemp[i]);
194 uv_vis.
weights(i) = 1 / sigma[i];
209 std::vector<t_real> utemp =
211 uv_vis.
u = Eigen::Map<Vector<t_real>>(utemp.data(), utemp.size(), 1);
215 std::vector<t_real> vtemp = file.
stochread<t_real>(
"v", N);
216 uv_vis.
v = -Eigen::Map<Vector<t_real>>(vtemp.data(), vtemp.size(), 1);
219 std::vector<t_real> wtemp = file.
stochread<t_real>(
"w", N);
220 uv_vis.
w = Eigen::Map<Vector<t_real>>(wtemp.data(), wtemp.size(), 1);
222 uv_vis.
w = Vector<t_real>::Zero(utemp.size());
225 std::vector<t_real> retemp = file.
stochread<t_real>(
"re", N);
226 std::vector<t_real> imtemp = file.
stochread<t_real>(
"im", N);
227 std::vector<t_real> sigma = file.
stochread<t_real>(
"sigma", N);
229 uv_vis.
vis = Vector<t_complex>::Zero(retemp.size());
230 uv_vis.
weights = Vector<t_complex>::Zero(retemp.size());
231 for (
size_t i = 0; i < retemp.size(); ++i) {
232 uv_vis.
vis(i) = t_complex(retemp[i], imtemp[i]);
233 uv_vis.
weights(i) = 1 / sigma[i];
245 const bool w_term,
const size_t chunksize = 0) {
247 HighFive::File h5file(h5name, HighFive::File::OpenOrCreate | HighFive::File::Truncate);
254 HighFive::DataSetCreateProps props;
255 if (uv_vis.
u.size()) {
257 props.add(HighFive::Chunking(std::vector<hsize_t>{chunksize}));
259 props.add(HighFive::Chunking(std::vector<hsize_t>{
static_cast<hsize_t
>(uv_vis.
u.size())}));
261 props.add(HighFive::Deflate(9));
264 h5file.createDataSet(
"u", std::vector<t_real>(uv_vis.
u.data(), uv_vis.
u.data() + uv_vis.
u.size()),
266 h5file.createDataSet(
"v", std::vector<t_real>(uv_vis.
v.data(), uv_vis.
v.data() + uv_vis.
v.size()),
269 h5file.createDataSet(
270 "w", std::vector<t_real>(uv_vis.
w.data(), uv_vis.
w.data() + uv_vis.
w.size()), props);
273 std::vector<t_real> redata, imdata, sigma;
274 redata.reserve(uv_vis.
vis.size());
275 imdata.reserve(uv_vis.
vis.size());
276 sigma.reserve(uv_vis.
weights.size());
277 for (
size_t i = 0; i < uv_vis.
vis.size(); ++i) {
278 redata.push_back(uv_vis.
vis(i).real());
279 imdata.push_back(uv_vis.
vis(i).imag());
280 sigma.push_back(1.0 / uv_vis.
weights(i).real());
282 h5file.createDataSet(
"re", std::move(redata), props);
283 h5file.createDataSet(
"im", std::move(imdata), props);
284 h5file.createDataSet(
"sigma", std::move(imdata), props);
Purify interface class to handle HDF5 input files.
std::vector< T > read(const std::string &label) const
Method to read the entire dataset.
std::vector< T > distread(const std::string &label)
std::vector< T > stochread(const std::string &label, size_t batchsize, bool shuffle=false)
H5Handler(const std::string &filename)
Default constructor (serial behaviour)
utilities::vis_params read_visibility(const std::string &vis_name, const bool w_term)
Reads an HDF5 file with u,v visibilities, constructs a vis_params object and returns it.
utilities::vis_params stochread_visibility(H5Handler &file, const size_t N, const bool w_term)
Stochastically reads dataset slices from the supplied HDF5-file handler, constructs a vis_params obje...
void write_visibility(const utilities::vis_params &uv_vis, const std::string &h5name, const bool w_term, const size_t chunksize=0)
Write an HDF5 file with u,v visibilities from a vis_params object.
Vector< t_complex > weights