1 #ifndef PURIFY_OPERATORS_GPU_H
2 #define PURIFY_OPERATORS_GPU_H
4 #include "purify/config.h"
10 #include <sopt/chained_operators.h>
11 #include <sopt/linear_transform.h>
12 #ifdef PURIFY_ARRAYFIRE
13 #include <arrayfire.h>
16 #ifdef PURIFY_ARRAYFIRE
21 af::array init_af_G_2d(
const Sparse<t_complex> &G) {
22 Vector<t_complexf> data(G.nonZeros());
23 Vector<t_int> rows(G.rows() + 1);
24 Vector<t_int> cols(G.nonZeros());
25 rows(G.rows()) = G.nonZeros();
27 for (t_int k = 0; k < G.outerSize(); ++k) {
29 for (Sparse<t_complex>::InnerIterator it(G, k); it; ++it) {
30 data(index) =
static_cast<t_complexf>(it.value());
31 cols(index) = it.col();
35 assert(data.size() == G.nonZeros());
36 return af::sparse(G.rows(), G.cols(), G.nonZeros(),
37 reinterpret_cast<const af::cfloat *
>(data.data()), rows.data(), cols.data(),
41 sopt::OperatorFunction<Vector<t_complexf>> host_wrapper_float(
42 const sopt::OperatorFunction<af::array> &gpu_operator,
const t_uint &input_size,
43 const t_uint &output_size,
const t_uint &idx = 0) {
44 return [=](Vector<t_complexf> &output,
const Vector<t_complexf> &input) {
46 const af::array gpu_input =
47 af::array(input_size,
reinterpret_cast<const af::cfloat *
>(input.data()));
49 gpu_operator(gpu_output, gpu_input);
50 const std::shared_ptr<t_complexf> gpu_data =
51 std::shared_ptr<t_complexf>(
reinterpret_cast<t_complexf *
>(gpu_output.host<af::cfloat>()),
53 output = Vector<t_complexf>::Zero(output_size);
54 std::copy(gpu_data.get(), gpu_data.get() + output_size, output.data());
57 sopt::OperatorFunction<Vector<t_complex>> host_wrapper(
58 const sopt::OperatorFunction<af::array> &gpu_operator,
const t_uint &input_size,
59 const t_uint &output_size,
const t_uint &idx = 0) {
60 const sopt::OperatorFunction<Vector<t_complexf>> float_wrapper =
61 host_wrapper_float(gpu_operator, input_size, output_size, idx);
62 return [=](Vector<t_complex> &output,
const Vector<t_complex> &input) {
63 Vector<t_complexf> output_float;
64 float_wrapper(output_float, input.cast<
t_complexf>());
65 output = output_float.cast<t_complex>();
69 std::tuple<sopt::OperatorFunction<af::array>, sopt::OperatorFunction<af::array>>
70 init_af_zero_padding_2d(
const Image<t_complexf> &S,
const t_real &oversample_ratio) {
71 const t_uint imsizex_ = S.cols();
72 const t_uint imsizey_ = S.rows();
73 const t_uint ftsizeu_ = std::floor(imsizex_ * oversample_ratio);
74 const t_uint ftsizev_ = std::floor(imsizey_ * oversample_ratio);
75 const t_uint x_start = std::floor(ftsizeu_ * 0.5 - imsizex_ * 0.5);
76 const t_uint y_start = std::floor(ftsizev_ * 0.5 - imsizey_ * 0.5);
77 const af::array correction =
78 af::array(imsizey_ * imsizex_,
reinterpret_cast<const af::cfloat *
>(S.data()));
79 auto direct = [=](af::array &output,
const af::array &x) {
80 output = af::constant(0., ftsizev_, ftsizeu_, c32);
81 output(af::seq(y_start, y_start + imsizey_ - 1), af::seq(x_start, x_start + imsizex_ - 1)) =
82 af::moddims(correction * x, imsizey_, imsizex_);
83 output = af::flat(output);
85 auto indirect = [=](af::array &output,
const af::array &x) {
86 output = af::moddims(x, ftsizev_, ftsizeu_)(af::seq(y_start, y_start + imsizey_ - 1),
87 af::seq(x_start, x_start + imsizex_ - 1));
88 output = correction * af::flat(output);
90 return std::make_tuple(direct, indirect);
93 template <
class... ARGS>
94 std::tuple<sopt::OperatorFunction<af::array>, sopt::OperatorFunction<af::array>>
95 init_af_gridding_matrix_2d(
const Vector<t_real> &
u, ARGS &&...args) {
96 const Sparse<t_complex> interpolation_matrix =
98 const Sparse<t_complex> adjoint = interpolation_matrix.adjoint();
99 const std::shared_ptr<const af::array> G =
100 std::make_shared<const af::array>(gpu::init_af_G_2d(interpolation_matrix));
101 const std::shared_ptr<const af::array> G_adjoint =
102 std::make_shared<const af::array>(gpu::init_af_G_2d(adjoint));
103 auto direct = [G](af::array &output,
const af::array &input) { output = af::matmul(*G, input); };
104 auto indirect = [G_adjoint](af::array &output,
const af::array &input) {
105 output = af::matmul(*G_adjoint, input);
107 return std::make_tuple(direct, indirect);
111 std::tuple<sopt::OperatorFunction<Vector<t_complex>>, sopt::OperatorFunction<Vector<t_complex>>>
112 init_af_gridding_matrix_2d(
const sopt::mpi::Communicator &comm,
const Vector<t_real> &
u,
113 const Vector<t_real> &
v,
const Vector<t_complex> &weights,
114 const t_uint &imsizey_,
const t_uint &imsizex_,
115 const t_real oversample_ratio,
116 const std::function<t_real(t_real)> kernelu,
117 const std::function<t_real(t_real)> kernelv,
const t_uint Ju = 4,
118 const t_uint Jv = 4) {
122 u,
v, weights, imsizey_, imsizex_, oversample_ratio, kernelu, kernelv, Ju, Jv);
123 const DistributeSparseVector distributor(interpolation_matrix, comm);
125 const Sparse<t_complex> adjoint = interpolation_matrix.adjoint();
126 const std::shared_ptr<const af::array> G =
127 std::make_shared<const af::array>(gpu::init_af_G_2d(interpolation_matrix));
128 const std::shared_ptr<const af::array> G_adjoint =
129 std::make_shared<const af::array>(gpu::init_af_G_2d(adjoint));
130 const sopt::OperatorFunction<af::array> directGgpu =
131 [G](af::array &output,
const af::array &input) { output = af::matmul(*G, input); };
132 const sopt::OperatorFunction<af::array> indirectGgpu = [G_adjoint](af::array &output,
133 const af::array &input) {
134 output = af::matmul(*G_adjoint, input);
136 const sopt::OperatorFunction<Vector<t_complex>> directGhost =
137 gpu::host_wrapper(directGgpu, interpolation_matrix.cols(), interpolation_matrix.rows());
138 const sopt::OperatorFunction<Vector<t_complex>> indirectGhost =
139 gpu::host_wrapper(indirectGgpu, interpolation_matrix.rows(), interpolation_matrix.cols());
140 return std::make_tuple(
141 [=](Vector<t_complex> &output,
const Vector<t_complex> &input) {
142 Vector<t_complex> dist_input;
143 if (comm.is_root()) {
144 assert(input.size() > 0);
145 distributor.scatter(input, dist_input);
147 distributor.scatter(dist_input);
149 directGhost(output, dist_input);
151 [=](Vector<t_complex> &output,
const Vector<t_complex> &input) {
152 Vector<t_complex> dist_output;
153 indirectGhost(dist_output, input);
154 if (not comm.is_root()) {
155 distributor.gather(dist_output);
157 distributor.gather(dist_output, output);
158 assert(output.size() > 0);
164 std::tuple<sopt::OperatorFunction<af::array>, sopt::OperatorFunction<af::array>> init_af_FFT_2d(
165 const t_uint &imsizey_,
const t_uint &imsizex_,
const t_real &oversample_factor_) {
166 auto const ftsizeu_ = std::floor(imsizex_ * oversample_factor_);
167 auto const ftsizev_ = std::floor(imsizey_ * oversample_factor_);
168 auto direct = [=](af::array &output,
const af::array &input) {
169 output = af::fft2(af::moddims(input, ftsizev_, ftsizeu_), ftsizev_, ftsizeu_);
170 output = af::flat(output) / std::sqrt(ftsizeu_ * ftsizev_);
172 auto indirect = [=](af::array &output,
const af::array &input) {
173 output = af::ifft2(af::moddims(input, ftsizev_, ftsizeu_), ftsizev_, ftsizeu_);
174 output = af::flat(output) * std::sqrt(ftsizeu_ * ftsizev_);
176 return std::make_tuple(direct, indirect);
178 std::tuple<sopt::OperatorFunction<af::array>, sopt::OperatorFunction<af::array>>
179 af_base_padding_and_FFT_2d(
const std::function<t_real(t_real)> &ftkernelu,
180 const std::function<t_real(t_real)> &ftkernelv,
const t_uint &imsizey,
181 const t_uint &imsizex,
const t_real oversample_ratio = 2,
182 const t_real w_mean = 0,
const t_real &cellx = 1,
183 const t_real &celly = 1) {
184 sopt::OperatorFunction<af::array> directZ, indirectZ;
185 sopt::OperatorFunction<af::array> directFFT, indirectFFT;
187 oversample_ratio, imsizey, imsizex, ftkernelu, ftkernelv, w_mean, cellx, celly);
189 PURIFY_LOW_LOG(
"Constructing Zero Padding and Correction Operator: ZDB");
192 std::tie(directZ, indirectZ) =
193 purify::gpu::operators::init_af_zero_padding_2d(S.cast<
t_complexf>(), oversample_ratio);
195 std::tie(directFFT, indirectFFT) =
196 purify::gpu::operators::init_af_FFT_2d(imsizey, imsizex, oversample_ratio);
197 const auto direct = sopt::chained_operators<af::array>(directFFT, directZ);
198 const auto indirect = sopt::chained_operators<af::array>(indirectZ, indirectFFT);
199 return std::make_tuple(direct, indirect);
201 std::tuple<sopt::OperatorFunction<Vector<t_complex>>, sopt::OperatorFunction<Vector<t_complex>>>
203 const Vector<t_complex> &weights,
const t_uint &imsizey,
204 const t_uint &imsizex,
const t_real oversample_ratio = 2,
206 const t_uint Jv = 4,
const bool w_term =
false,
const t_real cellx = 1,
207 const t_real &celly = 1,
const t_uint &idx = 0) {
209 std::function<t_real(t_real)> kernelu, kernelv, ftkernelu, ftkernelv;
210 std::tie(kernelu, kernelv, ftkernelu, ftkernelv) =
212 sopt::OperatorFunction<af::array> directFZ, indirectFZ;
213 std::tie(directFZ, indirectFZ) = af_base_padding_and_FFT_2d(
214 ftkernelu, ftkernelv, imsizey, imsizex, oversample_ratio, w.mean(), cellx, celly);
215 PURIFY_MEDIUM_LOG(
"FoV (width, height): {} deg x {} deg", imsizex * cellx / (60. * 60.),
216 imsizey * celly / (60. * 60.));
217 PURIFY_LOW_LOG(
"Constructing Weighting and Gridding Operators: WG");
220 sopt::OperatorFunction<af::array> directG, indirectG;
221 std::tie(directG, indirectG) = purify::gpu::operators::init_af_gridding_matrix_2d(
222 u,
v, weights, imsizey, imsizex, oversample_ratio, kernelu, kernelv, Ju, Jv);
223 auto direct = gpu::host_wrapper(sopt::chained_operators<af::array>(directG, directFZ),
224 imsizey * imsizex,
u.size(), idx);
225 auto indirect = gpu::host_wrapper(sopt::chained_operators<af::array>(indirectFZ, indirectG),
226 u.size(), imsizex * imsizey, idx);
227 return std::make_tuple(direct, indirect);
231 std::tuple<sopt::OperatorFunction<Vector<t_complex>>, sopt::OperatorFunction<Vector<t_complex>>>
232 base_mpi_degrid_operator_2d(
const sopt::mpi::Communicator &comm,
const Vector<t_real> &
u,
233 const Vector<t_real> &
v,
const Vector<t_real> &w,
234 const Vector<t_complex> &weights,
const t_uint &imsizey,
235 const t_uint &imsizex,
const t_real &oversample_ratio = 2,
237 const t_uint Jv = 4,
const bool w_term =
false,
const t_real cellx = 1,
238 const t_real &celly = 1,
const t_uint &idx = 0) {
240 std::function<t_real(t_real)> kernelu, kernelv, ftkernelu, ftkernelv;
241 std::tie(kernelu, kernelv, ftkernelu, ftkernelv) =
243 sopt::OperatorFunction<af::array> directFZ, indirectFZ;
244 std::tie(directFZ, indirectFZ) =
245 af_base_padding_and_FFT_2d(ftkernelu, ftkernelv, imsizey, imsizex, oversample_ratio);
246 PURIFY_MEDIUM_LOG(
"FoV (width, height): {} deg x {} deg", imsizex * cellx / (60. * 60.),
247 imsizey * celly / (60. * 60.));
248 PURIFY_LOW_LOG(
"Constructing Weighting and MPI Gridding Operators: WG");
251 sopt::OperatorFunction<Vector<t_complex>> directG, indirectG;
252 std::tie(directG, indirectG) = operators::init_af_gridding_matrix_2d(
253 comm,
u,
v, weights, imsizey, imsizex, oversample_ratio, kernelu, kernelv, Ju, Jv);
254 sopt::OperatorFunction<Vector<t_complex>> direct =
255 gpu::host_wrapper(directFZ, imsizey * imsizex,
256 std::floor(imsizex * imsizey * oversample_ratio * oversample_ratio), idx);
257 sopt::OperatorFunction<Vector<t_complex>> indirect = gpu::host_wrapper(
258 indirectFZ, std::floor(imsizex * imsizey * oversample_ratio * oversample_ratio),
259 imsizex * imsizey, idx);
260 direct = sopt::chained_operators<Vector<t_complex>>(directG, direct);
261 indirect = sopt::chained_operators<Vector<t_complex>>(indirect, indirectG);
263 return std::make_tuple(direct, indirect);
265 return std::make_tuple(directG, indirectG);
270 namespace measurementoperator {
273 const Vector<t_real> &
u,
const Vector<t_real> &
v,
const Vector<t_real> &w,
274 const Vector<t_complex> &weights,
const t_uint &imsizey,
const t_uint &imsizex,
276 const t_uint Ju = 4,
const t_uint Jv = 4,
const bool w_term =
false,
const t_real cellx = 1,
277 const t_real &celly = 1,
const t_uint &idx = 0) {
281 std::array<t_int, 3> N = {0, 1,
static_cast<t_int
>(imsizey * imsizex)};
282 std::array<t_int, 3> M = {0, 1,
static_cast<t_int
>(
u.size())};
283 sopt::OperatorFunction<Vector<t_complex>> directDegrid, indirectDegrid;
285 u,
v, w, weights, imsizey, imsizex, oversample_ratio,
kernel, Ju, Jv, w_term, idx);
286 auto &direct = directDegrid;
287 auto &indirect = indirectDegrid;
288 return std::make_shared<sopt::LinearTransform<Vector<t_complex>>>(direct, M, indirect, N);
291 const utilities::vis_params &uv_vis_input,
const t_uint &imsizey,
const t_uint &imsizex,
292 const t_real &cell_x,
const t_real &cell_y,
const t_real &oversample_ratio = 2,
294 const bool w_term =
false,
const t_uint &idx = 0) {
298 uv_vis.u, uv_vis.v, uv_vis.w, uv_vis.weights, imsizey, imsizex, oversample_ratio,
kernel, Ju,
299 Jv, w_term, cell_x, cell_y, idx);
303 std::shared_ptr<sopt::LinearTransform<Vector<t_complex>>> init_degrid_operator_2d_mpi(
304 const sopt::mpi::Communicator &comm,
const Vector<t_real> &
u,
const Vector<t_real> &
v,
305 const Vector<t_real> &w,
const Vector<t_complex> &weights,
const t_uint &imsizey,
306 const t_uint &imsizex,
const t_real &oversample_ratio = 2,
308 const bool w_term =
false,
const t_real cellx = 1,
const t_real &celly = 1,
309 const t_uint &idx = 0) {
314 std::array<t_int, 3> N = {0, 1,
static_cast<t_int
>(imsizey * imsizex)};
315 std::array<t_int, 3> M = {0, 1,
static_cast<t_int
>(
u.size())};
316 sopt::OperatorFunction<Vector<t_complex>> directDegrid, indirectDegrid;
317 std::tie(directDegrid, indirectDegrid) = purify::gpu::operators::base_mpi_degrid_operator_2d(
318 comm,
u,
v, w, weights, imsizey, imsizex, oversample_ratio,
kernel, Ju, Jv, w_term, idx);
319 auto Broadcast = purify::operators::init_broadcaster<Vector<t_complex>>(comm);
320 auto direct = directDegrid;
321 auto indirect = sopt::chained_operators<Vector<t_complex>>(Broadcast, indirectDegrid);
322 return std::make_shared<sopt::LinearTransform<Vector<t_complex>>>(direct, M, indirect, N);
324 std::shared_ptr<sopt::LinearTransform<Vector<t_complex>>> init_degrid_operator_2d_mpi(
325 const sopt::mpi::Communicator &comm,
const utilities::vis_params &uv_vis_input,
326 const t_uint &imsizey,
const t_uint &imsizex,
const t_real &cell_x,
const t_real &cell_y,
328 const t_uint Ju = 4,
const t_uint Jv = 4,
const bool w_term =
false,
const t_uint &idx = 0) {
331 return gpu::measurementoperator::init_degrid_operator_2d_mpi(
332 comm, uv_vis.u, uv_vis.v, uv_vis.w, uv_vis.weights, imsizey, imsizex, oversample_ratio,
333 kernel, Ju, Jv, w_term, cell_x, cell_y, idx);
336 const sopt::mpi::Communicator &comm,
const Vector<t_real> &
u,
const Vector<t_real> &
v,
337 const Vector<t_real> &w,
const Vector<t_complex> &weights,
const t_uint &imsizey,
338 const t_uint &imsizex,
const t_real &oversample_ratio = 2,
340 const bool w_term =
false,
const t_real cellx = 1,
const t_real &celly = 1,
341 const t_uint &idx = 0) {
346 std::array<t_int, 3> N = {0, 1,
static_cast<t_int
>(imsizey * imsizex)};
347 std::array<t_int, 3> M = {0, 1,
static_cast<t_int
>(
u.size())};
348 sopt::OperatorFunction<Vector<t_complex>> directDegrid, indirectDegrid;
350 u,
v, w, weights, imsizey, imsizex, oversample_ratio,
kernel, Ju, Jv, w_term, cellx, celly,
352 const auto allsumall = purify::operators::init_all_sum_all<Vector<t_complex>>(comm);
353 auto direct = directDegrid;
354 auto indirect = sopt::chained_operators<Vector<t_complex>>(allsumall, indirectDegrid);
355 return std::make_shared<sopt::LinearTransform<Vector<t_complex>>>(direct, M, indirect, N);
358 const sopt::mpi::Communicator &comm,
const utilities::vis_params &uv_vis_input,
359 const t_uint &imsizey,
const t_uint &imsizex,
const t_real &cell_x,
const t_real &cell_y,
361 const t_uint Ju = 4,
const t_uint Jv = 4,
const bool w_term =
false,
const t_uint &idx = 0) {
365 comm, uv_vis.u, uv_vis.v, uv_vis.w, uv_vis.weights, imsizey, imsizex, oversample_ratio,
366 kernel, Ju, Jv, w_term, idx);
#define PURIFY_LOW_LOG(...)
Low priority message.
#define PURIFY_MEDIUM_LOG(...)
Medium priority message.
const std::vector< t_real > u
data for u coordinate
const std::vector< t_real > v
data for v coordinate
Image< t_complex > init_correction2d(const t_real &oversample_ratio, const t_uint &imsizey_, const t_uint &imsizex_, const std::function< t_real(t_real)> ftkernelu, const std::function< t_real(t_real)> ftkernelv, const t_real &w_mean, const t_real &cellx, const t_real &celly)
Sparse< t_complex > init_gridding_matrix_2d(const Vector< t_real > &u, const Vector< t_real > &v, const Vector< t_complex > &weights, const t_uint &imsizey_, const t_uint &imsizex_, const t_real &oversample_ratio, const std::function< t_real(t_real)> kernelu, const std::function< t_real(t_real)> kernelv, const t_uint Ju, const t_uint Jv)
Construct gridding matrix.
std::shared_ptr< sopt::LinearTransform< T > > init_degrid_operator_2d(const Vector< t_real > &u, const Vector< t_real > &v, const Vector< t_real > &w, const Vector< t_complex > &weights, const t_uint &imsizey, const t_uint &imsizex, const t_real &oversample_ratio=2, const kernels::kernel kernel=kernels::kernel::kb, const t_uint Ju=4, const t_uint Jv=4, const bool w_stacking=false, const t_real &cellx=1, const t_real &celly=1)
Returns linear transform that is the standard degridding operator.
std::tuple< sopt::OperatorFunction< T >, sopt::OperatorFunction< T > > base_degrid_operator_2d(const Vector< t_real > &u, const Vector< t_real > &v, const Vector< t_real > &w, const Vector< t_complex > &weights, const t_uint &imsizey, const t_uint &imsizex, const t_real &oversample_ratio=2, const kernels::kernel kernel=kernels::kernel::kb, const t_uint Ju=4, const t_uint Jv=4, const fftw_plan &ft_plan=fftw_plan::measure, const bool w_stacking=false, const t_real &cellx=1, const t_real &celly=1, const bool on_the_fly=true)
utilities::vis_params convert_to_pixels(const utilities::vis_params &uv_vis, const t_real cell_x, const t_real cell_y, const t_real imsizex, const t_real imsizey, const t_real oversample_ratio)
Converts u and v coordaintes to units of pixels.
std::complex< float > t_complexf
std::tuple< std::function< t_real(t_real)>, std::function< t_real(t_real)>, std::function< t_real(t_real)>, std::function< t_real(t_real)> > create_kernels(const kernels::kernel kernel_name_, const t_uint Ju_, const t_uint Jv_, const t_real imsizey_, const t_real imsizex_, const t_real oversample_ratio)
Sparse< typename T0::Scalar > compress_outer(T0 const &matrix)
Indices of non empty outer indices.