1 #ifndef PURIFY_FLY_OPERATORS_H
2 #define PURIFY_FLY_OPERATORS_H
4 #include "purify/config.h"
14 #include <sopt/mpi/communicator.h>
22 const Vector<t_real> &
u,
const Vector<t_real> &
v,
const Vector<t_complex> &weights,
23 const t_uint &imsizey_,
const t_uint &imsizex_,
const t_real &oversample_ratio,
24 const std::function<t_real(t_real)> &kernelu,
const t_uint Ju,
const t_int total_samples) {
25 const t_uint ftsizev_ = std::floor(imsizey_ * oversample_ratio);
26 const t_uint ftsizeu_ = std::floor(imsizex_ * oversample_ratio);
27 const t_uint rows =
u.size();
28 const t_uint cols = ftsizeu_ * ftsizev_;
29 if (
u.size() !=
v.size())
30 throw std::runtime_error(
31 "Size of u and v vectors are not the same for creating gridding matrix.");
33 const std::shared_ptr<Vector<t_real>> u_ptr = std::make_shared<Vector<t_real>>(
u);
34 const std::shared_ptr<Vector<t_real>> v_ptr = std::make_shared<Vector<t_real>>(
v);
35 const std::shared_ptr<Vector<t_complex>> weights_ptr =
36 std::make_shared<Vector<t_complex>>(weights);
37 const t_complex
I(0, 1);
38 const t_int ju_max = std::min(Ju, ftsizeu_);
39 const t_int jv_max = std::min(Ju, ftsizev_);
41 total_samples, [&](
const t_real x) {
return kernelu(x * ju_max * 0.5); });
42 std::set<t_int> nonZeros_set;
43 for (t_int m = 0; m < rows; ++m) {
45 const t_real u_val = (*u_ptr)(m);
46 const t_real v_val = (*v_ptr)(m);
47 const t_real k_u = std::floor(u_val - ju_max * 0.5);
48 const t_real k_v = std::floor(v_val - jv_max * 0.5);
49 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
52 static_cast<t_int
>(std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
53 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
55 const t_int i_0 =
static_cast<t_int
>(
56 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
58 nonZeros_set.insert(index);
63 std::vector<t_int> nonZeros_vec(nonZeros_set.begin(), nonZeros_set.end());
64 std::sort(nonZeros_vec.data(), nonZeros_vec.data() + nonZeros_vec.size());
66 mapping.reserve(nonZeros_vec.size());
67 for (t_int index = 0; index < nonZeros_vec.size(); index++)
68 mapping.coeffRef(nonZeros_vec[index]) = index;
69 PURIFY_LOW_LOG(
"Non Zero grid locations: {} ", mapping.nonZeros());
71 const auto degrid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
72 ftsizeu_, ftsizev_](T &output,
const T &input) {
73 output = T::Zero(u_ptr->size());
74 assert(input.size() == ftsizeu_ * ftsizev_);
76 #pragma omp parallel for
78 for (t_int m = 0; m < rows; ++m) {
80 const t_real u_val = (*u_ptr)(m);
81 const t_real v_val = (*v_ptr)(m);
82 const t_real k_u = std::floor(u_val - ju_max * 0.5);
83 const t_real k_v = std::floor(v_val - jv_max * 0.5);
84 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
86 const t_real c_0 =
static_cast<t_int
>(
87 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
89 assert(c_0 < total_samples);
90 const t_real kernelv_val = samples[c_0] * (1. - (2 * (p % 2)));
91 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
93 const t_int i_0 =
static_cast<t_int
>(
94 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
96 assert(i_0 < total_samples);
97 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
99 const t_real sign = kernelu_val * kernelv_val;
100 result += input(index) * sign;
105 output.array() *= (*weights_ptr).array();
108 const auto grid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
109 ftsizeu_, ftsizev_, mapping, nonZeros_vec](T &output,
const T &input) {
110 const t_int N = ftsizeu_ * ftsizev_;
113 t_int
const max_threads = omp_get_max_threads();
115 t_int
const max_threads = 1;
117 T output_compressed = T::Zero(nonZeros_vec.size() * max_threads);
118 assert(output.size() == N);
120 #pragma omp parallel for
122 for (t_int m = 0; m < rows; ++m) {
123 t_complex result = 0;
125 const t_int shift = omp_get_thread_num() * nonZeros_vec.size();
127 const t_int shift = 0;
129 const t_real u_val = (*u_ptr)(m);
130 const t_real v_val = (*v_ptr)(m);
131 const t_real k_u = std::floor(u_val - ju_max * 0.5);
132 const t_real k_v = std::floor(v_val - jv_max * 0.5);
133 const t_complex vis = input(m) * std::conj((*weights_ptr)(m));
134 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
136 const t_real c_0 =
static_cast<t_int
>(
137 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
139 assert(c_0 < total_samples);
140 const t_real kernelv_val = samples[c_0] * (1. - (2 * (p % 2)));
141 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
143 const t_int i_0 =
static_cast<t_int
>(
144 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
146 assert(i_0 < total_samples);
147 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
149 const t_complex result = kernelu_val * kernelv_val * vis;
150 output_compressed(mapping.coeff(index) + shift) += result;
154 for (t_int m = 1; m < max_threads; m++) {
155 const t_int loop_shift = m * nonZeros_vec.size();
156 output_compressed.segment(0, nonZeros_vec.size()) +=
157 output_compressed.segment(loop_shift, nonZeros_vec.size());
159 for (t_int index = 0; index < nonZeros_vec.size(); index++)
160 output(nonZeros_vec[index]) += output_compressed(index);
162 return std::make_tuple(degrid, grid);
168 const sopt::mpi::Communicator &comm,
const Vector<t_real> &
u,
const Vector<t_real> &
v,
169 const Vector<t_complex> &weights,
const t_uint &imsizey_,
const t_uint &imsizex_,
170 const t_real &oversample_ratio,
const std::function<t_real(t_real)> &kernelu,
const t_uint Ju,
171 const t_int total_samples) {
172 const t_uint ftsizev_ = std::floor(imsizey_ * oversample_ratio);
173 const t_uint ftsizeu_ = std::floor(imsizex_ * oversample_ratio);
174 const t_uint rows =
u.size();
175 const t_uint cols = ftsizeu_ * ftsizev_;
176 if (
u.size() !=
v.size())
177 throw std::runtime_error(
178 "Size of u and v vectors are not the same for creating gridding matrix.");
180 const std::shared_ptr<Vector<t_real>> u_ptr = std::make_shared<Vector<t_real>>(
u);
181 const std::shared_ptr<Vector<t_real>> v_ptr = std::make_shared<Vector<t_real>>(
v);
182 const std::shared_ptr<Vector<t_complex>> weights_ptr =
183 std::make_shared<Vector<t_complex>>(weights);
184 const t_complex
I(0, 1);
185 const t_int ju_max = std::min(Ju, ftsizeu_);
186 const t_int jv_max = std::min(Ju, ftsizev_);
188 total_samples, [&](
const t_real x) {
return kernelu(x * ju_max * 0.5); });
190 std::set<t_int> nonZeros_set;
191 for (t_int m = 0; m < rows; ++m) {
192 t_complex result = 0;
193 const t_real u_val = (*u_ptr)(m);
194 const t_real v_val = (*v_ptr)(m);
195 const t_real k_u = std::floor(u_val - ju_max * 0.5);
196 const t_real k_v = std::floor(v_val - jv_max * 0.5);
197 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
200 static_cast<t_int
>(std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
201 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
203 const t_int i_0 =
static_cast<t_int
>(
204 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
206 nonZeros_set.insert(index);
211 const std::shared_ptr<DistributeSparseVector> distributor =
212 std::make_shared<DistributeSparseVector>(nonZeros_set, cols, comm);
214 std::vector<t_int> nonZeros_vec(nonZeros_set.begin(), nonZeros_set.end());
215 std::sort(nonZeros_vec.data(), nonZeros_vec.data() + nonZeros_vec.size());
216 SparseVector<t_int> mapping(cols);
217 mapping.reserve(nonZeros_vec.size());
218 for (t_int index = 0; index < nonZeros_vec.size(); index++)
219 mapping.coeffRef(nonZeros_vec[index]) = index;
220 PURIFY_LOW_LOG(
"Non Zero grid locations: {} ", mapping.nonZeros());
221 const t_int nonZeros_size = mapping.nonZeros();
222 const auto degrid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
223 ftsizeu_, ftsizev_, distributor, mapping, nonZeros_size,
224 comm](T &output,
const T &input) {
226 if (comm.is_root()) {
227 assert(input.size() == ftsizeu_ * ftsizev_);
228 distributor->scatter(input, input_buff);
230 distributor->scatter(input_buff);
232 assert(input_buff.size() == mapping.nonZeros());
233 #pragma omp parallel for
234 for (t_int m = 0; m < rows; ++m) {
235 t_complex result = 0;
236 const t_real u_val = (*u_ptr)(m);
237 const t_real v_val = (*v_ptr)(m);
238 const t_real k_u = std::floor(u_val - ju_max * 0.5);
239 const t_real k_v = std::floor(v_val - jv_max * 0.5);
240 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
242 const t_real c_0 =
static_cast<t_int
>(
243 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
245 assert(c_0 < total_samples);
246 const t_real kernelv_val = samples[c_0] * (1. - (2 * (
p % 2)));
247 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
249 const t_int i_0 =
static_cast<t_int
>(
250 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
252 assert(i_0 < total_samples);
253 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
255 const t_real sign = kernelu_val * kernelv_val;
256 result += input_buff(mapping.coeff(index)) * sign;
261 output.array() *= (*weights_ptr).array();
263 const auto grid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
264 ftsizeu_, ftsizev_, mapping, nonZeros_size, distributor,
265 comm](T &output,
const T &input) {
266 const t_int N = ftsizeu_ * ftsizev_;
268 t_int
const max_threads = omp_get_max_threads();
270 t_int
const max_threads = 1;
272 T output_compressed = T::Zero(nonZeros_size * max_threads);
273 #pragma omp parallel for
274 for (t_int m = 0; m < rows; ++m) {
275 t_complex result = 0;
277 const t_int shift = omp_get_thread_num() * nonZeros_size;
279 const t_int shift = 0;
281 const t_real u_val = (*u_ptr)(m);
282 const t_real v_val = (*v_ptr)(m);
283 const t_real k_u = std::floor(u_val - ju_max * 0.5);
284 const t_real k_v = std::floor(v_val - jv_max * 0.5);
285 const t_complex
vis = input(m) * std::conj((*weights_ptr)(m));
286 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
288 const t_real c_0 =
static_cast<t_int
>(
289 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
291 assert(c_0 < total_samples);
292 const t_real kernelv_val = samples[c_0] * (1. - (2 * (
p % 2)));
293 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
295 const t_int i_0 =
static_cast<t_int
>(
296 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
298 assert(i_0 < total_samples);
299 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
301 const t_complex result = kernelu_val * kernelv_val *
vis;
302 output_compressed(mapping.coeff(index) + shift) += result;
306 T output_sum = T::Zero(nonZeros_size);
307 for (t_int m = 0; m < max_threads; m++) {
308 const t_int loop_shift = m * nonZeros_size;
309 output_sum += output_compressed.segment(loop_shift, nonZeros_size);
311 if (not comm.is_root()) {
312 distributor->gather(output_sum);
315 distributor->gather(output_sum, output);
316 assert(output.size() == N);
319 return std::make_tuple(degrid, grid);
325 const sopt::mpi::Communicator &comm,
const t_uint number_of_images,
326 const std::vector<t_int> &image_index,
const Vector<t_real> &
u,
const Vector<t_real> &
v,
327 const Vector<t_complex> &weights,
const t_uint &imsizey_,
const t_uint &imsizex_,
328 const t_real &oversample_ratio,
const std::function<t_real(t_real)> &kernelu,
const t_uint Ju,
329 const t_int total_samples) {
330 if (std::any_of(image_index.begin(), image_index.end(), [&number_of_images](
int index) {
331 return index < 0 or index > (number_of_images - 1);
333 throw std::runtime_error(
"Image index is out of bounds");
334 const t_uint ftsizev_ = std::floor(imsizey_ * oversample_ratio);
335 const t_uint ftsizeu_ = std::floor(imsizex_ * oversample_ratio);
336 const t_uint rows =
u.size();
337 if (
u.size() !=
v.size())
338 throw std::runtime_error(
339 "Size of u and v vectors are not the same for creating gridding matrix.");
341 const std::shared_ptr<Vector<t_real>> u_ptr = std::make_shared<Vector<t_real>>(
u);
342 const std::shared_ptr<Vector<t_real>> v_ptr = std::make_shared<Vector<t_real>>(
v);
343 const std::shared_ptr<Vector<t_complex>> weights_ptr =
344 std::make_shared<Vector<t_complex>>(weights);
345 const std::shared_ptr<std::vector<t_int>> image_index_ptr =
346 std::make_shared<std::vector<t_int>>(image_index);
347 const t_complex
I(0, 1);
348 const t_int ju_max = std::min(Ju, ftsizeu_);
349 const t_int jv_max = std::min(Ju, ftsizev_);
351 total_samples, [&](
const t_real x) {
return kernelu(x * ju_max * 0.5); });
353 std::set<std::int64_t> nonZeros_set;
354 for (t_int m = 0; m < rows; ++m) {
355 t_complex result = 0;
356 const t_real u_val = (*u_ptr)(m);
357 const t_real v_val = (*v_ptr)(m);
358 const t_real k_u = std::floor(u_val - ju_max * 0.5);
359 const t_real k_v = std::floor(v_val - jv_max * 0.5);
360 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
363 static_cast<t_int
>(std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
364 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
366 const t_int i_0 =
static_cast<t_int
>(
367 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
368 const std::int64_t index =
370 static_cast<std::int64_t
>((*image_index_ptr)[m]) *
371 static_cast<std::int64_t
>(ftsizev_ * ftsizeu_);
372 nonZeros_set.insert(index);
377 const AllToAllSparseVector<std::int64_t> distributor(
378 nonZeros_set, ftsizeu_ * ftsizev_,
379 static_cast<std::int64_t
>(comm.rank()) *
static_cast<std::int64_t
>(ftsizeu_ * ftsizev_),
382 std::vector<std::int64_t> nonZeros_vec(nonZeros_set.begin(), nonZeros_set.end());
383 std::sort(nonZeros_vec.data(), nonZeros_vec.data() + nonZeros_vec.size());
384 SparseVector<t_int, std::int16_t> mapping(
static_cast<std::int64_t
>(ftsizev_ * ftsizeu_) *
385 static_cast<std::int64_t
>(number_of_images));
386 mapping.reserve(nonZeros_vec.size());
387 for (t_int index = 0; index < nonZeros_vec.size(); index++)
388 mapping.coeffRef(nonZeros_vec[index]) = index;
389 PURIFY_LOW_LOG(
"Non Zero grid locations: {} ", mapping.nonZeros());
391 const auto degrid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
392 ftsizeu_, ftsizev_, distributor, mapping, image_index_ptr,
393 comm](T &output,
const T &input) {
394 assert(input.size() == ftsizeu_ * ftsizev_);
396 distributor.recv_grid(input, input_buff);
397 #pragma omp parallel for
398 for (t_int m = 0; m < rows; ++m) {
399 t_complex result = 0;
400 const t_real u_val = (*u_ptr)(m);
401 const t_real v_val = (*v_ptr)(m);
402 const t_real k_u = std::floor(u_val - ju_max * 0.5);
403 const t_real k_v = std::floor(v_val - jv_max * 0.5);
404 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
406 const t_real c_0 =
static_cast<t_int
>(
407 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
409 assert(c_0 < total_samples);
410 const t_real kernelv_val = samples[c_0] * (1. - (2 * (
p % 2)));
411 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
413 const t_int i_0 =
static_cast<t_int
>(
414 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
416 assert(i_0 < total_samples);
417 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
418 const std::int64_t index =
420 static_cast<std::int64_t
>((*image_index_ptr)[m]) *
421 static_cast<std::int64_t
>(ftsizev_ * ftsizeu_);
422 const t_real sign = kernelu_val * kernelv_val;
423 result += input_buff(mapping.coeff(index)) * sign;
428 output.array() *= (*weights_ptr).array();
430 const t_int nonZeros_size = mapping.nonZeros();
431 const auto grid = [rows, ju_max, jv_max,
I, u_ptr, v_ptr, weights_ptr, samples, total_samples,
432 ftsizeu_, ftsizev_, mapping, nonZeros_size, distributor, image_index_ptr,
433 comm](T &output,
const T &input) {
434 const t_int N = ftsizeu_ * ftsizev_;
437 t_int
const max_threads = omp_get_max_threads();
439 t_int
const max_threads = 1;
441 T output_compressed = T::Zero(nonZeros_size * max_threads);
442 assert(output.size() == N);
443 #pragma omp parallel for
444 for (t_int m = 0; m < rows; ++m) {
445 t_complex result = 0;
447 const t_int shift = omp_get_thread_num() * nonZeros_size;
449 const t_int shift = 0;
451 const t_real u_val = (*u_ptr)(m);
452 const t_real v_val = (*v_ptr)(m);
453 const t_real k_u = std::floor(u_val - ju_max * 0.5);
454 const t_real k_v = std::floor(v_val - jv_max * 0.5);
455 const t_complex
vis = input(m) * std::conj((*weights_ptr)(m));
456 for (t_int jv = 1; jv < jv_max + 1; ++jv) {
458 const t_real c_0 =
static_cast<t_int
>(
459 std::floor(2 * std::abs(v_val - (k_v + jv)) * total_samples / jv_max));
461 assert(c_0 < total_samples);
462 const t_real kernelv_val = samples[c_0] * (1. - (2 * (
p % 2)));
463 for (t_int ju = 1; ju < ju_max + 1; ++ju) {
465 const t_int i_0 =
static_cast<t_int
>(
466 std::floor(2 * std::abs(u_val - (k_u + ju)) * total_samples / ju_max));
468 assert(i_0 < total_samples);
469 const t_real kernelu_val = samples[i_0] * (1. - (2 * (q % 2)));
470 const std::int64_t index =
472 static_cast<std::int64_t
>((*image_index_ptr)[m]) *
473 static_cast<std::int64_t
>(ftsizev_ * ftsizeu_);
474 const t_complex result = kernelu_val * kernelv_val *
vis;
475 output_compressed(mapping.coeff(index) + shift) += result;
479 for (t_int m = 1; m < max_threads; m++) {
480 const t_int loop_shift = m * nonZeros_size;
481 output_compressed.segment(0, nonZeros_size) +=
482 output_compressed.segment(loop_shift, nonZeros_size);
484 distributor.send_grid(output_compressed.segment(0, nonZeros_size), output);
486 return std::make_tuple(degrid, grid);
#define PURIFY_LOW_LOG(...)
Low priority message.
const std::vector< t_real > u
data for u coordinate
const std::vector< t_real > v
data for v coordinate
std::vector< t_real > kernel_samples(const t_int total_samples, const std::function< t_real(t_real)> kernelu)
Calculates samples of a kernel.
std::tuple< sopt::OperatorFunction< T >, sopt::OperatorFunction< T > > init_on_the_fly_gridding_matrix_2d(const Vector< t_real > &u, const Vector< t_real > &v, const Vector< t_complex > &weights, const t_uint &imsizey_, const t_uint &imsizex_, const t_real &oversample_ratio, const std::function< t_real(t_real)> &kernelu, const t_uint Ju, const t_int total_samples)
on the fly application of the degridding operator using presampling
t_int sub2ind(const t_int &row, const t_int &col, const t_int &rows, const t_int &cols)
Converts from subscript to index for matrix.
t_real mod(const t_real &x, const t_real &y)
Mod function modified to wrap circularly for negative numbers.
Eigen::SparseVector< T, Eigen::RowMajor, I > SparseVector