PURIFY
Next-generation radio interferometric imaging
algorithms_mpi.cc
Go to the documentation of this file.
1 #include "purify/types.h"
2 #include <array>
3 #include <random>
4 #include "benchmarks/utilities.h"
7 #include "purify/directories.h"
8 #include "purify/distribute.h"
9 #include "purify/logging.h"
11 #include "purify/mpi_utilities.h"
12 #include "purify/operators.h"
13 #include "purify/utilities.h"
15 #include <sopt/imaging_padmm.h>
16 #include <sopt/mpi/communicator.h>
17 #include <sopt/mpi/session.h>
18 #include <sopt/relative_variation.h>
19 #include <sopt/utilities.h>
20 #include <sopt/wavelets.h>
21 #include <sopt/wavelets/sara.h>
22 
23 using namespace purify;
24 
25 class AlgoFixtureMPI : public ::benchmark::Fixture {
26  public:
27  void SetUp(const ::benchmark::State &state) {
28  // Reading image from file and update related quantities
29  bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey);
30 
31  // Generating random uv(w) coverage
32  bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon,
33  newImage, m_image, m_world);
34 
35  bool newKernel = m_kernel != state.range(2);
36 
37  m_kernel = state.range(2);
38  // Create the measurement operator for both distributed algorithms
39  const t_real FoV = 1; // deg
40  const t_real cellsize = FoV / m_imsizex * 60. * 60.;
41  const bool w_term = false;
42  if (state.range(4) == 1) {
43  PURIFY_INFO("Using distributed image MPI algorithm");
44  m_measurements_distribute_image = factory::measurement_operator_factory<Vector<t_complex>>(
46  m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel,
47  m_kernel, w_term);
48  }
49 
50  if (state.range(4) == 2) {
51  PURIFY_INFO("Using distributed grid MPI algorithm");
52  m_measurements_distribute_grid = factory::measurement_operator_factory<Vector<t_complex>>(
54  m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term);
55  }
56 
57  m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file
58  }
59 
60  void TearDown(const ::benchmark::State &state) {}
61 
62  sopt::mpi::Communicator m_world;
63 
64  std::vector<std::tuple<std::string, t_uint>> const m_sara{
65  std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u),
66  std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u),
67  std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)};
68 
69  Image<t_complex> m_image;
70  t_uint m_imsizex;
71  t_uint m_imsizey;
72 
74  t_real m_epsilon;
75  t_real m_sigma;
76  t_uint m_kernel;
77 
78  std::shared_ptr<sopt::LinearTransform<Vector<t_complex>> const> m_measurements_distribute_image;
79  std::shared_ptr<sopt::LinearTransform<Vector<t_complex>> const> m_measurements_distribute_grid;
80  std::shared_ptr<sopt::algorithm::ImagingProximalADMM<t_complex>> m_padmm;
81  std::shared_ptr<sopt::algorithm::ImagingForwardBackward<t_complex>> m_fb;
82 };
83 
84 BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeImage)(benchmark::State &state) {
85  // Create the algorithm - has to be done there to reset the internal state.
86  // If done in the fixture repeats would start at the solution and converge immediately.
87  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
88  factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex);
89 
90  m_padmm = factory::padmm_factory<sopt::algorithm::ImagingProximalADMM<t_complex>>(
91  factory::algo_distribution::mpi_distributed, m_measurements_distribute_image, wavelets,
92  m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true,
93  false, 1e-3, 1e-2, 50);
94 
95  // Benchmark the application of the algorithm
96  while (state.KeepRunning()) {
97  auto start = std::chrono::high_resolution_clock::now();
98  auto result = (*m_padmm)();
99  auto end = std::chrono::high_resolution_clock::now();
100  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
101  state.SetIterationTime(b_utilities::duration(start, end, m_world));
102  }
103 }
104 
105 BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeGrid)(benchmark::State &state) {
106  // Create the algorithm - has to be done there to reset the internal state.
107  // If done in the fixture repeats would start at the solution and converge immediately.
108  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
109  factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex);
110 
111  m_padmm = factory::padmm_factory<sopt::algorithm::ImagingProximalADMM<t_complex>>(
112  factory::algo_distribution::mpi_distributed, m_measurements_distribute_grid, wavelets,
113  m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true,
114  false, 1e-3, 1e-2, 50);
115 
116  // Benchmark the application of the algorithm
117  while (state.KeepRunning()) {
118  auto start = std::chrono::high_resolution_clock::now();
119  auto result = (*m_padmm)();
120  auto end = std::chrono::high_resolution_clock::now();
121  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
122  state.SetIterationTime(b_utilities::duration(start, end, m_world));
123  }
124 }
125 
126 BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeImage)(benchmark::State &state) {
127  // Create the algorithm - has to be done there to reset the internal state.
128  // If done in the fixture repeats would start at the solution and converge immediately.
129  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
130  factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex);
131 
132  t_real const beta = m_sigma * m_sigma;
133  t_real const gamma = 0.0001;
134 
135  m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
136  factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data,
137  m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false,
138  1e-3, 1e-2, 50);
139 
140  // Benchmark the application of the algorithm
141  while (state.KeepRunning()) {
142  auto start = std::chrono::high_resolution_clock::now();
143  auto result = (*m_fb)();
144  auto end = std::chrono::high_resolution_clock::now();
145  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
146  state.SetIterationTime(b_utilities::duration(start, end, m_world));
147  }
148 }
149 
150 BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeGrid)(benchmark::State &state) {
151  // Create the algorithm - has to be done there to reset the internal state.
152  // If done in the fixture repeats would start at the solution and converge immediately.
153  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
154  factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex);
155 
156  t_real const beta = m_sigma * m_sigma;
157  t_real const gamma = 0.0001;
158 
159  m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
160  factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data,
161  m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false,
162  1e-3, 1e-2, 50);
163 
164  // Benchmark the application of the algorithm
165  while (state.KeepRunning()) {
166  auto start = std::chrono::high_resolution_clock::now();
167  auto result = (*m_fb)();
168  auto end = std::chrono::high_resolution_clock::now();
169  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
170  state.SetIterationTime(b_utilities::duration(start, end, m_world));
171  }
172 }
173 
174 #ifdef PURIFY_ONNXRT
175 BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &state) {
176  // Create the algorithm - has to be done there to reset the internal state.
177  // If done in the fixture repeats would start at the solution and converge immediately.
178 
179  // TODO: Wavelets are constructed but not used in the factory method
180  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
181  factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex);
182 
183  t_real const beta = m_sigma * m_sigma;
184  t_real const gamma = 0.0001;
185 
186  std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx";
187 
188  m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
189  factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data,
190  m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false,
191  1e-3, 1e-2, 50, tf_model_path, nondiff_func_type::Denoiser);
192 
193  // Benchmark the application of the algorithm
194  while (state.KeepRunning()) {
195  auto start = std::chrono::high_resolution_clock::now();
196  auto result = (*m_fb)();
197  auto end = std::chrono::high_resolution_clock::now();
198  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
199  state.SetIterationTime(b_utilities::duration(start, end, m_world));
200  }
201 }
202 
203 BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeGrid)(benchmark::State &state) {
204  // Create the algorithm - has to be done there to reset the internal state.
205  // If done in the fixture repeats would start at the solution and converge immediately.
206 
207  // TODO: Wavelets are constructed but not used in the factory method
208  auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
209  factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex);
210 
211  t_real const beta = m_sigma * m_sigma;
212  t_real const gamma = 0.0001;
213 
214  std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx";
215 
216  m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
217  factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data,
218  m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3), true, true, false,
219  1e-3, 1e-2, 50, tf_model_path, nondiff_func_type::Denoiser);
220 
221  // Benchmark the application of the algorithm
222  while (state.KeepRunning()) {
223  auto start = std::chrono::high_resolution_clock::now();
224  auto result = (*m_fb)();
225  auto end = std::chrono::high_resolution_clock::now();
226  std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl;
227  state.SetIterationTime(b_utilities::duration(start, end, m_world));
228  }
229 }
230 
231 BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage)
232  //->Apply(b_utilities::Arguments)
233  ->Args({128, 10000, 4, 10, 1})
234  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 1})
235  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 1})
236  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 1})
237  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 1})
238  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 1})
239  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 1})
240  ->UseManualTime()
241  ->MinTime(60.0)
242  ->MinWarmUpTime(10.0)
243  ->Repetitions(3) //->ReportAggregatesOnly(true)
244  ->Unit(benchmark::kMillisecond);
245 
246 BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeGrid)
247  //->Apply(b_utilities::Arguments)
248  ->Args({128, 10000, 4, 10, 1})
249  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 2})
250  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 2})
251  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 2})
252  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 2})
253  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 2})
254  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 2})
255  ->UseManualTime()
256  ->MinTime(9.0)
257  ->MinWarmUpTime(1.0)
258  ->Repetitions(3) //->ReportAggregatesOnly(true)
259  ->Unit(benchmark::kMillisecond);
260 
261 #endif
262 
263 BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage)
264  //->Apply(b_utilities::Arguments)
265  ->Args({128, 10000, 4, 10, 1})
266  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 1})
267  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 1})
268  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 1})
269  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 1})
270  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 1})
271  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 1})
272  ->UseManualTime()
273  ->MinTime(60.0)
274  ->MinWarmUpTime(10.0)
275  ->Repetitions(3) //->ReportAggregatesOnly(true)
276  ->Unit(benchmark::kMillisecond);
277 
278 BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid)
279  //->Apply(b_utilities::Arguments)
280  ->Args({128, 10000, 4, 10, 2})
281  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 2})
282  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 2})
283  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 2})
284  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 2})
285  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 2})
286  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 2})
287  ->UseManualTime()
288  ->MinTime(60.0)
289  ->MinWarmUpTime(10.0)
290  ->Repetitions(3) //->ReportAggregatesOnly(true)
291  ->Unit(benchmark::kMillisecond);
292 
293 BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage)
294  //->Apply(b_utilities::Arguments)
295  ->Args({128, 10000, 4, 10, 1})
296  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 1})
297  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 1})
298  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 1})
299  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 1})
300  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 1})
301  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 1})
302  ->UseManualTime()
303  ->MinTime(120.0)
304  ->MinWarmUpTime(10.0)
305  ->Repetitions(3) //->ReportAggregatesOnly(true)
306  ->Unit(benchmark::kMillisecond);
307 
308 BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid)
309  //->Apply(b_utilities::Arguments)
310  ->Args({128, 10000, 4, 10, 2})
311  ->Args({1024, static_cast<t_int>(1e6), 4, 10, 2})
312  ->Args({1024, static_cast<t_int>(1e7), 4, 10, 2})
313  ->Args({2048, static_cast<t_int>(1e6), 4, 10, 2})
314  ->Args({2048, static_cast<t_int>(1e7), 4, 10, 2})
315  ->Args({4096, static_cast<t_int>(1e6), 4, 10, 2})
316  ->Args({4096, static_cast<t_int>(1e7), 4, 10, 2})
317  ->UseManualTime()
318  ->MinTime(120.0)
319  ->MinWarmUpTime(10.0)
320  ->Repetitions(3) //->ReportAggregatesOnly(true)
321  ->Unit(benchmark::kMillisecond);
Args({128, 10000, 4, 10, 1}) -> Args({1024, static_cast< t_int >(1e6), 4, 10, 1}) ->Args({1024, static_cast< t_int >(1e7), 4, 10, 1}) ->Args({2048, static_cast< t_int >(1e6), 4, 10, 1}) ->Args({2048, static_cast< t_int >(1e7), 4, 10, 1}) ->Args({4096, static_cast< t_int >(1e6), 4, 10, 1}) ->Args({4096, static_cast< t_int >(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(60.0) ->MinWarmUpTime(10.0) ->Repetitions(3) ->Unit(benchmark::kMillisecond)
BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeImage)(benchmark
std::shared_ptr< sopt::LinearTransform< Vector< t_complex > > const > m_measurements_distribute_image
std::shared_ptr< sopt::algorithm::ImagingForwardBackward< t_complex > > m_fb
std::shared_ptr< sopt::algorithm::ImagingProximalADMM< t_complex > > m_padmm
sopt::mpi::Communicator m_world
Image< t_complex > m_image
void SetUp(const ::benchmark::State &state)
utilities::vis_params m_uv_data
void TearDown(const ::benchmark::State &state)
std::shared_ptr< sopt::LinearTransform< Vector< t_complex > > const > m_measurements_distribute_grid
#define PURIFY_INFO(...)
Definition: logging.h:195
bool updateMeasurements(t_uint newSize, utilities::vis_params &data)
Definition: utilities.cc:54
bool updateImage(t_uint newSize, Image< t_complex > &image, t_uint &sizex, t_uint &sizey)
Definition: utilities.cc:32
double duration(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end)
Definition: utilities.cc:26
std::string models_directory()
Holds TF models.