AA-ALERT RFIm
RFIm.hpp
Go to the documentation of this file.
1 // Copyright 2019 Netherlands eScience Center and Netherlands Institute for Radio Astronomy (ASTRON)
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
19 #include <OpenCLTypes.hpp>
20 #include <Kernel.hpp>
21 #include <InitializeOpenCL.hpp>
22 #include <Observation.hpp>
23 #include <utils.hpp>
24 #include <Statistics.hpp>
25 #include <Timer.hpp>
26 #include <typeinfo>
27 
28 #include <string>
29 #include <vector>
30 #include <iostream>
31 #include <iomanip>
32 #include <map>
33 
34 #pragma once
35 
36 namespace RFIm
37 {
38 
42 class RFImConfig : public isa::OpenCL::KernelConf
43 {
44 public:
45  RFImConfig();
46  ~RFImConfig();
50  bool getSubbandDedispersion() const;
54  bool getConditionalReplacement() const;
58  void setSubbandDedispersion(const bool subband);
62  void setConditionalReplacement(const bool replacement);
66  std::string print() const;
67 
68 private:
69  bool subbandDedispersion;
70  bool conditionalReplacement;
71 };
72 
77 {
80 };
81 
86 {
89 };
90 
95 {
98 };
99 
100 using RFImConfigurations = std::map<std::string, std::map<unsigned int, std::map<float, RFImConfig *> *> *>;
101 
108 void readRFImConfig(RFImConfigurations & configurations, const std::string & filename);
109 
113 void readSigmaSteps(const std::string &inputFilename, std::vector<float> &steps);
114 
129 template<typename DataType>
130 std::uint64_t timeDomainSigmaCut(const bool subbandDedispersion, const DataOrdering & ordering, const ReplacementStrategy & replacement, const AstroData::Observation & observation, std::vector<DataType> & time_series, const float sigmaCut, const unsigned int padding);
131 
145 template<typename DataType>
146 std::string * getTimeDomainSigmaCutOpenCL(const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const float sigmaCut, const unsigned int padding);
147 
160 template<typename DataType>
161 std::string * getTimeDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig & config, const std::string & dataTypeName, const AstroData::Observation & observation, const float sigmaCut, const unsigned int padding);
162 
179 template<typename DataType>
180 void testTimeDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, isa::OpenCL::OpenCLRunTime & openCLRunTime, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding);
181 
197 template<typename DataType>
198 void tuneTimeDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters & parameters, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding);
199 
215 template<typename DataType>
216 std::uint64_t frequencyDomainSigmaCut(const bool subbandDedispersion, const DataOrdering & ordering, const ReplacementStrategy & replacement, const AstroData::Observation & observation, std::vector<DataType> & time_series, const unsigned int nrBins, const float sigmaCut, const unsigned int padding);
217 
232 template<typename DataType>
233 std::string * getFrequencyDomainSigmaCutOpenCL(const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding);
234 
248 template<typename DataType>
249 std::string * getFrequencyDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig & config, const std::string & dataTypeName, const AstroData::Observation & observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding);
250 
268 template<typename DataType>
269 void testFrequencyDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, isa::OpenCL::OpenCLRunTime & openCLRunTime, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding);
270 
287 template<typename DataType>
288 void tuneFrequencyDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters & parameters, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding);
289 
290 } // RFIm
291 
293 {
294  return subbandDedispersion;
295 }
296 
298 {
299  return conditionalReplacement;
300 }
301 
302 inline void RFIm::RFImConfig::setSubbandDedispersion(const bool subband)
303 {
304  subbandDedispersion = subband;
305 }
306 
307 inline void RFIm::RFImConfig::setConditionalReplacement(const bool replacement)
308 {
309  conditionalReplacement = replacement;
310 }
311 
312 inline std::string RFIm::RFImConfig::print() const
313 {
314  return std::to_string(subbandDedispersion) + " " + std::to_string(conditionalReplacement) + " " + isa::OpenCL::KernelConf::print();
315 }
316 
317 template<typename DataType>
318 std::uint64_t RFIm::timeDomainSigmaCut(const bool subbandDedispersion, const DataOrdering & ordering, const ReplacementStrategy & replacement, const AstroData::Observation & observation, std::vector<DataType> & time_series, const float sigmaCut, const unsigned int padding)
319 {
320  std::uint64_t replacedSamples = 0;
321  for ( unsigned int beam = 0; beam < observation.getNrBeams(); beam++ )
322  {
323  if ( ordering == FrequencyTime )
324  {
325  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
326  {
327  isa::utils::Statistics<DataType> statistics;
328  for ( unsigned int sample_id = 0; sample_id < observation.getNrSamplesPerDispersedBatch(subbandDedispersion); sample_id++ )
329  {
330  statistics.addElement(time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id));
331  }
332  for ( unsigned int sample_id = 0; sample_id < observation.getNrSamplesPerDispersedBatch(subbandDedispersion); sample_id++ )
333  {
334  DataType sample_value = time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id);
335  if ( sample_value > (statistics.getMean() + (sigmaCut * statistics.getStandardDeviation())) )
336  {
337  replacedSamples++;
338  if ( replacement == ReplaceWithMean )
339  {
340  time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id) = static_cast<DataType>(statistics.getMean());
341  }
342  }
343  }
344  }
345  }
346  }
347  return replacedSamples;
348 }
349 
350 template<typename DataType>
351 std::string * RFIm::getTimeDomainSigmaCutOpenCL(const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const float sigmaCut, const unsigned int padding)
352 {
353  if ( (ordering == FrequencyTime) && (replacement == ReplaceWithMean) )
354  {
355  return getTimeDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean<DataType>(config, dataTypeName, observation, sigmaCut, padding);
356  }
357  return new std::string();
358 }
359 
360 template<typename DataType>
361 std::string * RFIm::getTimeDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig & config, const std::string & dataTypeName, const AstroData::Observation & observation, const float sigmaCut, const unsigned int padding)
362 {
363  std::string *code = new std::string();
364  // Kernel template
365  *code = "__kernel void timeDomainSigmaCut(__global " + dataTypeName + " * const restrict time_series) {\n"
366  + config.getIntType() + " threshold = 0;\n"
367  "float delta = 0.0f;\n"
368  "float mean = 0.0f;\n"
369  "float sigma_cut = 0.0f;\n"
370  + dataTypeName + " sample_value;\n"
371  "__local float reductionCOU[" + std::to_string(config.getNrThreadsD0()) + "];\n"
372  "__local float reductionMEA[" + std::to_string(config.getNrThreadsD0()) + "];\n"
373  "__local float reductionVAR[" + std::to_string(config.getNrThreadsD0()) + "];\n"
374  "<%LOCAL_VARIABLES%>"
375  "\n"
376  "// Compute mean and standard deviation\n"
377  "for ( " + config.getIntType() + " sample_id = get_local_id(0) + " + std::to_string(config.getNrThreadsD0() * config.getNrItemsD0()) + "; sample_id < " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) + "; sample_id += " + std::to_string(config.getNrThreadsD0() * config.getNrItemsD0()) + " ) "
378  "{\n"
379  "<%LOCAL_COMPUTE%>"
380  "}\n"
381  "// Local reduction (if necessary)\n"
382  "<%LOCAL_REDUCE%>"
383  "reductionCOU[get_local_id(0)] = counter_0;\n"
384  "reductionMEA[get_local_id(0)] = mean_0;\n"
385  "reductionVAR[get_local_id(0)] = variance_0;\n"
386  "barrier(CLK_LOCAL_MEM_FENCE);\n"
387  "// Global reduction\n"
388  "threshold = " + std::to_string(config.getNrThreadsD0() / 2) + ";\n"
389  "for (" + config.getIntType() + " sample_id = get_local_id(0); threshold > 0; threshold /= 2 ) {\n"
390  "if ( (sample_id < threshold)) {\n"
391  "delta = reductionMEA[sample_id + threshold] - mean_0;\n"
392  "counter_0 += reductionCOU[sample_id + threshold];\n"
393  "mean_0 = ((reductionCOU[sample_id] * mean_0) + (reductionCOU[sample_id + threshold] * reductionMEA[sample_id + threshold])) / counter_0;\n"
394  "variance_0 += reductionVAR[sample_id + threshold] + ((delta * delta) * ((reductionCOU[sample_id] * reductionCOU[sample_id + threshold]) / counter_0));\n"
395  "reductionCOU[sample_id] = counter_0;\n"
396  "reductionMEA[sample_id] = mean_0;\n"
397  "reductionVAR[sample_id] = variance_0;\n"
398  "}\n"
399  "barrier(CLK_LOCAL_MEM_FENCE);\n"
400  "}\n"
401  "mean = reductionMEA[0];\n"
402  "sigma_cut = (" + std::to_string(sigmaCut) + " * native_sqrt(reductionVAR[0] * " + std::to_string(1.0f/(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion()) - 1)) + "f));\n"
403  "// Replace samples over the sigma cut with mean\n"
404  "for (" + config.getIntType() + " sample_id = get_local_id(0); sample_id < " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) + "; sample_id += " + std::to_string(config.getNrThreadsD0() * config.getNrItemsD0()) + " ) "
405  "{\n"
406  "<%REPLACE%>"
407  "}\n"
408  "}\n";
409  // Declaration of per thread variables
410  std::string localVariablesTemplate = "float counter_<%ITEM_NUMBER%> = 1.0f;\n"
411  "float variance_<%ITEM_NUMBER%> = 0.0f;\n"
412  "float mean_<%ITEM_NUMBER%> = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + get_local_id(0) + <%ITEM_OFFSET%>];\n";
413  // Local compute
414  // Version without boundary checks
415  std::string localComputeNoCheckTemplate = "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + get_local_id(0) + <%ITEM_OFFSET%>];\n"
416  "counter_<%ITEM_NUMBER%> += 1.0f;\n"
417  "delta = sample_value - mean_<%ITEM_NUMBER%>;\n"
418  "mean_<%ITEM_NUMBER%> += delta / counter_<%ITEM_NUMBER%>;\n"
419  "variance_<%ITEM_NUMBER%> += delta * (sample_value - mean_<%ITEM_NUMBER%>);\n";
420  // Version with boundary checks
421  std::string localComputeCheckTemplate = "if ( sample_id + <%ITEM_OFFSET%> < " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) + " ) {\n"
422  "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + get_local_id(0) + <%ITEM_OFFSET%>];\n"
423  "counter_<%ITEM_NUMBER%> += 1.0f;\n"
424  "delta = sample_value - mean_<%ITEM_NUMBER%>;\n"
425  "mean_<%ITEM_NUMBER%> += delta / counter_<%ITEM_NUMBER%>;\n"
426  "variance_<%ITEM_NUMBER%> += delta * (sample_value - mean_<%ITEM_NUMBER%>);\n"
427  "}\n";
428  // In-thread reduction
429  std::string localReduceTemplate = "delta = mean_<%ITEM_NUMBER%> - mean_0;\n"
430  "counter_0 += counter_<%ITEM_NUMBER%>;\n"
431  "mean_0 = (((counter_0 - counter_<%ITEM_NUMBER%>) * mean_0) + (counter_<%ITEM_NUMBER%> * mean_<%ITEM_NUMBER%>)) / counter_0;\n"
432  "variance_0 += variance_<%ITEM_NUMBER%> + ((delta * delta) * (((counter_0 - counter_<%ITEM_NUMBER%>) * counter_<%ITEM_NUMBER%>) / counter_0));\n";
433  std::string replaceConditionTemplate = "if ( time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + sample_id + <%ITEM_OFFSET%>] > (mean + sigma_cut) ) {\n";
434  if ( typeid(DataType) == typeid(float) )
435  {
436  replaceConditionTemplate += "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + sample_id + <%ITEM_OFFSET%>] = mean;\n";
437  }
438  else
439  {
440  replaceConditionTemplate += "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + sample_id + <%ITEM_OFFSET%>] = convert_" + dataTypeName + "(mean);\n";
441  }
442  replaceConditionTemplate += "}\n";
443  std::string replaceTemplate = "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + sample_id + <%ITEM_OFFSET%>];\n"
444  "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_global_id(1) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + sample_id + <%ITEM_OFFSET%>] = (sample_value * (convert_" + dataTypeName + "(sample_value < (mean + sigma_cut)))) + (mean * (convert_" + dataTypeName + "(sample_value > mean + sigma_cut)));\n";
445  // End of kernel template
446  // Code generation
447  std::string localVariables;
448  std::string localCompute;
449  std::string localReduce;
450  std::string replace;
451  for ( unsigned int item = 0; item < config.getNrItemsD0(); item++ )
452  {
453  std::string * temp;
454  std::string itemString = std::to_string(item);
455  std::string itemOffsetString = std::to_string(item * config.getNrThreadsD0());
456  temp = isa::utils::replace(&localVariablesTemplate, "<%ITEM_NUMBER%>", itemString);
457  if ( item == 0 )
458  {
459  temp = isa::utils::replace(temp, " + <%ITEM_OFFSET%>", std::string(), true);
460  }
461  else
462  {
463  temp = isa::utils::replace(temp, "<%ITEM_OFFSET%>", itemOffsetString, true);
464  }
465  localVariables.append(*temp);
466  delete temp;
467  if ( (observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion()) % (config.getNrThreadsD0() * config.getNrItemsD0())) == 0 )
468  {
469  temp = isa::utils::replace(&localComputeNoCheckTemplate, "<%ITEM_NUMBER%>", itemString);
470  }
471  else
472  {
473  temp = isa::utils::replace(&localComputeCheckTemplate, "<%ITEM_NUMBER%>", itemString);
474  }
475  if ( item == 0 )
476  {
477  temp = isa::utils::replace(temp, " + <%ITEM_OFFSET%>", std::string(), true);
478  }
479  else
480  {
481  temp = isa::utils::replace(temp, "<%ITEM_OFFSET%>", itemOffsetString, true);
482  }
483  localCompute.append(*temp);
484  delete temp;
485  if ( item > 0 )
486  {
487  temp = isa::utils::replace(&localReduceTemplate, "<%ITEM_NUMBER%>", itemString);
488  localReduce.append(*temp);
489  delete temp;
490  }
491  if ( config.getConditionalReplacement() )
492  {
493  temp = isa::utils::replace(&replaceConditionTemplate, "<%ITEM_NUMBER%>", itemString);
494  }
495  else
496  {
497  temp = isa::utils::replace(&replaceTemplate, "<%ITEM_NUMBER%>", itemString);
498  }
499  if ( item == 0 )
500  {
501  temp = isa::utils::replace(temp, " + <%ITEM_OFFSET%>", std::string(), true);
502  }
503  else
504  {
505  temp = isa::utils::replace(temp, "<%ITEM_OFFSET%>", itemOffsetString, true);
506  }
507  replace.append(*temp);
508  }
509  code = isa::utils::replace(code, "<%LOCAL_VARIABLES%>", localVariables, true);
510  code = isa::utils::replace(code, "<%LOCAL_COMPUTE%>", localCompute, true);
511  code = isa::utils::replace(code, "<%LOCAL_REDUCE%>", localReduce, true);
512  code = isa::utils::replace(code, "<%REPLACE%>", replace, true);
513  return code;
514 }
515 
516 template<typename DataType>
517 void RFIm::testTimeDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, isa::OpenCL::OpenCLRunTime & openCLRunTime, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding)
518 {
519  std::uint64_t wrongSamples = 0;
520  std::uint64_t replacedSamples = 0;
521  std::vector<DataType> test_time_series, control_time_series;
522  test_time_series = time_series;
523  control_time_series = time_series;
524  cl::Buffer device_time_series;
525  cl::Kernel * kernel = nullptr;
526  // Generate OpenCL code
527  std::string * code = getTimeDomainSigmaCutOpenCL<DataType>(config, ordering, replacement, dataTypeName, observation, sigmaCut, padding);
528  if ( printCode )
529  {
530  std::cout << std::endl;
531  std::cout << *code << std::endl;
532  std::cout << std::endl;
533  delete code;
534  return;
535  }
536  // Execute control code
537  replacedSamples = timeDomainSigmaCut(config.getSubbandDedispersion(), ordering, replacement, observation, control_time_series, sigmaCut, padding);
538  // Execute OpenCL code
539  try
540  {
541  device_time_series = cl::Buffer(*(openCLRunTime.context), CL_MEM_READ_WRITE, test_time_series.size() * sizeof(DataType), 0, 0);
542  }
543  catch ( const cl::Error & err )
544  {
545  std::cerr << "OpenCL device memory allocation error: " << std::to_string(err.err()) << "." << std::endl;
546  throw err;
547  }
548  try
549  {
550  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, test_time_series.size() * sizeof(DataType), reinterpret_cast<void *>(test_time_series.data()));
551  }
552  catch( const cl::Error & err )
553  {
554  std::cerr << "OpenCL transfer H2D error: " << std::to_string(err.err()) << "." << std::endl;
555  }
556  try
557  {
558  kernel = isa::OpenCL::compile("timeDomainSigmaCut", *code, "-cl-mad-enable -Werror", *(openCLRunTime.context), openCLRunTime.devices->at(clDeviceID));
559  }
560  catch ( const isa::OpenCL::OpenCLError & err )
561  {
562  std::cerr << err.what() << std::endl;
563  delete code;
564  }
565  delete code;
566  try
567  {
568  cl::NDRange global, local;
569  global = cl::NDRange(config.getNrThreadsD0(), observation.getNrChannels(), observation.getNrBeams());
570  local = cl::NDRange(config.getNrThreadsD0(), 1, 1);
571  kernel->setArg(0, device_time_series);
572  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local);
573  openCLRunTime.queues->at(clDeviceID)[0].enqueueReadBuffer(device_time_series, CL_TRUE, 0, test_time_series.size() * sizeof(DataType), reinterpret_cast<void *>(test_time_series.data()));
574  }
575  catch ( const cl::Error & err )
576  {
577  std::cerr << "OpenCL kernel execution error: " << std::to_string(err.err()) << "." << std::endl;
578  delete kernel;
579  }
580  delete kernel;
581  // Compare results
582  for ( unsigned int beam = 0; beam < observation.getNrBeams(); beam++ )
583  {
584  if ( ordering == FrequencyTime )
585  {
586  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
587  {
588  for ( unsigned int sample_id = 0; sample_id < observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion()); sample_id++ )
589  {
590  if ( !isa::utils::same<double>(test_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id), control_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id), 1.1) )
591  {
592  wrongSamples++;
593  }
594  if ( printResults )
595  {
596  std::cout << static_cast<double>(test_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id)) << " " << static_cast<double>(control_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id)) << " ; ";
597  }
598  }
599  if ( printResults )
600  {
601  std::cout << std::endl;
602  }
603  }
604  }
605  }
606  // Print test results
607  if ( wrongSamples > 0 )
608  {
609  std::cout << std::fixed;
610  std::cout << "Wrong samples: " << wrongSamples << " (" << (wrongSamples * 100.0) / (static_cast<std::uint64_t>(observation.getNrBeams()) * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) << "%)." << std::endl;
611  }
612  else
613  {
614  std::cout << "TEST PASSED." << std::endl;
615  }
616 }
617 
618 template<typename DataType>
619 void RFIm::tuneTimeDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters & parameters, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding)
620 {
621  bool initializeDevice = true;
622  isa::utils::Timer timer;
623  double bestTime = std::numeric_limits<double>::max();
624  RFImConfig bestConfig;
625  std::vector<RFImConfig> configurations;
626  isa::OpenCL::OpenCLRunTime openCLRunTime;
627  cl::Event clEvent;
628  cl::Buffer device_time_series;
629  // Generate valid configurations
630  for ( unsigned int threads = parameters.getMinThreads(); threads <= parameters.getMaxThreads(); threads *= 2 )
631  {
632  for ( unsigned int items = 1; (items * 3) + 8 <= parameters.getMaxItems(); items++ )
633  {
634  if ( (threads * items) > observation.getNrSamplesPerDispersedBatch(subbandDedispersion) )
635  {
636  break;
637  }
638  RFImConfig baseConfig, tempConfig;
639  baseConfig.setSubbandDedispersion(subbandDedispersion);
640  baseConfig.setNrThreadsD0(threads);
641  baseConfig.setNrItemsD0(items);
642  // conditional = 0, int = 0
643  tempConfig = baseConfig;
644  tempConfig.setConditionalReplacement(false);
645  tempConfig.setIntType(0);
646  configurations.push_back(tempConfig);
647  // conditional = 0, int = 1
648  tempConfig = baseConfig;
649  tempConfig.setConditionalReplacement(false);
650  tempConfig.setIntType(1);
651  configurations.push_back(tempConfig);
652  // conditional = 1, int = 0
653  tempConfig = baseConfig;
654  tempConfig.setConditionalReplacement(true);
655  tempConfig.setIntType(0);
656  configurations.push_back(tempConfig);
657  // conditional = 1, int = 1
658  tempConfig = baseConfig;
659  tempConfig.setConditionalReplacement(true);
660  tempConfig.setIntType(1);
661  configurations.push_back(tempConfig);
662  }
663  }
664  // Test performance of each configuration
665  std::cout << std::fixed;
666  if ( !parameters.getBestMode() )
667  {
668  std::cout << std::endl << "# nrBeams nrChannels nrSamplesPerDispersedBatch sigma *configuration* time stdDeviation COV" << std::endl << std::endl;
669  }
670  for ( auto configuration = configurations.begin(); configuration != configurations.end(); ++configuration )
671  {
672  cl::Kernel * kernel = nullptr;
673  // Generate kernel
674  std::string * code = getTimeDomainSigmaCutOpenCL<DataType>(*configuration, ordering, replacement, dataTypeName, observation, sigmaCut, padding);
675  if ( initializeDevice )
676  {
677  isa::OpenCL::initializeOpenCL(clPlatformID, 1, openCLRunTime);
678  try
679  {
680  device_time_series = cl::Buffer(*(openCLRunTime.context), CL_MEM_READ_WRITE, time_series.size() * sizeof(DataType), 0, 0);
681  }
682  catch ( const cl::Error & err )
683  {
684  std::cerr << "OpenCL device memory allocation error: " << std::to_string(err.err()) << "." << std::endl;
685  throw err;
686  }
687  initializeDevice = false;
688  }
689  try
690  {
691  kernel = isa::OpenCL::compile("timeDomainSigmaCut", *code, "-cl-mad-enable -Werror", *(openCLRunTime.context), openCLRunTime.devices->at(clDeviceID));
692  }
693  catch ( const isa::OpenCL::OpenCLError & err )
694  {
695  std::cerr << err.what() << std::endl;
696  delete code;
697  }
698  delete code;
699  try
700  {
701  cl::NDRange global, local;
702  global = cl::NDRange((*configuration).getNrThreadsD0(), observation.getNrChannels(), observation.getNrBeams());
703  local = cl::NDRange((*configuration).getNrThreadsD0(), 1, 1);
704  kernel->setArg(0, device_time_series);
705  // Warm-up run
706  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, time_series.size() * sizeof(DataType), reinterpret_cast<const void *>(time_series.data()), nullptr, &clEvent);
707  clEvent.wait();
708  openCLRunTime.queues->at(clDeviceID)[0].finish();
709  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local, nullptr, &clEvent);
710  clEvent.wait();
711  // Tuning runs
712  for ( unsigned int iteration = 0; iteration < parameters.getNrIterations(); iteration++ )
713  {
714  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, time_series.size() * sizeof(DataType), reinterpret_cast<const void *>(time_series.data()), nullptr, &clEvent);
715  clEvent.wait();
716  openCLRunTime.queues->at(clDeviceID)[0].finish();
717  timer.start();
718  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local, nullptr, &clEvent);
719  clEvent.wait();
720  timer.stop();
721  }
722  }
723  catch ( const cl::Error & err )
724  {
725  std::cerr << "OpenCL kernel error during tuning: " << std::to_string(err.err()) << "." << std::endl;
726  delete kernel;
727  if (err.err() == -4 || err.err() == -61)
728  {
729  throw err;
730  }
731  initializeDevice = true;
732  break;
733  }
734  delete kernel;
735  if ( timer.getAverageTime() < bestTime )
736  {
737  bestTime = timer.getAverageTime();
738  bestConfig = *configuration;
739  }
740  if ( !parameters.getBestMode() )
741  {
742  std::cout << observation.getNrBeams() << " " << observation.getNrChannels() << " ";
743  std::cout << observation.getNrSamplesPerDispersedBatch(subbandDedispersion) << " ";
744  std::cout.precision(2);
745  std::cout << sigmaCut << " ";
746  std::cout << (*configuration).print() << " ";
747  std::cout.precision(6);
748  std::cout << timer.getAverageTime() << " " << timer.getStandardDeviation() << " " << timer.getCoefficientOfVariation();
749  std::cout << std::endl;
750  }
751  timer.reset();
752  }
753  if ( parameters.getBestMode() )
754  {
755  std::cout.precision(2);
756  std::cout << observation.getNrSamplesPerDispersedBatch(subbandDedispersion) << " " << sigmaCut << " ";
757  std::cout << bestConfig.print() << std::endl;
758  }
759  else
760  {
761  std::cout << std::endl;
762  }
763 }
764 
765 template<typename DataType>
766 std::uint64_t RFIm::frequencyDomainSigmaCut(const bool subbandDedispersion, const DataOrdering & ordering, const ReplacementStrategy & replacement, const AstroData::Observation & observation, std::vector<DataType> & time_series, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
767 {
768  std::uint64_t replacedSamples = 0;
769  unsigned int nrChannelsPerBin = static_cast<unsigned int>(std::ceil(observation.getNrChannels() / nrBins));
770  for ( unsigned int beam = 0; beam < observation.getNrBeams(); beam++ )
771  {
772  if ( ordering == FrequencyTime )
773  {
774  for ( unsigned int sample_id = 0; sample_id < observation.getNrSamplesPerDispersedBatch(subbandDedispersion); sample_id++ )
775  {
776  isa::utils::Statistics<DataType> statistics_corrected;
777  isa::utils::Statistics<DataType> * local_statistics = new isa::utils::Statistics<DataType> [nrBins];
778 
779  // Compute the statistics for a bin
780  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
781  {
782  local_statistics[channel / nrChannelsPerBin].addElement(time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id));
783  }
784 
785  // Compute the global statistics subtracting to each channel the mean of the bin it is part of
786  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
787  {
788  statistics_corrected.addElement(time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id) - local_statistics[channel / nrChannelsPerBin].getMean());
789  }
790 
791  // Flag and replace
792  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
793  {
794  DataType sample_value = time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id) - local_statistics[channel / nrChannelsPerBin].getMean();
795  if ( sample_value > statistics_corrected.getMean() + (sigmaCut * statistics_corrected.getStandardDeviation()) )
796  {
797  replacedSamples++;
798  if ( replacement == ReplaceWithMean )
799  {
800  time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(subbandDedispersion, padding / sizeof(DataType))) + sample_id) = static_cast<DataType>(local_statistics[channel / nrChannelsPerBin].getMean());
801  }
802  }
803  }
804  delete [] local_statistics;
805  }
806  }
807  }
808  return replacedSamples;
809 }
810 
811 template<typename DataType>
812 std::string * RFIm::getFrequencyDomainSigmaCutOpenCL(const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
813 {
814  if ( (ordering == FrequencyTime) && (replacement == ReplaceWithMean) )
815  {
816  return getFrequencyDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean<DataType>(config, dataTypeName, observation, nrBins, sigmaCut, padding);
817  }
818  return new std::string();
819 }
820 
821 template<typename DataType>
822 std::string * RFIm::getFrequencyDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig & config, const std::string & dataTypeName, const AstroData::Observation & observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
823 {
824  unsigned int binSize = observation.getNrChannels() / nrBins;
825  std::string *code = new std::string();
826  // Kernel template
827  *code = "__kernel void frequencyDomainSigmaCut(__global " + dataTypeName + " * const restrict time_series) {\n"
828  "float delta = 0.0f;\n"
829  "float sigma_cut = 0.0f;\n"
830  + dataTypeName + " sample_value;\n"
831  "<%LOCAL_VARIABLES%>"
832  "<%BIN_VARIABLES%>"
833  "\n"
834  "// Stop unnecessary threads\n"
835  "if ( ((get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + get_local_id(0)) >= " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) + " )\n"
836  "{\n"
837  "return;\n"
838  "}\n"
839  "// Compute mean for each bin\n"
840  "<%COMPUTE_MEAN_BIN%>"
841  "// Compute mean and standard deviation corrected for local bin\n"
842  "<%LOCAL_COMPUTE%>"
843  "// Local reduction (if necessary)\n"
844  "<%LOCAL_REDUCE%>"
845  "sigma_cut = (" + std::to_string(sigmaCut) + " * native_sqrt(variance_0 * " + std::to_string(1.0f/(observation.getNrChannels() - 1)) + "f));\n"
846  "// Replace samples over the sigma cut with mean\n"
847  "<%REPLACE%>"
848  "}\n";
849  // Declaration of per thread variables
850  std::string localVariablesTemplate = "float counter_<%ITEM_NUMBER%> = 1.0f;\n"
851  "float variance_<%ITEM_NUMBER%> = 0.0f;\n"
852  "float mean_<%ITEM_NUMBER%> = 0.0f;\n";
853  std::string binVariablesTemplate = "float mean_bin_<%BIN_NUMBER%> = 0.0f;\n";
854  // Compute mean for a single bin
855  std::string binComputeMeanTemplate = "mean_bin_<%BIN_NUMBER%> = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((<%BIN_NUMBER%> * " + std::to_string(binSize) + ") * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + get_local_id(0)];\n"
856  "for ( " + config.getIntType() + " channel_id = (<%BIN_NUMBER%> * " + std::to_string(binSize) + ") + 1; channel_id < (<%BIN_NUMBER%> + 1) * " + std::to_string(binSize) + "; channel_id++ )\n"
857  "{\n"
858  "mean_bin_<%BIN_NUMBER%> += time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (channel_id * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)];\n"
859  "}\n"
860  "mean_bin_<%BIN_NUMBER%> /= " + std::to_string(binSize) + ";\n";
861  // Local compute
862  // Version without boundary checks
863  std::string localComputeNoCheckTemplate = "for ( " + config.getIntType() + " channel_id = ((<%BIN_NUMBER%> * " + std::to_string(binSize) + ") + <%ITEM_NUMBER%>); channel_id < (<%BIN_NUMBER%> + 1) * " + std::to_string(binSize) + "; channel_id += " + std::to_string(config.getNrItemsD1()) + " )\n"
864  "{\n"
865  "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] - mean_bin_<%BIN_NUMBER%>;\n"
866  "counter_<%ITEM_NUMBER%> += 1.0f;\n"
867  "delta = sample_value - mean_<%ITEM_NUMBER%>;\n"
868  "mean_<%ITEM_NUMBER%> += delta / counter_<%ITEM_NUMBER%>;\n"
869  "variance_<%ITEM_NUMBER%> += delta * (sample_value - mean_<%ITEM_NUMBER%>);\n"
870  "}\n";
871  // Version with boundary checks
872  std::string localComputeCheckTemplate = "for ( " + config.getIntType() + " channel_id = ((<%BIN_NUMBER%> * " + std::to_string(binSize) + ") + <%ITEM_NUMBER%>); channel_id < (<%BIN_NUMBER%> + 1) * " + std::to_string(binSize) + "; channel_id += " + std::to_string(config.getNrItemsD1()) + " )\n"
873  "{\n"
874  "if ( channel_id + <%ITEM_NUMBER%> < " + std::to_string(binSize) + " ) {\n"
875  "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] - mean_bin_<%BIN_NUMBER%>;\n"
876  "counter_<%ITEM_NUMBER%> += 1.0f;\n"
877  "delta = sample_value - mean_<%ITEM_NUMBER%>;\n"
878  "mean_<%ITEM_NUMBER%> += delta / counter_<%ITEM_NUMBER%>;\n"
879  "variance_<%ITEM_NUMBER%> += delta * (sample_value - mean_<%ITEM_NUMBER%>);\n"
880  "}\n"
881  "}\n";
882  // In-thread reduction
883  std::string localReduceTemplate = "delta = mean_<%ITEM_NUMBER%> - mean_0;\n"
884  "counter_0 += counter_<%ITEM_NUMBER%>;\n"
885  "mean_0 = (((counter_0 - counter_<%ITEM_NUMBER%>) * mean_0) + (counter_<%ITEM_NUMBER%> * mean_<%ITEM_NUMBER%>)) / counter_0;\n"
886  "variance_0 += variance_<%ITEM_NUMBER%> + ((delta * delta) * (((counter_0 - counter_<%ITEM_NUMBER%>) * counter_<%ITEM_NUMBER%>) / counter_0));\n";
887  // Replace with boundary checks
888  std::string replaceConditionTemplate = "for ( " + config.getIntType() + " channel_id = ((<%BIN_NUMBER%> * " + std::to_string(binSize) + ") + <%ITEM_NUMBER%>); channel_id < (<%BIN_NUMBER%> + 1) * " + std::to_string(binSize) + "; channel_id += " + std::to_string(config.getNrItemsD1()) + " )\n"
889  "{\n"
890  "if ( (time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] - mean_bin_<%BIN_NUMBER%>) > (mean_0 + sigma_cut) ) {\n";
891  if ( typeid(DataType) == typeid(float) )
892  {
893  replaceConditionTemplate += "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] = mean_bin_<%BIN_NUMBER%>;\n";
894  }
895  else
896  {
897  replaceConditionTemplate += "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] = convert_" + dataTypeName + "(mean_bin_<%BIN_NUMBER%>);\n";
898  }
899  replaceConditionTemplate += "}\n"
900  "}\n";
901  // Replace without boundary checks
902  std::string replaceTemplate = "for ( " + config.getIntType() + " channel_id = ((<%BIN_NUMBER%> * " + std::to_string(binSize) + ") + <%ITEM_NUMBER%>); channel_id < (<%BIN_NUMBER%> + 1) * " + std::to_string(binSize) + "; channel_id += " + std::to_string(config.getNrItemsD1()) + " )\n"
903  "{\n"
904  "sample_value = time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)];\n"
905  "time_series[(get_global_id(2) * " + std::to_string(observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + ((channel_id + <%ITEM_NUMBER%>) * " + std::to_string(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + ") + (get_group_id(0) * " + std::to_string(config.getNrThreadsD0()) + ") + (channel_id + <%ITEM_NUMBER%>)] = ((sample_value) * (convert_" + dataTypeName + "( (sample_value - mean_bin_<%BIN_NUMBER%>) <= (mean_0 + sigma_cut) ))) + (mean_bin_<%BIN_NUMBER%> * (convert_" + dataTypeName + "( (sample_value - mean_bin_<%BIN_NUMBER%>) > (mean_0 + sigma_cut) )));\n"
906  "}\n";
907  // End of kernel template
908  // Code generation
909  std::string localVariables;
910  std::string binVariables;
911  std::string binComputeMean;
912  std::string localCompute;
913  std::string localReduce;
914  std::string replace;
915  for ( unsigned int item = 0; item < config.getNrItemsD1(); item++ )
916  {
917  std::string * temp;
918  std::string itemString = std::to_string(item);
919  temp = isa::utils::replace(&localVariablesTemplate, "<%ITEM_NUMBER%>", itemString);
920  localVariables.append(*temp);
921  delete temp;
922  if ( item > 0 )
923  {
924  temp = isa::utils::replace(&localReduceTemplate, "<%ITEM_NUMBER%>", itemString);
925  localReduce.append(*temp);
926  delete temp;
927  }
928  }
929  for ( unsigned int bin = 0; bin < nrBins; bin++ )
930  {
931  std::string * temp;
932  std::string binString = std::to_string(bin);
933  temp = isa::utils::replace(&binVariablesTemplate, "<%BIN_NUMBER%>", binString);
934  binVariables.append(*temp);
935  delete temp;
936  for ( unsigned int item = 0; item < config.getNrItemsD1(); item++ )
937  {
938  std::string itemString = std::to_string(item);
939 
940  temp = isa::utils::replace(&binComputeMeanTemplate, "<%ITEM_NUMBER%>", itemString);
941  if ( item == 0 )
942  {
943  temp = isa::utils::replace(temp, " + 0", std::string(), true);
944  }
945  temp = isa::utils::replace(temp, "<%BIN_NUMBER%>", binString, true);
946  binComputeMean.append(*temp);
947  delete temp;
948 
949  if ( (binSize % config.getNrItemsD1()) == 0 )
950  {
951  temp = isa::utils::replace(&localComputeNoCheckTemplate, "<%ITEM_NUMBER%>", itemString);
952  }
953  else
954  {
955  temp = isa::utils::replace(&localComputeCheckTemplate, "<%ITEM_NUMBER%>", itemString);
956  }
957  if ( item == 0 )
958  {
959  temp = isa::utils::replace(temp, " + 0", std::string(), true);
960  }
961  temp = isa::utils::replace(temp, "<%BIN_NUMBER%>", binString, true);
962  localCompute.append(*temp);
963  delete temp;
964  if ( config.getConditionalReplacement() )
965  {
966  temp = isa::utils::replace(&replaceConditionTemplate, "<%ITEM_NUMBER%>", itemString);
967  }
968  else
969  {
970  temp = isa::utils::replace(&replaceTemplate, "<%ITEM_NUMBER%>", itemString);
971  }
972  if ( item == 0 )
973  {
974  temp = isa::utils::replace(temp, " + 0", std::string(), true);
975  }
976  temp = isa::utils::replace(temp, "<%BIN_NUMBER%>", binString, true);
977  replace.append(*temp);
978  }
979  }
980  code = isa::utils::replace(code, "<%LOCAL_VARIABLES%>", localVariables, true);
981  code = isa::utils::replace(code, "<%BIN_VARIABLES%>", binVariables, true);
982  code = isa::utils::replace(code, "<%COMPUTE_MEAN_BIN%>", binComputeMean, true);
983  code = isa::utils::replace(code, "<%LOCAL_COMPUTE%>", localCompute, true);
984  code = isa::utils::replace(code, "<%LOCAL_REDUCE%>", localReduce, true);
985  code = isa::utils::replace(code, "<%REPLACE%>", replace, true);
986  return code;
987 }
988 
989 template<typename DataType>
990 void RFIm::testFrequencyDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig & config, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, isa::OpenCL::OpenCLRunTime & openCLRunTime, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
991 {
992  std::uint64_t wrongSamples = 0;
993  std::uint64_t replacedSamples = 0;
994  std::vector<DataType> test_time_series, control_time_series;
995  test_time_series = time_series;
996  control_time_series = time_series;
997  cl::Buffer device_time_series;
998  cl::Kernel * kernel = nullptr;
999  // Generate OpenCL code
1000  std::string * code = getFrequencyDomainSigmaCutOpenCL<DataType>(config, ordering, replacement, dataTypeName, observation, nrBins, sigmaCut, padding);
1001  if ( printCode )
1002  {
1003  std::cout << std::endl;
1004  std::cout << *code << std::endl;
1005  std::cout << std::endl;
1006  delete code;
1007  return;
1008  }
1009  // Execute control code
1010  replacedSamples = frequencyDomainSigmaCut(config.getSubbandDedispersion(), ordering, replacement, observation, control_time_series, nrBins, sigmaCut, padding);
1011  // Execute OpenCL code
1012  try
1013  {
1014  device_time_series = cl::Buffer(*(openCLRunTime.context), CL_MEM_READ_WRITE, test_time_series.size() * sizeof(DataType), 0, 0);
1015  }
1016  catch ( const cl::Error & err )
1017  {
1018  std::cerr << "OpenCL device memory allocation error: " << std::to_string(err.err()) << "." << std::endl;
1019  throw err;
1020  }
1021  try
1022  {
1023  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, test_time_series.size() * sizeof(DataType), reinterpret_cast<void *>(test_time_series.data()));
1024  }
1025  catch( const cl::Error & err )
1026  {
1027  std::cerr << "OpenCL transfer H2D error: " << std::to_string(err.err()) << "." << std::endl;
1028  }
1029  try
1030  {
1031  kernel = isa::OpenCL::compile("frequencyDomainSigmaCut", *code, "-cl-mad-enable -Werror", *(openCLRunTime.context), openCLRunTime.devices->at(clDeviceID));
1032  }
1033  catch ( const isa::OpenCL::OpenCLError & err )
1034  {
1035  std::cerr << err.what() << std::endl;
1036  delete code;
1037  }
1038  delete code;
1039  try
1040  {
1041  cl::NDRange global, local;
1042  global = cl::NDRange(isa::utils::pad(observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion()), config.getNrThreadsD0()), 1, observation.getNrBeams());
1043  local = cl::NDRange(config.getNrThreadsD0(), 1, 1);
1044  kernel->setArg(0, device_time_series);
1045  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local);
1046  openCLRunTime.queues->at(clDeviceID)[0].enqueueReadBuffer(device_time_series, CL_TRUE, 0, test_time_series.size() * sizeof(DataType), reinterpret_cast<void *>(test_time_series.data()));
1047  }
1048  catch ( const cl::Error & err )
1049  {
1050  std::cerr << "OpenCL kernel execution error: " << std::to_string(err.err()) << "." << std::endl;
1051  delete kernel;
1052  }
1053  delete kernel;
1054  // Compare results
1055  for ( unsigned int beam = 0; beam < observation.getNrBeams(); beam++ )
1056  {
1057  if ( ordering == FrequencyTime )
1058  {
1059  for ( unsigned int channel = 0; channel < observation.getNrChannels(); channel++ )
1060  {
1061  for ( unsigned int sample_id = 0; sample_id < observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion()); sample_id++ )
1062  {
1063  if ( !isa::utils::same<double>(test_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id), control_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id), 1.1) )
1064  {
1065  wrongSamples++;
1066  }
1067  if ( printResults )
1068  {
1069  std::cout << static_cast<double>(test_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id)) << " " << static_cast<double>(control_time_series.at((beam * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + (channel * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion(), padding / sizeof(DataType))) + sample_id)) << " ; ";
1070  }
1071  }
1072  if ( printResults )
1073  {
1074  std::cout << std::endl;
1075  }
1076  }
1077  }
1078  }
1079  // Print test results
1080  if ( wrongSamples > 0 )
1081  {
1082  std::cout << std::fixed;
1083  std::cout << "Wrong samples: " << wrongSamples << " (" << (wrongSamples * 100.0) / (static_cast<std::uint64_t>(observation.getNrBeams()) * observation.getNrChannels() * observation.getNrSamplesPerDispersedBatch(config.getSubbandDedispersion())) << "%)." << std::endl;
1084  }
1085  else
1086  {
1087  std::cout << "TEST PASSED." << std::endl;
1088  }
1089 }
1090 
1091 template<typename DataType>
1092 void RFIm::tuneFrequencyDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters & parameters, const DataOrdering & ordering, const ReplacementStrategy & replacement, const std::string & dataTypeName, const AstroData::Observation & observation, const std::vector<DataType> & time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
1093 {
1094  bool initializeDevice = true;
1095  isa::utils::Timer timer;
1096  double bestTime = std::numeric_limits<double>::max();
1097  RFImConfig bestConfig;
1098  std::vector<RFImConfig> configurations;
1099  isa::OpenCL::OpenCLRunTime openCLRunTime;
1100  cl::Event clEvent;
1101  cl::Buffer device_time_series;
1102  // Generate valid configurations
1103  for ( unsigned int threads = parameters.getMinThreads(); threads <= parameters.getMaxThreads(); threads *= 2 )
1104  {
1105  for ( unsigned int items = 1; (items * 3) + 3 + nrBins <= parameters.getMaxItems(); items++ )
1106  {
1107  if ( threads > observation.getNrSamplesPerDispersedBatch(subbandDedispersion) )
1108  {
1109  break;
1110  }
1111  if ( items > (observation.getNrChannels() / nrBins) )
1112  {
1113  break;
1114  }
1115  RFImConfig baseConfig, tempConfig;
1116  baseConfig.setSubbandDedispersion(subbandDedispersion);
1117  baseConfig.setNrThreadsD0(threads);
1118  baseConfig.setNrItemsD1(items);
1119  // conditional = 0, int = 0
1120  tempConfig = baseConfig;
1121  tempConfig.setConditionalReplacement(false);
1122  tempConfig.setIntType(0);
1123  configurations.push_back(tempConfig);
1124  // conditional = 0, int = 1
1125  tempConfig = baseConfig;
1126  tempConfig.setConditionalReplacement(false);
1127  tempConfig.setIntType(1);
1128  configurations.push_back(tempConfig);
1129  // conditional = 1, int = 0
1130  tempConfig = baseConfig;
1131  tempConfig.setConditionalReplacement(true);
1132  tempConfig.setIntType(0);
1133  configurations.push_back(tempConfig);
1134  // conditional = 1, int = 1
1135  tempConfig = baseConfig;
1136  tempConfig.setConditionalReplacement(true);
1137  tempConfig.setIntType(1);
1138  configurations.push_back(tempConfig);
1139  }
1140  }
1141  // Test performance of each configuration
1142  std::cout << std::fixed;
1143  if ( !parameters.getBestMode() )
1144  {
1145  std::cout << std::endl << "# nrBeams nrChannels nrSamplesPerDispersedBatch sigma *configuration* time stdDeviation COV" << std::endl << std::endl;
1146  }
1147  for ( auto configuration = configurations.begin(); configuration != configurations.end(); ++configuration )
1148  {
1149  cl::Kernel * kernel = nullptr;
1150  // Generate kernel
1151  std::string * code = getFrequencyDomainSigmaCutOpenCL<DataType>(*configuration, ordering, replacement, dataTypeName, observation, nrBins, sigmaCut, padding);
1152  if ( initializeDevice )
1153  {
1154  isa::OpenCL::initializeOpenCL(clPlatformID, 1, openCLRunTime);
1155  try
1156  {
1157  device_time_series = cl::Buffer(*(openCLRunTime.context), CL_MEM_READ_WRITE, time_series.size() * sizeof(DataType), 0, 0);
1158  }
1159  catch ( const cl::Error & err )
1160  {
1161  std::cerr << "OpenCL device memory allocation error: " << std::to_string(err.err()) << "." << std::endl;
1162  throw err;
1163  }
1164  initializeDevice = false;
1165  }
1166  try
1167  {
1168  kernel = isa::OpenCL::compile("frequencyDomainSigmaCut", *code, "-cl-mad-enable -Werror", *(openCLRunTime.context), openCLRunTime.devices->at(clDeviceID));
1169  }
1170  catch ( const isa::OpenCL::OpenCLError & err )
1171  {
1172  std::cerr << err.what() << std::endl;
1173  delete code;
1174  }
1175  delete code;
1176  try
1177  {
1178  cl::NDRange global, local;
1179  global = cl::NDRange(isa::utils::pad(observation.getNrSamplesPerDispersedBatch(subbandDedispersion), (*configuration).getNrThreadsD0()), 1, observation.getNrBeams());
1180  local = cl::NDRange((*configuration).getNrThreadsD0(), 1, 1);
1181  kernel->setArg(0, device_time_series);
1182  // Warm-up run
1183  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, time_series.size() * sizeof(DataType), reinterpret_cast<const void *>(time_series.data()), nullptr, &clEvent);
1184  clEvent.wait();
1185  openCLRunTime.queues->at(clDeviceID)[0].finish();
1186  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local, nullptr, &clEvent);
1187  clEvent.wait();
1188  // Tuning runs
1189  for ( unsigned int iteration = 0; iteration < parameters.getNrIterations(); iteration++ )
1190  {
1191  openCLRunTime.queues->at(clDeviceID)[0].enqueueWriteBuffer(device_time_series, CL_FALSE, 0, time_series.size() * sizeof(DataType), reinterpret_cast<const void *>(time_series.data()), nullptr, &clEvent);
1192  clEvent.wait();
1193  openCLRunTime.queues->at(clDeviceID)[0].finish();
1194  timer.start();
1195  openCLRunTime.queues->at(clDeviceID)[0].enqueueNDRangeKernel(*kernel, cl::NullRange, global, local, nullptr, &clEvent);
1196  clEvent.wait();
1197  timer.stop();
1198  }
1199  }
1200  catch ( const cl::Error & err )
1201  {
1202  std::cerr << "OpenCL kernel error during tuning: " << std::to_string(err.err()) << "." << std::endl;
1203  delete kernel;
1204  if (err.err() == -4 || err.err() == -61)
1205  {
1206  throw err;
1207  }
1208  initializeDevice = true;
1209  break;
1210  }
1211  delete kernel;
1212  if ( timer.getAverageTime() < bestTime )
1213  {
1214  bestTime = timer.getAverageTime();
1215  bestConfig = *configuration;
1216  }
1217  if ( !parameters.getBestMode() )
1218  {
1219  std::cout << observation.getNrBeams() << " " << observation.getNrChannels() << " ";
1220  std::cout << observation.getNrSamplesPerDispersedBatch(subbandDedispersion) << " ";
1221  std::cout.precision(2);
1222  std::cout << sigmaCut << " ";
1223  std::cout << (*configuration).print() << " ";
1224  std::cout.precision(6);
1225  std::cout << timer.getAverageTime() << " " << timer.getStandardDeviation() << " " << timer.getCoefficientOfVariation();
1226  std::cout << std::endl;
1227  }
1228  timer.reset();
1229  }
1230  if ( parameters.getBestMode() )
1231  {
1232  std::cout.precision(2);
1233  std::cout << observation.getNrSamplesPerDispersedBatch(subbandDedispersion) << " " << sigmaCut << " ";
1234  std::cout << bestConfig.print() << std::endl;
1235  }
1236  else
1237  {
1238  std::cout << std::endl;
1239  }
1240 }
Definition: RFIm.hpp:97
std::string * getFrequencyDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig &config, const std::string &dataTypeName, const AstroData::Observation &observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
Generates the OpenCL code for the frequency domain sigma cut. This function generates specialized cod...
Definition: RFIm.hpp:822
void testTimeDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig &config, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const std::vector< DataType > &time_series, isa::OpenCL::OpenCLRunTime &openCLRunTime, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding)
Test the OpenCL kernel by comparing results with C++ implementation.
Definition: RFIm.hpp:517
void tuneTimeDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters &parameters, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const std::vector< DataType > &time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const float sigmaCut, const unsigned int padding)
Tune the OpenCL kernel to find best performing configuration for a certain scenario.
Definition: RFIm.hpp:619
RFImKernel
The kernel type.
Definition: RFIm.hpp:76
std::string * getFrequencyDomainSigmaCutOpenCL(const RFImConfig &config, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
Generates the OpenCL code for the frequency domain sigma cut.
Definition: RFIm.hpp:812
Definition: RFIm.hpp:78
DataOrdering
Ordering of input/output data.
Definition: RFIm.hpp:85
std::map< std::string, std::map< unsigned int, std::map< float, RFImConfig * > * > * > RFImConfigurations
Definition: RFIm.hpp:100
bool getSubbandDedispersion() const
Return true if in subbanding mode, false otherwise.
Definition: RFIm.hpp:292
Definition: RFIm.hpp:87
Definition: RFIm.hpp:96
std::string * getTimeDomainSigmaCutOpenCL_FrequencyTime_ReplaceWithMean(const RFImConfig &config, const std::string &dataTypeName, const AstroData::Observation &observation, const float sigmaCut, const unsigned int padding)
Generates the OpenCL code for the time domain sigma cut. This function generates specialized code for...
Definition: RFIm.hpp:361
void readSigmaSteps(const std::string &inputFilename, std::vector< float > &steps)
Read the.
Definition: RFIm.cpp:108
void setSubbandDedispersion(const bool subband)
Set the subbanding mode.
Definition: RFIm.hpp:302
void tuneFrequencyDomainSigmaCut(const bool subbandDedispersion, const isa::OpenCL::TuningParameters &parameters, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const std::vector< DataType > &time_series, const unsigned int clPlatformID, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
Tune the OpenCL kernel to find best performing configuration for a certain scenario.
Definition: RFIm.hpp:1092
Definition: RFIm.hpp:36
RFImConfig()
Definition: RFIm.cpp:20
std::string print() const
Print the configuration.
Definition: RFIm.hpp:312
void testFrequencyDomainSigmaCut(const bool printCode, const bool printResults, const RFImConfig &config, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const std::vector< DataType > &time_series, isa::OpenCL::OpenCLRunTime &openCLRunTime, const unsigned int clDeviceID, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
Test the OpenCL kernel by comparing results with C++ implementation.
Definition: RFIm.hpp:990
bool getConditionalReplacement() const
Return true if using a condition for replacement, false otherwise.
Definition: RFIm.hpp:297
void setConditionalReplacement(const bool replacement)
Set the conditional replacement mode.
Definition: RFIm.hpp:307
void readRFImConfig(RFImConfigurations &configurations, const std::string &filename)
Read one RFImConfig from a configuration file.
Definition: RFIm.cpp:24
ReplacementStrategy
Strategy for flagged data replacement.
Definition: RFIm.hpp:94
Definition: RFIm.hpp:88
~RFImConfig()
Definition: RFIm.cpp:22
RFI specific kernel configuration.
Definition: RFIm.hpp:42
Definition: RFIm.hpp:79
std::string * getTimeDomainSigmaCutOpenCL(const RFImConfig &config, const DataOrdering &ordering, const ReplacementStrategy &replacement, const std::string &dataTypeName, const AstroData::Observation &observation, const float sigmaCut, const unsigned int padding)
Generates the OpenCL code for the time domain sigma cut.
Definition: RFIm.hpp:351
std::uint64_t timeDomainSigmaCut(const bool subbandDedispersion, const DataOrdering &ordering, const ReplacementStrategy &replacement, const AstroData::Observation &observation, std::vector< DataType > &time_series, const float sigmaCut, const unsigned int padding)
Compute time domain sigma cut. Not optimized, just for testing purpose.
Definition: RFIm.hpp:318
std::uint64_t frequencyDomainSigmaCut(const bool subbandDedispersion, const DataOrdering &ordering, const ReplacementStrategy &replacement, const AstroData::Observation &observation, std::vector< DataType > &time_series, const unsigned int nrBins, const float sigmaCut, const unsigned int padding)
Compute frequency domain sigma cut. Not optimized, just for testing purpose.
Definition: RFIm.hpp:766