13 #pragma once
15 #include <seqan3/std/iterator>
16 #include <memory>
17 #include <optional>
18 #include <seqan3/std/ranges>
19 #include <type_traits>
41 namespace seqan3::detail
42 {
77 template <typename config_t, typename ...algorithm_policies_t>
79  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
80 {
81 private:
86  using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
88  using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
92  static_assert(!std::same_as<alignment_result_t, empty_type>, "Alignment result type was not configured.");
100  empty_type>;
107  empty_type>;
109 public:
113  constexpr alignment_algorithm() = default;
114  constexpr alignment_algorithm(alignment_algorithm const &) = default;
115  constexpr alignment_algorithm(alignment_algorithm &&) = default;
116  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
118  ~alignment_algorithm() = default;
128  explicit constexpr alignment_algorithm(config_t const & cfg) :
129  invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>{cfg}...,
130  cfg_ptr{std::make_shared<config_t>(cfg)}
131  {
132  this->scoring_scheme = seqan3::get<align_cfg::scoring_scheme>(*cfg_ptr).scheme;
133  this->initialise_alignment_state(*cfg_ptr);
134  }
182  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
184  requires (!traits_t::is_vectorised) && std::invocable<callback_t, alignment_result_t>
186  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
187  {
188  using std::get;
190  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
191  compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair), callback);
192  }
195  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
197  requires traits_t::is_vectorised && std::invocable<callback_t, alignment_result_t>
199  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
200  {
201  assert(cfg_ptr != nullptr);
203  static_assert(simd_concept<typename traits_t::score_type>, "Expected simd score type.");
204  static_assert(simd_concept<typename traits_t::trace_type>, "Expected simd trace type.");
206  // Extract the batch of sequences for the first and the second sequence.
207  auto sequence1_range = indexed_sequence_pairs | views::get<0> | views::get<0>;
208  auto sequence2_range = indexed_sequence_pairs | views::get<0> | views::get<1>;
210  // Initialise the find_optimum policy in the simd case.
211  this->initialise_find_optimum_policy(sequence1_range,
212  sequence2_range,
213  this->scoring_scheme.padding_match_score());
215  // Convert batch of sequences to sequence of simd vectors.
216  auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
217  auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
219  max_size_in_collection = std::pair{simd_sequences1.size(), simd_sequences2.size()};
220  // Reset the alignment state's optimum between executions of the alignment algorithm.
221  this->alignment_state.reset_optimum();
223  compute_matrix(simd_sequences1, simd_sequences2);
225  make_alignment_result(indexed_sequence_pairs, callback);
226  }
229 private:
243  template <typename sequence_range_t>
244  constexpr auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
245  {
246  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
248  using simd_score_t = typename traits_t::score_type;
250  std::vector<simd_score_t, aligned_allocator<simd_score_t, alignof(simd_score_t)>> simd_sequence{};
252  for (auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(this->scoring_scheme.padding_symbol))
253  for (auto && simd_vector : simd_vector_chunk)
254  simd_sequence.push_back(std::move(simd_vector));
256  return simd_sequence;
257  }
276  template <std::ranges::forward_range sequence1_t,
277  std::ranges::forward_range sequence2_t,
278  typename callback_t>
279  constexpr void compute_single_pair(size_t const idx,
280  sequence1_t && sequence1,
281  sequence2_t && sequence2,
282  callback_t & callback)
283  {
284  assert(cfg_ptr != nullptr);
286  if constexpr (traits_t::is_debug)
287  initialise_debug_matrices(sequence1, sequence2);
289  // Reset the alignment state's optimum between executions of the alignment algorithm.
290  this->alignment_state.reset_optimum();
292  if constexpr (traits_t::is_banded)
293  {
294  using seqan3::get;
295  // Get the band and check if band configuration is valid.
296  auto const & band = get<align_cfg::band_fixed_size>(*cfg_ptr);
297  check_valid_band_parameter(sequence1, sequence2, band);
298  auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
299  // It would be great to use this interface here instead
300  compute_matrix(subsequence1, subsequence2, band);
301  make_alignment_result(idx, subsequence1, subsequence2, callback);
302  }
303  else
304  {
305  compute_matrix(sequence1, sequence2);
306  make_alignment_result(idx, sequence1, sequence2, callback);
307  }
308  }
326  template <typename sequence1_t, typename sequence2_t>
327  constexpr void check_valid_band_parameter(sequence1_t && sequence1,
328  sequence2_t && sequence2,
329  align_cfg::band_fixed_size const & band)
330  {
331  static_assert(config_t::template exists<align_cfg::band_fixed_size>(),
332  "The band configuration is required for the banded alignment algorithm.");
335  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
337  if (static_cast<diff_type>(band.lower_diagonal) > std::ranges::distance(sequence1))
338  {
340  {
341  "Invalid band error: The lower diagonal excludes the whole alignment matrix."
342  };
343  }
345  if (static_cast<diff_type>(band.upper_diagonal) < -std::ranges::distance(sequence2))
346  {
348  {
349  "Invalid band error: The upper diagonal excludes the whole alignment matrix."
350  };
351  }
352  }
366  template <typename sequence1_t, typename sequence2_t>
367  constexpr void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
368  {
369  size_t rows = std::ranges::distance(sequence2) + 1;
370  size_t cols = std::ranges::distance(sequence1) + 1;
374  }
383  template <typename sequence1_t, typename sequence2_t>
384  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
386  requires (!traits_t::is_banded)
388  {
389  // ----------------------------------------------------------------------------
390  // Initialisation phase: allocate memory and initialise first column.
391  // ----------------------------------------------------------------------------
393  this->allocate_matrix(sequence1, sequence2);
396  // ----------------------------------------------------------------------------
397  // Recursion phase: compute column-wise the alignment matrix.
398  // ----------------------------------------------------------------------------
400  for (auto const & alphabet1 : sequence1)
401  {
402  compute_alignment_column<true>(this->scoring_scheme_profile_column(alphabet1), sequence2);
404  }
406  // ----------------------------------------------------------------------------
407  // Wrap up phase: track score in last column and prepare the alignment result.
408  // ----------------------------------------------------------------------------
411  }
414  template <typename sequence1_t, typename sequence2_t>
415  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band)
417  requires traits_t::is_banded
419  {
420  // ----------------------------------------------------------------------------
421  // Initialisation phase: allocate memory and initialise first column.
422  // ----------------------------------------------------------------------------
424  // Allocate and initialise first column.
425  this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
426  size_t last_row_index = this->score_matrix.band_row_index;
427  initialise_first_alignment_column(sequence2 | views::take(last_row_index));
429  // ----------------------------------------------------------------------------
430  // 1st recursion phase: iterate as long as the band intersects with the first row.
431  // ----------------------------------------------------------------------------
433  size_t sequence2_size = std::ranges::distance(sequence2);
434  for (auto const & seq1_value : sequence1 | views::take(this->score_matrix.band_col_index))
435  {
436  compute_alignment_column<true>(seq1_value, sequence2 | views::take(++last_row_index));
437  // Only if band reached last row of matrix the last cell might be tracked.
438  finalise_last_cell_in_column(last_row_index >= sequence2_size);
439  }
441  // ----------------------------------------------------------------------------
442  // 2nd recursion phase: iterate until the end of the matrix.
443  // ----------------------------------------------------------------------------
445  size_t first_row_index = 0;
446  for (auto const & seq1_value : sequence1 | views::drop(this->score_matrix.band_col_index))
447  {
448  // In the second phase the band moves in every column one base down on the second sequence.
449  compute_alignment_column<false>(seq1_value, sequence2 | views::slice(first_row_index++, ++last_row_index));
450  // Only if band reached last row of matrix the last cell might be tracked.
451  finalise_last_cell_in_column(last_row_index >= sequence2_size);
452  }
454  // ----------------------------------------------------------------------------
455  // Wrap up phase: track score in last column and prepare the alignment result.
456  // ----------------------------------------------------------------------------
459  }
473  template <typename sequence2_t>
474  auto initialise_first_alignment_column(sequence2_t && sequence2)
475  {
476  // Get the initial column.
477  alignment_column = this->current_alignment_column();
478  assert(!alignment_column.empty()); // Must contain at least one element.
480  // Initialise first cell.
482  this->init_origin_cell(*alignment_column_it, this->alignment_state);
484  // Initialise the remaining cells of this column.
485  for (auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
486  this->init_column_cell(*++alignment_column_it, this->alignment_state);
488  // Finalise the last cell of the initial column.
489  bool at_last_row = true;
490  if constexpr (traits_t::is_banded) // If the band reaches until the last row of the matrix.
491  at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
493  finalise_last_cell_in_column(at_last_row);
494  }
511  template <bool initialise_first_cell, typename sequence1_value_t, typename sequence2_t>
512  void compute_alignment_column(sequence1_value_t const & seq1_value, sequence2_t && sequence2)
513  {
514  this->next_alignment_column(); // move to next column and set alignment column iterator accordingly.
515  alignment_column = this->current_alignment_column();
518  auto seq2_it = std::ranges::begin(sequence2);
520  if constexpr (initialise_first_cell) // Initialise first cell if it intersects with the first row of the matrix.
521  {
522  this->init_row_cell(*alignment_column_it, this->alignment_state);
523  }
524  else // Compute first cell of banded column if it does not intersect with the first row of the matrix.
525  {
526  this->compute_first_band_cell(*alignment_column_it,
527  this->alignment_state,
528  this->scoring_scheme.score(seq1_value, *seq2_it));
529  ++seq2_it;
530  }
532  for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
533  this->compute_cell(*++alignment_column_it,
534  this->alignment_state,
535  this->scoring_scheme.score(seq1_value, *seq2_it));
536  }
548  constexpr void finalise_last_cell_in_column(bool const at_last_row) noexcept
549  {
550  if (at_last_row)
551  this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
553  if constexpr (traits_t::is_debug)
555  }
558  constexpr void finalise_alignment() noexcept
559  {
560  // ----------------------------------------------------------------------------
561  // Check for the optimum in last cell/column.
562  // ----------------------------------------------------------------------------
564  this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
565  this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
566  }
594  template <typename index_t, typename sequence1_t, typename sequence2_t, typename callback_t>
596  requires (!traits_t::is_vectorised)
598  constexpr void make_alignment_result([[maybe_unused]] index_t const idx,
599  [[maybe_unused]] sequence1_t & sequence1,
600  [[maybe_unused]] sequence2_t & sequence2,
601  callback_t & callback)
602  {
603  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
605  // ----------------------------------------------------------------------------
606  // Build the alignment result
607  // ----------------------------------------------------------------------------
610  "The configuration must contain at least one align_cfg::output_* element.");
612  result_value_t res{};
614  if constexpr (traits_t::output_sequence1_id)
615  res.sequence1_id = idx;
617  if constexpr (traits_t::output_sequence2_id)
618  res.sequence2_id = idx;
620  // Choose what needs to be computed.
621  if constexpr (traits_t::compute_score)
622  res.score = this->alignment_state.optimum.score;
624  if constexpr (traits_t::compute_end_positions)
625  {
626  res.end_positions = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
627  row_index_type{this->alignment_state.optimum.row_index}};
628  // At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
629  if constexpr (traits_t::is_banded)
630  res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
631  }
633  if constexpr (traits_t::compute_begin_positions)
634  {
635  // Get a aligned sequence builder for banded or un-banded case.
636  aligned_sequence_builder builder{sequence1, sequence2};
637  auto optimum_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
638  row_index_type{this->alignment_state.optimum.row_index}};
639  auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
640  res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
641  res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
644  res.alignment = std::move(trace_res.alignment);
645  }
647  // Store the matrices in debug mode.
648  if constexpr (traits_t::is_debug)
649  {
650  res.score_debug_matrix = std::move(score_debug_matrix);
651  if constexpr (traits_t::compute_sequence_alignment) // compute alignment
652  res.trace_debug_matrix = std::move(trace_debug_matrix);
653  }
655  callback(std::move(res));
656  }
683  template <typename indexed_sequence_pair_range_t, typename callback_t>
685  requires traits_t::is_vectorised
687  constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs,
688  callback_t & callback)
689  {
690  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
692  size_t simd_index = 0;
693  for (auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
694  {
695  (void) sequence_pairs;
696  result_value_t res{};
698  if constexpr (traits_t::output_sequence1_id)
699  res.sequence1_id = alignment_index;
701  if constexpr (traits_t::output_sequence2_id)
702  res.sequence2_id = alignment_index;
704  if constexpr (traits_t::compute_score)
705  res.score = this->alignment_state.optimum.score[simd_index]; // Just take this
707  if constexpr (traits_t::compute_end_positions)
708  {
709  res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
710  res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
711  }
713  callback(std::move(res));
714  ++simd_index;
715  }
716  }
727  {
728  using std::get;
730  auto column = this->current_alignment_column();
732  auto coord = get<1>(column.front()).coordinate;
733  if constexpr (traits_t::is_banded)
734  coord.second += coord.first - this->score_matrix.band_col_index;
736  matrix_offset offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
737  column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
739  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
740  {
741  using std::get;
742  return get<0>(tpl).current;
743  }), score_debug_matrix.begin() + offset);
745  // if traceback is enabled.
747  {
748  auto trace_matrix_it = trace_debug_matrix.begin() + offset;
749  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
750  {
751  using std::get;
752  return get<1>(tpl).current;
753  }), trace_debug_matrix.begin() + offset);
754  }
755  }
769 };
771 } // namespace seqan3::detail
