146 class alignment_file_input_format<format_sam>
150 using format_tag = format_sam;
155 alignment_file_input_format() noexcept =
default;
156 alignment_file_input_format(alignment_file_input_format
const &) =
delete;
159 alignment_file_input_format & operator=(alignment_file_input_format
const &) =
delete;
160 alignment_file_input_format(alignment_file_input_format &&) noexcept =
default;
161 alignment_file_input_format & operator=(alignment_file_input_format &&) noexcept =
default;
162 ~alignment_file_input_format() noexcept =
default;
166 template <
typename stream_type,
167 typename seq_legal_alph_type,
168 typename ref_seqs_type,
169 typename ref_ids_type,
172 typename offset_type,
173 typename ref_seq_type,
174 typename ref_id_type,
175 typename ref_offset_type,
181 typename tag_dict_type,
182 typename e_value_type,
183 typename bit_score_type>
184 void read(stream_type & stream,
186 ref_seqs_type & ref_seqs,
191 offset_type & offset,
192 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
193 ref_id_type & ref_id,
194 ref_offset_type & ref_offset,
199 tag_dict_type & tag_dict,
200 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
201 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
203 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
204 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
205 "The ref_offset must be a specialisation of std::optional.");
211 int32_t ref_offset_tmp{};
213 [[maybe_unused]] int32_t offset_tmp{};
214 [[maybe_unused]] int32_t soft_clipping_end{};
216 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
222 read_header(stream_view, header, ref_seqs);
230 read_field(field_view,
id);
232 read_field(field_view, flag);
234 read_field(field_view, ref_id_tmp);
235 check_and_assign_ref_id(ref_id, ref_id_tmp, header, ref_seqs);
237 read_field(field_view, ref_offset_tmp);
240 if (ref_offset_tmp == -1)
241 ref_offset = std::nullopt;
242 else if (ref_offset_tmp > -1)
243 ref_offset = ref_offset_tmp;
244 else if (ref_offset_tmp < -1)
245 throw format_error{
"No negative values are allowed for field::REF_OFFSET."};
247 read_field(field_view, mapq);
251 if constexpr (!detail::decays_to_ignore_v<align_type>)
255 std::tie(
cigar, ref_length, seq_length, offset_tmp, soft_clipping_end) = parse_cigar(field_view);
264 detail::consume(field_view);
271 if constexpr (!detail::decays_to_ignore_v<mate_type>)
274 read_field(field_view, tmp_mate_ref_id);
276 if (tmp_mate_ref_id ==
"=")
278 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
279 get<0>(mate) = ref_id;
281 check_and_assign_ref_id(get<0>(mate), ref_id_tmp, header, ref_seqs);
285 check_and_assign_ref_id(get<0>(mate), tmp_mate_ref_id, header, ref_seqs);
289 read_field(field_view, tmp_pnext);
292 get<1>(mate) = --tmp_pnext;
293 else if (tmp_pnext < 0)
294 throw format_error{
"No negative values are allowed at the mate mapping position."};
297 read_field(field_view, get<2>(mate));
301 for (
size_t i = 0; i < 3u; ++i)
303 detail::consume(field_view);
311 auto constexpr is_legal_alph = is_in_alphabet<seq_legal_alph_type>;
314 if (!is_legal_alph(c))
316 is_legal_alph.msg.str() +
317 " evaluated to false on " +
318 detail::make_printable(c)};
322 if constexpr (detail::decays_to_ignore_v<seq_type>)
324 if constexpr (!detail::decays_to_ignore_v<align_type>)
327 "If you want to read ALIGNMENT but not SEQ, the alignment" 328 " object must store a sequence container at the second (query) position.");
336 for (; seq_length > 0; --seq_length)
338 get<1>(align).push_back(
value_type_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
351 detail::consume(seq_stream);
356 read_field(seq_stream, seq);
358 if constexpr (!detail::decays_to_ignore_v<align_type>)
376 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
379 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
397 detail::consume(stream_view |
view::take_until(!(is_char<'\r'> || is_char<'\n'>)));
403 if constexpr (!detail::decays_to_ignore_v<align_type>)
405 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
407 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
409 if (!ref_id_tmp.empty())
411 assert(header.
ref_dict.count(ref_id_tmp) != 0);
412 ref_idx = header.
ref_dict[ref_id_tmp];
416 construct_alignment(align,
cigar, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
426 bool ref_info_present_in_header{
false};
438 template <
typename ref_id_type,
439 typename ref_id_tmp_type,
440 typename header_type,
441 typename ref_seqs_type>
442 void check_and_assign_ref_id(ref_id_type & ref_id,
443 ref_id_tmp_type & ref_id_tmp,
444 header_type & header,
449 auto search = header.ref_dict.find(ref_id_tmp);
451 if (
search == header.ref_dict.end())
453 if constexpr(detail::decays_to_ignore_v<ref_seqs_type>)
455 if (ref_info_present_in_header)
457 throw format_error{
"Unknown reference id found in record which is not present in the header."};
461 header.ref_ids().push_back(ref_id_tmp);
463 header.ref_dict[header.ref_ids()[pos]] = pos;
469 throw format_error{
"Unknown reference id found in record which is not present in the given ids."};
489 template <
typename align_type,
typename ref_seqs_type>
490 void construct_alignment(align_type & align,
492 [[maybe_unused]] int32_t rid,
493 [[maybe_unused]] ref_seqs_type & ref_seqs,
494 [[maybe_unused]] int32_t ref_start,
497 if (rid > -1 && ref_start > -1 &&
501 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
503 assert(static_cast<size_t>(ref_start + ref_length) <=
std::ranges::size(ref_seqs[rid]));
512 static_assert(
std::Same<unaligned_t, decltype(dummy_seq)>,
513 "No reference information was given so the type of the first alignment tuple position" 514 "must have an unaligned sequence type of a dummy sequence (" 515 "view::repeat_n(dna5{}, size_t{}) | " 516 "std::view::transform(detail::access_restrictor_fn{}))");
522 detail::alignment_from_cigar(align,
cigar);
526 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
546 template <
typename stream_view_type>
547 void read_field(stream_view_type && stream_view,
detail::ignore_t const & SEQAN3_DOXYGEN_ONLY(target))
549 detail::consume(stream_view);
559 template <
typename stream_view_type, std::ranges::ForwardRange target_range_type>
560 void read_field(stream_view_type && stream_view, target_range_type & target)
579 template <
typename stream_view_type, Arithmetic target_type>
580 void read_field(stream_view_type && stream_view, target_type & target)
587 if (res.ec == std::errc::invalid_argument || res.ptr !=
end)
589 "' could not be cast into type " +
590 detail::get_display_name_v<target_type>.str()};
592 if (res.ec == std::errc::result_out_of_range)
594 "' into type " + detail::get_display_name_v<target_type>.str() +
595 " would cause an overflow."};
608 template <
typename stream_view_type,
typename optional_value_type>
611 optional_value_type tmp;
612 read_field(std::forward<stream_view_type>(stream_view), tmp);
633 template <
typename stream_view_type,
typename value_type>
635 stream_view_type && stream_view,
647 variant = std::move(tmp_vector);
667 template <
typename stream_view_type>
695 read_field(stream_view, tmp);
702 read_field(stream_view, tmp);
722 switch (array_value_type_id)
726 read_sam_dict_vector(target[tag], stream_view, int8_t{});
731 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
736 read_sam_dict_vector(target[tag], stream_view, int16_t{});
741 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
746 read_sam_dict_vector(target[tag], stream_view, int32_t{});
751 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
756 read_sam_dict_vector(target[tag], stream_view,
float{});
761 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
768 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
788 template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
789 void read_header(stream_view_type && stream_view,
793 auto parse_tag_value = [&stream_view,
this] (
auto & value)
828 parse_tag_value(*who);
840 ref_info_present_in_header =
true;
846 parse_tag_value(get<0>(info));
857 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
862 throw format_error{to_string(
"Unknown reference name '",
id,
"' found in SAM header ",
863 "(header.ref_ids(): ", hdr.
ref_ids(),
").")};
865 auto & given_ref_info = hdr.
ref_id_info[id_it->second];
867 if (std::get<0>(given_ref_info) != std::get<0>(info))
868 throw format_error{
"Provided reference has unequal length as specified in the header."};
874 static_assert(!detail::is_type_specialisation_of_v<decltype(hdr.
ref_ids()),
std::deque>,
875 "The range over reference ids must be of type std::deque such that " 876 "pointers are not invalidated.");
887 parse_tag_value(get<0>(tmp));
902 parse_tag_value(tmp.id);
921 who = &tmp.command_line_call;
925 who = &tmp.description;
933 parse_tag_value(*who);
962 class alignment_file_output_format<format_sam>
966 using format_tag = format_sam;
971 alignment_file_output_format() noexcept =
default;
972 alignment_file_output_format(alignment_file_output_format
const &) =
delete;
975 alignment_file_output_format & operator=(alignment_file_output_format
const &) =
delete;
976 alignment_file_output_format(alignment_file_output_format &&) noexcept =
default;
977 alignment_file_output_format & operator=(alignment_file_output_format &&) noexcept =
default;
978 ~alignment_file_output_format() noexcept =
default;
982 template <
typename stream_type,
983 typename header_type,
986 typename ref_seq_type,
987 typename ref_id_type,
991 typename tag_dict_type>
992 void write(stream_type & stream,
994 header_type && header,
999 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
1000 ref_id_type && ref_id,
1002 align_type && align,
1006 tag_dict_type && tag_dict,
1007 double SEQAN3_DOXYGEN_ONLY(e_value),
1008 double SEQAN3_DOXYGEN_ONLY(bit_score))
1028 "The seq object must be a std::ranges::ForwardRange over " 1029 "letters that model seqan3::Alphabet.");
1033 "The id object must be a std::ranges::ForwardRange over " 1034 "letters that model seqan3::Alphabet.");
1038 "The ref_seq object must be a std::ranges::ForwardRange " 1039 "over letters that model seqan3::Alphabet.");
1041 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
1046 "The ref_id object must be a std::ranges::ForwardRange " 1047 "over letters that model seqan3::Alphabet.");
1051 static_assert(!detail::decays_to_ignore_v<header_type>,
1052 "If you give indices as reference id information the header must also be present.");
1056 "The align object must be a std::pair of two ranges whose " 1057 "value_type is comparable to seqan3::gap");
1062 "The align object must be a std::pair of two ranges whose " 1063 "value_type is comparable to seqan3::gap");
1067 "The qual object must be a std::ranges::ForwardRange " 1068 "over letters that model seqan3::Alphabet.");
1071 "The mate object must be a std::tuple of size 3 with " 1072 "1) a std::ranges::ForwardRange with a value_type modelling seqan3::Alphabet, " 1073 "2) a std::Integral or std::optional<std::Integral>, and " 1074 "3) a std::Integral.");
1082 "The mate object must be a std::tuple of size 3 with " 1083 "1) a std::ranges::ForwardRange with a value_type modelling seqan3::Alphabet, " 1084 "2) a std::Integral or std::optional<std::Integral>, and " 1085 "3) a std::Integral.");
1089 static_assert(!detail::decays_to_ignore_v<header_type>,
1090 "If you give indices as mate reference id information the header must also be present.");
1093 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
1098 if constexpr (!detail::decays_to_ignore_v<header_type> &&
1099 !detail::decays_to_ignore_v<ref_id_type> &&
1104 "The ref_id type is not convertible to the reference id information stored in the " 1105 "reference dictionary of the header object.");
1109 if ((header.ref_dict).count(ref_id) == 0)
1111 "' was not in the list of references"};
1116 throw format_error{
"The ref_offset object must be an std::Integral >= 0."};
1121 if constexpr (!detail::decays_to_ignore_v<header_type>)
1125 write_header(stream, options, header);
1126 written_header =
true;
1134 char const separator{
'\t'};
1136 write_range(stream_it, std::forward<id_type>(
id));
1138 stream << separator;
1140 stream << flag << separator;
1142 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
1146 write_range(stream_it, (header.ref_ids())[ref_id]);
1150 if (ref_id.has_value())
1151 write_range(stream_it, (header.ref_ids())[ref_id.value()]);
1157 write_range(stream_it, std::forward<ref_id_type>(ref_id));
1165 stream << separator;
1168 stream << (ref_offset.
value_or(-1) + 1) << separator;
1170 stream << static_cast<unsigned>(mapq) << separator;
1179 for (
auto chr : get<1>(align))
1184 write_range(stream_it, detail::get_cigar_string(std::forward<align_type>(align), offset, off_end));
1191 stream << separator;
1195 write_range(stream_it, (header.ref_ids())[get<0>(mate)]);
1199 if (get<0>(mate).has_value())
1202 write_range(stream_it, header.ref_ids()[get<0>(mate).value_or(0)]);
1208 write_range(stream_it, get<0>(mate));
1211 stream << separator;
1216 stream << (get<1>(mate).value_or(-1) + 1) << separator;
1220 stream << get<1>(mate) << separator;
1223 stream << get<2>(mate) << separator;
1225 write_range(stream_it, std::forward<seq_type>(
seq));
1227 stream << separator;
1229 write_range(stream_it, std::forward<qual_type>(qual));
1231 write_tag_fields(stream, std::forward<tag_dict_type>(tag_dict), separator);
1241 bool written_header{
false};
1250 template <
typename stream_it_t,
typename field_type>
1254 void write_range(stream_it_t & stream_it, field_type && field_value)
1268 template <
typename stream_it_t>
1269 void write_range(stream_it_t & stream_it,
char const *
const field_value)
1279 template <
typename stream_t, Arithmetic field_type>
1280 void write_field(stream_t & stream, field_type field_value)
1284 stream << static_cast<int16_t>(field_value);
1286 stream << field_value;
1296 template <
typename stream_t>
1297 void write_tag_fields(stream_t & stream,
sam_tag_dictionary const & tag_dict,
char const separator)
1299 auto stream_variant_fn = [
this, &stream] (
auto && arg)
1309 if (arg.begin() != arg.end())
1311 for (
auto it = arg.begin(); it != (arg.end() - 1); ++it)
1313 write_field(stream, *it);
1317 write_field(stream, *(arg.end() - 1));
1322 for (
auto & [tag, variant] : tag_dict)
1324 stream << separator;
1326 char char0 = tag / 256;
1327 char char1 = tag % 256;
1329 stream << char0 << char1 <<
':' << detail::sam_tag_type_char[variant.index()] <<
':';
1331 if (detail::sam_tag_type_char_extra[variant.index()] !=
'\0')
1332 stream << detail::sam_tag_type_char_extra[variant.index()] <<
',';
1354 template <
typename stream_t,
typename ref_
ids_type>
1355 void write_header(stream_t & stream,
1366 !(header.
sorting ==
"unknown" ||
1367 header.
sorting ==
"unsorted" ||
1368 header.
sorting ==
"queryname" ||
1369 header.
sorting ==
"coordinate" ))
1370 throw format_error{
"SAM format error: The header.sorting member must be " 1371 "one of [unknown, unsorted, queryname, coordinate]."};
1377 throw format_error{
"SAM format error: The header.grouping member must be " 1378 "one of [none, query, reference]."};
1397 stream <<
"@HD\tVN:";
1401 stream <<
"\tSO:" << header.
sorting;
1407 stream <<
"\tGO:" << header.
grouping;
1414 stream <<
"@SQ\tSN:";
1418 stream <<
"\tLN:" << get<0>(ref_info);
1420 if (!get<1>(ref_info).empty())
1421 stream <<
"\t" << get<1>(ref_info);
1427 for (
auto const & read_group : header.
read_groups)
1430 <<
"\tID:" << get<0>(read_group);
1432 if (!get<1>(read_group).empty())
1433 stream <<
"\t" << get<1>(read_group);
1442 <<
"\tID:" << program.id;
1444 if (!program.name.empty())
1445 stream <<
"\tPN:" << program.name;
1447 if (!program.command_line_call.empty())
1448 stream <<
"\tCL:" << program.command_line_call;
1450 if (!program.previous.empty())
1451 stream <<
"\tPP:" << program.previous;
1453 if (!program.description.empty())
1454 stream <<
"\tDS:" << program.description;
1456 if (!program.version.empty())
1457 stream <<
"\tVN:" << program.version;
1463 for (
auto const & comment : header.
comments)
1465 stream <<
"@CO\t" << comment;
Provides concepts for core language types and relations that don't have concepts in C++20 (yet)...
::ranges::distance distance
Alias for ranges::distance. Returns the number of hops from first to last.
Definition: iterator:321
void assign_unaligned(aligned_seq_t &aligned_seq, unaligned_sequence_type &&unaligned_seq)
An implementation of seqan3::AlignedSequence::assign_unaligned_sequence for sequence containers...
Definition: aligned_sequence_concept.hpp:345
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (TransformationTrait shortcut).
Definition: pre.hpp:48
::ranges::next next
Alias for ranges::next. Returns the nth successor of the given iterator.
Definition: iterator:331
Provides seqan3::view::istreambuf.
auto constexpr take_until
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:599
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
The alphabet of a gap character '-'.
Definition: gap.hpp:36
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
constexpr auto zip
A range adaptor that transforms a tuple of range into a range of tuples.
Definition: ranges:948
Result type of std::from_chars.
Definition: charconv:47
::ranges::ostreambuf_iterator ostreambuf_iterator
Alias for ranges::ostreambuf_iterator. Writes successive characters onto the output stream from which...
Definition: iterator.hpp:56
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
Provides seqan3::type_list and auxiliary type traits.
auto search(queries_t &&queries, index_t const &index, configuration_t const &cfg)
Search a query or a range of queries in an index.
Definition: search.hpp:56
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf.hpp:245
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
auto constexpr take_until_or_throw_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:641
Auxiliary for pretty printing of exception messages.
Auxiliary functions for the alignment IO.
Provides std::from_chars and std::to_chars if not defined in the stl <charconv> header.
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:144
Provides seqan3::alignment_file_output_options.
The concept Integral is satisfied if and only if T is an integral type.
Provides seqan3::view::take_until and seqan3::view::take_until_or_throw.
Provides seqan3::TupleLike.
Provides seqan3::view::repeat_n.
Provides various utility functions.
std::from_chars_result from_chars(char const *first, char const *last, value_type &value, int base) noexcept
Parse a char sequence into an integral.
Definition: charconv:174
Provides various utility functions.
Provides seqan3::view::char_to.
Adaptations of concepts from the Ranges TS.
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
::ranges::advance advance
Alias for ranges::advance. Advances the iterator by the given distance.
Definition: iterator:316
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
::ranges::copy copy
Alias for ranges::copy. Copies a range of elements to a new location.
Definition: algorithm:44
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:66
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
The options type defines various option members that influence the behavior of all or some formats...
Definition: output_options.hpp:22
Whether a type behaves like a tuple.
Definition: aligned_sequence_concept.hpp:35
The cigar alphabet pairs a counter with a seqan3::cigar_op letter.
Definition: cigar.hpp:51
Provides character predicates for tokenisation.
The generic alphabet concept that covers most data types used in ranges.
Provides seqan3::ostream and seqan3::ostreambuf iterator.
auto constexpr take_until_or_throw
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:613
T back_inserter(T... args)
Adaptations of algorithms from the Ranges TS.
Resolves to std::ranges::ImplicitlyConvertibleTo<type1, type2>().
Provides seqan3::view::slice.
The (most general) container concept as defined by the standard library.
::ranges::empty empty
Alias for ranges::empty. Checks whether a range is empty.
Definition: ranges:194
Provides seqan3::view::to_char.
Requires std::detail::WeaklyEqualityComparableWitht<t1,t2>, but also that t1 and t2, as well as their common_reference_t satisfy std::EqualityComparable.
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:97
Provides various transformation traits used by the range module.
typename reference< t >::type reference_t
Shortcut for seqan3::reference (TransformationTrait shortcut).
Definition: pre.hpp:77
Static reflection for arbitrary types.
Exposes the value_type of another type.
Definition: pre.hpp:41
Provides SeqAn version macros and global variables.
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
Provides seqan3::gap_decorator.
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
auto const char_to
A view over an alphabet, given a range of characters.
Definition: char_to.hpp:70
A more refined container concept than seqan3::Container.
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:324
T emplace_back(T... args)