Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions include/seqan3/io/sam_file/detail/format_sam_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <seqan3/io/detail/misc.hpp>
#include <seqan3/io/sam_file/detail/cigar.hpp>
#include <seqan3/io/sam_file/header.hpp>
#include <seqan3/io/sam_file/input_options.hpp>
#include <seqan3/io/sam_file/output_format_concept.hpp>
#include <seqan3/utility/detail/type_name_as_string.hpp>
#include <seqan3/utility/views/repeat_n.hpp>
Expand Down Expand Up @@ -82,10 +83,11 @@ class format_sam_base
template <arithmetic arithmetic_target_type>
void read_arithmetic_field(std::string_view const & str, arithmetic_target_type & arithmetic_target);

template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type, typename seq_legal_alph_type>
void read_header(stream_view_type && stream_view,
sam_file_header<ref_ids_type> & hdr,
ref_seqs_type & /*ref_id_to_pos_map*/);
ref_seqs_type & /*ref_id_to_pos_map*/,
sam_file_input_options<seq_legal_alph_type> const & options);

template <typename stream_t, typename header_type>
void write_header(stream_t & stream, sam_file_output_options const & options, header_type & header);
Expand Down Expand Up @@ -258,6 +260,7 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str,
* \tparam stream_view_type The type of the stream as a view.
* \param[in, out] stream_view The stream view to iterate over.
* \param[in, out] hdr The header (as a pointer) to store the parsed values.
* \param[in] options The options to alter the parsing process.
*
* \throws seqan3::format_error if any unexpected character or format is encountered.
*
Expand All @@ -270,10 +273,11 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str,
* not in a correct state (e.g. required fields are not given), but throwing might occur downstream of the actual
* error.
*/
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type, typename seq_legal_alph_type>
inline void format_sam_base::read_header(stream_view_type && stream_view,
sam_file_header<ref_ids_type> & hdr,
ref_seqs_type & /*ref_id_to_pos_map*/)
ref_seqs_type & /*ref_id_to_pos_map*/,
sam_file_input_options<seq_legal_alph_type> const & options)
{
auto it = std::ranges::begin(stream_view);
auto end = std::ranges::end(stream_view);
Expand Down Expand Up @@ -335,9 +339,13 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
read_forward_range_field(string_buffer, value);
};

auto print_cerr_of_unspported_tag = [](char const * const header_tag, std::array<char, 2> raw_tag)
auto print_cerr_of_unspported_tag = [&options](char const * const header_tag, std::array<char, 2> raw_tag)
{
std::cerr << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0] << raw_tag[1] << '\n';
if (options.stream_warnings_to == nullptr)
return;

*options.stream_warnings_to << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0]
<< raw_tag[1] << '\n';
};

while (it != end && is_char<'@'>(*it))
Expand Down
41 changes: 20 additions & 21 deletions include/seqan3/io/sam_file/format_bam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class format_bam : private detail::format_sam_base
typename e_value_type,
typename bit_score_type>
void read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
sam_file_input_options<seq_legal_alph_type> const & options,
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
Expand Down Expand Up @@ -260,25 +260,24 @@ template <typename stream_type, // constraints checked by file
typename tag_dict_type,
typename e_value_type,
typename bit_score_type>
inline void
format_bam::read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
seq_type & seq,
qual_type & qual,
id_type & id,
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
mate_type & mate,
tag_dict_type & tag_dict,
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
inline void format_bam::read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & options,
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
seq_type & seq,
qual_type & qual,
id_type & id,
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
mate_type & mate,
tag_dict_type & tag_dict,
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
{
static_assert(detail::decays_to_ignore_v<ref_offset_type>
|| detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
Expand Down Expand Up @@ -308,7 +307,7 @@ format_bam::read_alignment_record(stream_type & stream,
read_integral_byte_field(stream_view, l_text);

if (l_text > 0) // header text is present
read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs);
read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs, options);

read_integral_byte_field(stream_view, n_ref);

Expand Down
41 changes: 20 additions & 21 deletions include/seqan3/io/sam_file/format_sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class format_sam : protected detail::format_sam_base
typename e_value_type,
typename bit_score_type>
void read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
sam_file_input_options<seq_legal_alph_type> const & options,
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
Expand Down Expand Up @@ -354,25 +354,24 @@ template <typename stream_type, // constraints checked by file
typename tag_dict_type,
typename e_value_type,
typename bit_score_type>
inline void
format_sam::read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
seq_type & seq,
qual_type & qual,
id_type & id,
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
mate_type & mate,
tag_dict_type & tag_dict,
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
inline void format_sam::read_alignment_record(stream_type & stream,
sam_file_input_options<seq_legal_alph_type> const & options,
ref_seqs_type & ref_seqs,
sam_file_header<ref_ids_type> & header,
stream_pos_type & position_buffer,
seq_type & seq,
qual_type & qual,
id_type & id,
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
ref_id_type & ref_id,
ref_offset_type & ref_offset,
cigar_type & cigar_vector,
flag_type & flag,
mapq_type & mapq,
mate_type & mate,
tag_dict_type & tag_dict,
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
{
static_assert(detail::decays_to_ignore_v<ref_offset_type>
|| detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
Expand All @@ -389,7 +388,7 @@ format_sam::read_alignment_record(stream_type & stream,
// -------------------------------------------------------------------------------------------------------------
if (is_char<'@'>(*stream_it)) // we always read the header if present
{
read_header(stream_view, header, ref_seqs);
read_header(stream_view, header, ref_seqs, options);

if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records
return;
Expand Down
14 changes: 13 additions & 1 deletion include/seqan3/io/sam_file/input_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#pragma once

#include <iostream>

#include <seqan3/core/platform.hpp>

namespace seqan3
Expand All @@ -22,7 +24,17 @@ namespace seqan3
template <typename sequence_legal_alphabet>
struct sam_file_input_options
{
// options to define
/*!\brief The stream to write warnings to. Defaults to std::cerr.
* \details
* ### Example
* \include test/snippet/io/sam_file/sam_file_input_options.cpp
* Output to std::cerr:
* \include test/snippet/io/sam_file/sam_file_input_options.err
* Output to std::cout:
* \include test/snippet/io/sam_file/sam_file_input_options.out
* \experimentalapi{Experimental since version 3.4.}
*/
std::ostream * stream_warnings_to{std::addressof(std::cerr)};
};

} // namespace seqan3
85 changes: 85 additions & 0 deletions test/snippet/io/sam_file/sam_file_input_options.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0

#include <sstream>

#include <seqan3/io/sam_file/input.hpp>

// A helper struct to create a temporary file and remove it when it goes out of scope.
struct temporary_file
{
std::filesystem::path const path{std::filesystem::temp_directory_path() / "warnings.txt"};

temporary_file()
{
std::ofstream file{path}; // Create file
}
temporary_file(temporary_file const &) = delete;
temporary_file & operator=(temporary_file const &) = delete;
temporary_file(temporary_file &&) = delete;
temporary_file & operator=(temporary_file &&) = delete;
~temporary_file()
{
std::filesystem::remove(path);
}

std::string read_content() const
{
std::ifstream file{path};
return std::string{std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
}
};

static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0
@SQ SN:ref LN:34
)";

static auto get_sam_file_input()
{
return seqan3::sam_file_input{std::istringstream{sam_file_raw}, seqan3::format_sam{}};
}

void defaults_to_cerr()
{
auto fin = get_sam_file_input();
std::cerr << "Written to cerr: ";
auto it = fin.begin(); // Prints to cerr: "Unsupported SAM header tag in @HD: pb"
}

void redirect_to_cout()
{
auto fin = get_sam_file_input();
fin.options.stream_warnings_to = std::addressof(std::cout); // Equivalent to `= &std::cout;`
std::cout << "Written to cout: ";
auto it = fin.begin(); // Prints to cout: "Unsupported SAM header tag in @HD: pb"
}

void redirect_to_file()
{
temporary_file tmp_file{};
auto fin = get_sam_file_input();

{ // Inner scope to close file before reading
std::ofstream warning_file{tmp_file.path};
fin.options.stream_warnings_to = std::addressof(warning_file); // Equivalent to `= &warning_file;`
auto it = fin.begin(); // Prints to file: "Unsupported SAM header tag in @HD: pb"
}

std::cout << "Written to file: " << tmp_file.read_content();
}

void silence_warnings()
{
auto fin = get_sam_file_input();
fin.options.stream_warnings_to = nullptr;
auto it = fin.begin(); // No warning emitted
}

int main()
{
defaults_to_cerr();
redirect_to_cout();
redirect_to_file();
silence_warnings();
}
1 change: 1 addition & 0 deletions test/snippet/io/sam_file/sam_file_input_options.err
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Written to cerr: Unsupported SAM header tag in @HD: pb
3 changes: 3 additions & 0 deletions test/snippet/io/sam_file/sam_file_input_options.err.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
SPDX-License-Identifier: CC0-1.0
2 changes: 2 additions & 0 deletions test/snippet/io/sam_file/sam_file_input_options.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Written to cout: Unsupported SAM header tag in @HD: pb
Written to file: Unsupported SAM header tag in @HD: pb
3 changes: 3 additions & 0 deletions test/snippet/io/sam_file/sam_file_input_options.out.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
SPDX-License-Identifier: CC0-1.0
7 changes: 7 additions & 0 deletions test/unit/io/sam_file/format_bam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ struct sam_file_read<seqan3::format_bam> : public sam_file_data
'\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};

std::string unknown_tag_header{
'\x42', '\x41', '\x4d', '\x01', '\x25', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
'\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e',
'\x30', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09',
'\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00',
'\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};

std::string big_header_input{
'\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
'\x3A', '\x31', '\x2E', '\x36', '\x09', '\x53', '\x4F', '\x3A', '\x63', '\x6F', '\x6F', '\x72', '\x64', '\x69',
Expand Down
5 changes: 5 additions & 0 deletions test/unit/io/sam_file/format_sam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ struct sam_file_read<seqan3::format_sam> : public sam_file_data
std::string minimal_header{
R"(@HD VN:1.6
@SQ SN:ref LN:34
)"};

std::string unknown_tag_header{
R"(@HD VN:1.6 pb:5.0.0
@SQ SN:ref LN:34
)"};

std::string big_header_input{
Expand Down
Loading