Skip to content

Commit 3fbe403

Browse files
authored
Merge pull request #3246 from eseiler/feature/sam_warnings
[FEATURE] Allow setting warning stream for sam file
2 parents c44d090 + 54532ca commit 3fbe403

12 files changed

+237
-50
lines changed

include/seqan3/io/sam_file/detail/format_sam_base.hpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <seqan3/io/detail/misc.hpp>
2222
#include <seqan3/io/sam_file/detail/cigar.hpp>
2323
#include <seqan3/io/sam_file/header.hpp>
24+
#include <seqan3/io/sam_file/input_options.hpp>
2425
#include <seqan3/io/sam_file/output_format_concept.hpp>
2526
#include <seqan3/utility/detail/type_name_as_string.hpp>
2627
#include <seqan3/utility/views/repeat_n.hpp>
@@ -82,10 +83,11 @@ class format_sam_base
8283
template <arithmetic arithmetic_target_type>
8384
void read_arithmetic_field(std::string_view const & str, arithmetic_target_type & arithmetic_target);
8485

85-
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
86+
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type, typename seq_legal_alph_type>
8687
void read_header(stream_view_type && stream_view,
8788
sam_file_header<ref_ids_type> & hdr,
88-
ref_seqs_type & /*ref_id_to_pos_map*/);
89+
ref_seqs_type & /*ref_id_to_pos_map*/,
90+
sam_file_input_options<seq_legal_alph_type> const & options);
8991

9092
template <typename stream_t, typename header_type>
9193
void write_header(stream_t & stream, sam_file_output_options const & options, header_type & header);
@@ -258,6 +260,7 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str,
258260
* \tparam stream_view_type The type of the stream as a view.
259261
* \param[in, out] stream_view The stream view to iterate over.
260262
* \param[in, out] hdr The header (as a pointer) to store the parsed values.
263+
* \param[in] options The options to alter the parsing process.
261264
*
262265
* \throws seqan3::format_error if any unexpected character or format is encountered.
263266
*
@@ -270,10 +273,11 @@ inline void format_sam_base::read_arithmetic_field(std::string_view const & str,
270273
* not in a correct state (e.g. required fields are not given), but throwing might occur downstream of the actual
271274
* error.
272275
*/
273-
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
276+
template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type, typename seq_legal_alph_type>
274277
inline void format_sam_base::read_header(stream_view_type && stream_view,
275278
sam_file_header<ref_ids_type> & hdr,
276-
ref_seqs_type & /*ref_id_to_pos_map*/)
279+
ref_seqs_type & /*ref_id_to_pos_map*/,
280+
sam_file_input_options<seq_legal_alph_type> const & options)
277281
{
278282
auto it = std::ranges::begin(stream_view);
279283
auto end = std::ranges::end(stream_view);
@@ -335,9 +339,13 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
335339
read_forward_range_field(string_buffer, value);
336340
};
337341

338-
auto print_cerr_of_unspported_tag = [](char const * const header_tag, std::array<char, 2> raw_tag)
342+
auto print_cerr_of_unspported_tag = [&options](char const * const header_tag, std::array<char, 2> raw_tag)
339343
{
340-
std::cerr << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0] << raw_tag[1] << '\n';
344+
if (options.stream_warnings_to == nullptr)
345+
return;
346+
347+
*options.stream_warnings_to << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0]
348+
<< raw_tag[1] << '\n';
341349
};
342350

343351
while (it != end && is_char<'@'>(*it))

include/seqan3/io/sam_file/format_bam.hpp

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class format_bam : private detail::format_sam_base
8383
typename e_value_type,
8484
typename bit_score_type>
8585
void read_alignment_record(stream_type & stream,
86-
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
86+
sam_file_input_options<seq_legal_alph_type> const & options,
8787
ref_seqs_type & ref_seqs,
8888
sam_file_header<ref_ids_type> & header,
8989
stream_pos_type & position_buffer,
@@ -260,25 +260,24 @@ template <typename stream_type, // constraints checked by file
260260
typename tag_dict_type,
261261
typename e_value_type,
262262
typename bit_score_type>
263-
inline void
264-
format_bam::read_alignment_record(stream_type & stream,
265-
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
266-
ref_seqs_type & ref_seqs,
267-
sam_file_header<ref_ids_type> & header,
268-
stream_pos_type & position_buffer,
269-
seq_type & seq,
270-
qual_type & qual,
271-
id_type & id,
272-
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
273-
ref_id_type & ref_id,
274-
ref_offset_type & ref_offset,
275-
cigar_type & cigar_vector,
276-
flag_type & flag,
277-
mapq_type & mapq,
278-
mate_type & mate,
279-
tag_dict_type & tag_dict,
280-
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
281-
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
263+
inline void format_bam::read_alignment_record(stream_type & stream,
264+
sam_file_input_options<seq_legal_alph_type> const & options,
265+
ref_seqs_type & ref_seqs,
266+
sam_file_header<ref_ids_type> & header,
267+
stream_pos_type & position_buffer,
268+
seq_type & seq,
269+
qual_type & qual,
270+
id_type & id,
271+
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
272+
ref_id_type & ref_id,
273+
ref_offset_type & ref_offset,
274+
cigar_type & cigar_vector,
275+
flag_type & flag,
276+
mapq_type & mapq,
277+
mate_type & mate,
278+
tag_dict_type & tag_dict,
279+
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
280+
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
282281
{
283282
static_assert(detail::decays_to_ignore_v<ref_offset_type>
284283
|| detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
@@ -308,7 +307,7 @@ format_bam::read_alignment_record(stream_type & stream,
308307
read_integral_byte_field(stream_view, l_text);
309308

310309
if (l_text > 0) // header text is present
311-
read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs);
310+
read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs, options);
312311

313312
read_integral_byte_field(stream_view, n_ref);
314313

include/seqan3/io/sam_file/format_sam.hpp

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class format_sam : protected detail::format_sam_base
165165
typename e_value_type,
166166
typename bit_score_type>
167167
void read_alignment_record(stream_type & stream,
168-
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
168+
sam_file_input_options<seq_legal_alph_type> const & options,
169169
ref_seqs_type & ref_seqs,
170170
sam_file_header<ref_ids_type> & header,
171171
stream_pos_type & position_buffer,
@@ -354,25 +354,24 @@ template <typename stream_type, // constraints checked by file
354354
typename tag_dict_type,
355355
typename e_value_type,
356356
typename bit_score_type>
357-
inline void
358-
format_sam::read_alignment_record(stream_type & stream,
359-
sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
360-
ref_seqs_type & ref_seqs,
361-
sam_file_header<ref_ids_type> & header,
362-
stream_pos_type & position_buffer,
363-
seq_type & seq,
364-
qual_type & qual,
365-
id_type & id,
366-
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
367-
ref_id_type & ref_id,
368-
ref_offset_type & ref_offset,
369-
cigar_type & cigar_vector,
370-
flag_type & flag,
371-
mapq_type & mapq,
372-
mate_type & mate,
373-
tag_dict_type & tag_dict,
374-
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
375-
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
357+
inline void format_sam::read_alignment_record(stream_type & stream,
358+
sam_file_input_options<seq_legal_alph_type> const & options,
359+
ref_seqs_type & ref_seqs,
360+
sam_file_header<ref_ids_type> & header,
361+
stream_pos_type & position_buffer,
362+
seq_type & seq,
363+
qual_type & qual,
364+
id_type & id,
365+
ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
366+
ref_id_type & ref_id,
367+
ref_offset_type & ref_offset,
368+
cigar_type & cigar_vector,
369+
flag_type & flag,
370+
mapq_type & mapq,
371+
mate_type & mate,
372+
tag_dict_type & tag_dict,
373+
e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
374+
bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
376375
{
377376
static_assert(detail::decays_to_ignore_v<ref_offset_type>
378377
|| detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
@@ -389,7 +388,7 @@ format_sam::read_alignment_record(stream_type & stream,
389388
// -------------------------------------------------------------------------------------------------------------
390389
if (is_char<'@'>(*stream_it)) // we always read the header if present
391390
{
392-
read_header(stream_view, header, ref_seqs);
391+
read_header(stream_view, header, ref_seqs, options);
393392

394393
if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // file has no records
395394
return;

include/seqan3/io/sam_file/input_options.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
#pragma once
1111

12+
#include <iostream>
13+
1214
#include <seqan3/core/platform.hpp>
1315

1416
namespace seqan3
@@ -22,7 +24,17 @@ namespace seqan3
2224
template <typename sequence_legal_alphabet>
2325
struct sam_file_input_options
2426
{
25-
// options to define
27+
/*!\brief The stream to write warnings to. Defaults to std::cerr.
28+
* \details
29+
* ### Example
30+
* \include test/snippet/io/sam_file/sam_file_input_options.cpp
31+
* Output to std::cerr:
32+
* \include test/snippet/io/sam_file/sam_file_input_options.err
33+
* Output to std::cout:
34+
* \include test/snippet/io/sam_file/sam_file_input_options.out
35+
* \experimentalapi{Experimental since version 3.4.}
36+
*/
37+
std::ostream * stream_warnings_to{std::addressof(std::cerr)};
2638
};
2739

2840
} // namespace seqan3
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: CC0-1.0
4+
5+
#include <sstream>
6+
7+
#include <seqan3/io/sam_file/input.hpp>
8+
9+
// A helper struct to create a temporary file and remove it when it goes out of scope.
10+
struct temporary_file
11+
{
12+
std::filesystem::path const path{std::filesystem::temp_directory_path() / "warnings.txt"};
13+
14+
temporary_file()
15+
{
16+
std::ofstream file{path}; // Create file
17+
}
18+
temporary_file(temporary_file const &) = delete;
19+
temporary_file & operator=(temporary_file const &) = delete;
20+
temporary_file(temporary_file &&) = delete;
21+
temporary_file & operator=(temporary_file &&) = delete;
22+
~temporary_file()
23+
{
24+
std::filesystem::remove(path);
25+
}
26+
27+
std::string read_content() const
28+
{
29+
std::ifstream file{path};
30+
return std::string{std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
31+
}
32+
};
33+
34+
static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0
35+
@SQ SN:ref LN:34
36+
)";
37+
38+
static auto get_sam_file_input()
39+
{
40+
return seqan3::sam_file_input{std::istringstream{sam_file_raw}, seqan3::format_sam{}};
41+
}
42+
43+
void defaults_to_cerr()
44+
{
45+
auto fin = get_sam_file_input();
46+
std::cerr << "Written to cerr: ";
47+
auto it = fin.begin(); // Prints to cerr: "Unsupported SAM header tag in @HD: pb"
48+
}
49+
50+
void redirect_to_cout()
51+
{
52+
auto fin = get_sam_file_input();
53+
fin.options.stream_warnings_to = std::addressof(std::cout); // Equivalent to `= &std::cout;`
54+
std::cout << "Written to cout: ";
55+
auto it = fin.begin(); // Prints to cout: "Unsupported SAM header tag in @HD: pb"
56+
}
57+
58+
void redirect_to_file()
59+
{
60+
temporary_file tmp_file{};
61+
auto fin = get_sam_file_input();
62+
63+
{ // Inner scope to close file before reading
64+
std::ofstream warning_file{tmp_file.path};
65+
fin.options.stream_warnings_to = std::addressof(warning_file); // Equivalent to `= &warning_file;`
66+
auto it = fin.begin(); // Prints to file: "Unsupported SAM header tag in @HD: pb"
67+
}
68+
69+
std::cout << "Written to file: " << tmp_file.read_content();
70+
}
71+
72+
void silence_warnings()
73+
{
74+
auto fin = get_sam_file_input();
75+
fin.options.stream_warnings_to = nullptr;
76+
auto it = fin.begin(); // No warning emitted
77+
}
78+
79+
int main()
80+
{
81+
defaults_to_cerr();
82+
redirect_to_cout();
83+
redirect_to_file();
84+
silence_warnings();
85+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Written to cerr: Unsupported SAM header tag in @HD: pb
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2+
SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3+
SPDX-License-Identifier: CC0-1.0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Written to cout: Unsupported SAM header tag in @HD: pb
2+
Written to file: Unsupported SAM header tag in @HD: pb
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2+
SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3+
SPDX-License-Identifier: CC0-1.0

test/unit/io/sam_file/format_bam_test.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ struct sam_file_read<seqan3::format_bam> : public sam_file_data
3737
'\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
3838
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
3939

40+
std::string unknown_tag_header{
41+
'\x42', '\x41', '\x4d', '\x01', '\x25', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
42+
'\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e',
43+
'\x30', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09',
44+
'\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00',
45+
'\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
46+
4047
std::string big_header_input{
4148
'\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
4249
'\x3A', '\x31', '\x2E', '\x36', '\x09', '\x53', '\x4F', '\x3A', '\x63', '\x6F', '\x6F', '\x72', '\x64', '\x69',

0 commit comments

Comments
 (0)