Skip to content

Commit 5698c75

Browse files
authored
Merge branch 'tskit-dev:main' into main
2 parents bf0f9c1 + 6fce699 commit 5698c75

38 files changed

+851
-130
lines changed

.github/workflows/python.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ jobs:
3838
- name: run JSON metadata example
3939
run: |
4040
cargo run --example json_metadata --features derive
41+
- name: run bincode metadata example
42+
run: |
43+
cargo run --example bincode_metadata --features derive
4144
- name: setup Python and run tests
4245
run: |
4346
uv venv -p ${{ matrix.python }}
4447
source .venv/bin/activate
4548
uv pip install -r python/requirements_locked_3_13.txt
49+
uv pip install python/tskit_glue
4650
python -m pytest python
4751

.github/workflows/test32bit.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ jobs:
3838
- name: Install rust tooling for 32 bit builds
3939
run: |
4040
rustup target install i686-unknown-linux-gnu
41+
- name: Install rust toolchain
42+
run: |
43+
rustup toolchain install
4144
- name: cargo check
4245
run: |
4346
cargo hack check --all-targets --target=i686-unknown-linux-gnu --feature-powerset

.github/workflows/tests.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ jobs:
3333
- uses: taiki-e/install-action@cargo-hack
3434
- run: sudo apt-get update -y
3535
if: matrix.os == 'ubuntu-24.04'
36+
- name: update toolchain
37+
run: rustup toolchain install
3638
- name: cargo check (powerset)
3739
run: cargo hack check --feature-powerset --no-dev-deps
3840
- name: cargo check examples (powerset)
@@ -76,6 +78,8 @@ jobs:
7678
components: clippy
7779
- uses: Swatinem/[email protected]
7880
- uses: taiki-e/install-action@cargo-hack
81+
- name: update toolchain
82+
run: rustup toolchain install
7983
- name: clippy (all targets, feature powerset)
8084
run: cargo hack clippy --all-targets --feature-powerset -- -D warnings
8185

@@ -85,7 +89,7 @@ jobs:
8589
strategy:
8690
matrix:
8791
rust:
88-
- 1.71.0
92+
- 1.75.0
8993
steps:
9094
- uses: actions/[email protected]
9195
with:

Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ description = "rust interface to tskit"
88
license = "MIT"
99
homepage = "https://github.com/tskit-dev/tskit-rust"
1010
repository = "https://github.com/tskit-dev/tskit-rust"
11-
rust-version = "1.71.0"
11+
rust-version = "1.75.0"
1212

1313
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1414
[lints.rust]
@@ -60,3 +60,10 @@ name = "tree_traversals"
6060
[[example]]
6161
name = "json_metadata"
6262
required-features = ["derive"]
63+
64+
[[example]]
65+
name = "bincode_metadata"
66+
required-features = ["derive"]
67+
68+
[[example]]
69+
name = "manual_metadata_encoding"

book/src/SUMMARY.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* [Metadata](./metadata.md)
2020
- [Defining metadata types in rust](./metadata_derive.md)
2121
- [Metadata and tables](./metadata_tables.md)
22-
- [Metadata schema](./metadata_schema.md)
22+
- [Metadata processing with Python](./metadata_python.md)
2323
- [Advanced topics](./metadata_advanced.md)
2424

2525
* [Error handling](./error_handling.md)

book/src/metadata_python.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Metadata processing with Python
2+
3+
## `JSON` metadata
4+
5+
If your metadata are generated in `JSON` format via `serde` (see [here](metadata_derive.md)), then the metadata are simple to access from Python.
6+
The code repository for `tskit-rust` contains examples in the `python/` subdirectory.
7+
8+
You may work with `JSON` metadata with or without a metadata schema (see [here](https://tskit.dev/tskit/docs/stable/metadata.html)).
9+
A schema is useful for data validation but there is an unfortunate inefficiency if your input to Python is a tree sequence rather than a table collection.
10+
You will have to copy the tables, add the metadata schema, and regenerate a tree sequence.
11+
See the examples mentioned above.
12+
13+
## Other formats
14+
15+
The `tskit-python` API only supports `JSON` and Python's `struct` data formats.
16+
It is useful to use a format other than `JSON` in order to minimize storage requirements.
17+
However, doing so will require that you provide a method to covert the data into a valid Python object.
18+
19+
An easy way to provide conversion methods is to use [pyo3](https://pyo3.rs) to create a small Python module to deserialize your metadata into Python objects.
20+
The `tskit-rust` code repository contains an example of this in the `python/` subdirectory.
21+
The module is shown in its entirety below:
22+
23+
```rust, noplaygound, ignore
24+
{{#include ../../python/tskit_glue/src/lib.rs}}
25+
```
26+
27+
Using it in Python is just a matter of importing the module:
28+
29+
```python
30+
{{#include ../../python/test_bincode_metadata.py}}
31+
```

book/src/metadata_schema.md

Lines changed: 0 additions & 17 deletions
This file was deleted.

examples/bincode_metadata.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#[derive(serde::Serialize, serde::Deserialize, tskit::metadata::MutationMetadata)]
2+
#[serializer("bincode")]
3+
struct MutationMetadata {
4+
effect_size: f64,
5+
dominance: f64,
6+
}
7+
8+
#[derive(serde::Serialize, serde::Deserialize, tskit::metadata::IndividualMetadata)]
9+
#[serializer("bincode")]
10+
struct IndividualMetadata {
11+
name: String,
12+
phenotypes: Vec<i32>,
13+
}
14+
15+
fn main() {
16+
let ts = make_treeseq().unwrap();
17+
ts.dump("with_bincode_metadata.trees", 0).unwrap();
18+
}
19+
20+
fn make_tables() -> anyhow::Result<tskit::TableCollection> {
21+
let mut tables = tskit::TableCollection::new(100.0)?;
22+
let pop0 = tables.add_population()?;
23+
let ind0 = tables.add_individual_with_metadata(
24+
0,
25+
None,
26+
None,
27+
&IndividualMetadata {
28+
name: "Jerome".to_string(),
29+
phenotypes: vec![0, 1, 2, 0],
30+
},
31+
)?;
32+
let node0 = tables.add_node(tskit::NodeFlags::new_sample(), 0.0, pop0, ind0)?;
33+
let site0 = tables.add_site(50.0, Some("A".as_bytes()))?;
34+
let _ = tables.add_mutation_with_metadata(
35+
site0,
36+
node0,
37+
tskit::MutationId::NULL,
38+
1.0,
39+
Some("G".as_bytes()),
40+
&MutationMetadata {
41+
effect_size: -1e-3,
42+
dominance: 0.1,
43+
},
44+
)?;
45+
tables.build_index()?;
46+
Ok(tables)
47+
}
48+
49+
fn make_treeseq() -> anyhow::Result<tskit::TreeSequence> {
50+
Ok(make_tables()?.tree_sequence(0)?)
51+
}

examples/haploid_wright_fisher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ fn stress_test_total_branch_length() {
156156
// stress test the branch length fn b/c it is not a trivial
157157
// wrapper around the C API.
158158
{
159-
use streaming_iterator::StreamingIterator;
159+
use tskit::StreamingIterator;
160160
let mut x = f64::NAN;
161161
if let Ok(mut tree_iter) = ts.tree_iterator(0) {
162162
// We will only do the first tree to save time.
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
use core::str;
2+
3+
use tskit::metadata::MetadataRoundtrip;
4+
5+
struct MutationMetadata {
6+
effect_size: f64,
7+
dominance: f64,
8+
}
9+
10+
impl MetadataRoundtrip for MutationMetadata {
11+
fn encode(&self) -> Result<Vec<u8>, tskit::metadata::MetadataError> {
12+
let mut rv = vec![];
13+
rv.extend_from_slice(&self.effect_size.to_le_bytes());
14+
rv.extend_from_slice(&self.dominance.to_le_bytes());
15+
Ok(rv)
16+
}
17+
18+
fn decode(md: &[u8]) -> Result<Self, tskit::metadata::MetadataError>
19+
where
20+
Self: Sized,
21+
{
22+
let slice: [u8; 8] = md[0..8].try_into().unwrap();
23+
let effect_size = f64::from_le_bytes(slice);
24+
let slice: [u8; 8] = md[8..].try_into().unwrap();
25+
let dominance = f64::from_le_bytes(slice);
26+
Ok(Self {
27+
effect_size,
28+
dominance,
29+
})
30+
}
31+
}
32+
33+
impl tskit::metadata::MutationMetadata for MutationMetadata {}
34+
35+
struct IndividualMetadata {
36+
name: String,
37+
phenotypes: Vec<i32>,
38+
}
39+
40+
impl MetadataRoundtrip for IndividualMetadata {
41+
fn encode(&self) -> Result<Vec<u8>, tskit::metadata::MetadataError> {
42+
let mut rv = vec![];
43+
rv.extend_from_slice(self.name.len().to_le_bytes().as_slice());
44+
rv.extend_from_slice(self.name.as_bytes());
45+
rv.extend_from_slice(self.phenotypes.len().to_le_bytes().as_slice());
46+
for i in self.phenotypes.iter() {
47+
rv.extend_from_slice(i.to_le_bytes().as_slice());
48+
}
49+
Ok(rv)
50+
}
51+
fn decode(md: &[u8]) -> Result<Self, tskit::metadata::MetadataError>
52+
where
53+
Self: Sized,
54+
{
55+
let size: [u8; std::mem::size_of::<usize>()] =
56+
md[0..std::mem::size_of::<usize>()].try_into().unwrap();
57+
let size = usize::from_le_bytes(size);
58+
let md = &md[std::mem::size_of::<usize>()..];
59+
let name = str::from_utf8(&md[0..size]).unwrap().to_string();
60+
let md = &md[size..];
61+
let md = &md[std::mem::size_of::<usize>()..];
62+
let mut phenotypes = vec![];
63+
// NOTE: production code would want to validate that
64+
// the remaining number of bytes are correct
65+
let chunks = md.chunks_exact(std::mem::size_of::<i32>());
66+
for c in chunks {
67+
// Unwrap b/c the conversion cannot fail b/c the chunk size is correct!
68+
let a: [u8; std::mem::size_of::<i32>()] = c.try_into().unwrap();
69+
phenotypes.push(i32::from_le_bytes(a));
70+
}
71+
Ok(Self { name, phenotypes })
72+
}
73+
}
74+
75+
impl tskit::metadata::IndividualMetadata for IndividualMetadata {}
76+
77+
fn main() {
78+
let ts = make_treeseq().unwrap();
79+
ts.dump("with_manual_metadata.trees", 0).unwrap();
80+
}
81+
82+
fn make_tables() -> anyhow::Result<tskit::TableCollection> {
83+
let mut tables = tskit::TableCollection::new(100.0)?;
84+
let pop0 = tables.add_population()?;
85+
let ind0 = tables.add_individual_with_metadata(
86+
0,
87+
None,
88+
None,
89+
&IndividualMetadata {
90+
name: "Jerome".to_string(),
91+
phenotypes: vec![0, 1, 2, 0],
92+
},
93+
)?;
94+
let node0 = tables.add_node(tskit::NodeFlags::new_sample(), 0.0, pop0, ind0)?;
95+
let site0 = tables.add_site(50.0, Some("A".as_bytes()))?;
96+
let _ = tables.add_mutation_with_metadata(
97+
site0,
98+
node0,
99+
tskit::MutationId::NULL,
100+
1.0,
101+
Some("G".as_bytes()),
102+
&MutationMetadata {
103+
effect_size: -1e-3,
104+
dominance: 0.1,
105+
},
106+
)?;
107+
tables.build_index()?;
108+
Ok(tables)
109+
}
110+
111+
fn make_treeseq() -> anyhow::Result<tskit::TreeSequence> {
112+
Ok(make_tables()?.tree_sequence(0)?)
113+
}
114+
115+
#[test]
116+
fn test_mutation_metadata_roundtrip() {
117+
let md = MutationMetadata {
118+
effect_size: 0.1,
119+
dominance: 0.25,
120+
};
121+
let encoded = md.encode().unwrap();
122+
let decoded = MutationMetadata::decode(&encoded).unwrap();
123+
assert_eq!(md.effect_size, decoded.effect_size);
124+
assert_eq!(md.dominance, decoded.dominance);
125+
}
126+
127+
#[test]
128+
fn test_individual_metadata_roundtrip() {
129+
let md = IndividualMetadata {
130+
name: "Jerome".to_string(),
131+
phenotypes: vec![10, 9],
132+
};
133+
let encoded = md.encode().unwrap();
134+
let decoded = IndividualMetadata::decode(&encoded).unwrap();
135+
assert_eq!(md.name, decoded.name);
136+
assert_eq!(md.phenotypes, decoded.phenotypes);
137+
}

0 commit comments

Comments
 (0)