From e7f7177391bb42dbc919916fe9282761648cf369 Mon Sep 17 00:00:00 2001 From: Kai Zhang Date: Mon, 30 Sep 2024 17:39:31 +0800 Subject: [PATCH] work on seqspec --- .gitignore | 2 +- Cargo.toml | 7 + docs/api.rst | 2 + precellar/Cargo.toml | 3 +- precellar/src/align.rs | 10 +- precellar/src/io.rs | 38 ++++- precellar/src/lib.rs | 1 - python/Cargo.toml | 6 +- python/src/lib.rs | 34 +++- python/src/pyseqspec.rs | 135 +++++++++++++++ seqspec/Cargo.toml | 10 ++ .../src/seqspec.rs => seqspec/src/lib.rs | 159 ++++++++++-------- 12 files changed, 314 insertions(+), 93 deletions(-) create mode 100644 Cargo.toml create mode 100644 python/src/pyseqspec.rs create mode 100644 seqspec/Cargo.toml rename precellar/src/seqspec.rs => seqspec/src/lib.rs (82%) diff --git a/.gitignore b/.gitignore index dd297c2..7289f4e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -*/target +target *.gz # Byte-compiled / optimized / DLL files diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..af5cd0e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[workspace] +members = ["seqspec", "precellar", "python"] +resolver = "2" + +[workspace.dependencies] +seqspec = { version = "0.1", path = "seqspec" } +precellar = { version = "0.1", path = "precellar" } \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index a399b2e..e0ae857 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -10,6 +10,8 @@ methods. .. autosummary:: :toctree: _autosummary + SeqSpec + make_genome_index align make_fragment diff --git a/precellar/Cargo.toml b/precellar/Cargo.toml index 4424351..e33b3d1 100644 --- a/precellar/Cargo.toml +++ b/precellar/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] anyhow = "1.0" bed-utils = "0.5.1" -#bwa = { git = "https://github.com/regulatory-genomics/bwa-rust.git", rev = "69d482501956039588f94ce9f87367d7ae8f19af" } bwa-mem2 = { git = "https://github.com/regulatory-genomics/bwa-mem2-rust.git", rev = "07eda9b9c2815ae52b3fa30b01de0e19fae31fe0" } bstr = "1.0" cached-path = "0.6" @@ -22,6 +21,6 @@ kdam = "0.5.2" rayon = "1.10" smallvec = "1.13" serde = "1.0" -serde_yaml = "0.9" +seqspec = { version = "0.1", workspace = true } regex = "1.6" zstd = { version = "0.13", features = ["zstdmt"] } \ No newline at end of file diff --git a/precellar/src/align.rs b/precellar/src/align.rs index 46b79ab..184270e 100644 --- a/precellar/src/align.rs +++ b/precellar/src/align.rs @@ -1,5 +1,5 @@ use crate::barcode::{BarcodeCorrector, Whitelist}; -use crate::seqspec::{Assay, Modality, Read, Region, RegionType, SequenceType}; +use seqspec::{Assay, Modality, Read, Region, RegionType, SequenceType}; use crate::qc::{AlignQC, Metrics}; use bstr::BString; @@ -207,7 +207,7 @@ impl FastqProcessor { pub fn gen_raw_fastq_records(&self) -> FastqRecords { let modality = self.modality(); let data = self.assay.get_index_of(modality) - .map(|(read, regions)| (read, regions, read.read_fastq(self.base_dir.clone()))); + .map(|(read, regions)| (read, regions, crate::io::read_fastq(read, self.base_dir.clone()))); FastqRecords::new(data) } @@ -219,7 +219,7 @@ impl FastqProcessor { .find(|(_, index)| index.into_iter().any(|x| x.0.region_type.is_barcode())).unwrap(); let range = index.into_iter().find(|x| x.0.region_type.is_barcode()).unwrap().1; - read.read_fastq(&self.base_dir).records().for_each(|record| { + crate::io::read_fastq(read, &self.base_dir).records().for_each(|record| { let mut record = record.unwrap(); record = slice_fastq_record(&record, range.start as usize, range.end as usize); if read.is_reverse() { @@ -242,7 +242,7 @@ impl FastqProcessor { } let region = regions[0]; if region.sequence_type == SequenceType::Onlist { - Ok(Whitelist::new(region.onlist.as_ref().unwrap().read()?)) + Ok(Whitelist::new(crate::io::read_onlist(region.onlist.as_ref().unwrap())?)) } else { Ok(Whitelist::empty()) } @@ -270,7 +270,7 @@ impl FastqRecords { { let records = iter.map(|(read, regions, reader)| FastqRecord { - id: read.id().to_string(), + id: read.read_id.to_string(), is_reverse: read.is_reverse(), subregion: regions.into_iter().filter_map(|x| { let region_type = x.0.region_type; diff --git a/precellar/src/io.rs b/precellar/src/io.rs index 1af315e..260009f 100644 --- a/precellar/src/io.rs +++ b/precellar/src/io.rs @@ -1,5 +1,7 @@ -use std::{fs::File, io::{BufWriter, Write}, path::{Path, PathBuf}, str::FromStr}; +use std::{fs::File, io::{BufRead, BufReader, BufWriter, Write}, path::{Path, PathBuf}, str::FromStr}; use anyhow::{Context, Result, anyhow}; +use noodles::fastq; +use cached_path::Cache; /// Open a file, possibly compressed. Supports gzip and zstd. pub fn open_file_for_read>(file: P) -> Box { @@ -80,4 +82,38 @@ pub fn open_file_for_write>( }, }; Ok(writer) +} + +pub fn read_fastq>(read: &seqspec::Read, base_dir: P) -> fastq::Reader { + let base_dir = base_dir.as_ref().to_path_buf(); + let reader = multi_reader::MultiReader::new( + read.files.clone().unwrap().into_iter().map(move |file| open_file(&file, &base_dir)) + ); + fastq::Reader::new(BufReader::new(reader)) +} + +pub fn read_onlist(onlist: &seqspec::Onlist) -> Result> { + let cache = Cache::new()?; + let file = cache.cached_path(&onlist.url)?; + let reader = std::io::BufReader::new(open_file_for_read(file)); + Ok(reader.lines().map(|x| x.unwrap()).collect()) +} + +fn open_file>(file: &seqspec::File, base_dir: P) -> Box { + match file.urltype { + seqspec::UrlType::Local => { + let mut path = PathBuf::from(&file.url); + path = if path.is_absolute() { + path + } else { + base_dir.as_ref().join(path) + }; + Box::new(open_file_for_read(path)) + } + _ => { + let cache = Cache::new().unwrap(); + let file = cache.cached_path(&file.url).unwrap(); + Box::new(open_file_for_read(file)) + } + } } \ No newline at end of file diff --git a/precellar/src/lib.rs b/precellar/src/lib.rs index 6718006..b06dfcb 100644 --- a/precellar/src/lib.rs +++ b/precellar/src/lib.rs @@ -1,4 +1,3 @@ -pub mod seqspec; pub mod barcode; pub mod align; pub mod fragment; diff --git a/python/Cargo.toml b/python/Cargo.toml index 4acdeb1..c160fe6 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -10,13 +10,15 @@ crate-type = ["cdylib"] [dependencies] anyhow = "1.0" -#bwa = { git = "https://github.com/regulatory-genomics/bwa-rust.git", rev = "69d482501956039588f94ce9f87367d7ae8f19af" } bwa-mem2 = { git = "https://github.com/regulatory-genomics/bwa-mem2-rust.git", rev = "07eda9b9c2815ae52b3fa30b01de0e19fae31fe0" } bstr = "1.0" either = "1.13" itertools = "0.13" noodles = { version = "0.80", features = ["core", "fastq", "bam", "sam", "bgzf"] } -precellar = { path = "../precellar" } +seqspec = { version = "0.1", workspace = true } +serde_yaml = "0.9" +termtree = "0.5" +precellar = { version = "0.1", workspace = true } regex = "1.6" log = "0.4" env_logger = "0.11" diff --git a/python/src/lib.rs b/python/src/lib.rs index 6027f42..36841d6 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,4 +1,5 @@ mod utils; +mod pyseqspec; use std::{collections::HashMap, path::PathBuf, str::FromStr}; use bwa_mem2::{AlignerOpts, BurrowsWheelerAligner, FMIndex, PairedEndStats}; @@ -13,8 +14,10 @@ use ::precellar::{ align::{Alinger, FastqProcessor, NameCollatedRecords}, fragment::FragmentGenerator, io::{open_file_for_write, Compression}, - qc::{FragmentQC, Metrics, AlignQC}, seqspec::{Assay, Modality}, + qc::{FragmentQC, Metrics, AlignQC}, }; +use pyseqspec::SeqSpec; +use seqspec::{Assay, Modality}; #[cfg(not(target_env = "msvc"))] use tikv_jemallocator::Jemalloc; @@ -46,8 +49,9 @@ fn make_genome_index( /// Parameters /// ---------- /// -/// seqspec: Path -/// File path to the sequencing specification, see https://github.com/pachterlab/seqspec. +/// seqspec: SeqSpec | Path +/// A SeqSpec object or file path to the yaml sequencing specification file, see +/// https://github.com/pachterlab/seqspec. /// genom_index: Path /// File path to the genome index. The genome index can be created by the `make_genome_index` function. /// modality: str @@ -95,7 +99,7 @@ fn make_genome_index( )] fn align( py: Python<'_>, - seqspec: PathBuf, + seqspec: Bound<'_, PyAny>, genome_index: PathBuf, modality: &str, output_bam: Option, @@ -108,11 +112,23 @@ fn align( temp_dir: Option, num_threads: u32, ) -> Result> { - let modality = Modality::from_str(modality).unwrap(); - assert!(output_bam.is_some() || output_fragment.is_some(), "either output_bam or output_fragment must be provided"); - let spec = Assay::from_path(&seqspec).unwrap(); + let modality = Modality::from_str(modality).unwrap(); + let spec; + let base_dir; + match seqspec.extract::() { + Ok(p) => { + spec = Assay::from_path(&p).unwrap(); + base_dir = p.parent().unwrap().to_path_buf(); + }, + _ => { + let s: PyRef = seqspec.extract()?; + spec = s.0.clone(); + base_dir = ".".into(); + } + } + let aligner = BurrowsWheelerAligner::new( FMIndex::read(genome_index).unwrap(), AlignerOpts::default().with_n_threads(num_threads as usize), @@ -121,7 +137,7 @@ fn align( let header = aligner.header(); let mut processor = FastqProcessor::new(spec, aligner).with_modality(modality) .with_barcode_correct_prob(0.9) - .with_base_dir(seqspec.parent().unwrap()); + .with_base_dir(base_dir); let mut fragment_qc = FragmentQC::default(); mito_dna.into_iter().for_each(|x| { processor.add_mito_dna(&x); @@ -255,6 +271,8 @@ fn precellar(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; + m.add_class::().unwrap(); + m.add_function(wrap_pyfunction!(make_genome_index, m)?)?; m.add_function(wrap_pyfunction!(align, m)?)?; m.add_function(wrap_pyfunction!(make_fragment, m)?)?; diff --git a/python/src/pyseqspec.rs b/python/src/pyseqspec.rs new file mode 100644 index 0000000..caeb97c --- /dev/null +++ b/python/src/pyseqspec.rs @@ -0,0 +1,135 @@ +use std::{path::PathBuf, str::FromStr}; + +use pyo3::prelude::*; +use seqspec::{Assay, File, Modality, Read, Region, Strand, UrlType}; +use anyhow::Result; +use termtree::Tree; + +/** A SeqSpec object. + + A SeqSpec object is used to annotate sequencing libraries produced by genomics assays. + Genomic library structure depends on both the assay and sequencer (and kits) used to + generate and bind the assay-specific construct to the sequencing adapters to generate + a sequencing library. SeqSpec is specific to both a genomics assay and sequencer + and provides a standardized format for describing the structure of sequencing + libraries and the resulting sequencing reads. See https://github.com/pachterlab/seqspec for more details. + + Parameters + ---------- + path + Path to the AnnData file. + + See Also + -------- + align +*/ +#[pyclass] +#[repr(transparent)] +pub struct SeqSpec(pub(crate) Assay); + +#[pymethods] +impl SeqSpec { + #[new] + #[pyo3(signature = (path))] + pub fn new(path: &str) -> Result { + let assay = Assay::from_path(path)?; + Ok(SeqSpec(assay)) + } + + /// Add a fastq file containing reads to the AnnData object. + /// + /// Parameters + /// ---------- + /// read_id: str + /// The id of the read. + /// modality: str + /// The modality of the read. + /// primer_id: str + /// The id of the primer. + /// is_reverse: bool + /// Whether the read is reverse. + /// fastq: Path | list[Path] + /// The path to the fastq file containing the reads. + #[pyo3( + signature = (read_id, *, modality, primer_id, is_reverse, fastq), + text_signature = "($self, read_id, *, modality, primer_id, is_reverse, fastq)", + )] + pub fn add_read( + &mut self, + read_id: &str, + modality: &str, + primer_id: &str, + is_reverse: bool, + fastq: Bound<'_, PyAny>, + ) -> Result<()> { + let fastq = if fastq.is_instance_of::() { + fastq.extract::>()? + } else { + vec![fastq.extract::()?] + }; + + let assay = &mut self.0; + + let mut reads = assay.sequence_spec.take().unwrap_or(Vec::new()); + reads = reads.into_iter().filter(|r| r.read_id != read_id).collect(); + + let mut read = Read::default(); + if is_reverse { + read.strand = Strand::Neg; + } else { + read.strand = Strand::Pos; + } + read.modality = Modality::from_str(modality)?; + read.primer_id = primer_id.to_string(); + read.files = Some(fastq.into_iter().map(|path| make_file_path(path)).collect::>>()?); + + reads.push(read); + assay.sequence_spec = Some(reads); + Ok(()) + } + + /* + /// Identify the position of elements in a spec. + /// + /// Parameters + /// ---------- + /// read_id: str + /// The id of the read. + #[pyo3( + signature = (modality=None), + text_signature = "($self, modality=None)", + )] + pub fn index(&mut self, modality: &str) -> Result<()> { + */ + + #[pyo3(text_signature = "($self)")] + pub fn to_yaml(&self) -> String { + serde_yaml::to_string(&self.0).unwrap() + } + + fn __repr__(&self) -> String { + let assay = &self.0; + let tree = Tree::new("".to_string()) + .with_leaves(assay.library_spec.as_ref().unwrap_or(&Vec::new()).iter().map(|region| build_tree(region))); + format!("{}", tree) + } +} + +fn build_tree(region: &Region) -> Tree { + Tree::new(region.region_id.clone()) + .with_leaves(region.regions.as_ref().unwrap_or(&Vec::new()) + .iter().map(|child| build_tree(child))) +} + +fn make_file_path(path: PathBuf) -> Result { + let file = std::fs::File::open(&path)?; + Ok(File { + file_id: path.file_name().unwrap().to_str().unwrap().to_string(), + filename: path.file_name().unwrap().to_str().unwrap().to_string(), + filetype: "fastq".to_string(), + filesize: file.metadata()?.len(), + url: path.to_str().unwrap().to_string(), + urltype: UrlType::Local, + md5: "0".to_string(), + }) +} \ No newline at end of file diff --git a/seqspec/Cargo.toml b/seqspec/Cargo.toml new file mode 100644 index 0000000..b503080 --- /dev/null +++ b/seqspec/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "seqspec" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0" +log = "0.4" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" \ No newline at end of file diff --git a/precellar/src/seqspec.rs b/seqspec/src/lib.rs similarity index 82% rename from precellar/src/seqspec.rs rename to seqspec/src/lib.rs index d452dd9..a29ef22 100644 --- a/precellar/src/seqspec.rs +++ b/seqspec/src/lib.rs @@ -3,13 +3,9 @@ use serde::{Deserialize, Deserializer, Serialize}; use serde_yaml::{self, Value}; use std::{fs, ops::Range, str::FromStr}; use anyhow::{bail, Result}; -use noodles::fastq; -use std::{io::{BufRead, BufReader}, path::{Path, PathBuf}}; -use cached_path::Cache; +use std::path::Path; -use crate::io::open_file_for_read; - -#[derive(Deserialize, Serialize, Debug)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Assay { pub seqspec_version: String, pub assay_id: String, @@ -27,6 +23,27 @@ pub struct Assay { pub library_spec: Option>, } +impl Default for Assay { + fn default() -> Self { + Self { + seqspec_version: "0.3.0".to_string(), + assay_id: "".to_string(), + name: "".to_string(), + doi: "".to_string(), + date: "".to_string(), + description: "".to_string(), + modalities: Vec::new(), + lib_struct: "".to_string(), + library_protocol: LibraryProtocol::Standard("Custom".to_string()), + library_kit: LibraryKit::Standard("Custom".to_string()), + sequence_protocol: SequenceProtocol::Standard("Custom".to_string()), + sequence_kit: SequenceKit::Standard("Custom".to_string()), + sequence_spec: None, + library_spec: None, + } + } +} + impl Assay { pub fn from_path>(path: P) -> Result { let yaml_str = fs::read_to_string(path)?; @@ -87,13 +104,13 @@ impl FromStr for Modality { } } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum LibraryProtocol { Standard(String), Custom(Vec), } -#[derive(Deserialize, Serialize, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)] pub struct ProtocolItem { pub protocol_id: String, pub name: Option, @@ -130,13 +147,13 @@ impl Serialize for LibraryProtocol { } } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum LibraryKit { Standard(String), Custom(Vec), } -#[derive(Deserialize, Serialize, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)] pub struct KitItem { pub kit_id: String, pub name: Option, @@ -173,7 +190,7 @@ impl Serialize for LibraryKit { } } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum SequenceProtocol { Custom(Vec), Standard(String), @@ -209,7 +226,7 @@ impl Serialize for SequenceProtocol { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Clone)] pub enum SequenceKit { Standard(String), Custom(Vec), @@ -245,9 +262,9 @@ impl Serialize for SequenceKit { } } -#[derive(Deserialize, Serialize, Debug)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Read { - read_id: String, + pub read_id: String, pub name: Option, pub modality: Modality, pub primer_id: String, @@ -257,19 +274,22 @@ pub struct Read { pub files: Option>, } -impl Read { - pub fn id(&self) -> &str { - &self.read_id - } - - pub fn read_fastq>(&self, base_dir: P) -> fastq::Reader { - let base_dir = base_dir.as_ref().to_path_buf(); - let reader = multi_reader::MultiReader::new( - self.files.clone().unwrap().into_iter().map(move |file| file.open(&base_dir)) - ); - fastq::Reader::new(BufReader::new(reader)) +impl Default for Read { + fn default() -> Self { + Self { + read_id: "".to_string(), + name: None, + modality: Modality::Dna, + primer_id: "".to_string(), + min_len: 0, + max_len: 0, + strand: Strand::Pos, + files: None, + } } +} +impl Read { fn get_index<'a>(&'a self, region: &'a Region) -> Option)>> { if region.sequence_type != SequenceType::Joined { return None; @@ -347,14 +367,14 @@ impl Read { } } -#[derive(Deserialize, Serialize, Debug)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] #[serde(rename_all = "lowercase")] pub enum Strand { Pos, Neg, } -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct File { pub file_id: String, pub filename: String, @@ -365,28 +385,7 @@ pub struct File { pub md5: String, } -impl File { - pub fn open>(&self, base_dir: P) -> Box { - match self.urltype { - UrlType::Local => { - let mut path = PathBuf::from(&self.url); - path = if path.is_absolute() { - path - } else { - base_dir.as_ref().join(path) - }; - Box::new(open_file_for_read(path)) - } - _ => { - let cache = Cache::new().unwrap(); - let file = cache.cached_path(&self.url).unwrap(); - Box::new(open_file_for_read(file)) - } - } - } -} - -#[derive(Deserialize, Serialize, Debug, Copy, Clone)] +#[derive(Deserialize, Serialize, Debug, Copy, Clone, PartialEq)] #[serde(rename_all = "lowercase")] pub enum UrlType { Local, @@ -395,10 +394,11 @@ pub enum UrlType { Https, } -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Region { pub region_id: String, pub region_type: RegionType, + pub name: String, pub sequence_type: SequenceType, pub sequence: String, pub min_len: u32, @@ -407,6 +407,22 @@ pub struct Region { pub regions: Option>, } +impl Default for Region { + fn default() -> Self { + Self { + region_id: "".to_string(), + region_type: RegionType::Named, + name: "".to_string(), + sequence_type: SequenceType::Fixed, + sequence: "".to_string(), + min_len: 0, + max_len: 0, + onlist: None, + regions: None, + } + } +} + impl Region { /// Return an iterator over all regions in the region tree. pub fn iter_regions(&self) -> impl Iterator { @@ -502,28 +518,19 @@ pub enum SequenceType { Joined, // the sequence is created from nested regions and the regions property must contain Regions } -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub struct Onlist { pub file_id: String, pub filename: String, pub filetype: String, pub filesize: u64, - url: String, + pub url: String, pub urltype: UrlType, pub location: Option, pub md5: String, } -impl Onlist { - pub fn read(&self) -> Result> { - let cache = Cache::new()?; - let file = cache.cached_path(&self.url)?; - let reader = std::io::BufReader::new(open_file_for_read(file)); - Ok(reader.lines().map(|x| x.unwrap()).collect()) - } -} - -#[derive(Deserialize, Serialize, Debug, Clone, Copy)] +#[derive(Deserialize, Serialize, Debug, Clone, Copy, PartialEq)] #[serde(rename_all = "lowercase")] pub enum Location { Local, @@ -536,20 +543,26 @@ mod tests { #[test] fn test_parse() { - let protocol: LibraryProtocol = serde_yaml::from_str("10X").unwrap(); - println!("{:?}", protocol); - - let protocol: LibraryProtocol = serde_yaml::from_str( - "- !LibProtocol - protocol_id: CG000338 Chromium Next GEM Multiome ATAC + Gene Expression Rev. D protocol (10x Genomics) - name: DogmaSeq-DIG - modality: rna" - ).unwrap(); - println!("{:?}", protocol); + let yaml_str = fs::read_to_string("tests/data/spec.yaml").expect("Failed to read file"); + let assay: Assay = serde_yaml::from_str(&yaml_str).expect("Failed to parse YAML"); + + println!("{:?}", assay); + } + + #[test] + fn test_serialize() { + fn se_de(yaml_str: &str) { + let assay: Assay = serde_yaml::from_str(&yaml_str).expect("Failed to parse YAML"); + let yaml_str_ = serde_yaml::to_string(&assay).unwrap(); + let assay_ = serde_yaml::from_str(&yaml_str_).expect("Failed to parse YAML"); + assert_eq!(assay, assay_); + } + + se_de(&fs::read_to_string("tests/data/spec.yaml").expect("Failed to read file")); } #[test] - fn test_parse_yaml() { + fn test_index() { let yaml_str = fs::read_to_string("tests/data/spec.yaml").expect("Failed to read file"); let assay: Assay = serde_yaml::from_str(&yaml_str).expect("Failed to parse YAML");