Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: calculate cds coverage #1514

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/nextclade-web/src/components/Common/TableSlim.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import styled from 'styled-components'

export const TableSlim = styled(ReactstrapTable)`
& td {
padding: 0 0.5rem;
padding: 0.1rem 0.5rem;
}

& tr {
Expand Down
102 changes: 97 additions & 5 deletions packages/nextclade-web/src/components/Results/ColumnCoverage.tsx
Original file line number Diff line number Diff line change
@@ -1,21 +1,113 @@
import { round } from 'lodash'
import React, { useMemo } from 'react'
import { round, sortBy } from 'lodash'
import React, { useCallback, useMemo, useState } from 'react'
import { useRecoilValue } from 'recoil'
import { TableSlim } from 'src/components/Common/TableSlim'
import { Tooltip } from 'src/components/Results/Tooltip'
import { useTranslationSafe } from 'src/helpers/useTranslationSafe'
import { cdsesAtom } from 'src/state/results.state'

import type { AnalysisResult } from 'src/types'
import { getSafeId } from 'src/helpers/getSafeId'
import styled from 'styled-components'

const MAX_ROWS = 10

export interface ColumnCoverageProps {
analysisResult: AnalysisResult
}

export function ColumnCoverage({ analysisResult }: ColumnCoverageProps) {
const { index, seqName, coverage } = analysisResult
const { t } = useTranslationSafe()
const [showTooltip, setShowTooltip] = useState(false)
const onMouseEnter = useCallback(() => setShowTooltip(true), [])
const onMouseLeave = useCallback(() => setShowTooltip(false), [])

const { index, seqName, coverage, cdsCoverage } = analysisResult

const id = getSafeId('col-coverage', { index, seqName })
const coveragePercentage = useMemo(() => `${round(coverage * 100, 1).toFixed(1)}%`, [coverage])

const coveragePercentage = useMemo(() => formatCoveragePercentage(coverage), [coverage])

const { rows, isTruncated } = useMemo(() => {
const cdsCoverageSorted = sortBy(Object.entries(cdsCoverage), ([_, coverage]) => coverage)
const { head, tail } = truncateMiddle(cdsCoverageSorted, MAX_ROWS * 2)
let rows = head.map(([cds, coverage]) => <CdsCoverageRow key={cds} cds={cds} coverage={coverage} />)
if (tail) {
const tailRows = tail.map(([cds, coverage]) => <CdsCoverageRow key={cds} cds={cds} coverage={coverage} />)
rows = [...rows, <Spacer key="spacer" />, ...tailRows]
}
return { rows, isTruncated: !!tail }
}, [cdsCoverage])

return (
<div id={id} className="w-100">
<div id={id} className="w-100" onMouseEnter={onMouseEnter} onMouseLeave={onMouseLeave}>
{coveragePercentage}
<Tooltip isOpen={showTooltip} target={id}>
<div className="w-100">
<h6>{t('Nucleotide coverage: {{ value }}', { value: coveragePercentage })}</h6>
</div>

<div className="mt-3 w-100">
<h6>{t('CDS coverage')}</h6>
{isTruncated && (
<p className="small">
{t('Showing only the {{ num }} CDS with lowest and {{ num }} CDS with highest coverage', {
num: MAX_ROWS,
})}
</p>
)}
<TableSlim striped className="mb-1">
<thead>
<tr>
<th className="text-center">{t('CDS')}</th>
<th className="text-center">{t('Coverage')}</th>
</tr>
</thead>
<tbody>{rows}</tbody>
</TableSlim>
</div>
</Tooltip>
</div>
)
}

function CdsCoverageRow({ cds, coverage }: { cds: string; coverage: number }) {
const cdses = useRecoilValue(cdsesAtom)
const color = cdses.find((c) => c.name === cds)?.color ?? '#aaa'
return (
<tr key={cds}>
<td>
<CdsText $background={color}>{cds}</CdsText>
</td>
<td className="text-monospace text-right">{formatCoveragePercentage(coverage)}</td>
</tr>
)
}

function Spacer() {
return (
<tr>
<td colSpan={2} className="text-center">
{'...'}
</td>
</tr>
)
}

const CdsText = styled.span<{ $background?: string; $color?: string }>`
padding: 1px 2px;
background-color: ${(props) => props.$background};
color: ${(props) => props.$color ?? props.theme.gray100};
font-weight: 700;
border-radius: 3px;
`

function formatCoveragePercentage(coverage: number) {
return `${round(coverage * 100, 1).toFixed(1)}%`
}

function truncateMiddle<T>(arr: T[], n: number) {
if (n < 3 || arr.length <= n) return { head: arr, tail: undefined }
const half = Math.floor((n - 2) / 2)
return { head: arr.slice(0, half), tail: arr.slice(arr.length - (n - half - 1)) }
}
12 changes: 12 additions & 0 deletions packages/nextclade/src/io/nextclade_csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use itertools::{chain, Either, Itertools};
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fmt::Display;
use std::path::Path;
use std::str::FromStr;
Expand Down Expand Up @@ -160,6 +161,7 @@ lazy_static! {
o!("alignmentStart") => true,
o!("alignmentEnd") => true,
o!("coverage") => true,
o!("cdsCoverage") => true,
o!("isReverseComplement") => true,
},
CsvColumnCategory::RefMuts => indexmap! {
Expand Down Expand Up @@ -414,6 +416,7 @@ impl<W: VecWriter> NextcladeResultsCsvWriter<W> {
missing_cdses,
// divergence,
coverage,
cds_coverage,
phenotype_values,
qc,
custom_node_attributes,
Expand Down Expand Up @@ -599,6 +602,7 @@ impl<W: VecWriter> NextcladeResultsCsvWriter<W> {
self.add_entry("alignmentStart", &(alignment_range.begin + 1).to_string())?;
self.add_entry("alignmentEnd", &alignment_range.end.to_string())?;
self.add_entry("coverage", coverage)?;
self.add_entry("cdsCoverage", &format_cds_coverage(cds_coverage, ARRAY_ITEM_DELIMITER))?;
self.add_entry_maybe(
"qc.missingData.missingDataThreshold",
qc.missing_data.as_ref().map(|md| md.missing_data_threshold.to_string()),
Expand Down Expand Up @@ -992,6 +996,14 @@ pub fn format_stop_codons(stop_codons: &[StopCodonLocation], delimiter: &str) ->
.join(delimiter)
}

#[inline]
pub fn format_cds_coverage(cds_coverage: &BTreeMap<String, f64>, delimiter: &str) -> String {
cds_coverage
.iter()
.map(|(cds, coverage)| format!("{cds}:{coverage}"))
.join(delimiter)
}

#[inline]
pub fn format_failed_cdses(failed_cdses: &[String], delimiter: &str) -> String {
failed_cdses.join(delimiter)
Expand Down
28 changes: 27 additions & 1 deletion packages/nextclade/src/run/nextclade_run_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use crate::analyze::pcr_primer_changes::get_pcr_primer_changes;
use crate::analyze::phenotype::calculate_phenotype;
use crate::analyze::virus_properties::PhenotypeData;
use crate::coord::coord_map_global::CoordMapGlobal;
use crate::coord::range::AaRefRange;
use crate::coord::range::{AaRefRange, Range};
use crate::graph::node::GraphNodeKey;
use crate::qc::qc_run::qc_run;
use crate::run::nextclade_wasm::{AnalysisOutput, Nextclade};
Expand Down Expand Up @@ -67,6 +67,7 @@ struct NextcladeResultWithAa {
total_unknown_aa: usize,
aa_alignment_ranges: BTreeMap<String, Vec<AaRefRange>>,
aa_unsequenced_ranges: BTreeMap<String, Vec<AaRefRange>>,
cds_coverage: BTreeMap<String, f64>,
}

#[derive(Default)]
Expand Down Expand Up @@ -170,6 +171,7 @@ pub fn nextclade_run_one(
total_unknown_aa,
aa_alignment_ranges,
aa_unsequenced_ranges,
cds_coverage,
} = if !gene_map.is_empty() {
let coord_map_global = CoordMapGlobal::new(&alignment.ref_seq);

Expand Down Expand Up @@ -235,6 +237,28 @@ pub fn nextclade_run_one(
aa_unsequenced_ranges,
} = gather_aa_alignment_ranges(&translation, gene_map);

let cds_coverage = translation
.cdses()
.map(|cds| {
let ref_peptide_len = ref_translation.get_cds(&cds.name)?.seq.len();
let num_aligned_aa = cds.alignment_ranges.iter().map(Range::len).sum::<usize>();
let num_unknown_aa = unknown_aa_ranges
.iter()
.filter(|r| r.cds_name == cds.name)
.map(|r| r.length)
.sum();
let total_covered_aa = num_aligned_aa.saturating_sub(num_unknown_aa);

let coverage_aa = if ref_peptide_len == 0 {
0.0
} else {
total_covered_aa as f64 / ref_peptide_len as f64
};

Ok((cds.name.clone(), coverage_aa))
})
.collect::<Result<BTreeMap<String, f64>, Report>>()?;

Comment on lines +240 to +261
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relevant algo code

NextcladeResultWithAa {
translation,
aa_changes_groups,
Expand All @@ -253,6 +277,7 @@ pub fn nextclade_run_one(
total_unknown_aa,
aa_alignment_ranges,
aa_unsequenced_ranges,
cds_coverage,
}
} else {
NextcladeResultWithAa::default()
Expand Down Expand Up @@ -441,6 +466,7 @@ pub fn nextclade_run_one(
warnings,
missing_cdses: missing_genes,
coverage,
cds_coverage,
aa_motifs,
aa_motifs_changes,
qc,
Expand Down
1 change: 1 addition & 0 deletions packages/nextclade/src/types/outputs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ pub struct NextcladeOutputs {
pub missing_cdses: Vec<String>,
pub divergence: f64,
pub coverage: f64,
pub cds_coverage: BTreeMap<String, f64>,
pub qc: QcResult,
pub custom_node_attributes: BTreeMap<String, String>,
pub nearest_node_id: GraphNodeKey,
Expand Down
Loading