Skip to content

Commit

Permalink
Merge pull request #1 from scbirlab/wobble
Browse files Browse the repository at this point in the history
wobble
  • Loading branch information
eachanjohnson authored Jun 2, 2023
2 parents 995a65f + 54cffa4 commit 00e3738
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 42 deletions.
16 changes: 10 additions & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@

name: Python package

on:
push:
branches: [ $default-branch ]
pull_request:
branches: [ $default-branch ]
on: [push]

jobs:
build:
Expand All @@ -29,6 +25,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install flake8 pytest pytest-cov
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand All @@ -37,4 +34,11 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest streq --doctest-modules --junitxml=tests/test-results.xml --cov=com --cov-report=xml --cov-report=html
pytest streq --doctest-modules --junitxml=test/test-results.xml --cov=com --cov-report=xml --cov-report=html
- name: Upload pytest test results
uses: actions/upload-artifact@v3
with:
name: pytest-results-${{ matrix.python-version }}
path: junit/test-results-${{ matrix.python-version }}.xml
# Use always() to always run this step to publish test results when there are test failures
if: ${{ always() }}
19 changes: 6 additions & 13 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,18 @@
*.so
*.egg-info
*.whl
/build/lib
/build/bazel*
/dist/
.ipynb_checkpoints
/bazel-*
.jax_configure.bazelrc
/tensorflow
.DS_Store
.mypy_cache/
.pytype/
/docs/build
*_pb2.py
/docs/notebooks/.ipynb_checkpoints/
/docs/_autosummary
docs/build
docs/_autosummary
.idea
.vscode
.envrc
jax.iml
.bazelrc.user
/*/__pycache__/
/test/*.xml
.coverage
.pytest_cache

# virtualenv/venv directories
/venv/
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# 🧬 streq

![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/scbirlab/streq/python-publish.yml)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/streq)
![PyPI](https://img.shields.io/pypi/v/streq)

Python utilities for working with nucleotide sequence strings.

## Installation
Expand Down Expand Up @@ -85,6 +89,17 @@ Get autocorrelation (rough indicator for secondary structure).
1.9238095238095236
```

Wobble base-pairing can be taken into account.

```python
>>> correlation('GGGTTT')
0.0
>>> correlation('GGGTTT', wobble=True)
2.3
>>> correlation('GGGUUU', wobble=True)
2.3
```

Provide a second sequence to get correlation between sequences.

```python
Expand Down
11 changes: 11 additions & 0 deletions docs/source/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ Get autocorrelation (rough indicator for secondary structure).
1.9238095238095236
```

Wobble base-pairing can be taken into account.

```python
>>> correlation('GGGTTT')
0.0
>>> correlation('GGGTTT', wobble=True)
2.3
>>> correlation('GGGUUU', wobble=True)
2.3
```

Provide a second sequence to get correlation between sequences.

```python
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[project]
name = "streq"
version = "0.0.1"
version = "0.0.2"
authors = [
{ name="Eachan Johnson", email="eachan.johnson@crick.ac.uk" },
]
description = "Basic utilities for working with nucleotide sequence strings."
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.8"
license = {file = "LICENSE"}
keywords = ["nucleotide", "sequence", "science"]

Expand All @@ -20,6 +20,8 @@ classifiers = [

"License :: OSI Approved :: MIT License",

"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3 :: Only",
Expand Down
71 changes: 51 additions & 20 deletions streq/distance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""Functions for calculating distances and similarities between sequences."""

from __future__ import annotations

from collections.abc import Callable
from difflib import SequenceMatcher
from functools import partial

from .seqtools import reverse_complement
from .utils import _normalize_case
Expand Down Expand Up @@ -69,24 +75,20 @@ def levenshtein(x: str, y: str) -> int:


@_normalize_case(nargs=2)
def hamming(x: str, y: str,
wobble: bool = False) -> int:
def hamming(x: str, y: str) -> int:

"""Calculate the Hamming distance between two sequences.
The Hamming distance is the number of mismatches.
Note: the `wobble` parameter is not yet implemented.
The Hamming distance is the number of mismatches for two sequences
of identical length. This function truncates the longer sequence
to the shortest length.
Parameters
----------
x : str
Sequence.
y : str, optional
Second sequence for correlation with x.
wobble : bool, optional
Whether to calulate correlations taking into account G.U wobble.
Not yet implemented.
Returns
-------
Expand All @@ -103,7 +105,7 @@ def hamming(x: str, y: str,
3
"""

return sum(a != b for a, b in zip(x, y))


Expand Down Expand Up @@ -147,19 +149,35 @@ def ratcliff_obershelp(x: str,

return sum(code[0] != 'equal' for code in sm)

def mismatch_fun(x, y, n, wobble):

wobbles = sum(wobble and ((a == 'G' and b == 'A') or (a in 'TU' and b == 'C')) for a, b in zip(x[n:], y))
return hamming(x[n:], y) - wobbles


def _mismatch_fun(wobble: bool = False) -> Callable[[str, str, int], int]:

return partial(mismatch_fun, wobble=wobble)


@_normalize_case(nargs=2)
def correlation(x: str,
y: str = '',
wobble: bool = False) -> float:

"""Get autocorrelation of a single sequence or
"""Calculate autocorrelation of a single sequence or
correlation between two sequences.
If a single sequence is provided, it's autocorrelation
is calculated, which might be an indicator of secondary structure.
If a single sequence is provided, its correlation with its
reverse complement is calculated, which might be an indicator
of secondary structure.
Note: the `wobble` parameter is not yet implemented.
If two sequences are provided, then the correlation between the
first sequence and the reverse complement of the second sequence
is calculated. This might be an indicator of binding affinity.
If wobble is True, then the G.U basepairing is also taken into
account.
Parameters
----------
Expand All @@ -169,7 +187,6 @@ def correlation(x: str,
Second sequence for correlation with x.
wobble : bool, optional
Whether to calulate correlations taking into account G.U wobble.
Not yet implemented.
Returns
-------
Expand All @@ -187,16 +204,30 @@ def correlation(x: str,
>>> correlation('AAACTTT')
1.9238095238095236
>>> correlation('AAA', 'TTT')
0.0
3.0
>>> correlation('AAA', 'AAA')
0.0
>>> correlation('GGGTTT')
0.0
>>> correlation('GGGTTT', wobble=True)
2.3
>>> correlation('GGGUUU', wobble=True)
2.3
>>> correlation('GGG', 'UUU')
0.0
>>> correlation('GGG', 'UUU', wobble=True)
3.0
"""

x = x
y = y or reverse_complement(x)
y = reverse_complement(y or x)

max_len = min(len(x), len(y))

return sum(((max_len - n) - hamming(x[n:], y, wobble)) / (max_len - n)
for n in range(len(x)))
len_x = len(x)

max_len = min(len_x, len(y))

mismatch_fun = _mismatch_fun(wobble=wobble)

return sum(((max_len - n) - mismatch_fun(x, y, n)) / (max_len - n)
for n in range(len_x))
3 changes: 2 additions & 1 deletion streq/seqtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
"""

from __future__ import annotations

from collections.abc import Generator, Sequence
from difflib import SequenceMatcher
import re

from .utils import (sequences,
Expand Down
2 changes: 2 additions & 0 deletions streq/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Miscellaneous utilities used in streq."""

from __future__ import annotations

from collections import namedtuple
from collections.abc import Callable, Sequence
from functools import wraps
Expand Down

0 comments on commit 00e3738

Please sign in to comment.