-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
623 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from transformers import pipeline | ||
from argparse import ArgumentParser | ||
import torch | ||
import gradio as gr | ||
import numpy as np | ||
|
||
argparse = ArgumentParser() | ||
argparse.add_argument( | ||
"-m", | ||
"--model", | ||
help="HuggingFace Model identifier, such as 'google/flan-t5-base'", | ||
required=True, | ||
) | ||
|
||
args = argparse.parse_args() | ||
|
||
|
||
mod = args.model | ||
mod = mod.replace("\"", "").replace("'", "") | ||
|
||
model_checkpoint = mod | ||
|
||
# Audio class | ||
classifier = pipeline(task="audio-classification", model=mod) | ||
|
||
def classify_text(audio): | ||
global classifier | ||
sr, data = audio | ||
short_tensor = data.astype(np.float32) | ||
res = classifier(short_tensor) | ||
return res[0]["label"] | ||
|
||
input_audio = gr.Audio( | ||
sources=["upload","microphone"], | ||
waveform_options=gr.WaveformOptions( | ||
waveform_color="#01C6FF", | ||
waveform_progress_color="#0066B4", | ||
skip_length=2, | ||
show_controls=False, | ||
), | ||
) | ||
demo = gr.Interface( | ||
title="everything-ai-audioclass", | ||
fn=classify_text, | ||
inputs=input_audio, | ||
outputs="text" | ||
) | ||
|
||
if __name__ == "__main__": | ||
demo.launch(server_name="0.0.0.0", share=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import subprocess as sp | ||
import gradio as gr | ||
import subprocess as sp | ||
|
||
|
||
def build_command(hf_usr, hf_token, configpath): | ||
sp.run(f"export HF_USERNAME=\"{hf_usr}\"", shell=True) | ||
sp.run(f"export HF_TOKEN=\"{hf_token}\"", shell=True) | ||
sp.run(f"autotrain --config {configpath}", shell=True) | ||
return f"export HF_USERNAME={hf_usr}\nexport HF_TOKEN={hf_token}\nautotrain --config {configpath}" | ||
|
||
|
||
demo = gr.Interface( | ||
build_command, | ||
[ | ||
gr.Textbox( | ||
label="HF username", | ||
info="Your HF username", | ||
lines=3, | ||
value=f"your-cute-name", | ||
), | ||
gr.Textbox( | ||
label="HF write token", | ||
info="An HF token that has write permissions on your repository", | ||
lines=3, | ||
value=f"your-powerful-token", | ||
), | ||
gr.Textbox( | ||
label="Yaml configuration file", | ||
info="Path to the yaml configuration file containing the information to use autotrain", | ||
lines=3, | ||
value="/path/to/config.yaml", | ||
) | ||
], | ||
title="everything-ai-autotrain", | ||
outputs="textbox", | ||
theme=gr.themes.Base() | ||
) | ||
|
||
if __name__ == "__main__": | ||
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from transformers import AutoTokenizer, EsmForProteinFolding | ||
from transformers.models.esm.openfold_utils.protein import to_pdb, Protein as OFProtein | ||
from transformers.models.esm.openfold_utils.feats import atom14_to_atom37 | ||
from proteins_viz import * | ||
import gradio as gr | ||
|
||
def convert_outputs_to_pdb(outputs): | ||
final_atom_positions = atom14_to_atom37(outputs["positions"][-1], outputs) | ||
outputs = {k: v.to("cpu").numpy() for k, v in outputs.items()} | ||
final_atom_positions = final_atom_positions.cpu().numpy() | ||
final_atom_mask = outputs["atom37_atom_exists"] | ||
pdbs = [] | ||
for i in range(outputs["aatype"].shape[0]): | ||
aa = outputs["aatype"][i] | ||
pred_pos = final_atom_positions[i] | ||
mask = final_atom_mask[i] | ||
resid = outputs["residue_index"][i] + 1 | ||
pred = OFProtein( | ||
aatype=aa, | ||
atom_positions=pred_pos, | ||
atom_mask=mask, | ||
residue_index=resid, | ||
b_factors=outputs["plddt"][i], | ||
chain_index=outputs["chain_index"][i] if "chain_index" in outputs else None, | ||
) | ||
pdbs.append(to_pdb(pred)) | ||
return pdbs | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("facebook/esmfold_v1") | ||
model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1", low_cpu_mem_usage=True) | ||
|
||
model = model.cuda() | ||
|
||
model.esm = model.esm.half() | ||
|
||
import torch | ||
|
||
torch.backends.cuda.matmul.allow_tf32 = True | ||
|
||
model.trunk.set_chunk_size(64) | ||
|
||
def fold_protein(test_protein): | ||
tokenized_input = tokenizer([test_protein], return_tensors="pt", add_special_tokens=False)['input_ids'] | ||
tokenized_input = tokenized_input.cuda() | ||
with torch.no_grad(): | ||
output = model(tokenized_input) | ||
pdb = convert_outputs_to_pdb(output) | ||
with open("output_structure.pdb", "w") as f: | ||
f.write("".join(pdb)) | ||
image = take_care("output_structure.pdb") | ||
return image | ||
|
||
iface = gr.Interface( | ||
title="everything-ai-proteinfold", | ||
fn=fold_protein, | ||
inputs="text", | ||
outputs="image", | ||
) | ||
|
||
iface.launch(server_name="0.0.0.0", share=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import pandas as pd | ||
from biopandas.pdb import PandasPdb | ||
from prody import parsePDBHeader | ||
|
||
|
||
|
||
|
||
def read_pdb_to_dataframe( | ||
pdb_path, | ||
model_index: int = 1, | ||
parse_header: bool = True, | ||
) -> pd.DataFrame: | ||
""" | ||
Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata. | ||
Args: | ||
pdb_path (str, optional): Path to a local PDB file to read. Defaults to None. | ||
model_index (int, optional): Index of the model to extract from the PDB file, in case | ||
it contains multiple models. Defaults to 1. | ||
parse_header (bool, optional): Whether to parse the PDB header and extract metadata. | ||
Defaults to True. | ||
Returns: | ||
pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row | ||
per atom | ||
""" | ||
atomic_df = PandasPdb().read_pdb(pdb_path) | ||
if parse_header: | ||
header = parsePDBHeader(pdb_path) | ||
else: | ||
header = None | ||
atomic_df = atomic_df.get_model(model_index) | ||
if len(atomic_df.df["ATOM"]) == 0: | ||
raise ValueError(f"No model found for index: {model_index}") | ||
|
||
return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"]]), header | ||
|
||
from graphein.protein.graphs import label_node_id | ||
|
||
def process_dataframe(df: pd.DataFrame, granularity='CA') -> pd.DataFrame: | ||
""" | ||
Process a DataFrame of protein structure data to reduce ambiguity and simplify analysis. | ||
This function performs the following steps: | ||
1. Handles alternate locations for an atom, defaulting to keep the first one if multiple exist. | ||
2. Assigns a unique node_id to each residue in the DataFrame, using a helper function label_node_id. | ||
3. Filters the DataFrame based on specified granularity (defaults to 'CA' for alpha carbon). | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
The DataFrame containing protein structure data to process. It is expected to contain columns 'alt_loc' and 'atom_name'. | ||
granularity : str, optional | ||
The level of detail or perspective at which the DataFrame should be analyzed. Defaults to 'CA' (alpha carbon). | ||
""" | ||
# handle the case of alternative locations, | ||
# if so default to the 1st one = A | ||
if 'alt_loc' in df.columns: | ||
df['alt_loc'] = df['alt_loc'].replace('', 'A') | ||
df = df.loc[(df['alt_loc']=='A')] | ||
df = label_node_id(df, granularity) | ||
df = df.loc[(df['atom_name']==granularity)] | ||
return df | ||
|
||
|
||
from graphein.protein.graphs import initialise_graph_with_metadata | ||
from graphein.protein.graphs import add_nodes_to_graph | ||
from graphein.protein.visualisation import plotly_protein_structure_graph | ||
from PIL import Image | ||
import networkx as nx | ||
|
||
def take_care(pdb_path): | ||
|
||
|
||
df, header = read_pdb_to_dataframe(pdb_path) | ||
process_df = process_dataframe(df) | ||
|
||
g = initialise_graph_with_metadata(protein_df=process_df, # from above cell | ||
raw_pdb_df=df, # Store this for traceability | ||
pdb_code = '3nir', #and again | ||
granularity = 'CA' # Store this so we know what kind of graph we have | ||
) | ||
g = add_nodes_to_graph(g) | ||
|
||
|
||
def add_backbone_edges(G: nx.Graph) -> nx.Graph: | ||
# Iterate over every chain | ||
for chain_id in G.graph["chain_ids"]: | ||
# Find chain residues | ||
chain_residues = [ | ||
(n, v) for n, v in G.nodes(data=True) if v["chain_id"] == chain_id | ||
] | ||
# Iterate over every residue in chain | ||
for i, residue in enumerate(chain_residues): | ||
try: | ||
# Checks not at chain terminus | ||
if i == len(chain_residues) - 1: | ||
continue | ||
# Asserts residues are on the same chain | ||
cond_1 = ( residue[1]["chain_id"] == chain_residues[i + 1][1]["chain_id"]) | ||
# Asserts residue numbers are adjacent | ||
cond_2 = (abs(residue[1]["residue_number"] - chain_residues[i + 1][1]["residue_number"])== 1) | ||
|
||
# If this checks out, we add a peptide bond | ||
if (cond_1) and (cond_2): | ||
# Adds "peptide bond" between current residue and the next | ||
if G.has_edge(i, i + 1): | ||
G.edges[i, i + 1]["kind"].add('backbone_bond') | ||
else: | ||
G.add_edge(residue[0],chain_residues[i + 1][0],kind={'backbone_bond'},) | ||
except IndexError as e: | ||
print(e) | ||
return G | ||
|
||
g = add_backbone_edges(g) | ||
|
||
|
||
|
||
p = plotly_protein_structure_graph( | ||
g, | ||
colour_edges_by="kind", | ||
colour_nodes_by="seq_position", | ||
label_node_ids=False, | ||
plot_title="Backbone Protein Graph", | ||
node_size_multiplier=1, | ||
) | ||
image_file = "protein_graph.png" | ||
p.write_image(image_file, format='png') | ||
|
||
|
||
# Load the PNG image into a PIL image | ||
image = Image.open(image_file) | ||
|
||
return image |
Oops, something went wrong.