Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/TheBotiverse/Botiverse into…
Browse files Browse the repository at this point in the history
… main
  • Loading branch information
YousefAtefB committed Jul 24, 2023
2 parents b83b646 + 05b7b4f commit 847eac1
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 14 deletions.
23 changes: 22 additions & 1 deletion botiverse/Theorizer/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,22 @@
from botiverse.Theorizer import generate
import os
# from botiverse.Theorizer import generate
import gdown
import zipfile

curr_dir = os.path.dirname(os.path.abspath(__file__))

model_dir = os.path.join(curr_dir,"model")
squad_dir = os.path.join(curr_dir,"squad")

if not os.path.exists(os.path.join(squad_dir,"sample_probs.pkl")):
print("Sample probs not found. Downloading Theorizer sample probs...")
url = "https://drive.google.com/uc?id=1UjZaqM9jf9nzeK1R7WdSSLNEVKdxJOHO"
gdown.download(url,os.path.join(squad_dir,"sample_probs.pkl"), quiet=False)
print("Done.")

if not os.path.exists(os.path.join(model_dir,"pretrained-model")):
print("Weights not found. Downloading Theorizer weights...")
model_path = os.path.join(model_dir,"pretrained-model")
url = "https://drive.google.com/drive/folders/1rUvMP1HdE_H4TAMG8HxHT6z5Y0ZOnBsg"
gdown.download_folder(url,output=model_path,quiet=False)
print("Done.")
4 changes: 2 additions & 2 deletions botiverse/Theorizer/squad/sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,14 +226,14 @@ def read_sample_probs(sample_probs_path):
return sample_probs

def select_with_default_sampel_probs(sentence):
sample_probs_path = "botiverse/Theorizer/squad/dataset/train.probs.pkl"
sample_probs_path = "botiverse/Theorizer/squad/sample_probs.pkl"
sample_probs = read_sample_probs(sample_probs_path)
selection = select(
"Bob is eating a delicious cake in Vancouver.", sample_probs)
return selection

def test():
sample_probs_path = "squad/dataset/train.probs.pkl"
sample_probs_path = "botiverse/Theorizer/squad/sample_probs.pkl"
sample_probs = read_sample_probs(sample_probs_path)
selection = select(
"Bob is eating a delicious cake in Vancouver.", sample_probs)
Expand Down
37 changes: 28 additions & 9 deletions botiverse/Theorizer/squad/squad_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import multiprocess as mp
import math
import numpy as np

import pickle as pkl

@dataclass
class SquadExample:
Expand Down Expand Up @@ -81,11 +81,30 @@ def create_squad_example_with_info(raw_ex: List[SquadExample]) -> List[SquadAugm
Augment the raw examples with question-type and clue info.
"""

examples_with_info = []
for e in tqdm(raw_ex):
new_e = extract_clue_and_question_info(
sentence=e.context_text, question=e.question_text, answer=e.answer_text, answer_start=e.answer_start)
examples_with_info.append(new_e)
num_process = 1
start_index = 0
end_index = len(raw_ex)
batch_size = len(raw_ex) // num_process


def task(j):
start = start_index + j * batch_size
end = min(start_index + (j + 1) * batch_size, end_index)
examples = []
e: SquadExample
for e in tqdm(raw_ex[start:end], desc=f"Process {j}", position=j, leave=False):
new_e = extract_clue_and_question_info(
sentence=e.context_text, question=e.question_text, answer=e.answer_text, answer_start=e.answer_start)
examples.append(new_e)
return examples

# examples_list = []
# with mp.Pool(num_process) as pool:
# examples_list = pool.map(task, range(num_process))

examples_with_info = task(0)
# for e in examples_list:
# examples_with_info += e

return examples_with_info

Expand Down Expand Up @@ -238,20 +257,20 @@ def pipeline(input_file: str):
"""
Pipeline for processing squad examples.
"""
# mp.set_start_method("spawn")
mp.set_start_method("spawn")

raw_ex = read_squad_examples(input_file)
raw_ex = raw_ex[:1000]

processed_ex = create_process_squad_examples(raw_ex)
print(processed_ex[:6])
with_info_ex = create_squad_example_with_info(raw_ex)
with_info_ex = create_squad_example_with_info(raw_ex[:1000])

sample_probs = calculate_probability_distribution(with_info_ex)

return sample_probs


if __name__ == "__main__":
data = pipeline("squad/dataset/train.txt")
data = pipeline("botiverse/Theorizer/squad/dataset/train.txt")
print(data)
Binary file not shown.
3 changes: 2 additions & 1 deletion botiverse/Theorizer/squad/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import benepar
import numpy as np
import math

NLP = spacy.load('en_core_web_sm')
benepar.download('benepar_en3')
PARSER = benepar.Parser("benepar_en3")
Expand All @@ -13,7 +14,7 @@
[
word.rstrip().lower()
for word in open(
"botiverse/Theorizer/squad/info_extractor.py", "r", encoding="utf-8"
"botiverse/Theorizer/squad/function_words.txt", "r", encoding="utf-8"
).readlines()
]
)
Expand Down
47 changes: 47 additions & 0 deletions examples/Theorizer/example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from botiverse.Theorizer import generate\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"context = \"Bob is eating a delicious cake in Vancouver.\"\n",
"qa_dict = generate(context)\n",
"print(json.dumps(qa_dict,indent=4))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"Programming Language :: Python :: 3.9",
"Operating System :: OS Independent"
],
packages=["botiverse", "botiverse.bots", "botiverse.models", "botiverse.preprocessors"],
packages=["botiverse", "botiverse.bots", "botiverse.models", "botiverse.preprocessors","botiverse.Theorizer"],
include_package_data=True,
install_requires=["numpy", "torch"] # just as was in requirements.txt
)
Expand Down

0 comments on commit 847eac1

Please sign in to comment.