-
Notifications
You must be signed in to change notification settings - Fork 0
/
02-multi-label-classification-pipeline.py
46 lines (37 loc) · 1.44 KB
/
02-multi-label-classification-pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
import json
# ----------------------------------------------------------------------------------------
# hugging face model name
model_name = 'FacebookAI/xlm-roberta-base'
# path where fine tuned model was saved
model_name_fine_tuned = 'models/multi-label-classification/FacebookAI/xlm-roberta-base-fine-tuned-en-it-6epochs-32batch-model'
# use cpu (set false for nvidia gpu)
use_cpu = False
# ----------------------------------------------------------------------------------------
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# load labels mapping
with open('data/multi-label-classification/labels-mapping-en-it.json', 'r') as file:
label2id = json.load(file)
id2label = {id: label for label, id in label2id.items()}
# load model
model = AutoModelForSequenceClassification.from_pretrained(
model_name_fine_tuned,
num_labels=len(id2label),
problem_type='multi_label_classification',
id2label=id2label,
label2id=label2id
)
# set device
if use_cpu == False:
model.to('cuda')
# load pipeline
text_classifier = TextClassificationPipeline(
model=model,
tokenizer=tokenizer,
device='cuda' if use_cpu == False else 'cpu',
top_k=None
)
# use pipeline
single_prediction = text_classifier('put some text here')
multiple_predictions = text_classifier(['put first text here', 'put second text here', '...'])