-
Notifications
You must be signed in to change notification settings - Fork 4
/
ocr.py
69 lines (54 loc) · 2.28 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import cv2
import os
import keras_ocr
import numpy as np
import pytesseract
import easyocr
import matplotlib.pyplot as plt
class OCR():
def __init__(self,image_folder):
self.image_folder= image_folder
self.images = self.load_images_from_folder()
self.pipeline = self._keras_model_load()
self.text_reader=self._easyocr_model_load()
def load_images_from_folder(self):
images = []
for filename in os.listdir(self.image_folder):
img = cv2.imread(os.path.join(self.image_folder,filename))
gray = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if gray is not None:
images.append(gray)
return images
def _keras_model_load(self):
pipeline = keras_ocr.pipeline.Pipeline()
return pipeline
def keras_ocr_works(self,visualization=True):
images = [keras_ocr.tools.read(img) for img in self.load_images_from_folder()]
prediction_groups = self.pipeline.recognize(images) # prediction
if visualization:
fig, axs = plt.subplots(nrows=len(images), figsize=(20, 20))
for ax, image, predictions in zip(axs, images, prediction_groups):
#print(predictions)
keras_ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=ax)
def _easyocr_model_load(self):
text_reader = easyocr.Reader(['tr','en']) #Initialzing the ocr
return text_reader
def easyocr_model_works(self,visualization=True):
for i in range(len(self.images)):
results = self.text_reader.readtext(self.images[i] )
for (bbox, text, prob) in results:
print(text)
if visualization:
plt.imshow(self.images[i])
plt.title("{} Image".format(str(i)));
plt.show()
def pytesseract_model_works(self,visualization=True):
tesseract_preds = []
for img in self.images:
tesseract_preds.append(pytesseract.image_to_string(img))
for i in range(len(self.images)):
print(tesseract_preds[i])
if visualization:
plt.imshow(self.images[i])
plt.title("{} Image".format(str(i)));
plt.show()