-
Notifications
You must be signed in to change notification settings - Fork 1
/
python.py
35 lines (25 loc) · 898 Bytes
/
python.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import cv2
import pytesseract
from PIL import Image
def preprocess_image(image_path):
# Read the image
image = cv2.imread(image_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply Gaussian blur to reduce noise
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Threshold the image to get a binary image
_, thresholded = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresholded
def ocr(image_path):
# Preprocess the image
processed_image = preprocess_image(image_path)
# Convert OpenCV image to PIL format
pil_image = Image.fromarray(processed_image)
# Perform OCR using Tesseract
text = pytesseract.image_to_string(pil_image)
return text
# Example usage
image_path = "2.jpg"
text = ocr(image_path)
print("Extracted text:", text)