-
Notifications
You must be signed in to change notification settings - Fork 4
/
ocr.py
27 lines (24 loc) · 871 Bytes
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from openai import OpenAI
import os
def ocr(base64_image):
api_key = os.getenv('OPENAI_API')
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Transcribe the text on here. Describe diagram if there are any. Be concise. If there are any blue highlighting, only return that text inside triple backslash quote with no preamble"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
},
],
}
],
max_tokens=300,
)
return response.choices[0].message.content
if __name__ == "__main__":
print(ocr(url = "https://github.com/EllAchE/llama-out-loud/blob/ocr/image/zoom_book.png?raw=true"))