-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_ocr.py
More file actions
35 lines (27 loc) · 1.01 KB
/
test_ocr.py
File metadata and controls
35 lines (27 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import fitz # PyMuPDF
import os
def ocr_file(path: str, language="eng") -> str:
ext = os.path.splitext(path)[1].lower()
if ext == ".pdf":
# Open PDF, take first page
doc = fitz.open(path)
page = doc[0]
tp = page.get_textpage_ocr(language=language)
return page.get_text("text", textpage=tp)
elif ext in [".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp"]:
pix = fitz.Pixmap(path)
# Ensure RGB, no alpha
if pix.colorspace is None or pix.colorspace.n != 3:
pix = fitz.Pixmap(fitz.csRGB, pix)
if pix.alpha:
pix = fitz.Pixmap(pix, 0)
# OCR as a one-page PDF in memory
img_pdf_bytes = pix.pdfocr_tobytes(language=language)
doc = fitz.open("pdf", img_pdf_bytes)
return doc[0].get_text("text")
else:
raise ValueError(f"Unsupported file type: {ext}")
# Example usage
if __name__ == "__main__":
text = ocr_file("/Users/nyeung/Downloads/stock_q.png", language="eng")
print(text)