clearSilverMarten
Val Town is a collaborative website to build and scale JavaScript apps.
Deploy APIs, crons, & store data – all from the browser, and deployed in milliseconds.
import pytesseract from pytesseract import Output from PIL import Image import cv2 import os
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def process_image(image_path, output_format="txt"): try: # इमेज को लोड करें और प्रोसेस करें image = cv2.imread(image_path) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# नॉइज़ रिमूवल
processed_image = cv2.GaussianBlur(gray, (5, 5), 0)
# OCR से टेक्स्ट पहचान
text_data = pytesseract.image_to_string(processed_image, lang="eng+hin+ben", config='--psm 6')
# टेक्स्ट फॉर्मेट करना
print("\nExtracted Text:\n", text_data)
# आउटपुट फॉर्मेट के अनुसार फाइल सेव करना
if output_format == "txt":
with open("output_text.txt", "w", encoding="utf-8") as file:
file.write(text_data)
print("Text saved as 'output_text.txt'.")
elif output_format == "pdf":
pdf_output = pytesseract.image_to_pdf_or_hocr(processed_image, extension='pdf')
with open("output_document.pdf", "wb") as file:
file.write(pdf_output)
print("PDF saved as 'output_document.pdf'.")
except Exception as e:
print("Error:", e)
image_path = "sample_image.jpg" # अपनी इमेज का पथ दें output_format = "pdf" # 'txt' या 'pdf' में से चुनें
process_image(image_path, output_format)