import { Maybe } from '@lib/utils';
import { getDocument, GlobalWorkerOptions, PDFDocumentProxy, PDFPageProxy } from 'pdfjs-dist';

GlobalWorkerOptions.workerSrc = new URL('/p/pdf.worker.min.mjs', import.meta.url).toString();

export const performOCRonPDF = async (pdfBuffer: ArrayBuffer): Promise<Maybe<string>> => {
  try {
    const pdf: PDFDocumentProxy = await getDocument({ data: new Uint8Array(pdfBuffer) }).promise;

    const texts = await Promise.all(
      Array.from({ length: pdf.numPages }, async (_, i) => {
        const page: PDFPageProxy = await pdf.getPage(i + 1);

        const textContent = await page.getTextContent();

        return textContent.items.map(el => ('str' in el ? el.str : '')).join(' ');
      })
    );

    return texts.join(' ');
  } catch (error) {
    return null;
  }
};
