label-studio.git

title: OCR Labeling for PDFs
type: enterprise
group: Computer Vision
order: 10
image: /static/templates/pdf-ocr.png
details: |
  <h1>Perform region-level OCR directly on native PDFs</h1>
  <dl>
    <dt>Industry Applications</dt>
    <dd>document intelligence, invoice processing, form extraction, contract analysis, receipt digitization, document QA, structured data capture, automated data entry, compliance document review, financial document processing</dd>
    <dt>Associated Models</dt>
    <dd>OCR text detection, bounding box detection, document classification, text recognition</dd>
    <dt>Domain Terminology</dt>
    <dd>PDF text extraction, region-based OCR, bounding boxes, text correction, document parsing, normalized coordinates, multi-page documents</dd>
  </dl>
config: |
  <View>
    <Style>
      .htx-pdf { calc(100vh - 250px); }
    </Style>
    <Header value="Select text to correct" size="4"/>
    <OcrLabels name="ocr" toName="pdf">
      <Label value="Typo" />
      <Label value="Incorrect Amount" />
      <Label value="Incorrect Name" />
    </OcrLabels>
    <Pdf name="pdf" value="$pdf"/>
  </View>
 
  <!-- {
    "data": {
      "pdf": "/static/samples/opossum-cuteness.pdf"
    }
  } -->