Skip to content

Commit 2cdd189

Browse files
committed
[RELEASE] iText pdfOCR 5.0.0
2 parents 233d93b + 9edfff0 commit 2cdd189

622 files changed

Lines changed: 331342 additions & 5468 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/createpr.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
name: CreatePR
2+
3+
on:
4+
create:
5+
6+
jobs:
7+
call-createpr:
8+
uses: XodoDocs/itext-github-workflows/.github/workflows/createpr.yml@master
9+
secrets: inherit

.github/workflows/createrebase.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: "Create rebased branch"
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- develop
7+
types:
8+
- assigned
9+
- synchronize
10+
11+
jobs:
12+
call-createrebase:
13+
uses: XodoDocs/itext-github-workflows/.github/workflows/createrebase.yml@master
14+
secrets: inherit

.github/workflows/licensecheck.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: LicenseCheck
2+
3+
on:
4+
push:
5+
# 'branches-ignore' or 'branches' can be used to filter specific branches.
6+
# By default, without any filters, it runs on every push to all branches.
7+
# To be explicit, you can use:
8+
branches-ignore:
9+
- 'develop'
10+
- 'master'
11+
- 'rebased/*'
12+
13+
jobs:
14+
call-licensecheck:
15+
uses: XodoDocs/itext-github-workflows/.github/workflows/licensecheck.yml@master
16+
secrets: inherit

easyOcr_to_onnx_export/README.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# easyOcr_to_onnx_export script
2+
3+
### Disclaimer
4+
5+
There is no official method for converting EasyOCR models to ONNX, so a custom approach was required.
6+
The converted recognition models retain the same inputs and outputs as the original versions, while the detection models were slightly adjusted to better fit our use case.
7+
<br>
8+
9+
10+
## Setup Instructions
11+
12+
Follow these steps to set up a virtual environment and install the required dependencies.
13+
14+
### 1. Create a virtual environment
15+
16+
```bash
17+
python -m venv .venv
18+
```
19+
20+
---
21+
22+
### 2. Activate the virtual environment
23+
24+
* **Linux / macOS:**
25+
26+
```bash
27+
source .venv/bin/activate
28+
```
29+
30+
* **Windows:**
31+
32+
```bash
33+
.venv\Scripts\activate
34+
```
35+
36+
---
37+
38+
### 3. Install dependencies
39+
40+
Install all required packages using the `requirements.txt` file:
41+
42+
```bash
43+
pip install -r requirements.txt
44+
```
45+
46+
---
47+
48+
### 4. Run the script
49+
50+
```bash
51+
python easyOcr_to_onnx_export.py <model_dir>
52+
```
53+
54+
Replace `<model_dir>` with the path to your EasyOCR model directory.
55+
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import os.path
4+
5+
import easyocr
6+
from easyocr import config
7+
from easyocr.craft import CRAFT
8+
from easyocr.detection import copyStateDict
9+
10+
import torch
11+
12+
13+
detection_models = (
14+
'craft',
15+
)
16+
recognition_models_gen1 = (
17+
'arabic_g1',
18+
'bengali_g1',
19+
'cyrillic_g1',
20+
'devanagari_g1',
21+
'japanese_g1',
22+
'korean_g1',
23+
'latin_g1',
24+
# FIXME: this one causes issues during export
25+
# 'tamil_g1',
26+
'thai_g1',
27+
'zh_sim_g1',
28+
'zh_tra_g1',
29+
)
30+
recognition_models_gen2 = (
31+
'cyrillic_g2',
32+
'english_g2',
33+
'japanese_g2',
34+
'kannada_g2',
35+
'korean_g2',
36+
'latin_g2',
37+
'telugu_g2',
38+
'zh_sim_g2',
39+
)
40+
recognition_models = recognition_models_gen1 + recognition_models_gen2
41+
42+
43+
# Detection model
44+
class TrimmedCRAFT(CRAFT):
45+
def forward(self, x):
46+
# Ignoring "feature"
47+
y, _ = super().forward(x)
48+
# Transposing result back to BCHW
49+
return y.permute(0, 3, 1, 2)
50+
51+
52+
def get_detector(trained_model, device='cpu'):
53+
net = TrimmedCRAFT()
54+
net.load_state_dict(copyStateDict(torch.load(trained_model, map_location=device, weights_only=False)))
55+
torch.quantization.quantize_dynamic(net, dtype=torch.qint8, inplace=True)
56+
net.eval()
57+
return net
58+
59+
60+
def main():
61+
parser = argparse.ArgumentParser()
62+
parser.add_argument('model_dir', help='directory with EasyOCR models')
63+
model_dir = parser.parse_args().model_dir
64+
65+
for recognition_model in recognition_models:
66+
print(f'Exporting {recognition_model}...')
67+
gen = 'gen1' if recognition_model.endswith('_g1') else 'gen2'
68+
filename: str = config.recognition_models[gen][recognition_model]['filename']
69+
reader = easyocr.Reader(
70+
lang_list=['en'],
71+
gpu=False,
72+
model_storage_directory=model_dir,
73+
recog_network=recognition_model,
74+
quantize=False,
75+
)
76+
# AdaptiveAvgPool2d cannot be exported to ONNX
77+
# Specifying a static one instead assuming imgH=64
78+
reader.recognizer.AdaptiveAvgPool = torch.nn.AvgPool2d((1, 3))
79+
dummy_input = (
80+
torch.randn(1, 1, 64, 512),
81+
torch.randn(1, 512),
82+
)
83+
torch.onnx.export(
84+
reader.recognizer,
85+
dummy_input,
86+
os.path.join(model_dir, filename.rsplit('.', 1)[0] + '.onnx'),
87+
export_params=True,
88+
input_names=('input', 'text',),
89+
output_names=('preds',),
90+
dynamic_axes={
91+
"input": {0: 'batch_size', 3: 'width'},
92+
"text": {0: 'batch_size', 1: 'batch_max_length'},
93+
},
94+
)
95+
96+
print('Exporting CRAFT...')
97+
filename: str = config.detection_models['craft']['filename']
98+
dummy_input = (torch.randn(1, 3, 2560, 2560),)
99+
model = get_detector(os.path.join(model_dir, filename))
100+
torch.onnx.export(
101+
model,
102+
dummy_input,
103+
os.path.join(model_dir, filename.rsplit('.', 1)[0] + '.onnx'),
104+
export_params=True,
105+
input_names=('images',),
106+
output_names=('y',),
107+
dynamic_axes={
108+
"images": {0: 'batch_size', 2: 'height', 3: 'width'},
109+
},
110+
)
111+
112+
if __name__ == '__main__':
113+
main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
easyocr==1.7.2
2+
torch==2.8.0
3+
onnxscript

pdfocr-api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<parent>
66
<groupId>com.itextpdf</groupId>
77
<artifactId>pdfocr-root</artifactId>
8-
<version>4.1.2</version>
8+
<version>5.0.0</version>
99
</parent>
1010

1111
<properties>

pdfocr-api/src/main/java/com/itextpdf/pdfocr/AbstractPdfOcrEventHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2025 Apryse Group NV
3+
Copyright (c) 1998-2026 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IImageRotationHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2025 Apryse Group NV
3+
Copyright (c) 1998-2026 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.

pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
This file is part of the iText (R) project.
3-
Copyright (c) 1998-2025 Apryse Group NV
3+
Copyright (c) 1998-2026 Apryse Group NV
44
Authors: Apryse Software.
55
66
This program is offered under a commercial and under the AGPL license.
@@ -40,6 +40,7 @@ public interface IOcrEngine {
4040
* in the format described below.
4141
*
4242
* @param input input image {@link java.io.File}
43+
*
4344
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
4445
* representing the number of the page and value is
4546
* {@link java.util.List} of {@link TextInfo} elements where each
@@ -63,6 +64,35 @@ public interface IOcrEngine {
6364
*/
6465
Map<Integer, List<TextInfo>> doImageOcr(File input, OcrProcessContext ocrProcessContext);
6566

67+
/**
68+
* Reads data from the provided list of input image files and returns retrieved data
69+
* in the format described below.
70+
*
71+
* @param inputs list of {@link java.io.File} input images
72+
*
73+
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
74+
* representing the number of the page and value is
75+
* {@link java.util.List} of {@link TextInfo} elements where each
76+
* {@link TextInfo} element contains a word or a line and its 4
77+
* coordinates(bbox)
78+
*/
79+
Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs);
80+
81+
/**
82+
* Reads data from the provided list of input image files and returns retrieved data
83+
* in the format described below.
84+
*
85+
* @param inputs list of {@link java.io.File} input images
86+
* @param ocrProcessContext ocr processing context
87+
*
88+
* @return {@link java.util.Map} where key is {@link java.lang.Integer}
89+
* representing the number of the page and value is
90+
* {@link java.util.List} of {@link TextInfo} elements where each
91+
* {@link TextInfo} element contains a word or a line and its 4
92+
* coordinates(bbox)
93+
*/
94+
Map<Integer, List<TextInfo>> doImageOcr(List<File> inputs, OcrProcessContext ocrProcessContext);
95+
6696
/**
6797
* Performs OCR using provided {@link IOcrEngine} for the given list of
6898
* input images and saves output to a text file using provided path.

0 commit comments

Comments
 (0)