Skip to content

Commit 945f5d3

Browse files
committed
Update rectangles to rectangle as only one region can be assigned, fix #378
1 parent b8aba2e commit 945f5d3

File tree

4 files changed

+97
-28
lines changed

4 files changed

+97
-28
lines changed

docs/api.md

+3-5
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ Figures out what words are in `image`, where the words are in `image`, etc.
179179

180180
- `image` see [Image Format](./image-format.md) for more details.
181181
- `options` a object of customized optons
182-
- `rectangles` an array of objects to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
182+
- `rectangle` an object to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
183183
- `jobId` Please see details above
184184

185185
**Output:**
@@ -198,7 +198,7 @@ const { createWorker } = Tesseract;
198198
})();
199199
```
200200

201-
With rectangles
201+
With rectangle
202202

203203
```javascript
204204
const { createWorker } = Tesseract;
@@ -208,9 +208,7 @@ const { createWorker } = Tesseract;
208208
await worker.loadLanguage('eng');
209209
await worker.initialize('eng');
210210
const { data: { text } } = await worker.recognize(image, {
211-
rectangles: [
212-
{ top: 0, left: 0, width: 100, height: 100 },
213-
],
211+
rectangle: { top: 0, left: 0, width: 100, height: 100 },
214212
});
215213
console.log(text);
216214
})();

docs/examples.md

+87-12
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
55
### basic
66

77
```javascript
8-
import { createWorker } from 'tesseract.js';
8+
const { createWorker } = require('tesseract.js');
99

1010
const worker = createWorker();
1111

@@ -22,7 +22,7 @@ const worker = createWorker();
2222
### with detailed progress
2323

2424
```javascript
25-
import { createWorker } from 'tesseract.js';
25+
const { createWorker } = require('tesseract.js');
2626

2727
const worker = createWorker({
2828
logger: m => console.log(m), // Add logger here
@@ -41,7 +41,7 @@ const worker = createWorker({
4141
### with multiple languages, separate by '+'
4242

4343
```javascript
44-
import { createWorker } from 'tesseract.js';
44+
const { createWorker } = require('tesseract.js');
4545

4646
const worker = createWorker();
4747

@@ -57,7 +57,7 @@ const worker = createWorker();
5757
### with whitelist char (^2.0.0-beta.1)
5858

5959
```javascript
60-
import { createWorker } from 'tesseract.js';
60+
const { createWorker } = require('tesseract.js');
6161

6262
const worker = createWorker();
6363

@@ -79,7 +79,7 @@ const worker = createWorker();
7979
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
8080

8181
```javascript
82-
import { createWorker, PSM } from 'tesseract.js';
82+
const { createWorker, PSM } = require('tesseract.js');
8383

8484
const worker = createWorker();
8585

@@ -103,30 +103,105 @@ Please check **examples** folder for details.
103103
Browser: [download-pdf.html](../examples/browser/download-pdf.html)
104104
Node: [download-pdf.js](../examples/node/download-pdf.js)
105105

106-
### with only part of the image (^2.0.0-beta.1)
106+
### with only part of the image (^2.0.1)
107+
108+
**One rectangle**
107109

108110
```javascript
109-
import { createWorker } from 'tesseract.js';
111+
const { createWorker } = require('tesseract.js');
112+
113+
const worker = createWorker();
114+
const rectangle = { left: 0, top: 0, width: 500, height: 250 };
115+
116+
(async () => {
117+
await worker.load();
118+
await worker.loadLanguage('eng');
119+
await worker.initialize('eng');
120+
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle });
121+
console.log(text);
122+
await worker.terminate();
123+
})();
124+
```
125+
126+
**Multiple Rectangles**
127+
128+
```javascript
129+
const { createWorker } = require('tesseract.js');
110130

111131
const worker = createWorker();
112132
const rectangles = [
113-
{ left: 0, top: 0, width: 500, height: 250 },
133+
{
134+
left: 0,
135+
top: 0,
136+
width: 500,
137+
height: 250,
138+
},
139+
{
140+
left: 500,
141+
top: 0,
142+
width: 500,
143+
height: 250,
144+
},
114145
];
115146

116147
(async () => {
117148
await worker.load();
118149
await worker.loadLanguage('eng');
119150
await worker.initialize('eng');
120-
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { rectangles });
121-
console.log(text);
151+
const values = [];
152+
for (let i = 0; i < rectangles.length; i++) {
153+
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] });
154+
values.push(text);
155+
}
156+
console.log(values);
122157
await worker.terminate();
123158
})();
124159
```
125160

161+
**Multiple Rectangles (with scheduler to do recognition in parallel)**
162+
163+
```javascript
164+
const { createWorker, createScheduler } = require('tesseract.js');
165+
166+
const scheduler = createScheduler();
167+
const worker1 = createWorker();
168+
const worker2 = createWorker();
169+
const rectangles = [
170+
{
171+
left: 0,
172+
top: 0,
173+
width: 500,
174+
height: 250,
175+
},
176+
{
177+
left: 500,
178+
top: 0,
179+
width: 500,
180+
height: 250,
181+
},
182+
];
183+
184+
(async () => {
185+
await worker1.load();
186+
await worker2.load();
187+
await worker1.loadLanguage('eng');
188+
await worker2.loadLanguage('eng');
189+
await worker1.initialize('eng');
190+
await worker2.initialize('eng');
191+
scheduler.addWorker(worker1);
192+
scheduler.addWorker(worker2);
193+
const results = await Promise.all(rectangles.map((rectangle) => (
194+
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle })
195+
)));
196+
console.log(results.map(r => r.data.text));
197+
await scheduler.terminate();
198+
})();
199+
```
200+
126201
### with multiple workers to speed up (^2.0.0-beta.1)
127202

128203
```javascript
129-
import { createWorker, createScheduler } from 'tesseract.js';
204+
const { createWorker, createScheduler } = require('tesseract.js');
130205

131206
const scheduler = createScheduler();
132207
const worker1 = createWorker();
@@ -143,7 +218,7 @@ const worker2 = createWorker();
143218
scheduler.addWorker(worker2);
144219
/** Add 10 recognition jobs */
145220
const results = await Promise.all(Array(10).fill(0).map(() => (
146-
await scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
221+
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
147222
)))
148223
console.log(results);
149224
await scheduler.terminate(); // It also terminates all workers.

src/worker-script/index.js

+4-6
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,12 @@ const initialize = ({
187187
}
188188
};
189189

190-
const recognize = ({ payload: { image, options: { rectangles = [] } } }, res) => {
190+
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => {
191191
try {
192192
const ptr = setImage(TessModule, api, image);
193-
rectangles.forEach(({
194-
left, top, width, height,
195-
}) => {
196-
api.SetRectangle(left, top, width, height);
197-
});
193+
if (typeof rec === 'object') {
194+
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
195+
}
198196
api.Recognize(null);
199197
res.resolve(dump(TessModule, api, params));
200198
TessModule._free(ptr);

tests/recognize.test.js

+3-5
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,9 @@ describe('recognize()', () => {
6969
const { data: { text } } = await worker.recognize(
7070
`${IMAGE_PATH}/${name}`,
7171
{
72-
rectangles: [
73-
{
74-
top, left, width, height,
75-
},
76-
],
72+
rectangle: {
73+
top, left, width, height,
74+
},
7775
},
7876
);
7977
expect(text).to.be(ans);

0 commit comments

Comments
 (0)