@@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
5
5
### basic
6
6
7
7
``` javascript
8
- import { createWorker } from ' tesseract.js' ;
8
+ const { createWorker } = require ( ' tesseract.js' ) ;
9
9
10
10
const worker = createWorker ();
11
11
@@ -22,7 +22,7 @@ const worker = createWorker();
22
22
### with detailed progress
23
23
24
24
``` javascript
25
- import { createWorker } from ' tesseract.js' ;
25
+ const { createWorker } = require ( ' tesseract.js' ) ;
26
26
27
27
const worker = createWorker ({
28
28
logger : m => console .log (m), // Add logger here
@@ -41,7 +41,7 @@ const worker = createWorker({
41
41
### with multiple languages, separate by '+'
42
42
43
43
``` javascript
44
- import { createWorker } from ' tesseract.js' ;
44
+ const { createWorker } = require ( ' tesseract.js' ) ;
45
45
46
46
const worker = createWorker ();
47
47
@@ -57,7 +57,7 @@ const worker = createWorker();
57
57
### with whitelist char (^2.0.0-beta.1)
58
58
59
59
``` javascript
60
- import { createWorker } from ' tesseract.js' ;
60
+ const { createWorker } = require ( ' tesseract.js' ) ;
61
61
62
62
const worker = createWorker ();
63
63
@@ -79,7 +79,7 @@ const worker = createWorker();
79
79
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
80
80
81
81
``` javascript
82
- import { createWorker , PSM } from ' tesseract.js' ;
82
+ const { createWorker , PSM } = require ( ' tesseract.js' ) ;
83
83
84
84
const worker = createWorker ();
85
85
@@ -103,30 +103,105 @@ Please check **examples** folder for details.
103
103
Browser: [ download-pdf.html] ( ../examples/browser/download-pdf.html )
104
104
Node: [ download-pdf.js] ( ../examples/node/download-pdf.js )
105
105
106
- ### with only part of the image (^2.0.0-beta.1)
106
+ ### with only part of the image (^2.0.1)
107
+
108
+ ** One rectangle**
107
109
108
110
``` javascript
109
- import { createWorker } from ' tesseract.js' ;
111
+ const { createWorker } = require (' tesseract.js' );
112
+
113
+ const worker = createWorker ();
114
+ const rectangle = { left: 0 , top: 0 , width: 500 , height: 250 };
115
+
116
+ (async () => {
117
+ await worker .load ();
118
+ await worker .loadLanguage (' eng' );
119
+ await worker .initialize (' eng' );
120
+ const { data: { text } } = await worker .recognize (' https://tesseract.projectnaptha.com/img/eng_bw.png' , { rectangle });
121
+ console .log (text);
122
+ await worker .terminate ();
123
+ })();
124
+ ```
125
+
126
+ ** Multiple Rectangles**
127
+
128
+ ``` javascript
129
+ const { createWorker } = require (' tesseract.js' );
110
130
111
131
const worker = createWorker ();
112
132
const rectangles = [
113
- { left: 0 , top: 0 , width: 500 , height: 250 },
133
+ {
134
+ left: 0 ,
135
+ top: 0 ,
136
+ width: 500 ,
137
+ height: 250 ,
138
+ },
139
+ {
140
+ left: 500 ,
141
+ top: 0 ,
142
+ width: 500 ,
143
+ height: 250 ,
144
+ },
114
145
];
115
146
116
147
(async () => {
117
148
await worker .load ();
118
149
await worker .loadLanguage (' eng' );
119
150
await worker .initialize (' eng' );
120
- const { data: { text } } = await worker .recognize (' https://tesseract.projectnaptha.com/img/eng_bw.png' , ' eng' , { rectangles });
121
- console .log (text);
151
+ const values = [];
152
+ for (let i = 0 ; i < rectangles .length ; i++ ) {
153
+ const { data: { text } } = await worker .recognize (' https://tesseract.projectnaptha.com/img/eng_bw.png' , { rectangle: rectangles[i] });
154
+ values .push (text);
155
+ }
156
+ console .log (values);
122
157
await worker .terminate ();
123
158
})();
124
159
```
125
160
161
+ ** Multiple Rectangles (with scheduler to do recognition in parallel)**
162
+
163
+ ``` javascript
164
+ const { createWorker , createScheduler } = require (' tesseract.js' );
165
+
166
+ const scheduler = createScheduler ();
167
+ const worker1 = createWorker ();
168
+ const worker2 = createWorker ();
169
+ const rectangles = [
170
+ {
171
+ left: 0 ,
172
+ top: 0 ,
173
+ width: 500 ,
174
+ height: 250 ,
175
+ },
176
+ {
177
+ left: 500 ,
178
+ top: 0 ,
179
+ width: 500 ,
180
+ height: 250 ,
181
+ },
182
+ ];
183
+
184
+ (async () => {
185
+ await worker1 .load ();
186
+ await worker2 .load ();
187
+ await worker1 .loadLanguage (' eng' );
188
+ await worker2 .loadLanguage (' eng' );
189
+ await worker1 .initialize (' eng' );
190
+ await worker2 .initialize (' eng' );
191
+ scheduler .addWorker (worker1);
192
+ scheduler .addWorker (worker2);
193
+ const results = await Promise .all (rectangles .map ((rectangle ) => (
194
+ scheduler .addJob (' recognize' , ' https://tesseract.projectnaptha.com/img/eng_bw.png' , { rectangle })
195
+ )));
196
+ console .log (results .map (r => r .data .text ));
197
+ await scheduler .terminate ();
198
+ })();
199
+ ```
200
+
126
201
### with multiple workers to speed up (^2.0.0-beta.1)
127
202
128
203
``` javascript
129
- import { createWorker , createScheduler } from ' tesseract.js' ;
204
+ const { createWorker , createScheduler } = require ( ' tesseract.js' ) ;
130
205
131
206
const scheduler = createScheduler ();
132
207
const worker1 = createWorker ();
@@ -143,7 +218,7 @@ const worker2 = createWorker();
143
218
scheduler .addWorker (worker2);
144
219
/** Add 10 recognition jobs */
145
220
const results = await Promise .all (Array (10 ).fill (0 ).map (() => (
146
- await scheduler .addJob (' recognize' , ' https://tesseract.projectnaptha.com/img/eng_bw.png' )
221
+ scheduler .addJob (' recognize' , ' https://tesseract.projectnaptha.com/img/eng_bw.png' )
147
222
)))
148
223
console .log (results);
149
224
await scheduler .terminate (); // It also terminates all workers.
0 commit comments