I want to make OCR from some images on a web page.
I found tesseract with these example codes:
(creating script tag accordingly like:)
var s = document.createElement("script")
s.type = "text/javascript"
//s.type = "module"
//s.src = "https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js" // 1.0
s.src = "https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js" // 2.0
jQuery("head").append(s)
- with version1 from:
https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js
Tesseract.recognize(url)
.then(function(result) {
ocrFinal += result.text
document.getElementById("ocr_results")
.innerText = ocrFinal
}).progress(function(result) {
document.getElementById("ocr_status")
.innerText = result["status"] + " (" +
(result["progress"] * 100) + "%)";
});
- with version2 from:
https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js
2.1)
Tesseract.recognize(
img,
lang,
{ logger: m => console.log(m) }
).then(({ data: { text } }) => {
console.log("risultato:");
console.log(text);
return text
})
2.2)
import { createWorker } from 'https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js';
const worker = createWorker({
logger: m => console.log(m)
});
(async () => {
await worker.load();
await worker.loadLanguage(lang);
await worker.initialize(lang);
await worker.setParameters({
tessedit_pageseg_mode: PSM.AUTO,
})
const { data: { text } } = await worker.recognize(img).progress(function(message){ console.log(message) })
await worker.terminate()
console.log("risultato:");
console.log(text)
return text
})();
So, 1) works well.
Instead with v2: I got 2.1 ok, but I can’t make it work the 2.2, that seems to have better performance.
I get this err from Chrome (I have no experience on modules, import etc), using this:
ocr2(myImage.src, "eng")
function ocr2(img, lang) {
import { createWorker } from 'https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js';
const worker = createWorker({
logger: m => console.log(m)
});
(async () => {
await worker.load()
await worker.loadLanguage(lang)
await worker.initialize(lang)
await worker.setParameters({
tessedit_pageseg_mode: PSM.AUTO,
})
const { data: { text } } = await worker.recognize(img).progress(function(message){
console.log(message) })
await worker.terminate()
console.log("result:")
console.log(text)
return text
})();
}
Uncaught SyntaxError: Cannot use import statement outside a module
How can I make it work?