Trying to use ocr tesseract into a web page

I want to make OCR from some images on a web page.
I found tesseract with these example codes:
(creating script tag accordingly like:)

var s = document.createElement("script")
		s.type = "text/javascript"
                //s.type = "module"
		//s.src = "https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js" // 1.0
		s.src = "https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js" // 2.0
		jQuery("head").append(s)
  1. with version1 from:
    https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/tesseract.js
Tesseract.recognize(url)
				 .then(function(result) {
                    ocrFinal += result.text
					document.getElementById("ocr_results")
							.innerText = ocrFinal
				 }).progress(function(result) {
					document.getElementById("ocr_status")
							.innerText = result["status"] + " (" +
								(result["progress"] * 100) + "%)";
				});
  1. with version2 from:
    https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js

2.1)

Tesseract.recognize(
                 img,
                 lang,
                 { logger: m => console.log(m) }
               ).then(({ data: { text } }) => {
                 console.log("risultato:");
                 console.log(text);
   	  return text
               })

2.2)

import { createWorker } from 'https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js';

                const worker = createWorker({
                  logger: m => console.log(m)
                });

                (async () => {
                  await worker.load();
                  await worker.loadLanguage(lang);
                  await worker.initialize(lang);
                    await worker.setParameters({
                        tessedit_pageseg_mode: PSM.AUTO,
                      })
                  const { data: { text } } = await worker.recognize(img).progress(function(message){ console.log(message) })  
                  await worker.terminate()
				  console.log("risultato:");
				  console.log(text)
				  return text             
                })();

So, 1) works well.
Instead with v2: I got 2.1 ok, but I can’t make it work the 2.2, that seems to have better performance.
I get this err from Chrome (I have no experience on modules, import etc), using this:

ocr2(myImage.src, "eng")

function ocr2(img, lang) {

    import { createWorker } from 'https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js';

                const worker = createWorker({
                  logger: m => console.log(m)
                });

                (async () => {
                  await worker.load()
                  await worker.loadLanguage(lang)
                  await worker.initialize(lang)
                    await worker.setParameters({
                        tessedit_pageseg_mode: PSM.AUTO,
                      })
                  const { data: { text } } = await worker.recognize(img).progress(function(message){ 
                 console.log(message) })  
                  await worker.terminate()
				  console.log("result:")
				  console.log(text)
				  return text             
                })();

    }

Uncaught SyntaxError: Cannot use import statement outside a module

How can I make it work?

This topic was automatically closed 91 days after the last reply. New replies are no longer allowed.