added ocr tutorial

x4nth055 · x4nth055 · commit 4f94ee0b2d9d · 2019-10-22T12:34:12.000+02:00
diff --git a/machine-learning/optical-character-recognition/README.md b/machine-learning/optical-character-recognition/README.md
@@ -0,0 +1,28 @@
+# [How to Recognize Optical Characters in Images in Python](https://www.thepythoncode.com/article/optical-character-recognition-pytesseract-python)
+To run this:
+- `pip3 install -r requirements.txt`
+- If you want to recognize optical characters of the image `test.png`:
+    ```
+    python extracting_text.py test.png
+    ```
+    **Output:**
+    ```
+    This is a lot of 12 point text to test the
+    ocr code and see if it works on all types
+    of file format.
+
+    The quick brown dog jumped over the
+    lazy fox. The quick brown dog jumped
+    over the lazy fox. The quick brown dog
+    jumped over the lazy fox. The quick
+    brown dog jumped over the lazy fox.
+    ```
+- for drawing boxes of the image `test.png` for the word `"dog"`:
+    ```
+    python draw-boxes.py test.png dog
+    ```
+    **Output:**
+
+    <img src="detected-words-ocr.png" align="center">
+- For live camera, consider using `live_recognizer.py` script.
+
diff --git a/machine-learning/optical-character-recognition/detected-words-ocr.png b/machine-learning/optical-character-recognition/detected-words-ocr.png
diff --git a/machine-learning/optical-character-recognition/draw-boxes.py b/machine-learning/optical-character-recognition/draw-boxes.py
@@ -0,0 +1,43 @@
+import pytesseract
+import cv2
+import sys
+import matplotlib.pyplot as plt
+from PIL import Image
+
+# read the image using OpenCV
+image = cv2.imread(sys.argv[1])
+
+# make a copy of this image to draw in
+image_copy = image.copy()
+
+# the target word to search for
+target_word = sys.argv[2]
+
+# get all data from the image
+data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+
+# get all occurences of the that word
+word_occurences = [ i for i, word in enumerate(data["text"]) if word.lower() == target_word ]
+
+for occ in word_occurences:
+    # extract the width, height, top and left position for that detected word
+    w = data["width"][occ]
+    h = data["height"][occ]
+    l = data["left"][occ]
+    t = data["top"][occ]
+    # define all the surrounding box points
+    p1 = (l, t)
+    p2 = (l + w, t)
+    p3 = (l + w, t + h)
+    p4 = (l, t + h)
+    # draw the 4 lines (rectangular)
+    image_copy = cv2.line(image_copy, p1, p2, color=(255, 0, 0), thickness=2)
+    image_copy = cv2.line(image_copy, p2, p3, color=(255, 0, 0), thickness=2)
+    image_copy = cv2.line(image_copy, p3, p4, color=(255, 0, 0), thickness=2)
+    image_copy = cv2.line(image_copy, p4, p1, color=(255, 0, 0), thickness=2)
+
+plt.imsave("all_dog_words.png", image_copy)
+plt.imshow(image_copy)
+plt.show()
+
+
diff --git a/machine-learning/optical-character-recognition/extracting_text.py b/machine-learning/optical-character-recognition/extracting_text.py
@@ -0,0 +1,21 @@
+import pytesseract
+import cv2
+import matplotlib.pyplot as plt
+import sys
+from PIL import Image
+
+# read the image using OpenCV 
+# from the command line first argument
+image = cv2.imread(sys.argv[1])
+# or you can use Pillow
+# image = Image.open(sys.argv[1])
+
+# get the string
+string = pytesseract.image_to_string(image)
+# print it
+print(string)
+
+# get all data
+# data = pytesseract.image_to_data(image)
+
+# print(data)
diff --git a/machine-learning/optical-character-recognition/live_recognizer.py b/machine-learning/optical-character-recognition/live_recognizer.py
@@ -0,0 +1,52 @@
+import pytesseract
+import cv2
+import matplotlib.pyplot as plt
+from PIL import Image
+
+# the target word to search for
+target_word = "your"
+
+cap = cv2.VideoCapture(0)
+
+while True:
+    # read the image from the cam
+    _, image = cap.read()
+
+    # make a copy of this image to draw in
+    image_copy = image.copy()
+
+    # get all data from the image
+    data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
+
+    # print the data
+    print(data["text"])
+
+    # get all occurences of the that word
+    word_occurences = [ i for i, word in enumerate(data["text"]) if word.lower() == target_word ]
+
+    for occ in word_occurences:
+        # extract the width, height, top and left position for that detected word
+        w = data["width"][occ]
+        h = data["height"][occ]
+        l = data["left"][occ]
+        t = data["top"][occ]
+        # define all the surrounding box points
+        p1 = (l, t)
+        p2 = (l + w, t)
+        p3 = (l + w, t + h)
+        p4 = (l, t + h)
+        # draw the 4 lines (rectangular)
+        image_copy = cv2.line(image_copy, p1, p2, color=(255, 0, 0), thickness=2)
+        image_copy = cv2.line(image_copy, p2, p3, color=(255, 0, 0), thickness=2)
+        image_copy = cv2.line(image_copy, p3, p4, color=(255, 0, 0), thickness=2)
+        image_copy = cv2.line(image_copy, p4, p1, color=(255, 0, 0), thickness=2)
+
+    if cv2.waitKey(1) == ord("q"):
+        break
+
+    cv2.imshow("image_copy", image_copy)
+
+cap.release()
+cv2.destroyAllWindows()
+
+
diff --git a/machine-learning/optical-character-recognition/requirements.txt b/machine-learning/optical-character-recognition/requirements.txt
@@ -0,0 +1,5 @@
+pytesseract
+numpy
+matplotlib
+opencv-python
+pillow
diff --git a/machine-learning/optical-character-recognition/test.png b/machine-learning/optical-character-recognition/test.png