|  | 
|  | 1 | +"""dojo module. | 
|  | 2 | +
 | 
|  | 3 | +1. **Inverted Images** | 
|  | 4 | +2. Rescaling | 
|  | 5 | +3. Binarization | 
|  | 6 | +4. Noise Removal | 
|  | 7 | +5. Dilation and Erosion | 
|  | 8 | +6. Rotation / Deskewing | 
|  | 9 | +7. Removing Borders | 
|  | 10 | +8. Missing Borders | 
|  | 11 | +9. Transparency / Alpha Channel | 
|  | 12 | +""" | 
|  | 13 | + | 
|  | 14 | +from __future__ import annotations | 
|  | 15 | + | 
|  | 16 | +import logging | 
|  | 17 | +import sys | 
|  | 18 | +from copy import copy | 
|  | 19 | +from pathlib import Path | 
|  | 20 | +from typing import TYPE_CHECKING, NoReturn | 
|  | 21 | + | 
|  | 22 | +import cv2 | 
|  | 23 | +from deprecated import deprecated | 
|  | 24 | +from icecream import ic | 
|  | 25 | +from matplotlib import pyplot as plt | 
|  | 26 | + | 
|  | 27 | +if sys.version_info >= (3, 11): | 
|  | 28 | +    from typing import Self | 
|  | 29 | +else: | 
|  | 30 | +    from typing_extensions import Self | 
|  | 31 | + | 
|  | 32 | +if TYPE_CHECKING: | 
|  | 33 | +    import numpy as np | 
|  | 34 | + | 
|  | 35 | +IMG_DIR: Path = Path(__file__).parents[1] / 'generic_data' / 'text_img' | 
|  | 36 | + | 
|  | 37 | + | 
|  | 38 | +def who(cls): | 
|  | 39 | +    """Its Class name.""" | 
|  | 40 | +    cls.class_name = cls.__name__ | 
|  | 41 | +    return cls | 
|  | 42 | + | 
|  | 43 | + | 
|  | 44 | +def add_method(method): | 
|  | 45 | +    """Adding method into class.""" | 
|  | 46 | + | 
|  | 47 | +    def wrapper(cls): | 
|  | 48 | +        """Set method into class.""" | 
|  | 49 | +        cls.method = method | 
|  | 50 | + | 
|  | 51 | + | 
|  | 52 | +@deprecated( | 
|  | 53 | +    version='1.69.0a5', | 
|  | 54 | +    reason='use another implementation updated into dojo20250114.', | 
|  | 55 | +) | 
|  | 56 | +class PreprocessImageOCR: | 
|  | 57 | +    """Preprocess Image.""" | 
|  | 58 | + | 
|  | 59 | +    def __init__(self, img_path: Path | None = None): | 
|  | 60 | +        """Initializer.""" | 
|  | 61 | +        self.dpi: float = 80.0 | 
|  | 62 | +        self._img_path: Path = None | 
|  | 63 | +        self._img_data: np.ndarray = None | 
|  | 64 | +        self.img: np.ndarray = None | 
|  | 65 | +        self.img_path = img_path | 
|  | 66 | + | 
|  | 67 | +    @property | 
|  | 68 | +    def img_path(self) -> Path: | 
|  | 69 | +        """Imagem file.""" | 
|  | 70 | +        return self._img_path | 
|  | 71 | + | 
|  | 72 | +    @img_path.setter | 
|  | 73 | +    def img_path(self, value: Path) -> NoReturn: | 
|  | 74 | +        self._img_path = value | 
|  | 75 | +        try: | 
|  | 76 | +            self._img_data = plt.imread(self._img_path) | 
|  | 77 | +            self.img = copy(self._img_data) | 
|  | 78 | +        except AttributeError: | 
|  | 79 | +            pass | 
|  | 80 | +        logging.info(ic('Image load')) | 
|  | 81 | +        logging.debug(ic(self._img_path)) | 
|  | 82 | + | 
|  | 83 | +    def save(self, fout: Path | None = None) -> Path: | 
|  | 84 | +        """Save current image.""" | 
|  | 85 | +        fout = fout or ( | 
|  | 86 | +            Path.cwd() / f'{self.img_path.stem}_latest{self.img_path.suffix}' | 
|  | 87 | +        ) | 
|  | 88 | +        fout = fout.resolve() | 
|  | 89 | +        fout.parent.mkdir(exist_ok=True) | 
|  | 90 | +        cv2.imwrite(fout, self.img) | 
|  | 91 | +        logging.info(ic('Image saved.')) | 
|  | 92 | +        logging.debug(ic(fout)) | 
|  | 93 | +        return fout | 
|  | 94 | + | 
|  | 95 | +    def reset(self) -> Self: | 
|  | 96 | +        """Reset to original image.""" | 
|  | 97 | +        self.img = copy(self._img_data) | 
|  | 98 | +        logging.info(ic('Image reseted.')) | 
|  | 99 | +        return self | 
|  | 100 | + | 
|  | 101 | +    def display(self, img_path: Path | None = None) -> bool: | 
|  | 102 | +        """Display image on screen. | 
|  | 103 | +
 | 
|  | 104 | +        https://stackoverflow.com/questions/28816046/displaying-different-images-with-actual-size-in-matplotlib-subplot | 
|  | 105 | +        """ | 
|  | 106 | +        self.img_path = img_path | 
|  | 107 | + | 
|  | 108 | +        height, width = self.img.shape[:2] | 
|  | 109 | + | 
|  | 110 | +        # What size does the figure need to be in inches to fit the image? | 
|  | 111 | +        figsize = width / self.dpi, height / self.dpi | 
|  | 112 | + | 
|  | 113 | +        # Create a figure of the right size with | 
|  | 114 | +        # one axes that takes up the full figure | 
|  | 115 | +        fig = plt.figure(figsize=figsize) | 
|  | 116 | +        ax = fig.add_axes([0, 0, 1, 1]) | 
|  | 117 | + | 
|  | 118 | +        # Hide spines, ticks, etc. | 
|  | 119 | +        ax.axis('off') | 
|  | 120 | + | 
|  | 121 | +        # Display the image. | 
|  | 122 | +        ax.imshow(self.img, cmap='gray') | 
|  | 123 | + | 
|  | 124 | +        plt.show() | 
|  | 125 | + | 
|  | 126 | + | 
|  | 127 | +@who | 
|  | 128 | +class PPIOCR(PreprocessImageOCR): | 
|  | 129 | +    """New class.""" | 
|  | 130 | + | 
|  | 131 | +    @deprecated( | 
|  | 132 | +        version='1.69.0a6', | 
|  | 133 | +        reason='use another implementation updated into dojo20250114.', | 
|  | 134 | +    ) | 
|  | 135 | +    def inverted(self) -> Self: | 
|  | 136 | +        """Inverter bit image.""" | 
|  | 137 | +        self.img = cv2.bitwise_not(self.img) | 
|  | 138 | +        return self | 
|  | 139 | + | 
|  | 140 | + | 
|  | 141 | +if __name__ == '__main__': | 
|  | 142 | +    o = PPIOCR(IMG_DIR / 'letter.jpg') | 
|  | 143 | +    o.display() | 
|  | 144 | +    o.inverted().display() | 
0 commit comments