Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V2 #2

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open

V2 #2

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions MathCaptchaSolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,53 @@ def __init__(self, image_path):

def enhance_legibility(self, cropped_image):
gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
_, mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
return cv2.erode(cv2.blur(mask, (2, 2)), self.kernel, iterations=1)
return gray

def math_operation(self, left_number, right_number, operation='+'):
if left_number.isdigit() and right_number.isdigit():
return eval(f"{left_number} {operation} {right_number}")
else:
return None
def math_operation_for_both_signs(self, left_number, right_number):
if left_number.isdigit() and right_number.isdigit():
def math_operation(self, left_number, right_number):
if right_number.isdigit():
return eval(f"{left_number} + {right_number}")
return None

def resolve(self, left_image, right_image,left_image_twice,right_image_twice):
left_number = pipe(left_image_twice)[0]['generated_text']
if left_number.isdigit():
left_number = int(left_number)
right_number = int(right_number)
return [left_number - right_number, left_number + right_number]
else:
return None
def resolve(self, left_image, right_image, sign_image, negative_sign_right_image):
sign = pipe(sign_image)[0]['generated_text']
left_number = pipe(left_image)[0]['generated_text']

if sign in {'+', '@', '4','*'}:
right_number = pipe(right_image)[0]['generated_text']
return self.math_operation(left_number, right_number)
elif sign in {'-', '='}:
right_number = pipe(negative_sign_right_image)[0]['generated_text']
return self.math_operation(left_number, right_number, '-')
if left_number<10 or left_number==None or left_number=="":
left_number = pipe(left_image)[0]['generated_text']
right_number = pipe(right_image_twice)[0]['generated_text']
if right_number.isdigit() and int(right_number)>10:
return self.math_operation(left_number, right_number)
else:
right_number = pipe(right_image)[0]['generated_text']
return self.math_operation(left_number, right_number)
elif left_number>=10 :
right_number = pipe(right_image_twice)[0]['generated_text']
return self.math_operation(left_number, right_number)
else:
unfixed_right_number = ''.join(char for char in pipe(right_image)[0]['generated_text'] if char.isdigit())
return self.math_operation_for_both_signs(left_number, unfixed_right_number)
left_number = pipe(left_image)[0]['generated_text']
if left_number.isdigit():
right_number = pipe(right_image)[0]['generated_text']
return self.math_operation(left_number, right_number)


def solve_captcha(self):
positions = {'left': 5, 'right': 60, 'sign': 39, 'negative_sign_right': 56}
dimensions = {'width': 25, 'height': 20, 'width_sign': 15, 'height_sign': 15, 'width_negative_sign': 18}

left_image = self.image[7:27, positions['left']:positions['left']+dimensions['width']]
right_image = self.image[7:27, positions['right']:positions['right']+dimensions['width']]
sign_image = self.image[10:25, positions['sign']:positions['sign']+dimensions['width_sign']]
negative_sign_right_image = self.image[7:27, positions['negative_sign_right']:positions['negative_sign_right']+dimensions['width_negative_sign']]
positions = {'left': 5, 'right_unit': 57 , 'right_twice' : 71}
dimensions = {'width_twice': 31, 'width_unit': 19, 'height': 20}

left_enhanced = self.enhance_legibility(left_image)
right_enhanced = self.enhance_legibility(right_image)
negative_sign_right_enhanced = self.enhance_legibility(negative_sign_right_image)
left_image_for_unit_number = self.image[7:30, positions['left']:positions['left']+dimensions['width_unit']]
left_image_for_twice_number = self.image[7:30, positions['left']:positions['left']+dimensions['width_twice']]
right_image_for_left_twice_number = self.image[7:30, positions['right_twice']:positions['right_twice']+dimensions['width_twice']]
right_image_for_left_unit_number = self.image[7:30, positions['right_unit']:positions['right_unit']+dimensions['width_twice']]

left_enhanced = self.enhance_legibility(left_image_for_unit_number)
left_enhanced_for_twice_number = self.enhance_legibility(left_image_for_twice_number)
right_enhanced = self.enhance_legibility(right_image_for_left_unit_number)
right_enhanced_for_twice_number = self.enhance_legibility(right_image_for_left_twice_number)

cv2.imwrite('left_number.png', left_enhanced)
cv2.imwrite('left_image_for_twice_number.png', left_enhanced_for_twice_number)
cv2.imwrite('right_number.png', right_enhanced)
cv2.imwrite('right_image_for_twice_number.png', right_enhanced_for_twice_number)

cv2.imwrite('left_number.jpg', left_enhanced)
cv2.imwrite('right_number.jpg', right_enhanced)
cv2.imwrite('sign.jpg', sign_image)
cv2.imwrite('negative_sign_right_number.jpg', negative_sign_right_enhanced)

return self.resolve('left_number.jpg', 'right_number.jpg', 'sign.jpg', 'negative_sign_right_number.jpg')
return self.resolve('left_number.png', 'right_number.png','left_image_for_twice_number.png',"right_image_for_twice_number.png")
129 changes: 6 additions & 123 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,148 +1,35 @@
<div align="center">
<a href="https://github.com/AmireNoori/MathCaptchaSolver">
<img src="images/logo.jpg" alt="Logo" width="160" height="160">
</a>

<h3 align="center">Math Captcha Solver</h3>

<p align="center">
A great tool for solving math captchas !
</p>
</div>


<!-- TABLE OF CONTENTS -->
<details>
<summary>Table of Contents</summary>
<ol>
<li>
<a href="#about-the-project">About The Project</a>
<ul>
<li><a href="#built-with">Built With</a></li>
</ul>
</li>
<li>
<a href="#How-it-processes">How it processes</a>
<ul>
<li><a href="#installation">Installation</a></li>
</ul>
</li>
<li><a href="#usage">Usage</a></li>
<li><a href="#license">License</a></li>
<li><a href="#contact">Contact</a></li>
<li><a href="#kindness">Kindness</a></li>

</ol>
</details>


## About The Project

This project is a tool to process mathematical captcha images and calculate the answer using deep learning models and image processing using Python language.

An example of captchas used in this project:

![captcha example](images/captcha-example.jpg)

If you are looking for solving captchas of the following types:

![captcha example](images/other-type.png)

go to this repo:

[MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver-v2)


### Built With

The deep learning model used in this tool: It is `TrOCR (large-sized model, fine-tuned on SROIE)`, which you can download from the link [https://huggingface.co/microsoft/trocr-large-printed](https://huggingface.co/microsoft/trocr-large-printed) and read about it.
The TrOCR model is an encoder-decoder model, consisting of an image Transformer as encoder, and a text Transformer as decoder.
Python language and opecv library
* https://huggingface.co
* https://opencv.org
* https://www.python.org

## How it processes

The processing of numbers and signs in the image is performed by the powerful model that I introduced above. But to process this type of captcha images, this model alone was not able to correctly extract the numbers and calculate the operation. So we had to use image processing to improve the result.

* First, let's see what will be returned if we give the complete captcha image to the model:

![captcha example](images/first-cap.jpg)

* Output
```sh
[{'generated_text': '40 % & @'}]
```

As you can see, the output returned to us is `40 % & @`, which is not a very good result.

So we first used morphological operations to erode the image and erase the extra lines to some extent, then we gave the image a more normal state using the blur method.
And finally, we used the crop technique to save two numbers separately with the sign between them with fixed coordinates of each number in all captcha images.

Now we have three pictures, two numbers and a math symbol :

![left number](images/left_number.jpg) ![sign](images/sign.jpg) ![right number](images/right_number.jpg)


Next, according to the extracted numbers in the image, the function performs the subtraction or addition operation and returns the answer to us.
![captcha example](images/captcha-example.png)

The output of the tool is in 3 different types.
This type of captcha is very similar to the captcha in [MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver) main branch.
So if you want to know more about the details of the project, refer to this link:

* 1-The first type of output is an `integer` that is the sum or subtraction of two numbers.this output is displayed when the math sign is correctly recognized

Example:

<div align="center">
<img src="images/captcha-1.jpg" alt="captcha example">
</div>


* Output
```sh
99
```

* 2-The second type of output is a `list` containing two numbers.

This output is displayed when the tool is able to recognize the numbers, but despite the possible filters that have been set for the sign, it is not able to recognize the sign and is forced to add or subtract the numbers together, and the list it displays is the total result.you can test both answers in the input according to the output of the tool

Example:

<div align="center">
<img src="images/captcha-2.jpg" alt="captcha example">
</div>


* Output
```sh
[3, 19]
```

* 3-The third type of output is `None`.

This output is displayed when the tool is not able to recognize one of the two numbers and as a result could not calculate the numbers together.
This case happens very rarely and the tool detects the numbers correctly in most cases, but it may still show you such an output.


<div align="center">
<img src="images/captcha-3.jpg" alt="captcha example">
</div>


* Output
```sh
None
```

* Execution Time
Captcha processing and calculation time in a system without GPU is between `16` seconds and `20` seconds from the time of execution to the end time, which is much less in systems with GPU and the processing speed is faster.
[MathCaptchaSolver](https://github.com/AmireNoori/MathCaptchaSolver)

### Installation

1. Clone the repo
```sh
git clone https://github.com/AmireNoori/MathCaptchaSolver
git clone https://github.com/AmireNoori/MathCaptchaSolver/tree/v2
```

You must have installed `transformers`, `opencv-python` and `numpy` libraries. For this, you can install each of them separately or install this tool using the `requirements.txt` file by entering the following command in the terminal.
Expand Down Expand Up @@ -177,7 +64,7 @@ _Note: When you run the program for the first time, your system must be connecte

This screenshot is an example of the results obtained from the 20 captcha images shared for you
<div align="center">
<img src="images/examples.jpg" alt="screenshot">
<img src="images/results.jpg" alt="screenshot">
</div>

## License
Expand Down Expand Up @@ -215,7 +102,3 @@ SOFTWARE.
Amir Noori - [@AmireNoori1](https://t.me/AmireNoori1) - [email protected]

Project Link: [https://github.com/AmireNoori/MathCapthaSolver](https://github.com/AmireNoori/MathCaptchaSolver)

## Kindness

Thank you if this tool was useful for you and you used it, give it a star ⭐ and make me happy by following my profile❤️.
Binary file added images/captcha-example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/results.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.