-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolution.py
114 lines (86 loc) · 3.21 KB
/
solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import argparse
import numpy as np
from PIL import Image
parser = argparse.ArgumentParser(description='Test task on images similarity.')
parser.add_argument("--path", help="Path to dataset with images")
args = parser.parse_args()
dataset = args.path
quantize = 16 # for histogram
hash_size = 8 # for avg_hash function
def findpeaks(y):
"""
Generate the indices of the peaks in a data line
:param y: :class:`numpy.ndarray` histogram values
:return: :class:`numpy.ndarray` the index values of the ridges in the line
"""
dy = np.diff(y)
dy_2 = np.array(np.where(dy == 0))
if dy.size == dy_2.size:
return []
dy_a1 = np.append(dy, [0])
dy_a2 = np.append([1], dy)
index_a1 = np.array((dy_a1 <= 0).nonzero())
index_a2 = np.array((dy_a2 > 0).nonzero())
index = np.intersect1d(index_a1, index_a2)
if len(index) == 0:
return index
if index[0] == 0:
if dy[0] == 0:
nonzero_index = (dy != 0).nonzero()
if dy[nonzero_index[0][0]] > 0:
index = index[1:]
if index[-1] == np.size(y):
if dy[-1] == 0:
nonzero_index = (dy != 0).nonzero()
if dy[nonzero_index[0][-1]] < 0:
index = index[0:-2]
# Get the values that are at the start of plateaus, or are peaks
index_v = np.append([0], np.diff(index))
index = np.compress(index_v != 1, index)
return index
def avg_hash(img):
"""
Return binary hash of image using average of pixels
:param img: PIL image
:return: class 'numpy.ndarray' with shape (hash_size**2,)
"""
image = img.convert("L").resize((8, 8), Image.ANTIALIAS)
pixels = np.asarray(image)
avg = pixels.mean()
bin_hash = pixels > avg
return bin_hash.ravel()
def compare_avg_hash(hash_1, hash_2):
"""
Return result of compare two hash arrays
:param hash_1: class:`numpy.ndarray` with shape (hash_size**2,)
:param hash_2: class:`numpy.ndarray` with shape (hash_size**2,)
:return: class: 'int' count of difference element between two arrays
"""
return np.count_nonzero(avg_hash(hash_1) != avg_hash(hash_2))
def hist_hash(img):
"""
Return values of np histogram
:param img: Pillow image
:return: numpy array
"""
return np.histogram(np.asarray(img).flatten(), bins=16)[0]
if __name__ == '__main__':
all_images = os.listdir(dataset)
for filename in all_images:
current = Image.open(os.path.join(dataset, filename))
for other_file in all_images:
compare = Image.open(os.path.join(dataset, other_file))
if filename == other_file:
continue
# for duplicate
elif np.array_equal(current, compare):
print(filename, other_file)
# for modification
elif compare_avg_hash(current, compare) == 0:
print(filename, other_file)
# for similar
elif np.array_equal(findpeaks(hist_hash(current)), findpeaks(hist_hash(compare))): # \
# and np.array_equal(np.argsort(findpeaks(hist_hash(current))),np.argsort(findpeaks(hist_hash(compare)))):
print(filename, other_file)
all_images.remove(filename)