-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmain.py
136 lines (110 loc) · 5.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
DCT compressor.
Reconstructs an image by using only the first K coefficients of its 8x8 DCT,
or by quantizing all coefficients of the 8x8 DCT.
Tried in Python 2.7.5
!! DEPENDENCIES !!
This script depends on the following external Python Packages:
- argparse, to parse command line arguments
- OpenCV, for the DCT and I/O of images
- numpy, to speed up array manipulation
- scikit-image, for SSIM computation
"""
import argparse
import itertools
import math
from skimage.metrics import structural_similarity as compute_ssim
import numpy as np
import cv2 as cv
import utils
def main():
# parse the command line arguments to attributes of 'args'
parser = argparse.ArgumentParser(description='DCT compressor. '
'Reconstructs an image by using only the first K coefficients '
'of its 8x8 DCT, or by quantizing all coefficients of the 8x8 DCT.')
parser.add_argument('--input', dest='image_path', required=True, type=str,
help='Path to the image to be analyzed.')
parser.add_argument('--coeffs', dest='num_coeffs', required=False, type=int,
help='Number of coefficients that will be used to reconstruct the original image, '
'without quantization.')
parser.add_argument('--scale-factor', dest='scale_factor', required=False, type=float, default=1,
help='Scale factor for the quantization step (the higher, the more quantization loss).')
args = parser.parse_args()
# read image
orig_img = cv.imread(args.image_path, cv.IMREAD_COLOR)
img = np.float32(orig_img)
# get YCC components
img_ycc = cv.cvtColor(img, code=cv.COLOR_BGR2YCrCb)
# compress and decompress every channel separately
rec_img = np.empty_like(img)
for channel_num in range(3):
mono_image = approximate_mono_image(img_ycc[:, :, channel_num],
num_coeffs=args.num_coeffs,
scale_factor=args.scale_factor)
rec_img[:, :, channel_num] = mono_image
# convert back to RGB
rec_img_rgb = cv.cvtColor(rec_img, code=cv.COLOR_YCrCb2BGR)
rec_img_rgb[rec_img_rgb < 0] = 0
rec_img_rgb[rec_img_rgb > 255] = 255
rec_img_rgb = np.uint8(rec_img_rgb)
# show PSNR and SSIM of the approximation
err_img = abs(np.array(rec_img_rgb, dtype=float) - np.array(orig_img, dtype=float))
mse = (err_img ** 2).mean()
psnr = 10 * math.log10((255 ** 2) / mse)
ssim = compute_ssim(cv.cvtColor(np.float32(rec_img_rgb), code=cv.COLOR_BGR2GRAY),
cv.cvtColor(np.float32(orig_img), code=cv.COLOR_BGR2GRAY))
print('PSNR: %s dB' % psnr)
print('SSIM: %s' % ssim)
# visualize
cv.imshow('Approximation image', rec_img_rgb)
while True:
cv.waitKey(33)
def approximate_mono_image(img, num_coeffs=None, scale_factor=1):
"""
Approximates a single channel image by using only the first coefficients of the DCT.
First, the image is chopped into 8x8 pixels patches and the DCT is applied to each patch.
Then, if num_coeffs is provided, only the first K DCT coefficients are kept.
If not, all the elements are quantized using the JPEG quantization matrix and the scale_factor.
Finally, the resulting coefficients are used to approximate the original patches with the IDCT, and the image is
reconstructed back again from these patches.
:param img: Image to be approximated.
:param num_coeffs: Number of DCT coefficients to use.
:param scale_factor: Scale factor to use in the quantization step.
:return: The approximated image.
"""
# prevent against multiple-channel images
if len(img.shape) != 2:
raise ValueError('Input image must be a single channel 2D array')
# shape of image
height = img.shape[0]
width = img.shape[1]
if (height % 8 != 0) or (width % 8 != 0):
raise ValueError("Image dimensions (%s, %s) must be multiple of 8" % (height, width))
# split into 8 x 8 pixels blocks
img_blocks = [img[j:j + 8, i:i + 8]
for (j, i) in itertools.product(range(0, height, 8),
range(0, width, 8))]
# DCT transform every 8x8 block
dct_blocks = [cv.dct(img_block) for img_block in img_blocks]
if num_coeffs is not None:
# keep only the first K DCT coefficients of every block
reduced_dct_coeffs = [utils.zig_zag(dct_block, num_coeffs) for dct_block in dct_blocks]
else:
# quantize all the DCT coefficients using the quantization matrix and the scaling factor
reduced_dct_coeffs = [np.round(dct_block / (utils.jpeg_quantiz_matrix * scale_factor))
for dct_block in dct_blocks]
# and get the original coefficients back
reduced_dct_coeffs = [reduced_dct_coeff * (utils.jpeg_quantiz_matrix * scale_factor)
for reduced_dct_coeff in reduced_dct_coeffs]
# IDCT of every block
rec_img_blocks = [cv.idct(coeff_block) for coeff_block in reduced_dct_coeffs]
# reshape the reconstructed image blocks
rec_img = []
for chunk_row_blocks in utils.chunks(rec_img_blocks, width / 8):
for row_block_num in range(8):
for block in chunk_row_blocks:
rec_img.extend(block[row_block_num])
rec_img = np.array(rec_img).reshape(height, width)
return rec_img
if __name__ == '__main__':
main()