Skip to content

Commit f7cffcb

Browse files
committed
Provide an option to show/hide Shannon entropy
The concept of information entropy was introduced by Claude Shannon in his 1948 paper "A Mathematical Theory of Communication", and is also referred to as Shannon entropy. Shannon's theory defines a data communication system composed of three elements: a source of data, a communication channel, and a receiver. The "fundamental problem of communication" – as expressed by Shannon – is for the receiver to be able to identify what data was generated by the source, based on the signal it receives through the channel. In qtest, execute "option entropy 1" before command "show" which will display both the value of each element and its Shannon entropy. For the sake of performance, the integer-only calculation is used for the kernel of Shannon entropy.
1 parent a68ceaa commit f7cffcb

7 files changed

+332
-1
lines changed

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ $(GIT_HOOKS):
3939

4040
OBJS := qtest.o report.o console.o harness.o queue.o \
4141
random.o dudect/constant.o dudect/fixture.o dudect/ttest.o \
42+
shannon_entropy.o \
4243
linenoise.o web.o
4344

4445
deps := $(OBJS:%.o=.%.o.d)

console.c

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
/* Some global values */
1919
int simulation = 0;
20+
int show_entropy = 0;
2021
static cmd_element_t *cmd_list = NULL;
2122
static param_element_t *param_list = NULL;
2223
static bool block_flag = false;
@@ -435,6 +436,7 @@ void init_cmd()
435436
add_param("verbose", &verblevel, "Verbosity level", NULL);
436437
add_param("error", &err_limit, "Number of errors until exit", NULL);
437438
add_param("echo", &echo, "Do/don't echo commands", NULL);
439+
add_param("entropy", &show_entropy, "Show/Hide Shannon entropy", NULL);
438440

439441
init_in();
440442
init_time(&last_time);

log2_lshift16.h

+282
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
/*
2+
* Generate precalculated values of log2 with assumption that arg will be left
3+
* shifted by 16 bit and return value of log2_lshift16() will be left shifted
4+
* by 3 bit All that shifts used for avoid of using floating point in
5+
* calculation.
6+
*/
7+
8+
#include <stdint.h>
9+
10+
#define LOG2_ARG_SHIFT (1 << 16)
11+
#define LOG2_RET_SHIFT (1 << 3)
12+
13+
/* store precalculated function (log2(arg << 24)) << 3 */
14+
static inline int log2_lshift16(uint64_t lshift16)
15+
{
16+
if (lshift16 < 558) {
17+
if (lshift16 < 54) {
18+
if (lshift16 < 13) {
19+
if (lshift16 < 7) {
20+
if (lshift16 < 1)
21+
return -136;
22+
if (lshift16 < 2)
23+
return -123;
24+
if (lshift16 < 3)
25+
return -117;
26+
if (lshift16 < 4)
27+
return -113;
28+
if (lshift16 < 5)
29+
return -110;
30+
if (lshift16 < 6)
31+
return -108;
32+
if (lshift16 < 7)
33+
return -106;
34+
} else {
35+
if (lshift16 < 8)
36+
return -104;
37+
if (lshift16 < 9)
38+
return -103;
39+
if (lshift16 < 10)
40+
return -102;
41+
if (lshift16 < 11)
42+
return -100;
43+
if (lshift16 < 12)
44+
return -99;
45+
if (lshift16 < 13)
46+
return -98;
47+
}
48+
} else {
49+
if (lshift16 < 29) {
50+
if (lshift16 < 15)
51+
return -97;
52+
if (lshift16 < 16)
53+
return -96;
54+
if (lshift16 < 17)
55+
return -95;
56+
if (lshift16 < 19)
57+
return -94;
58+
if (lshift16 < 21)
59+
return -93;
60+
if (lshift16 < 23)
61+
return -92;
62+
if (lshift16 < 25)
63+
return -91;
64+
if (lshift16 < 27)
65+
return -90;
66+
if (lshift16 < 29)
67+
return -89;
68+
} else {
69+
if (lshift16 < 32)
70+
return -88;
71+
if (lshift16 < 35)
72+
return -87;
73+
if (lshift16 < 38)
74+
return -86;
75+
if (lshift16 < 41)
76+
return -85;
77+
if (lshift16 < 45)
78+
return -84;
79+
if (lshift16 < 49)
80+
return -83;
81+
if (lshift16 < 54)
82+
return -82;
83+
}
84+
}
85+
} else {
86+
if (lshift16 < 181) {
87+
if (lshift16 < 99) {
88+
if (lshift16 < 59)
89+
return -81;
90+
if (lshift16 < 64)
91+
return -80;
92+
if (lshift16 < 70)
93+
return -79;
94+
if (lshift16 < 76)
95+
return -78;
96+
if (lshift16 < 83)
97+
return -77;
98+
if (lshift16 < 91)
99+
return -76;
100+
if (lshift16 < 99)
101+
return -75;
102+
} else {
103+
if (lshift16 < 108)
104+
return -74;
105+
if (lshift16 < 117)
106+
return -73;
107+
if (lshift16 < 128)
108+
return -72;
109+
if (lshift16 < 140)
110+
return -71;
111+
if (lshift16 < 152)
112+
return -70;
113+
if (lshift16 < 166)
114+
return -69;
115+
if (lshift16 < 181)
116+
return -68;
117+
}
118+
} else {
119+
if (lshift16 < 304) {
120+
if (lshift16 < 197)
121+
return -67;
122+
if (lshift16 < 215)
123+
return -66;
124+
if (lshift16 < 235)
125+
return -65;
126+
if (lshift16 < 256)
127+
return -64;
128+
if (lshift16 < 279)
129+
return -63;
130+
if (lshift16 < 304)
131+
return -62;
132+
} else {
133+
if (lshift16 < 332)
134+
return -61;
135+
if (lshift16 < 362)
136+
return -60;
137+
if (lshift16 < 395)
138+
return -59;
139+
if (lshift16 < 431)
140+
return -58;
141+
if (lshift16 < 470)
142+
return -57;
143+
if (lshift16 < 512)
144+
return -56;
145+
if (lshift16 < 558)
146+
return -55;
147+
}
148+
}
149+
}
150+
} else {
151+
if (lshift16 < 6317) {
152+
if (lshift16 < 2048) {
153+
if (lshift16 < 1117) {
154+
if (lshift16 < 609)
155+
return -54;
156+
if (lshift16 < 664)
157+
return -53;
158+
if (lshift16 < 724)
159+
return -52;
160+
if (lshift16 < 790)
161+
return -51;
162+
if (lshift16 < 861)
163+
return -50;
164+
if (lshift16 < 939)
165+
return -49;
166+
if (lshift16 < 1024)
167+
return -48;
168+
if (lshift16 < 1117)
169+
return -47;
170+
} else {
171+
if (lshift16 < 1218)
172+
return -46;
173+
if (lshift16 < 1328)
174+
return -45;
175+
if (lshift16 < 1448)
176+
return -44;
177+
if (lshift16 < 1579)
178+
return -43;
179+
if (lshift16 < 1722)
180+
return -42;
181+
if (lshift16 < 1878)
182+
return -41;
183+
if (lshift16 < 2048)
184+
return -40;
185+
}
186+
} else {
187+
if (lshift16 < 3756) {
188+
if (lshift16 < 2233)
189+
return -39;
190+
if (lshift16 < 2435)
191+
return -38;
192+
if (lshift16 < 2656)
193+
return -37;
194+
if (lshift16 < 2896)
195+
return -36;
196+
if (lshift16 < 3158)
197+
return -35;
198+
if (lshift16 < 3444)
199+
return -34;
200+
if (lshift16 < 3756)
201+
return -33;
202+
} else {
203+
if (lshift16 < 4096)
204+
return -32;
205+
if (lshift16 < 4467)
206+
return -31;
207+
if (lshift16 < 4871)
208+
return -30;
209+
if (lshift16 < 5312)
210+
return -29;
211+
if (lshift16 < 5793)
212+
return -28;
213+
if (lshift16 < 6317)
214+
return -27;
215+
}
216+
}
217+
} else {
218+
if (lshift16 < 21247) {
219+
if (lshift16 < 11585) {
220+
if (lshift16 < 6889)
221+
return -26;
222+
if (lshift16 < 7512)
223+
return -25;
224+
if (lshift16 < 8192)
225+
return -24;
226+
if (lshift16 < 8933)
227+
return -23;
228+
if (lshift16 < 9742)
229+
return -22;
230+
if (lshift16 < 10624)
231+
return -21;
232+
if (lshift16 < 11585)
233+
return -20;
234+
} else {
235+
if (lshift16 < 12634)
236+
return -19;
237+
if (lshift16 < 13777)
238+
return -18;
239+
if (lshift16 < 15024)
240+
return -17;
241+
if (lshift16 < 16384)
242+
return -16;
243+
if (lshift16 < 17867)
244+
return -15;
245+
if (lshift16 < 19484)
246+
return -14;
247+
if (lshift16 < 21247)
248+
return -13;
249+
}
250+
} else {
251+
if (lshift16 < 35734) {
252+
if (lshift16 < 23170)
253+
return -12;
254+
if (lshift16 < 25268)
255+
return -11;
256+
if (lshift16 < 27554)
257+
return -10;
258+
if (lshift16 < 30048)
259+
return -9;
260+
if (lshift16 < 32768)
261+
return -8;
262+
if (lshift16 < 35734)
263+
return -7;
264+
} else {
265+
if (lshift16 < 38968)
266+
return -6;
267+
if (lshift16 < 42495)
268+
return -5;
269+
if (lshift16 < 46341)
270+
return -4;
271+
if (lshift16 < 50535)
272+
return -3;
273+
if (lshift16 < 55109)
274+
return -2;
275+
if (lshift16 < 60097)
276+
return -1;
277+
}
278+
}
279+
}
280+
}
281+
return 0;
282+
}

qtest.c

+11-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
#include "dudect/fixture.h"
1818
#include "list.h"
1919

20+
/* Shannon entropy */
21+
extern double shannon_entropy(const uint8_t *input_data);
22+
extern int show_entropy;
23+
2024
/* Our program needs to use regular malloc/free */
2125
#define INTERNAL 1
2226
#include "harness.h"
@@ -803,8 +807,14 @@ static bool show_queue(int vlevel)
803807
if (exception_setup(true)) {
804808
while (ok && ori != cur && cnt < lcnt) {
805809
element_t *e = list_entry(cur, element_t, list);
806-
if (cnt < big_list_size)
810+
if (cnt < big_list_size) {
807811
report_noreturn(vlevel, cnt == 0 ? "%s" : " %s", e->value);
812+
if (show_entropy) {
813+
report_noreturn(
814+
vlevel, "(%3.2f%%)",
815+
shannon_entropy((const uint8_t *) e->value));
816+
}
817+
}
808818
cnt++;
809819
cur = cur->next;
810820
ok = ok && !error_check();

scripts/aspell-pws

+1
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,4 @@ alloc
289289
adjtime
290290
perf
291291
uring
292+
Shannon

scripts/pre-commit.hook

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
CPPCHECK_suppresses="--inline-suppr harness.c --suppress=unmatchedSuppression:harness.c --suppress=missingIncludeSystem \
44
--suppress=unusedFunction:linenoise.c \
55
--suppress=ConfigurationNotChecked:random.c \
6+
--suppress=identicalInnerCondition:log2_lshift16.h \
67
--suppress=nullPointerRedundantCheck:report.c \
78
--suppress=nullPointerRedundantCheck:harness.c \
89
--suppress=nullPointer:qtest.c \

shannon_entropy.c

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <assert.h>
2+
#include <stdint.h>
3+
#include <string.h>
4+
5+
/* Precalculated log2 realization */
6+
#include "log2_lshift16.h"
7+
8+
/* Shannon full integer entropy calculation */
9+
#define BUCKET_SIZE (1 << 8)
10+
11+
double shannon_entropy(const uint8_t *s)
12+
{
13+
assert(s);
14+
const uint64_t count = strlen((char *) s);
15+
uint64_t entropy_sum = 0;
16+
const uint64_t entropy_max = 8 * LOG2_RET_SHIFT;
17+
18+
uint32_t bucket[256];
19+
memset(&bucket, 0, sizeof(bucket));
20+
21+
for (uint32_t i = 0; i < count; i++)
22+
bucket[s[i]]++;
23+
24+
for (uint32_t i = 0; i < BUCKET_SIZE; i++) {
25+
if (bucket[i]) {
26+
uint64_t p = bucket[i];
27+
p *= LOG2_ARG_SHIFT / count;
28+
entropy_sum += -p * log2_lshift16(p);
29+
}
30+
}
31+
32+
entropy_sum /= LOG2_ARG_SHIFT;
33+
return entropy_sum * 100.0 / entropy_max;
34+
}

0 commit comments

Comments
 (0)