-
Notifications
You must be signed in to change notification settings - Fork 346
/
Copy pathtest_validate_json.sh
executable file
·331 lines (275 loc) · 11.8 KB
/
test_validate_json.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/usr/bin/env bash
# vim:ts=4:sts=4:sw=4:et
#
# Author: Hari Sekhon
# Date: 2015-12-22 23:39:33 +0000 (Tue, 22 Dec 2015)
#
# https://github.com/HariSekhon/DevOps-Python-tools
#
# License: see accompanying Hari Sekhon LICENSE file
#
# If you're using my code you're welcome to connect with me on LinkedIn and optionally send me feedback to help improve or steer this or other code I publish
#
# https://www.linkedin.com/in/HariSekhon
#
set -euo pipefail
[ -n "${DEBUG:-}" ] && set -x
srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$srcdir/..";
# shellcheck disable=SC1091
. ./tests/utils.sh
section "Testing validate_json.py"
export TIMEOUT=${TIMEOUT:-3}
if [ $# -gt 0 ]; then
echo "validate_json.py $*"
./validate_json.py "$@"
echo
fi
data_dir="tests/data"
broken_dir="$data_dir/broken_json_data"
exclude='/tests/spark-\d+\.\d+.\d+-bin-hadoop\d+.\d+$|broken|error'
rm -fr "$broken_dir" || :
mkdir "$broken_dir"
echo "checking all JSON files in local directory"
./validate_json.py --exclude "$exclude" .
echo
# ==================================================
hr2
echo "checking multirecord json"
./validate_json.py "$data_dir/multirecord.json"
echo
# ==================================================
hr2
echo "checking directory recursion (mixed with explicit file given)"
./validate_json.py "$data_dir/test.json" .
echo
# ==================================================
hr2
echo "checking json file without an extension"
cp -iv "$(find "${1:-.}" -iname '*.json' | grep -v -e '/spark-.*-bin-hadoop.*/' -e 'broken' -e 'error' | head -n1)" "$broken_dir/no_extension_testfile"
./validate_json.py -t 1 "$broken_dir/no_extension_testfile"
echo
# ==================================================
hr2
echo "testing stdin"
./validate_json.py - < "$data_dir/test.json"
./validate_json.py < "$data_dir/test.json"
echo "testing stdin and file mix"
# shellcheck disable=SC2094
./validate_json.py "$data_dir/test.json" - < "$data_dir/test.json"
echo "testing stdin with multirecord"
./validate_json.py -m - < "$data_dir/multirecord.json"
echo
# ==================================================
hr2
echo "checking symlink handling"
ln -sfv "test.json" "$data_dir/testlink.json"
./validate_json.py "$data_dir/testlink.json"
rm "$data_dir/testlink.json"
echo
check_broken(){
local filename="$1"
local expected_exitcode="${2:-2}"
local options="${*:3}"
set +e
# shellcheck disable=SC2086
./validate_json.py $options "$filename"
exitcode=$?
set -e
if [ "$exitcode" = "$expected_exitcode" ]; then
echo "successfully detected broken json in '$filename', returned exit code $exitcode"
echo
#elif [ $exitcode != 0 ]; then
# echo "returned unexpected non-zero exit code $exitcode for broken json in '$filename'"
# exit 1
else
echo "FAILED, returned unexpected exit code $exitcode for broken json in '$filename'"
exit 1
fi
}
# ==================================================
hr2
echo "checking normal json stdin breakage using --multi-record switch"
set +e
./validate_json.py - -m < "$data_dir/test.json"
exitcode=$?
set -e
if [ $exitcode = 2 ]; then
echo "successfully detected breakage for --multi-record stdin vs normal json"
echo
else
echo "FAILED to detect breakage when feeding normal json doc to stdin with --multi-record (expecting one json doc per line), returned unexpected exit code $exitcode"
exit 1
fi
echo "checking multirecord json stdin breakage without using --multi-record switch"
set +e
./validate_json.py - < "$data_dir/multirecord.json"
exitcode=$?
set -e
if [ $exitcode = 2 ]; then
echo "successfully detected breakage for multirecord json on stdin without using --multi-record switch"
echo
else
echo "FAILED to detect breakage when feeding multirecord json to stdin without using --multi-record, returned unexpected exit code $exitcode"
exit 1
fi
# ==================================================
hr2
echo blah > "$broken_dir/blah.json"
check_broken "$broken_dir/blah.json"
check_broken "$data_dir/single_quotes.notjson"
check_broken "$data_dir/multirecord_single_quotes.notjson"
check_broken "$data_dir/multirecord_single_quotes_embedded_double_quotes.notjson"
check_broken "$data_dir/multirecord_single_quotes_embedded_double_quotes_unescaped.notjson"
# ==================================================
hr2
# TODO: make this check pass again - the problem is it'll be more expensive to run this check just to give better feedback to the user
#echo "checking invalid single quote detection"
# # Alpine's busybox grep doesn't have color
# if grep --help 2>&1 | grep -q -- --color; then
# grep_opts="--color"
# else
# grep_opts="-o"
# fi
#set +o pipefail
#./validate_json.py "$data_dir/single_quotes.notjson" 2>&1 |
# tee /dev/stderr |
# grep $grep_opts 'JSON INVALID.*single quotes detected' ||
# { echo "Failed to find single quote message in output"; exit 1; }
#set -o pipefail
#echo
echo "checking --permit-single-quotes mode works"
./validate_json.py -s "$data_dir/single_quotes.notjson"
echo
echo "checking --permit-single-quotes mode works with embedded double quotes"
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes.notjson"
echo
echo "checking --permit-single-quotes mode works with unescaped embedded double quotes"
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes_unescaped.notjson"
echo
# ==================================================
hr2
echo "checking --permit-single-quotes mode works with multirecord single quoted json"
./validate_json.py -s "$data_dir/multirecord_single_quotes.notjson" -m
echo
echo "checking --permit-single-quotes mode infers multirecord single quoted json"
./validate_json.py -s "$data_dir/multirecord_single_quotes.notjson"
echo
# ==================================================
hr2
echo "checking --permit-single-quotes mode works with multirecord single quoted json with embedded double quotes"
./validate_json.py -s "$data_dir/multirecord_single_quotes_embedded_double_quotes.notjson" -m
echo
echo "checking --permit-single-quotes mode infers multirecord single quoted json with embedded double quotes"
./validate_json.py -s "$data_dir/multirecord_single_quotes_embedded_double_quotes.notjson"
echo
# ==================================================
hr2
echo "checking --permit-single-quotes mode works with multirecord json with unescaped embedded double quotes"
./validate_json.py -s "$data_dir/multirecord_single_quotes_embedded_double_quotes_unescaped.notjson" -m
echo
echo "checking --permit-single-quotes mode infers multirecord single quoted json with unescaped embedded double quotes"
./validate_json.py -s "$data_dir/multirecord_single_quotes_embedded_double_quotes_unescaped.notjson"
echo
# ==================================================
hr2
echo "checking --permit-single-quotes mode works with multirecord single quoted json with mixed quoting"
./validate_json.py -s "$data_dir/multirecord_single_double_mixed_quotes.notjson" -m
echo
echo "checking --permit-single-quotes mode infers multirecord single quoted json with mixed quoting"
./validate_json.py -s "$data_dir/multirecord_single_double_mixed_quotes.notjson"
echo
echo "checking --permit-single-quotes mode works with multirecord single quoted json with mixed quoting (should result in a WARNING message)"
if ./validate_json.py -s "$data_dir/multirecord_single_double_mixed_quotes.notjson" -m 2>&1 | grep -q WARNING; then
echo "Found warning message"
else
echo "failed to raise a WARNING message for mixed quoting"
exit 1
fi
echo
echo "checking --permit-single-quotes mode infers multirecord single quoted json with mixed quoting (should result in a WARNING message)"
if ./validate_json.py -s "$data_dir/multirecord_single_double_mixed_quotes.notjson" 2>&1 | grep -q WARNING; then
echo "Found warning message"
else
echo "failed to raise a WARNING message while inferring mixed quoting"
exit 1
fi
echo
# ============================================================================ #
# Print Mode Passthrough Tests
# ============================================================================ #
hr2
echo "# Print Mode Passthrough Tests"
hr2
echo "testing print mode"
[ "$(./validate_json.py -p "$data_dir/test.json" | cksum)" = "$(cksum < "$data_dir/test.json")" ] || { echo "print test failed!"; exit 1; }
echo "successfully passed test json to stdout"
echo
echo "testing print mode failed"
set +e
output="$(./validate_json.py -p "$data_dir/single_quotes.notjson")"
result=$?
set -e
[ $result -eq 2 ] || { echo "print test failed with wrong exit code $result instead of 2!"; exit 1; }
[ -z "$output" ] || { echo "print test failed by passing output to stdout for records that should be broken!"; exit 1; }
echo "successfully passed test of print mode failure"
echo
echo "testing print mode with multirecord"
[ "$(./validate_json.py -mp "$data_dir/multirecord.json" | cksum)" = "$(cksum < "$data_dir/multirecord.json")" ] ||
{ echo "print multirecord test failed!"; exit 1; }
echo "successfully passed multirecord json to stdout"
echo
echo "testing print mode with --permit-single-quotes"
[ "$(./validate_json.py -sp "$data_dir/single_quotes.notjson" | cksum)" = "$(cksum < "$data_dir/single_quotes.notjson")" ] ||
{ echo "print single quote json test failed!"; exit 1; }
echo "successfully passed single quoted json to stdout"
echo
echo "testing print mode with --permit-single-quotes multirecord"
[ "$(./validate_json.py -sp "$data_dir/multirecord_single_quotes.notjson" | cksum)" = "$(cksum < "$data_dir/multirecord_single_quotes.notjson")" ] ||
{ echo "print single quote multirecord singled quoted json test failed!"; exit 1; }
echo "successfully passed multirecord single quoted json stdout test"
echo
echo "testing print mode with --permit-single-quotes multirecord with embedded double quotes"
[ "$(./validate_json.py -sp "$data_dir/multirecord_single_quotes_embedded_double_quotes.notjson" | cksum)" = "$(cksum < "$data_dir/multirecord_single_quotes_embedded_double_quotes.notjson")" ] ||
{ echo "print single quote multirecord json with embedded double quotes test failed!"; exit 1; }
echo "successfully passed multirecord single quoted with embedded double quotes to stdout"
echo
echo "testing print mode with --permit-single-quotes multirecord with unescaped embedded double quotes"
[ "$(./validate_json.py -sp "$data_dir/multirecord_single_quotes_embedded_double_quotes_unescaped.notjson" | cksum)" = "$(cksum < "$data_dir/multirecord_single_quotes_embedded_double_quotes_unescaped.notjson")" ] ||
{ echo "print single quote multirecord json with unescaped embedded double quotes test failed!"; exit 1; }
echo "successfully passed multirecord single quoted with embedded unescaped double quotes to stdout"
echo
echo
# ============================================================================ #
hr2
echo '{ "name": "hari" ' > "$broken_dir/missing_end_quote.json"
check_broken "$broken_dir/missing_end_quote.json"
check_broken README.md
cat "$data_dir/test.json" >> "$broken_dir/multi-broken.json"
cat "$data_dir/test.json" >> "$broken_dir/multi-broken.json"
check_broken "$broken_dir/multi-broken.json"
echo
echo "checking for non-existent file"
check_broken nonexistentfile 2
echo
# ==================================================
hr2
echo "checking blank content is invalid"
echo > "$broken_dir/blank.json"
check_broken "$broken_dir/blank.json"
echo "checking blank content is invalid for multirecord"
check_broken "$broken_dir/blank.json" 2 -m
echo "checking blank content is invalid via stdin"
check_broken - 2 < "$broken_dir/blank.json"
echo "checking blank content is invalid for multirecord via stdin"
check_broken - 2 -m < "$broken_dir/blank.json"
echo "checking blank content is invalid for multirecord via stdin piped from /dev/null"
check_broken - 2 -m < /dev/null
echo
check_broken_sample_files json
rm -fr "$broken_dir"
echo "======="
echo "SUCCESS"
echo "======="
echo
echo