-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdump_all_au.sh
executable file
·89 lines (67 loc) · 1.92 KB
/
dump_all_au.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/bin/bash
# Pass option '--nolog' to print all messages to stdout rather than log files.
# This is mainly for testing.
LOG_TO_FILE=true
if [ $# -gt 0 ] && [ "$1" = "--nolog" ]; then
LOG_TO_FILE=false
fi
# Dump all FxOS AU records from the start date to the present.
#START_DATE=`date +%Y%m%d -d "-9 months"`
START_DATE=20150101
CURRENT_DIR=$(pwd)
SRC_DIR=$(cd "`dirname "$0"`"; pwd)
TELEMETRY_SERVER_DIR=$HOME/telemetry-server
OUTPUT_DIR="$CURRENT_DIR/output"
OUTPUT_FILE="$OUTPUT_DIR/au_raw.out"
LOG_FILE="$OUTPUT_DIR/au_raw.log"
JOB_LOG="$OUTPUT_DIR/mapred.log"
TARBALL=au_raw_dump.tar.gz
if [ ! -d "$OUTPUT_DIR" ]; then
mkdir "$OUTPUT_DIR"
fi
# Write output to log for debugging.
$LOG_TO_FILE && exec > $LOG_FILE 2>&1
echo "It is now `date`"
echo "Preparing job..."
WORK_DIR=$CURRENT_DIR/work
DATA_DIR=$CURRENT_DIR/data
if [ ! -d "$WORK_DIR" ]; then
mkdir "$WORK_DIR"
fi
if [ ! -d "$DATA_DIR" ]; then
mkdir "$DATA_DIR"
fi
JOB_FILE=$SRC_DIR/dump_all.py
FILTER=$SRC_DIR/filter.json
cp "$SRC_DIR/all_fxos_date.json" $FILTER
# Set the reason string.
sed -i'' "s/__REASON__/appusage/" $FILTER
# Set the date range.
DATE_STRING="\"min\": \""$START_DATE"\""
sed -i'' "s/__DATES__/$DATE_STRING/" $FILTER
echo "Job setup complete."
echo "Running job."
cd "$TELEMETRY_SERVER_DIR"
# Switch logging to separate file for job output.
$LOG_TO_FILE && exec > $JOB_LOG 2>&1
python -m mapreduce.job "$JOB_FILE" \
--input-filter "$FILTER" \
--num-mappers 16 \
--num-reducers 4 \
--work-dir "$WORK_DIR" \
--data-dir "$DATA_DIR" \
--output "$OUTPUT_FILE" \
--bucket "telemetry-published-v2" \
--verbose
JOB_EXIT_CODE=$?
# Back to main log file.
$LOG_TO_FILE && exec > $LOG_FILE 2>&1
echo "Mapreduce job exited with code: $JOB_EXIT_CODE"
echo "It is now `date`"
echo "Packaging output..."
cd "$CURRENT_DIR"
tar cvzf "$TARBALL" -C "$OUTPUT_DIR" .
rm -f $OUTPUT_DIR/*
mv $TARBALL $OUTPUT_DIR
echo "Done. Exiting..."
exit 0