-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunjob.sh
executable file
·154 lines (134 loc) · 4.42 KB
/
runjob.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/bin/bash
# Pass as arguments jobscript filename (relative to awsjobs dir),
# output filename,
# and optionally filter filename relative to filter dir
# (default is no filter).
#
# Initial command-line option -l/--local will use local data if any.
# Command-line option --since <date> gives start date
# Command-line option --until <date> gives end date (default is none).
# Command-line option --ndays n gives the number of days.
# --ndays can be combined with either of the other two.
# Option --reason can be used to specify the reason string (default is 'ftu').
# Earliest date to consider is 2014-04-01.
START_DATE_DEFAULT="20140401"
# Default filter file is template.
FILTER_TEMPLATE="all_fxos_date.json"
# Parse command-line options.
if [ $# -lt 2 ]; then
echo "Usage: `basename $0` <opts> jobscript_name output_filename"
echo " (jobscript_name is relative to fxos-metrics/awsjobs dir)"
echo " --filter <filename> : use custom filter file in filters dir"
echo " --local : use local data, if any"
echo " --since <yyyy-mm-dd> : earliest date to include"
echo " --until <yyyy-mm-dd> : latest date to include"
echo " --ndays <n> : number of days to count"
echo " --reason : reason string ('ftu' or 'appusage' for FxOS)"
exit 1
fi
while [ $# -gt 2 ]; do
case "$1" in
-l|--local)
LOCAL="--local-only"
;;
--since)
shift
START_DATE=`date +%Y%m%d -d "$1"`
;;
--until)
shift
END_DATE=`date +%Y%m%d -d "$1"`
;;
--ndays)
shift
NDAYS="$1"
;;
--filter)
shift
FILTER_FILE="$1"
;;
--reason)
shift
REASON_STRING="$1"
;;
*)
echo "Invalid option: $1"
exit 1
;;
esac
shift
done
# Handle date options, if any.
if [ -n "$NDAYS" ]; then
if [ -n "$START_DATE" ]; then
# Can't have all three passed.
if [ -n "$END_DATE" ]; then
echo "Can't use --since, --until, and --ndays all at the same time"
exit 1
fi
# Set end date based on start date and difference.
END_DATE=`date +%Y%m%d -d "$START_DATE+$NDAYS days"`
[[ "$END_DATE" > `date +%Y%m%d` ]] && END_DATE=''
else
# Backtrack from end date, if specified, otherwise from today.
START_DATE=`date +%Y%m%d -d "$END_DATE-$NDAYS days"`
fi
else
# Use default start date, if unset.
START_DATE=${START_DATE:-$START_DATE_DEFAULT}
# End date will either be specified as an arg, or unset.
fi
CURRENT_DIR=$(pwd)
SRC_DIR=$(cd "`dirname "$0"`"; pwd)
TELEMETRY_SERVER_DIR=$HOME/telemetry-server
# Set up temporary storage.
WORK_DIR=/mnt/telemetry/work
DATA_CACHE=$WORK_DIR/cache
if [ ! -d "$WORK_DIR" ]; then
mkdir "$WORK_DIR"
fi
if [ ! -d "$DATA_CACHE" ]; then
mkdir "$DATA_CACHE"
fi
JOB_FILE=$SRC_DIR/awsjobs/$1
OUTPUT_FILE=$CURRENT_DIR/$2
# Path to the filter template that will be populated with custom values,
# if required.
FILTER_TEMPLATE=$SRC_DIR/awsjobs/filters/$FILTER_TEMPLATE
# Path to filter file that will be used in the job
# either a custom file or one generated from the template.
FILTER=$SRC_DIR/awsjobs/filters/${FILTER_FILE:-_date_filter.json}
# Symlink utils dir into job dir so that telemetry-server job runner
# can see them.
UTILS_LINK="$(dirname $JOB_FILE)/utils"
if [ ! -e $UTILS_LINK ]; then
ln -s $SRC_DIR/utils $UTILS_LINK
fi
# Generate a filter file from the template, if required.
if [ -z "$FILTER_FILE" ]; then
cp $FILTER_TEMPLATE $FILTER
# Set the reason string.
sed -i'' "s/__REASON__/${REASON_STRING:-ftu}/" $FILTER
# Set the date range.
DATE_STRING="\"min\": \""$START_DATE"\""
if [ -n "$END_DATE" ]; then
DATE_STRING="$DATE_STRING, \"max\": \""$END_DATE"\""
fi
sed -i'' "s/__DATES__/$DATE_STRING/" $FILTER
fi
echo "Running job $JOB_FILE with filter $FILTER"
echo "Dumping output to $OUTPUT_FILE"
cd "$TELEMETRY_SERVER_DIR"
echo "Starting fxosping export"
python -m mapreduce.job "$JOB_FILE" \
--input-filter "$FILTER" \
--num-mappers 16 \
--num-reducers 4 \
--work-dir "$WORK_DIR" \
--data-dir "$DATA_CACHE" \
$LOCAL \
--output "$OUTPUT_FILE" \
--bucket "telemetry-published-v2" \
--verbose
echo "Mapreduce job exited with code: $?"
echo "Output is located in $OUTPUT_FILE"