-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_au_dogfood_data.sh
executable file
·130 lines (104 loc) · 3.63 KB
/
update_au_dogfood_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
## Download AU records, sanitize and generate CSV datasets.
## Copy resulting datasets to web server.
## Pass option --nocopy to prevent from copying files to web server.
## For testing purposes.
# Environment for communicating with AWS and app1.
. ~/.bash_profile
. /etc/profile.d/mozilla.sh
SRC_DIR=$(cd "`dirname "$0"`"; pwd)
# . $SRC_DIR/settings.env
# The base dir for the processing script.
# Also the working dir for the dashboard data.
WORK_DIR=$HOME/fxos-data/au
# Where to unpack and process the dump data.
DUMP_WORK_DIR=$WORK_DIR/aws_job
# Subdir to contain the processed data files to be copied to the web server.
DATA_DIR=$WORK_DIR/data_files
DUMP_TARBALL=au_dump.tar.gz
TARBALL=$DUMP_WORK_DIR/$DUMP_TARBALL
# JOB_OUTPUT=$DUMP_WORK_DIR
# OUTPUT_DATA=$JOB_OUTPUT/au_data.out
DUMP_FILE=au_data.out
OUTPUT_DATA=$DUMP_WORK_DIR/$DUMP_FILE
# OUTPUT_LOG=$JOB_OUTPUT/au_job.log
OUTPUT_LOG=$DUMP_WORK_DIR/au_job.log
PYTHON_MODULE=postprocessing.au_data_tables
LOG_FILE=$WORK_DIR/processing.log
LAST_UPDATED_PATH=$DATA_DIR/last_updated
# DASHBOARD_CSV_PATH=$DATA_DIR/$CSV_FILE
# DUMP_CSV_PATH=$DATA_DIR/$DUMP_CSV
ADDR=$USER
exec >> $LOG_FILE 2>&1
# Flush log file once per day.
if [ -e $LOG_FILE ] && [[ "$(date +%Y%m%d)" > "$(date -r $LOG_FILE +%Y%m%d)" ]]; then
> $LOG_FILE
fi
echo "------------"
echo
echo "Starting processing script: `date`."
# Check whether new data is available.
SERVER_LAST_UPDATED=`aws s3 ls "$S3_FXOS_AU/$DUMP_TARBALL" | \
grep -Eo "^[0-9]{4}(-[0-9]{2}){2}"`
# If not, nothing to do.
if [ -e "$LAST_UPDATED_PATH" ] && grep -q "$SERVER_LAST_UPDATED" $LAST_UPDATED_PATH; then
echo "Current data is up-to-date."
echo "Done: `date`."
echo
exit 0
fi
# Download new data, if available, process, and copy to server.
rm -f $TARBALL
echo "Downloading latest output from AWS."
aws s3 cp "$S3_FXOS_AU/$DUMP_TARBALL" "$DUMP_WORK_DIR"
if [ ! -e "$TARBALL" ]; then
echo "Failed to download tarball from AWS."
echo "" | mailx -s "FAILED: FxOS AU data - unable to download $DUMP_TARBALL" \
echo "Sent email notice. Exiting..."
exit 1
fi
# Extract tarball - creates a subdir called "output" containing files.
tar xvzf $TARBALL -C $DUMP_WORK_DIR
if [ ! -s "$OUTPUT_DATA" ]; then
# Something went wrong - no data file downloaded.
echo "No data file."
# Check for log file.
if [ ! -e "$OUTPUT_LOG" ]; then
echo "No log file either."
echo "-- No log file --" > $OUTPUT_LOG
fi
# Send email notice with log file as text.
mailx -s "FAILED: FxOS AU data - no data file $DUMP_FILE" "[email protected]" \
< $OUTPUT_LOG
echo "Sent email notice. Exiting..."
exit 1
fi
# At this point we should have the latest data.
echo "Processing data..."
cd $SRC_DIR
python -m $PYTHON_MODULE $OUTPUT_DATA $DATA_DIR
cd $DATA_DIR
if [ ! "ls -1 | grep -q '\.csv$'" ]; then
echo "Something went wrong - no CSV files generated."
echo "" | mailx -s "FAILED: FxOS AU data - no csv files" "[email protected]"
exit 1
fi
# Update the last updated time.
echo "Done. Recording data update time."
date -r $OUTPUT_DATA +"%Y-%m-%d %H:%M:%S" > $LAST_UPDATED_PATH
if [[ "$1" == "--nocopy" ]]; then
echo "Data files will not be copied to web server."
echo "Done: `date`."
exit 0
fi
# Copy new data to web server.
echo "Copying data files to web server."
tar cvfz csvs.tar.gz *.csv last_updated
scp csvs.tar.gz "$WWW:$(ssh $WWW ". .bash_profile; echo \$AU_CSV")"
REMOTE_CMD=". .bash_profile; cd \$AU_CSV; tar xvfz csvs.tar.gz; rm csvs.tar.gz"
REMOTE_CMD="$REMOTE_CMD; chmod 644 *"
ssh $WWW "$REMOTE_CMD"
rm csvs.tar.gz
echo "Done: `date`."
exit 0