Skip to content

Commit ee00f9a

Browse files
authored
Merge pull request #287 from MinecraftServerControl/automatic-restarts
Add automatic restart on crash
2 parents 21cdd21 + 8bfef14 commit ee00f9a

File tree

1 file changed

+157
-3
lines changed

1 file changed

+157
-3
lines changed

msctl

100755100644
Lines changed: 157 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ WGET=$(which wget)
4747
RDIFF_BACKUP=$(which rdiff-backup)
4848
RSYNC=$(which rsync)
4949
SOCAT=$(which socat)
50+
FLOCK=$(which flock)
5051

5152
# Script Usage
5253
# ---------------------------------------------------------------------------
@@ -276,6 +277,9 @@ mscs_defaults() {
276277
; for the world server selected.
277278
# mscs-default-server-command=\$JAVA -Xms\$INITIAL_MEMORY -Xmx\$MAXIMUM_MEMORY \$JVM_ARGS -jar \$SERVER_LOCATION/\$SERVER_JAR \$SERVER_ARGS
278279
280+
; Default behavior if to restart the server after crash is detected (default disabled).
281+
# mscs-default-restart-after-crash=false
282+
279283
; Location to store backup files.
280284
# mscs-backup-location=/opt/mscs/backups
281285
@@ -1315,6 +1319,123 @@ serverConsole() {
13151319
done
13161320
}
13171321

1322+
# ---------------------------------------------------------------------------
1323+
# Retrieve the timestamp.
1324+
#
1325+
# @return The current date and time.
1326+
# ---------------------------------------------------------------------------
1327+
timestamp() {
1328+
date +"%Y-%m-%d_%H-%M-%S"
1329+
}
1330+
1331+
# ---------------------------------------------------------------------------
1332+
# Stop the server monitor.
1333+
#
1334+
# @param 1 The world server to stop.
1335+
# ---------------------------------------------------------------------------
1336+
stopServerMonitor() {
1337+
local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1338+
WORLD_DIR="$WORLDS_LOCATION/$1"
1339+
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
1340+
MONITOR_PID=$(cat "$WORLD_DIR/monitor.pid")
1341+
MONITOR_LOCK_FILE="$WORLD_DIR/monitor.lock"
1342+
# Check if server monitor instance currently running.
1343+
(
1344+
$FLOCK -n 9
1345+
ACQUIRED_LOCK=$?
1346+
if [ "$ACQUIRED_LOCK" -eq 1 ]; then # Server monitor is running.
1347+
printf "[$(timestamp)] [INFO]: Stop command received for server monitor. Attempting to kill server monitor...\n" >> "$MONITOR_LOG"
1348+
# Kill the server monitor.
1349+
kill -9 "$MONITOR_PID"
1350+
# Verify it was actually killed.
1351+
if [ $? -eq 1 ]; then
1352+
printf "[$(timestamp)] [ERROR]: Unable to kill monitor process.\n" >> "$MONITOR_LOG"
1353+
exit 1
1354+
else
1355+
printf "[$(timestamp)] [INFO]: Server monitor process killed successfully.\n" >> "$MONITOR_LOG"
1356+
# Remove the monitor PID file.
1357+
rm -f "$WORLD_DIR/monitor.pid"
1358+
fi
1359+
fi
1360+
) 9>"$MONITOR_LOCK_FILE"
1361+
}
1362+
# ---------------------------------------------------------------------------
1363+
# Run the server monitor.
1364+
#
1365+
# @param 1 The world server to monitor.
1366+
# ---------------------------------------------------------------------------
1367+
serverMonitor() {
1368+
local WORLD_DIR MONITOR_LOG SERVER_LOG LAST_START_STATUS_LOG MONITOR_PID
1369+
WORLD_DIR="$WORLDS_LOCATION/$1"
1370+
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
1371+
SERVER_LOG="$WORLD_DIR/logs/latest.log"
1372+
LAST_START_STATUS_LOG="$WORLD_DIR/logs/last-start-status.log"
1373+
MONITOR_PID=$(cat "$WORLD_DIR/monitor.pid")
1374+
touch $LAST_START_STATUS_LOG
1375+
1376+
printf "[$(timestamp)] [INFO]: Server monitoring started for $1. Server PID: $(getJavaPID $1). Monitor PID: $MONITOR_PID.\n"
1377+
# Run monitor until the server is stopped and the PID file is removed (i.e. clean shutdown).
1378+
until ! serverRunning $1 && [ ! -f "$WORLDS_LOCATION/$1.pid" ]; do
1379+
# If server isn't running and server PID file exists, server crashed.
1380+
if ! serverRunning $1 && [ -f "$WORLDS_LOCATION/$1.pid" ]; then
1381+
printf "[$(timestamp)] [WARN]: Server crash detected. Attempting to restart $1...\n"
1382+
start $1
1383+
# Verify that the server restarted successfully.
1384+
if [ $? -eq 0 ]; then
1385+
printf "[$(timestamp)] [INFO]: Server monitoring resumed for $1. Server PID: $(getJavaPID $1). Monitor PID: $MONITOR_PID.\n"
1386+
printf " $1 automatically restarted from a crash (or in-game stop command)\n" > "$LAST_START_STATUS_LOG"
1387+
printf " on $(timestamp). See\n" >> "$LAST_START_STATUS_LOG"
1388+
printf " $WORLD_DIR/logs/mscs.monitor.log and\n" >> "$LAST_START_STATUS_LOG"
1389+
printf " $WORLD_DIR/crash_reports/ and\n" >> "$LAST_START_STATUS_LOG"
1390+
printf " $WORLD_DIR/logs/ for more information.\n" >> "$LAST_START_STATUS_LOG"
1391+
else
1392+
printf "[$(timestamp)] [ERROR]: Failed to restart $1.\n"
1393+
stopServerMonitor $1
1394+
fi
1395+
# If server is running and server PID file doesn't exist, error occurred.
1396+
elif serverRunning $1 && [ ! -f "$WORLDS_LOCATION/$1.pid" ]; then
1397+
printf "[$(timestamp)] [ERROR]: PID file doesn't exist.\n"
1398+
stopServerMonitor $1
1399+
fi
1400+
done
1401+
}
1402+
1403+
# ---------------------------------------------------------------------------
1404+
# Start the server monitor.
1405+
#
1406+
# @param 1 The world server to monitor.
1407+
# ---------------------------------------------------------------------------
1408+
startServerMonitor() {
1409+
local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1410+
WORLD_DIR="$WORLDS_LOCATION/$1"
1411+
MONITOR_LOG="$WORLD_DIR/logs/mscs.monitor.log"
1412+
MONITOR_PID="$WORLD_DIR/monitor.pid"
1413+
MONITOR_LOCK_FILE="$WORLD_DIR/monitor.lock"
1414+
RESTART_AFTER_CRASH=$(getMSCSValue "$1" "mscs-restart-after-crash" "$DEFAULT_RESTART_AFTER_CRASH")
1415+
1416+
# Verify option is enabled.
1417+
if true_value "$RESTART_AFTER_CRASH"; then
1418+
# Verify that there is no monitor instance currently running.
1419+
(
1420+
$FLOCK -n 9
1421+
ACQUIRED_LOCK="$?"
1422+
if [ "$ACQUIRED_LOCK" -eq 0 ]; then # Server monitor doesn't exist.
1423+
# Delete old log file greater than $LOG_DURATION, if it exists.
1424+
if [ -f "$MONITOR_LOG" ]; then
1425+
if [ "$LOG_DURATION" -gt 0 ]; then
1426+
find "$MONITOR_LOG" -type f -mtime +"$LOG_DURATION" -delete
1427+
fi
1428+
fi
1429+
# Run the server monitor.
1430+
# Nohup does not allow you to pass functions. However, the code below mimics nohup behavior by doing the following:
1431+
# Start subshell, ignore HUP signal, redirect stdin to /dev/null, redirect stdout and stderr to log file, run in background.
1432+
# Also store the PID of this process for later use.
1433+
( trap "" HUP ; echo $(exec sh -c 'echo "$PPID"') > "$MONITOR_PID"; serverMonitor $1 ) </dev/null 2>&1 1>>"$MONITOR_LOG" &
1434+
fi
1435+
) 9>"$MONITOR_LOCK_FILE"
1436+
fi
1437+
}
1438+
13181439
# ---------------------------------------------------------------------------
13191440
# Start the world server. Generate the appropriate environment for the
13201441
# server if it doesn't already exist.
@@ -1418,6 +1539,8 @@ start() {
14181539
fi
14191540
# Create a PID file for the world server.
14201541
echo $PID >"$WORLDS_LOCATION/$1.pid"
1542+
# Start the server crash monitor, if enabled.
1543+
startServerMonitor $1
14211544
}
14221545

14231546
# ---------------------------------------------------------------------------
@@ -1426,6 +1549,8 @@ start() {
14261549
# @param 1 The world server to stop.
14271550
# ---------------------------------------------------------------------------
14281551
stop() {
1552+
# Stop the server monitor if it is running.
1553+
stopServerMonitor $1
14291554
# Tell the server to stop.
14301555
sendCommand $1 "stop"
14311556
sendCommand $1 "end"
@@ -1454,6 +1579,8 @@ stop() {
14541579
# ---------------------------------------------------------------------------
14551580
forceStop() {
14561581
local WAIT
1582+
# Stop the server monitor if it is running.
1583+
stopServerMonitor $1
14571584
# Try to stop the server cleanly first.
14581585
sendCommand $1 "stop"
14591586
sendCommand $1 "end"
@@ -2039,7 +2166,10 @@ queryNumUsers() {
20392166
# @param 1 The world server of interest.
20402167
# ---------------------------------------------------------------------------
20412168
worldStatus() {
2042-
local STATUS NUM MAX PLAYERS COUNTER VERSION
2169+
local WORLD_DIR LAST_START_STATUS_LOG MONITOR_PID STATUS NUM MAX PLAYERS COUNTER VERSION
2170+
WORLD_DIR="$WORLDS_LOCATION/$1"
2171+
MONITOR_PID="$WORLD_DIR/monitor.pid"
2172+
LAST_START_STATUS_LOG="$WORLD_DIR/logs/last-start-status.log"
20432173
if serverRunning $1; then
20442174
STATUS=$(queryDetailedStatus $1)
20452175
if [ -n "$STATUS" ]; then
@@ -2069,6 +2199,17 @@ worldStatus() {
20692199
printf " Memory used: $(getJavaMemory "$1" | awk '{$1=int(100 * $1/1024/1024)/100"GB";}{ print;}')"
20702200
printf " ($(getMSCSValue "$1" "mscs-maximum-memory" "$DEFAULT_MAXIMUM_MEMORY" | rev | cut -c 2- | rev | awk '{$1=int($1/1024)"GB";}{ print;}') allocated).\n"
20712201
printf " Process ID: %d.\n" $(getJavaPID "$1")
2202+
# Display crash monitor PID if it's running (i.e. monitor.pid file exists and not empty).
2203+
if [ -f "$MONITOR_PID" ] && [ -s "$MONITOR_PID" ]; then
2204+
printf " Crash Monitor PID: $(cat $MONITOR_PID)\n"
2205+
fi
2206+
# If the last-status log exists and not empty, then last restart was from a crash.
2207+
# Display notice once.
2208+
if [ -f "$LAST_START_STATUS_LOG" ] && [ -s "$LAST_START_STATUS_LOG" ]; then
2209+
printf "$(cat $LAST_START_STATUS_LOG)\n"
2210+
# Remove it so user doesn't see it next time they run the status command.
2211+
rm -f $LAST_START_STATUS_LOG
2212+
fi
20722213
elif ! true_value "$(getMSCSValue $1 'mscs-enabled')"; then
20732214
printf "disabled.\n"
20742215
else
@@ -2103,7 +2244,7 @@ worldStatusJSON() {
21032244
# ---------------------------------------------------------------------------
21042245

21052246
# Make sure that Java, Perl, libjson-perl, libwww-perl, Python, Wget,
2106-
# Rdiff-backup, Rsync, and Socat are installed.
2247+
# Rdiff-backup, Rsync, Socat and flock are installed.
21072248
# ---------------------------------------------------------------------------
21082249
if [ ! -e "$JAVA" ]; then
21092250
echo "ERROR: Java not found!"
@@ -2168,6 +2309,12 @@ if [ ! -e "$SOCAT" ]; then
21682309
echo "sudo apt-get install socat"
21692310
exit 1
21702311
fi
2312+
if [ ! -e "$FLOCK" ]; then
2313+
echo "ERROR: flock not found!"
2314+
echo "Try installing this with:"
2315+
echo "sudo apt-get install util-linux"
2316+
exit 1
2317+
fi
21712318

21722319
# Parse command-line options
21732320
# ---------------------------------------------------------------------------
@@ -2259,8 +2406,9 @@ fi
22592406
# mscs-default-maximum-memory - Default maximum amount of memory for a world server.
22602407
# mscs-default-server-location - Default location of the server .jar file.
22612408
# mscs-default-server-command - Default command to run for a world server.
2409+
# mscs-default-restart-after-crash - Default behavior if to restart the server after crash is detected (default disabled).
22622410
# mscs-backup-location - Location to store backup files.
2263-
# mscs-backup-log - Lcation of the backup log file.
2411+
# mscs-backup-log - Location of the backup log file.
22642412
# mscs-backup-excluded-files - Comma separated list of files and directories excluded from backups.
22652413
# mscs-backup-duration - Length in days that backups survive.
22662414
# mscs-log-duration - Length in days that logs survive.
@@ -2309,6 +2457,7 @@ fi
23092457
# mscs-default-maximum-memory=2048M
23102458
# mscs-default-server-location=/opt/mscs/server
23112459
# mscs-default-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2460+
# mscs-default-restart-after-crash=false
23122461
# mscs-backup-location=/opt/mscs/backups
23132462
# mscs-backup-log=/opt/mscs/backups/backup.log
23142463
# mscs-backup-excluded_files=
@@ -2356,6 +2505,7 @@ DEFAULT_INITIAL_MEMORY=$(getDefaultsValue 'mscs-default-initial-memory' '128M')
23562505
DEFAULT_MAXIMUM_MEMORY=$(getDefaultsValue 'mscs-default-maximum-memory' '2048M')
23572506
DEFAULT_SERVER_LOCATION=$(getDefaultsValue 'mscs-default-server-location' $LOCATION'/server')
23582507
DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS')
2508+
DEFAULT_RESTART_AFTER_CRASH=$(getDefaultsValue 'mscs-default-restart-after-crash' 'false')
23592509
# Each world server can override the default values in a similar manner by
23602510
# adding certain key/value pairs to the world's mscs.properties file.
23612511
#
@@ -2375,6 +2525,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
23752525
# mscs-maximum-memory - Assign the maximum amount of memory for the server.
23762526
# mscs-server-location - Assign the location of the server .jar file.
23772527
# mscs-server-command - Assign the command to run for the server.
2528+
# mscs-restart-after-crash - Restart the server after a crash (default disabled).
23782529
#
23792530
# Like above, the following variables may be used in some of the key values:
23802531
# $JAVA - The Java virtual machine.
@@ -2404,6 +2555,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
24042555
# mscs-maximum-memory=2048M
24052556
# mscs-server-location=/opt/mscs/server
24062557
# mscs-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2558+
# mscs-restart-after-crash=false
24072559

24082560
# World (Server Instance) Configuration
24092561
# ---------------------------------------------------------------------------
@@ -2415,6 +2567,8 @@ VERSIONS_JSON=$(getDefaultsValue 'mscs-versions-json' $LOCATION'/version_manifes
24152567
VERSIONS_DURATION=$(getDefaultsValue 'mscs-versions-duration' '30')
24162568
# The duration (in minutes) to keep lock files before removing.
24172569
LOCKFILE_DURATION=$(getDefaultsValue 'mscs-lockfile-duration' '1440')
2570+
# Enable the option to restart the server after a crash is detected (default disabled).
2571+
RESTART_AFTER_CRASH=$(getDefaultsValue 'mscs-restart-after-crash' 'false')
24182572

24192573
# Backup Configuration
24202574
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)