@@ -47,6 +47,7 @@ WGET=$(which wget)
4747RDIFF_BACKUP=$( which rdiff-backup)
4848RSYNC=$( which rsync)
4949SOCAT=$( which socat)
50+ FLOCK=$( which flock)
5051
5152# Script Usage
5253# ---------------------------------------------------------------------------
@@ -276,6 +277,9 @@ mscs_defaults() {
276277; for the world server selected.
277278# mscs-default-server-command=\$ JAVA -Xms\$ INITIAL_MEMORY -Xmx\$ MAXIMUM_MEMORY \$ JVM_ARGS -jar \$ SERVER_LOCATION/\$ SERVER_JAR \$ SERVER_ARGS
278279
280+ ; Default behavior if to restart the server after crash is detected (default disabled).
281+ # mscs-default-restart-after-crash=false
282+
279283; Location to store backup files.
280284# mscs-backup-location=/opt/mscs/backups
281285
@@ -1315,6 +1319,123 @@ serverConsole() {
13151319 done
13161320}
13171321
1322+ # ---------------------------------------------------------------------------
1323+ # Retrieve the timestamp.
1324+ #
1325+ # @return The current date and time.
1326+ # ---------------------------------------------------------------------------
1327+ timestamp () {
1328+ date +" %Y-%m-%d_%H-%M-%S"
1329+ }
1330+
1331+ # ---------------------------------------------------------------------------
1332+ # Stop the server monitor.
1333+ #
1334+ # @param 1 The world server to stop.
1335+ # ---------------------------------------------------------------------------
1336+ stopServerMonitor () {
1337+ local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1338+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1339+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1340+ MONITOR_PID=$( cat " $WORLD_DIR /monitor.pid" )
1341+ MONITOR_LOCK_FILE=" $WORLD_DIR /monitor.lock"
1342+ # Check if server monitor instance currently running.
1343+ (
1344+ $FLOCK -n 9
1345+ ACQUIRED_LOCK=$?
1346+ if [ " $ACQUIRED_LOCK " -eq 1 ]; then # Server monitor is running.
1347+ printf " [$( timestamp) ] [INFO]: Stop command received for server monitor. Attempting to kill server monitor...\n" >> " $MONITOR_LOG "
1348+ # Kill the server monitor.
1349+ kill -9 " $MONITOR_PID "
1350+ # Verify it was actually killed.
1351+ if [ $? -eq 1 ]; then
1352+ printf " [$( timestamp) ] [ERROR]: Unable to kill monitor process.\n" >> " $MONITOR_LOG "
1353+ exit 1
1354+ else
1355+ printf " [$( timestamp) ] [INFO]: Server monitor process killed successfully.\n" >> " $MONITOR_LOG "
1356+ # Remove the monitor PID file.
1357+ rm -f " $WORLD_DIR /monitor.pid"
1358+ fi
1359+ fi
1360+ ) 9> " $MONITOR_LOCK_FILE "
1361+ }
1362+ # ---------------------------------------------------------------------------
1363+ # Run the server monitor.
1364+ #
1365+ # @param 1 The world server to monitor.
1366+ # ---------------------------------------------------------------------------
1367+ serverMonitor () {
1368+ local WORLD_DIR MONITOR_LOG SERVER_LOG LAST_START_STATUS_LOG MONITOR_PID
1369+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1370+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1371+ SERVER_LOG=" $WORLD_DIR /logs/latest.log"
1372+ LAST_START_STATUS_LOG=" $WORLD_DIR /logs/last-start-status.log"
1373+ MONITOR_PID=$( cat " $WORLD_DIR /monitor.pid" )
1374+ touch $LAST_START_STATUS_LOG
1375+
1376+ printf " [$( timestamp) ] [INFO]: Server monitoring started for $1 . Server PID: $( getJavaPID $1 ) . Monitor PID: $MONITOR_PID .\n"
1377+ # Run monitor until the server is stopped and the PID file is removed (i.e. clean shutdown).
1378+ until ! serverRunning $1 && [ ! -f " $WORLDS_LOCATION /$1 .pid" ]; do
1379+ # If server isn't running and server PID file exists, server crashed.
1380+ if ! serverRunning $1 && [ -f " $WORLDS_LOCATION /$1 .pid" ]; then
1381+ printf " [$( timestamp) ] [WARN]: Server crash detected. Attempting to restart $1 ...\n"
1382+ start $1
1383+ # Verify that the server restarted successfully.
1384+ if [ $? -eq 0 ]; then
1385+ printf " [$( timestamp) ] [INFO]: Server monitoring resumed for $1 . Server PID: $( getJavaPID $1 ) . Monitor PID: $MONITOR_PID .\n"
1386+ printf " $1 automatically restarted from a crash (or in-game stop command)\n" > " $LAST_START_STATUS_LOG "
1387+ printf " on $( timestamp) . See\n" >> " $LAST_START_STATUS_LOG "
1388+ printf " $WORLD_DIR /logs/mscs.monitor.log and\n" >> " $LAST_START_STATUS_LOG "
1389+ printf " $WORLD_DIR /crash_reports/ and\n" >> " $LAST_START_STATUS_LOG "
1390+ printf " $WORLD_DIR /logs/ for more information.\n" >> " $LAST_START_STATUS_LOG "
1391+ else
1392+ printf " [$( timestamp) ] [ERROR]: Failed to restart $1 .\n"
1393+ stopServerMonitor $1
1394+ fi
1395+ # If server is running and server PID file doesn't exist, error occurred.
1396+ elif serverRunning $1 && [ ! -f " $WORLDS_LOCATION /$1 .pid" ]; then
1397+ printf " [$( timestamp) ] [ERROR]: PID file doesn't exist.\n"
1398+ stopServerMonitor $1
1399+ fi
1400+ done
1401+ }
1402+
1403+ # ---------------------------------------------------------------------------
1404+ # Start the server monitor.
1405+ #
1406+ # @param 1 The world server to monitor.
1407+ # ---------------------------------------------------------------------------
1408+ startServerMonitor () {
1409+ local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1410+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1411+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1412+ MONITOR_PID=" $WORLD_DIR /monitor.pid"
1413+ MONITOR_LOCK_FILE=" $WORLD_DIR /monitor.lock"
1414+ RESTART_AFTER_CRASH=$( getMSCSValue " $1 " " mscs-restart-after-crash" " $DEFAULT_RESTART_AFTER_CRASH " )
1415+
1416+ # Verify option is enabled.
1417+ if true_value " $RESTART_AFTER_CRASH " ; then
1418+ # Verify that there is no monitor instance currently running.
1419+ (
1420+ $FLOCK -n 9
1421+ ACQUIRED_LOCK=" $? "
1422+ if [ " $ACQUIRED_LOCK " -eq 0 ]; then # Server monitor doesn't exist.
1423+ # Delete old log file greater than $LOG_DURATION, if it exists.
1424+ if [ -f " $MONITOR_LOG " ]; then
1425+ if [ " $LOG_DURATION " -gt 0 ]; then
1426+ find " $MONITOR_LOG " -type f -mtime +" $LOG_DURATION " -delete
1427+ fi
1428+ fi
1429+ # Run the server monitor.
1430+ # Nohup does not allow you to pass functions. However, the code below mimics nohup behavior by doing the following:
1431+ # Start subshell, ignore HUP signal, redirect stdin to /dev/null, redirect stdout and stderr to log file, run in background.
1432+ # Also store the PID of this process for later use.
1433+ ( trap " " HUP ; echo $( exec sh -c ' echo "$PPID"' ) > " $MONITOR_PID " ; serverMonitor $1 ) < /dev/null 2>&1 1>> " $MONITOR_LOG " &
1434+ fi
1435+ ) 9> " $MONITOR_LOCK_FILE "
1436+ fi
1437+ }
1438+
13181439# ---------------------------------------------------------------------------
13191440# Start the world server. Generate the appropriate environment for the
13201441# server if it doesn't already exist.
@@ -1418,6 +1539,8 @@ start() {
14181539 fi
14191540 # Create a PID file for the world server.
14201541 echo $PID > " $WORLDS_LOCATION /$1 .pid"
1542+ # Start the server crash monitor, if enabled.
1543+ startServerMonitor $1
14211544}
14221545
14231546# ---------------------------------------------------------------------------
@@ -1426,6 +1549,8 @@ start() {
14261549# @param 1 The world server to stop.
14271550# ---------------------------------------------------------------------------
14281551stop () {
1552+ # Stop the server monitor if it is running.
1553+ stopServerMonitor $1
14291554 # Tell the server to stop.
14301555 sendCommand $1 " stop"
14311556 sendCommand $1 " end"
@@ -1454,6 +1579,8 @@ stop() {
14541579# ---------------------------------------------------------------------------
14551580forceStop () {
14561581 local WAIT
1582+ # Stop the server monitor if it is running.
1583+ stopServerMonitor $1
14571584 # Try to stop the server cleanly first.
14581585 sendCommand $1 " stop"
14591586 sendCommand $1 " end"
@@ -2039,7 +2166,10 @@ queryNumUsers() {
20392166# @param 1 The world server of interest.
20402167# ---------------------------------------------------------------------------
20412168worldStatus () {
2042- local STATUS NUM MAX PLAYERS COUNTER VERSION
2169+ local WORLD_DIR LAST_START_STATUS_LOG MONITOR_PID STATUS NUM MAX PLAYERS COUNTER VERSION
2170+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
2171+ MONITOR_PID=" $WORLD_DIR /monitor.pid"
2172+ LAST_START_STATUS_LOG=" $WORLD_DIR /logs/last-start-status.log"
20432173 if serverRunning $1 ; then
20442174 STATUS=$( queryDetailedStatus $1 )
20452175 if [ -n " $STATUS " ]; then
@@ -2069,6 +2199,17 @@ worldStatus() {
20692199 printf " Memory used: $( getJavaMemory " $1 " | awk ' {$1=int(100 * $1/1024/1024)/100"GB";}{ print;}' ) "
20702200 printf " ($( getMSCSValue " $1 " " mscs-maximum-memory" " $DEFAULT_MAXIMUM_MEMORY " | rev | cut -c 2- | rev | awk ' {$1=int($1/1024)"GB";}{ print;}' ) allocated).\n"
20712201 printf " Process ID: %d.\n" $( getJavaPID " $1 " )
2202+ # Display crash monitor PID if it's running (i.e. monitor.pid file exists and not empty).
2203+ if [ -f " $MONITOR_PID " ] && [ -s " $MONITOR_PID " ]; then
2204+ printf " Crash Monitor PID: $( cat $MONITOR_PID ) \n"
2205+ fi
2206+ # If the last-status log exists and not empty, then last restart was from a crash.
2207+ # Display notice once.
2208+ if [ -f " $LAST_START_STATUS_LOG " ] && [ -s " $LAST_START_STATUS_LOG " ]; then
2209+ printf " $( cat $LAST_START_STATUS_LOG ) \n"
2210+ # Remove it so user doesn't see it next time they run the status command.
2211+ rm -f $LAST_START_STATUS_LOG
2212+ fi
20722213 elif ! true_value " $( getMSCSValue $1 ' mscs-enabled' ) " ; then
20732214 printf " disabled.\n"
20742215 else
@@ -2103,7 +2244,7 @@ worldStatusJSON() {
21032244# ---------------------------------------------------------------------------
21042245
21052246# Make sure that Java, Perl, libjson-perl, libwww-perl, Python, Wget,
2106- # Rdiff-backup, Rsync, and Socat are installed.
2247+ # Rdiff-backup, Rsync, Socat and flock are installed.
21072248# ---------------------------------------------------------------------------
21082249if [ ! -e " $JAVA " ]; then
21092250 echo " ERROR: Java not found!"
@@ -2168,6 +2309,12 @@ if [ ! -e "$SOCAT" ]; then
21682309 echo " sudo apt-get install socat"
21692310 exit 1
21702311fi
2312+ if [ ! -e " $FLOCK " ]; then
2313+ echo " ERROR: flock not found!"
2314+ echo " Try installing this with:"
2315+ echo " sudo apt-get install util-linux"
2316+ exit 1
2317+ fi
21712318
21722319# Parse command-line options
21732320# ---------------------------------------------------------------------------
22592406# mscs-default-maximum-memory - Default maximum amount of memory for a world server.
22602407# mscs-default-server-location - Default location of the server .jar file.
22612408# mscs-default-server-command - Default command to run for a world server.
2409+ # mscs-default-restart-after-crash - Default behavior if to restart the server after crash is detected (default disabled).
22622410# mscs-backup-location - Location to store backup files.
2263- # mscs-backup-log - Lcation of the backup log file.
2411+ # mscs-backup-log - Location of the backup log file.
22642412# mscs-backup-excluded-files - Comma separated list of files and directories excluded from backups.
22652413# mscs-backup-duration - Length in days that backups survive.
22662414# mscs-log-duration - Length in days that logs survive.
23092457# mscs-default-maximum-memory=2048M
23102458# mscs-default-server-location=/opt/mscs/server
23112459# mscs-default-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2460+ # mscs-default-restart-after-crash=false
23122461# mscs-backup-location=/opt/mscs/backups
23132462# mscs-backup-log=/opt/mscs/backups/backup.log
23142463# mscs-backup-excluded_files=
@@ -2356,6 +2505,7 @@ DEFAULT_INITIAL_MEMORY=$(getDefaultsValue 'mscs-default-initial-memory' '128M')
23562505DEFAULT_MAXIMUM_MEMORY=$( getDefaultsValue ' mscs-default-maximum-memory' ' 2048M' )
23572506DEFAULT_SERVER_LOCATION=$( getDefaultsValue ' mscs-default-server-location' $LOCATION ' /server' )
23582507DEFAULT_SERVER_COMMAND=$( getDefaultsValue ' mscs-default-server-command' ' $JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS' )
2508+ DEFAULT_RESTART_AFTER_CRASH=$( getDefaultsValue ' mscs-default-restart-after-crash' ' false' )
23592509# Each world server can override the default values in a similar manner by
23602510# adding certain key/value pairs to the world's mscs.properties file.
23612511#
@@ -2375,6 +2525,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
23752525# mscs-maximum-memory - Assign the maximum amount of memory for the server.
23762526# mscs-server-location - Assign the location of the server .jar file.
23772527# mscs-server-command - Assign the command to run for the server.
2528+ # mscs-restart-after-crash - Restart the server after a crash (default disabled).
23782529#
23792530# Like above, the following variables may be used in some of the key values:
23802531# $JAVA - The Java virtual machine.
@@ -2404,6 +2555,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
24042555# mscs-maximum-memory=2048M
24052556# mscs-server-location=/opt/mscs/server
24062557# mscs-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2558+ # mscs-restart-after-crash=false
24072559
24082560# World (Server Instance) Configuration
24092561# ---------------------------------------------------------------------------
@@ -2415,6 +2567,8 @@ VERSIONS_JSON=$(getDefaultsValue 'mscs-versions-json' $LOCATION'/version_manifes
24152567VERSIONS_DURATION=$( getDefaultsValue ' mscs-versions-duration' ' 30' )
24162568# The duration (in minutes) to keep lock files before removing.
24172569LOCKFILE_DURATION=$( getDefaultsValue ' mscs-lockfile-duration' ' 1440' )
2570+ # Enable the option to restart the server after a crash is detected (default disabled).
2571+ RESTART_AFTER_CRASH=$( getDefaultsValue ' mscs-restart-after-crash' ' false' )
24182572
24192573# Backup Configuration
24202574# ---------------------------------------------------------------------------
0 commit comments