@@ -47,6 +47,7 @@ WGET=$(which wget)
47
47
RDIFF_BACKUP=$( which rdiff-backup)
48
48
RSYNC=$( which rsync)
49
49
SOCAT=$( which socat)
50
+ FLOCK=$( which flock)
50
51
51
52
# Script Usage
52
53
# ---------------------------------------------------------------------------
@@ -276,6 +277,9 @@ mscs_defaults() {
276
277
; for the world server selected.
277
278
# mscs-default-server-command=\$ JAVA -Xms\$ INITIAL_MEMORY -Xmx\$ MAXIMUM_MEMORY \$ JVM_ARGS -jar \$ SERVER_LOCATION/\$ SERVER_JAR \$ SERVER_ARGS
278
279
280
+ ; Default behavior if to restart the server after crash is detected (default disabled).
281
+ # mscs-default-restart-after-crash=false
282
+
279
283
; Location to store backup files.
280
284
# mscs-backup-location=/opt/mscs/backups
281
285
@@ -1315,6 +1319,123 @@ serverConsole() {
1315
1319
done
1316
1320
}
1317
1321
1322
+ # ---------------------------------------------------------------------------
1323
+ # Retrieve the timestamp.
1324
+ #
1325
+ # @return The current date and time.
1326
+ # ---------------------------------------------------------------------------
1327
+ timestamp () {
1328
+ date +" %Y-%m-%d_%H-%M-%S"
1329
+ }
1330
+
1331
+ # ---------------------------------------------------------------------------
1332
+ # Stop the server monitor.
1333
+ #
1334
+ # @param 1 The world server to stop.
1335
+ # ---------------------------------------------------------------------------
1336
+ stopServerMonitor () {
1337
+ local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1338
+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1339
+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1340
+ MONITOR_PID=$( cat " $WORLD_DIR /monitor.pid" )
1341
+ MONITOR_LOCK_FILE=" $WORLD_DIR /monitor.lock"
1342
+ # Check if server monitor instance currently running.
1343
+ (
1344
+ $FLOCK -n 9
1345
+ ACQUIRED_LOCK=$?
1346
+ if [ " $ACQUIRED_LOCK " -eq 1 ]; then # Server monitor is running.
1347
+ printf " [$( timestamp) ] [INFO]: Stop command received for server monitor. Attempting to kill server monitor...\n" >> " $MONITOR_LOG "
1348
+ # Kill the server monitor.
1349
+ kill -9 " $MONITOR_PID "
1350
+ # Verify it was actually killed.
1351
+ if [ $? -eq 1 ]; then
1352
+ printf " [$( timestamp) ] [ERROR]: Unable to kill monitor process.\n" >> " $MONITOR_LOG "
1353
+ exit 1
1354
+ else
1355
+ printf " [$( timestamp) ] [INFO]: Server monitor process killed successfully.\n" >> " $MONITOR_LOG "
1356
+ # Remove the monitor PID file.
1357
+ rm -f " $WORLD_DIR /monitor.pid"
1358
+ fi
1359
+ fi
1360
+ ) 9> " $MONITOR_LOCK_FILE "
1361
+ }
1362
+ # ---------------------------------------------------------------------------
1363
+ # Run the server monitor.
1364
+ #
1365
+ # @param 1 The world server to monitor.
1366
+ # ---------------------------------------------------------------------------
1367
+ serverMonitor () {
1368
+ local WORLD_DIR MONITOR_LOG SERVER_LOG LAST_START_STATUS_LOG MONITOR_PID
1369
+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1370
+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1371
+ SERVER_LOG=" $WORLD_DIR /logs/latest.log"
1372
+ LAST_START_STATUS_LOG=" $WORLD_DIR /logs/last-start-status.log"
1373
+ MONITOR_PID=$( cat " $WORLD_DIR /monitor.pid" )
1374
+ touch $LAST_START_STATUS_LOG
1375
+
1376
+ printf " [$( timestamp) ] [INFO]: Server monitoring started for $1 . Server PID: $( getJavaPID $1 ) . Monitor PID: $MONITOR_PID .\n"
1377
+ # Run monitor until the server is stopped and the PID file is removed (i.e. clean shutdown).
1378
+ until ! serverRunning $1 && [ ! -f " $WORLDS_LOCATION /$1 .pid" ]; do
1379
+ # If server isn't running and server PID file exists, server crashed.
1380
+ if ! serverRunning $1 && [ -f " $WORLDS_LOCATION /$1 .pid" ]; then
1381
+ printf " [$( timestamp) ] [WARN]: Server crash detected. Attempting to restart $1 ...\n"
1382
+ start $1
1383
+ # Verify that the server restarted successfully.
1384
+ if [ $? -eq 0 ]; then
1385
+ printf " [$( timestamp) ] [INFO]: Server monitoring resumed for $1 . Server PID: $( getJavaPID $1 ) . Monitor PID: $MONITOR_PID .\n"
1386
+ printf " $1 automatically restarted from a crash (or in-game stop command)\n" > " $LAST_START_STATUS_LOG "
1387
+ printf " on $( timestamp) . See\n" >> " $LAST_START_STATUS_LOG "
1388
+ printf " $WORLD_DIR /logs/mscs.monitor.log and\n" >> " $LAST_START_STATUS_LOG "
1389
+ printf " $WORLD_DIR /crash_reports/ and\n" >> " $LAST_START_STATUS_LOG "
1390
+ printf " $WORLD_DIR /logs/ for more information.\n" >> " $LAST_START_STATUS_LOG "
1391
+ else
1392
+ printf " [$( timestamp) ] [ERROR]: Failed to restart $1 .\n"
1393
+ stopServerMonitor $1
1394
+ fi
1395
+ # If server is running and server PID file doesn't exist, error occurred.
1396
+ elif serverRunning $1 && [ ! -f " $WORLDS_LOCATION /$1 .pid" ]; then
1397
+ printf " [$( timestamp) ] [ERROR]: PID file doesn't exist.\n"
1398
+ stopServerMonitor $1
1399
+ fi
1400
+ done
1401
+ }
1402
+
1403
+ # ---------------------------------------------------------------------------
1404
+ # Start the server monitor.
1405
+ #
1406
+ # @param 1 The world server to monitor.
1407
+ # ---------------------------------------------------------------------------
1408
+ startServerMonitor () {
1409
+ local WORLD_DIR MONITOR_LOG MONITOR_PID MONITOR_LOCK_FILE ACQUIRED_LOCK
1410
+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
1411
+ MONITOR_LOG=" $WORLD_DIR /logs/mscs.monitor.log"
1412
+ MONITOR_PID=" $WORLD_DIR /monitor.pid"
1413
+ MONITOR_LOCK_FILE=" $WORLD_DIR /monitor.lock"
1414
+ RESTART_AFTER_CRASH=$( getMSCSValue " $1 " " mscs-restart-after-crash" " $DEFAULT_RESTART_AFTER_CRASH " )
1415
+
1416
+ # Verify option is enabled.
1417
+ if true_value " $RESTART_AFTER_CRASH " ; then
1418
+ # Verify that there is no monitor instance currently running.
1419
+ (
1420
+ $FLOCK -n 9
1421
+ ACQUIRED_LOCK=" $? "
1422
+ if [ " $ACQUIRED_LOCK " -eq 0 ]; then # Server monitor doesn't exist.
1423
+ # Delete old log file greater than $LOG_DURATION, if it exists.
1424
+ if [ -f " $MONITOR_LOG " ]; then
1425
+ if [ " $LOG_DURATION " -gt 0 ]; then
1426
+ find " $MONITOR_LOG " -type f -mtime +" $LOG_DURATION " -delete
1427
+ fi
1428
+ fi
1429
+ # Run the server monitor.
1430
+ # Nohup does not allow you to pass functions. However, the code below mimics nohup behavior by doing the following:
1431
+ # Start subshell, ignore HUP signal, redirect stdin to /dev/null, redirect stdout and stderr to log file, run in background.
1432
+ # Also store the PID of this process for later use.
1433
+ ( trap " " HUP ; echo $( exec sh -c ' echo "$PPID"' ) > " $MONITOR_PID " ; serverMonitor $1 ) < /dev/null 2>&1 1>> " $MONITOR_LOG " &
1434
+ fi
1435
+ ) 9> " $MONITOR_LOCK_FILE "
1436
+ fi
1437
+ }
1438
+
1318
1439
# ---------------------------------------------------------------------------
1319
1440
# Start the world server. Generate the appropriate environment for the
1320
1441
# server if it doesn't already exist.
@@ -1418,6 +1539,8 @@ start() {
1418
1539
fi
1419
1540
# Create a PID file for the world server.
1420
1541
echo $PID > " $WORLDS_LOCATION /$1 .pid"
1542
+ # Start the server crash monitor, if enabled.
1543
+ startServerMonitor $1
1421
1544
}
1422
1545
1423
1546
# ---------------------------------------------------------------------------
@@ -1426,6 +1549,8 @@ start() {
1426
1549
# @param 1 The world server to stop.
1427
1550
# ---------------------------------------------------------------------------
1428
1551
stop () {
1552
+ # Stop the server monitor if it is running.
1553
+ stopServerMonitor $1
1429
1554
# Tell the server to stop.
1430
1555
sendCommand $1 " stop"
1431
1556
sendCommand $1 " end"
@@ -1454,6 +1579,8 @@ stop() {
1454
1579
# ---------------------------------------------------------------------------
1455
1580
forceStop () {
1456
1581
local WAIT
1582
+ # Stop the server monitor if it is running.
1583
+ stopServerMonitor $1
1457
1584
# Try to stop the server cleanly first.
1458
1585
sendCommand $1 " stop"
1459
1586
sendCommand $1 " end"
@@ -2039,7 +2166,10 @@ queryNumUsers() {
2039
2166
# @param 1 The world server of interest.
2040
2167
# ---------------------------------------------------------------------------
2041
2168
worldStatus () {
2042
- local STATUS NUM MAX PLAYERS COUNTER VERSION
2169
+ local WORLD_DIR LAST_START_STATUS_LOG MONITOR_PID STATUS NUM MAX PLAYERS COUNTER VERSION
2170
+ WORLD_DIR=" $WORLDS_LOCATION /$1 "
2171
+ MONITOR_PID=" $WORLD_DIR /monitor.pid"
2172
+ LAST_START_STATUS_LOG=" $WORLD_DIR /logs/last-start-status.log"
2043
2173
if serverRunning $1 ; then
2044
2174
STATUS=$( queryDetailedStatus $1 )
2045
2175
if [ -n " $STATUS " ]; then
@@ -2069,6 +2199,17 @@ worldStatus() {
2069
2199
printf " Memory used: $( getJavaMemory " $1 " | awk ' {$1=int(100 * $1/1024/1024)/100"GB";}{ print;}' ) "
2070
2200
printf " ($( getMSCSValue " $1 " " mscs-maximum-memory" " $DEFAULT_MAXIMUM_MEMORY " | rev | cut -c 2- | rev | awk ' {$1=int($1/1024)"GB";}{ print;}' ) allocated).\n"
2071
2201
printf " Process ID: %d.\n" $( getJavaPID " $1 " )
2202
+ # Display crash monitor PID if it's running (i.e. monitor.pid file exists and not empty).
2203
+ if [ -f " $MONITOR_PID " ] && [ -s " $MONITOR_PID " ]; then
2204
+ printf " Crash Monitor PID: $( cat $MONITOR_PID ) \n"
2205
+ fi
2206
+ # If the last-status log exists and not empty, then last restart was from a crash.
2207
+ # Display notice once.
2208
+ if [ -f " $LAST_START_STATUS_LOG " ] && [ -s " $LAST_START_STATUS_LOG " ]; then
2209
+ printf " $( cat $LAST_START_STATUS_LOG ) \n"
2210
+ # Remove it so user doesn't see it next time they run the status command.
2211
+ rm -f $LAST_START_STATUS_LOG
2212
+ fi
2072
2213
elif ! true_value " $( getMSCSValue $1 ' mscs-enabled' ) " ; then
2073
2214
printf " disabled.\n"
2074
2215
else
@@ -2103,7 +2244,7 @@ worldStatusJSON() {
2103
2244
# ---------------------------------------------------------------------------
2104
2245
2105
2246
# Make sure that Java, Perl, libjson-perl, libwww-perl, Python, Wget,
2106
- # Rdiff-backup, Rsync, and Socat are installed.
2247
+ # Rdiff-backup, Rsync, Socat and flock are installed.
2107
2248
# ---------------------------------------------------------------------------
2108
2249
if [ ! -e " $JAVA " ]; then
2109
2250
echo " ERROR: Java not found!"
@@ -2168,6 +2309,12 @@ if [ ! -e "$SOCAT" ]; then
2168
2309
echo " sudo apt-get install socat"
2169
2310
exit 1
2170
2311
fi
2312
+ if [ ! -e " $FLOCK " ]; then
2313
+ echo " ERROR: flock not found!"
2314
+ echo " Try installing this with:"
2315
+ echo " sudo apt-get install util-linux"
2316
+ exit 1
2317
+ fi
2171
2318
2172
2319
# Parse command-line options
2173
2320
# ---------------------------------------------------------------------------
2259
2406
# mscs-default-maximum-memory - Default maximum amount of memory for a world server.
2260
2407
# mscs-default-server-location - Default location of the server .jar file.
2261
2408
# mscs-default-server-command - Default command to run for a world server.
2409
+ # mscs-default-restart-after-crash - Default behavior if to restart the server after crash is detected (default disabled).
2262
2410
# mscs-backup-location - Location to store backup files.
2263
- # mscs-backup-log - Lcation of the backup log file.
2411
+ # mscs-backup-log - Location of the backup log file.
2264
2412
# mscs-backup-excluded-files - Comma separated list of files and directories excluded from backups.
2265
2413
# mscs-backup-duration - Length in days that backups survive.
2266
2414
# mscs-log-duration - Length in days that logs survive.
2309
2457
# mscs-default-maximum-memory=2048M
2310
2458
# mscs-default-server-location=/opt/mscs/server
2311
2459
# mscs-default-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2460
+ # mscs-default-restart-after-crash=false
2312
2461
# mscs-backup-location=/opt/mscs/backups
2313
2462
# mscs-backup-log=/opt/mscs/backups/backup.log
2314
2463
# mscs-backup-excluded_files=
@@ -2356,6 +2505,7 @@ DEFAULT_INITIAL_MEMORY=$(getDefaultsValue 'mscs-default-initial-memory' '128M')
2356
2505
DEFAULT_MAXIMUM_MEMORY=$( getDefaultsValue ' mscs-default-maximum-memory' ' 2048M' )
2357
2506
DEFAULT_SERVER_LOCATION=$( getDefaultsValue ' mscs-default-server-location' $LOCATION ' /server' )
2358
2507
DEFAULT_SERVER_COMMAND=$( getDefaultsValue ' mscs-default-server-command' ' $JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS' )
2508
+ DEFAULT_RESTART_AFTER_CRASH=$( getDefaultsValue ' mscs-default-restart-after-crash' ' false' )
2359
2509
# Each world server can override the default values in a similar manner by
2360
2510
# adding certain key/value pairs to the world's mscs.properties file.
2361
2511
#
@@ -2375,6 +2525,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
2375
2525
# mscs-maximum-memory - Assign the maximum amount of memory for the server.
2376
2526
# mscs-server-location - Assign the location of the server .jar file.
2377
2527
# mscs-server-command - Assign the command to run for the server.
2528
+ # mscs-restart-after-crash - Restart the server after a crash (default disabled).
2378
2529
#
2379
2530
# Like above, the following variables may be used in some of the key values:
2380
2531
# $JAVA - The Java virtual machine.
@@ -2404,6 +2555,7 @@ DEFAULT_SERVER_COMMAND=$(getDefaultsValue 'mscs-default-server-command' '$JAVA -
2404
2555
# mscs-maximum-memory=2048M
2405
2556
# mscs-server-location=/opt/mscs/server
2406
2557
# mscs-server-command=$JAVA -Xms$INITIAL_MEMORY -Xmx$MAXIMUM_MEMORY $JVM_ARGS -jar $SERVER_LOCATION/$SERVER_JAR $SERVER_ARGS
2558
+ # mscs-restart-after-crash=false
2407
2559
2408
2560
# World (Server Instance) Configuration
2409
2561
# ---------------------------------------------------------------------------
@@ -2415,6 +2567,8 @@ VERSIONS_JSON=$(getDefaultsValue 'mscs-versions-json' $LOCATION'/version_manifes
2415
2567
VERSIONS_DURATION=$( getDefaultsValue ' mscs-versions-duration' ' 30' )
2416
2568
# The duration (in minutes) to keep lock files before removing.
2417
2569
LOCKFILE_DURATION=$( getDefaultsValue ' mscs-lockfile-duration' ' 1440' )
2570
+ # Enable the option to restart the server after a crash is detected (default disabled).
2571
+ RESTART_AFTER_CRASH=$( getDefaultsValue ' mscs-restart-after-crash' ' false' )
2418
2572
2419
2573
# Backup Configuration
2420
2574
# ---------------------------------------------------------------------------
0 commit comments