Skip to content

Commit 7867846

Browse files
pbailiebmcutler
authored andcommitted
[Feature:Logging] Preferred name logging sysadmin script (#16)
* Create preferred_name_logging.php Sysadmin tool portion as part of the logging request under Submitty/Submitty#3273 ("Preferred name is forgotten/removed without any indication why") Initial code entry. Testing and tweaking needed. WIP. * Update preferred_name_logging.php Psql can generate several logfiles that need to be parsed. * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php WIP * Update preferred_name_logging.php PR Candidate * Delete preferred_name_logging.php~ Unneeded temp file * Update submitty_student_auto_feed.php Add AUTH(entication) logging to student auto feed for preferred name change tracking. * Create readme.md Instructions on using the preferred name logging tool. * Update preferred_name_logging.php Update to log USER_ID token.
1 parent 45a3964 commit 7867846

File tree

3 files changed

+326
-0
lines changed

3 files changed

+326
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#!/usr/bin/env php
2+
<?php
3+
/**
4+
* Preferred Name Logging syadmin tool for Submitty
5+
*
6+
* This script is to be run on the same server as Postgresql. When run, it
7+
* will parse Postgresql's CSV logs for the previous day and compile a new
8+
* log of any changes to a user's preferred first and last names.
9+
*
10+
* @author Peter Bailie
11+
*/
12+
13+
/** Main class */
14+
class main {
15+
16+
/**
17+
* Config
18+
*
19+
* @var const array
20+
* @access private
21+
*/
22+
private const CONFIG = array(
23+
'timezone' => "America/New_York",
24+
'postgresql_logfile_path' => "/var/log/postgresql/",
25+
'submitty_logfile_path' => "/var/log/submitty/",
26+
'postgresql_logfile' => "postgresql",
27+
'submitty_logfile' => "submitty_preferred_names",
28+
'submitty_log_retention' => 7
29+
);
30+
31+
private const CONSTANTS = array(
32+
'postgresql_log_row_count' => 23,
33+
'psql_validation_update' => 7,
34+
'psql_validation_log' => 11,
35+
'psql_validation_pfn' => 13,
36+
'psql_data_date' => 8,
37+
'psql_data_auth' => 19,
38+
'psql_data_pfn' => 14
39+
);
40+
41+
/**
42+
* Method to invoke to run this program: main::run()
43+
*
44+
* @access public
45+
* @static
46+
*/
47+
public static function run() {
48+
// make sure we are running as root from cli
49+
switch (true) {
50+
case posix_getuid() !== 0:
51+
case PHP_SAPI !== 'cli':
52+
fprintf(STDERR, "This is a command line script. Root required.%s", PHP_EOL);
53+
exit(1);
54+
}
55+
56+
date_default_timezone_set(self::CONFIG['timezone']);
57+
58+
self::parse_and_write_logs();
59+
self::log_retention_and_deletion();
60+
61+
exit(0);
62+
}
63+
64+
/**
65+
* Process method
66+
*
67+
* @access private
68+
* @static
69+
*/
70+
private static function parse_and_write_logs() {
71+
//Check to make sure Submitty log directory path exists. Create it if needed.
72+
if (file_exists(self::CONFIG['submitty_logfile_path']) === false) {
73+
if (mkdir(self::CONFIG['submitty_logfile_path'], 0700) === false) {
74+
self::log("Submitty log folder needed, mkdir failed.");
75+
exit(1);
76+
}
77+
}
78+
79+
//Prepare submitty preferred name change log file for today.
80+
$submitty_logfile = sprintf("%s%s_%s.log", self::CONFIG['submitty_logfile_path'], self::CONFIG['submitty_logfile'], date("Y-m-d"));
81+
$submitty_fh = fopen($submitty_logfile, 'w');
82+
if ($submitty_fh === false) {
83+
self::log("Cannot create Submitty logfile.");
84+
exit(1);
85+
} else {
86+
fwrite($submitty_fh, "Log opened." . PHP_EOL);
87+
}
88+
89+
//There can be multiple psql log files that need to be read.
90+
//But we want the ones dated one day prior (hence subtract 86400 seconds which is one day)
91+
$preg_str = sprintf("~^%s\-%s_\d{6}\.csv$~", self::CONFIG['postgresql_logfile'], preg_quote(date("Y-m-d", time() - 86400)));
92+
$logfiles = preg_grep($preg_str, scandir(self::CONFIG['postgresql_logfile_path']));
93+
94+
foreach ($logfiles as $logfile) {
95+
$psql_fh = fopen(self::CONFIG['postgresql_logfile_path'] . $logfile, 'r');
96+
if ($psql_fh === false) {
97+
self::log("Cannot open {$logfile}.");
98+
continue;
99+
}
100+
101+
$psql_row = fgetcsv($psql_fh);
102+
$psql_row_num = 1;
103+
while($psql_row !== false) {
104+
//Validation. Any case is true, validation fails and row is ignored.
105+
switch(true) {
106+
case count($psql_row) !== self::CONSTANTS['postgresql_log_row_count']:
107+
self::log(sprintf("NOTICE: PSQL log row %d had %d columns. %d columns expected. Row ignored.%s", $psql_row_num, count($psql_row), self::CONFIG['postgresql_log_row_count']));
108+
case $psql_row[self::CONSTANTS['psql_validation_update']] !== "UPDATE":
109+
case $psql_row[self::CONSTANTS['psql_validation_log']] !== "LOG":
110+
case $psql_row[self::CONSTANTS['psql_validation_pfn']] !== "PREFERRED_NAME DATA UPDATE":
111+
$psql_row = fgetcsv($psql_fh);
112+
$psql_row_num++;
113+
continue 2;
114+
}
115+
116+
//Validation successful, process row.
117+
//Trim all values in the row
118+
$psql_row = array_map('trim', $psql_row);
119+
120+
//Get date token.
121+
$date = $psql_row[self::CONSTANTS['psql_data_date']];
122+
123+
//Get "AUTH" token (who logged the change).
124+
$key = array();
125+
if (preg_match("~/\* AUTH: \"[\w\-]+\" \*/~", $psql_row[19]) === 1) {
126+
$key['start'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], "/* AUTH: ") + 3;
127+
$key['end'] = strpos($psql_row[self::CONSTANTS['psql_data_auth']], " */");
128+
$auth = " | " . substr($psql_row[self::CONSTANTS['psql_data_auth']], $key['start'], $key['end'] - $key['start']);
129+
} else {
130+
$auth = " | AUTH NOT LOGGED ";
131+
//Anything sent to STDERR gets emailed by cron.
132+
fprintf(STDERR, "WARNING: AUTH NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true));
133+
}
134+
135+
//Get user_id and preferred name change tokens.
136+
$preferred_name = array();
137+
$preferred_names_data = explode(" ", $psql_row[self::CONSTANTS['psql_data_pfn']]);
138+
139+
//user_id token
140+
$key = array_search("USER_ID:", $preferred_names_data);
141+
if ($key !== false) {
142+
$user_id = " | USER_ID: {$preferred_names_data[$key+1]} ";
143+
} else {
144+
$user_id = " | USER_ID NOT LOGGED ";
145+
//Anything sent to STDERR gets emailed by cron.
146+
fprintf(STDERR, "WARNING: USER ID NOT LOGGED%s%s", PHP_EOL, var_export($psql_row, true));
147+
}
148+
149+
$key = array_search("PREFERRED_FIRSTNAME", $preferred_names_data);
150+
if ($key !== false) {
151+
$preferred_name['first']['old'] = $preferred_names_data[$key+2];
152+
$preferred_name['first']['new'] = $preferred_names_data[$key+4];
153+
}
154+
// It is possible that no Preferred Firstname was logged, in which we can ignore an move on.
155+
156+
$key = array_search("PREFERRED_LASTNAME", $preferred_names_data);
157+
if ($key !== false) {
158+
$preferred_name['last']['old'] = $preferred_names_data[$key+2];
159+
$preferred_name['last']['new'] = $preferred_names_data[$key+4];
160+
}
161+
// It is possible that no Preferred Lastname was logged, in which we can ignore an move on.
162+
163+
//Build preferred name change log entry.
164+
$submitty_log = $date . $auth . $user_id;
165+
if (isset($preferred_name['first'])) {
166+
$submitty_log .= " | PREFERRED_FIRSTNAME OLD: {$preferred_name['first']['old']} NEW: {$preferred_name['first']['new']}";
167+
} else {
168+
$submitty_log .= " | PREFERRED_FIRSTNAME UNCHANGED";
169+
}
170+
171+
if (isset($preferred_name['last'])) {
172+
$submitty_log .= " | PREFERRED_LASTNAME OLD: {$preferred_name['last']['old']} NEW: {$preferred_name['last']['new']}";
173+
} else {
174+
$submitty_log .= " | PREFERRED_LASTNAME UNCHANGED";
175+
}
176+
177+
//Write log entry and go to next row.
178+
fwrite($submitty_fh, $submitty_log . PHP_EOL);
179+
$psql_row = fgetcsv($psql_fh);
180+
$psql_row_num++;
181+
}
182+
183+
fclose($psql_fh);
184+
}
185+
186+
fwrite($submitty_fh, "Log closed." . PHP_EOL);
187+
fclose($submitty_fh);
188+
}
189+
190+
/**
191+
* Automatically remove old logs
192+
*
193+
* @access private
194+
* @static
195+
*/
196+
private static function log_retention_and_deletion() {
197+
$preg_str = sprintf("~^%s_%s.log$~", self::CONFIG['submitty_logfile'], preg_quote(date("m-d-Y")));
198+
$logfiles = preg_grep($preg_str, scandir(self::CONFIG['submitty_logfile_path']));
199+
$expiration_epoch = (int)(strtotime(date('Y-m-d')) / 86400) - self::CONFIG['submitty_log_retention'];
200+
201+
foreach($logfiles as $logfile) {
202+
$datestamp = substr($logfile, strpos($logfile, "_") + 1, 10);
203+
$datestamp_epoch = (int)(strtotime($datestamp) / 86400);
204+
if ($datestamp_epoch < $expiration_epoch) {
205+
if (unlink(self::CONFIG['submitty_logfile_path'] . $logfile) === false) {
206+
self::log("Could not delete old logfile: {$logfile}");
207+
}
208+
}
209+
}
210+
}
211+
212+
/**
213+
* Log messages to error log and STDERR.
214+
*
215+
* @access private
216+
* @static
217+
*/
218+
private static function log(string $msg) {
219+
$datestamp = date("m-d-Y");
220+
error_log(sprintf("%s %s", $datestamp, $msg), 0);
221+
fprintf(STDERR, "%s%s", $msg, PHP_EOL);
222+
}
223+
}
224+
225+
// Start processing.
226+
main::run();
227+
228+
// EOF
229+
?>

preferred_name_logging/readme.md

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Preferred Name Logging
2+
3+
This tool will help track when user preferred names are changed. It attempts to
4+
log who was authenticated for the change, and what change occurred.
5+
6+
It works by first having Postgresql log the required information as the data is
7+
updated in Submitty's databases. Then the sysadmin tool in this folder will
8+
scrape the Postgresql logfile and record only those entries that showed
9+
preferred name change.
10+
11+
## FERPA
12+
13+
Data processed and logged by this tool may be protected by
14+
[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html).
15+
Please consult and abide by your institute's data protection policies.
16+
17+
## Postgresql
18+
19+
Submitty's website code and database schema will instruct Postgresql to log any
20+
`UPDATE` query involving any user's preferred first or last name changes.
21+
22+
### Postgresql Configuration
23+
24+
Postgresql needs to be configured to produce the required logs as the necessary
25+
logging is not enabled by default.
26+
27+
#### Config file
28+
29+
- Config file is `Postgresql.conf`
30+
- In Ubuntu 18.04, it is located in `/etc/postgresql/10/main/postgresql.conf`
31+
32+
#### Config options
33+
34+
You will find these options under "ERROR REPORTING AND LOGGING". Please enable
35+
(remove the '#' symbol preceding the line) and set these values:
36+
37+
- `log_destination = 'csvlog'`
38+
- The sysadmin tool will scrape a CSV file.
39+
- `logging_collector = on`
40+
- Postgresql doesn't write logs without. this.
41+
- `log_directory = '/var/log/postgresql'`
42+
- You can use a different folder, if needed.
43+
- `log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'`
44+
- You can change the filename a little, but it **must** end with `-%Y-%m-%d_%H%M%S.log`
45+
- `log_file_mode = 0600`
46+
- Limits access to this logfile.
47+
- `log_rotation_age = 2d`
48+
- At least 2 days of logs will be needed since the sysadmin tool is intended
49+
to run the following day.
50+
- `log_rotation_size = 10MB`
51+
- This doesn't need to be changed. Any additional logfiles created for a
52+
single day will be picked up by the sysadmin tool.
53+
- `log_min_messages = log`
54+
- Preferred name changes are logged at the `log` level. You can set any
55+
higher level of detail, but not lower than `log`.
56+
- `log_min_duration_statement = 0`
57+
- We want all log instances regardless of process time.
58+
- `log_line_prefix = '%m [%p] %q%u@%d '`
59+
- This can be changed so long as the very first detail is a timestamp. The
60+
sysadmin tool expects to find a timestamp at the very first column.
61+
62+
## Sysadmin Tool
63+
64+
The sysadmin tool needs to be run on a machine with local file access to the
65+
Postgresql log file. It is written in PHP.
66+
67+
### Sysdamin Tool Config
68+
69+
The configuration is defined as a class constant. Near the top of the code is a
70+
small block starting as `private const CONFIG = array(`. Inside the block will
71+
be a config element in single quotes, an arrow like `=>`, a value possibly
72+
enclosed in double quotes, followed by a comma. (Do not lose the comma!)
73+
74+
- `'timezone' => "America/New_York",`
75+
- Set this to your local timezone. q.v.
76+
[https://www.php.net/manual/en/timezones.php](https://www.php.net/manual/en/timezones.php)
77+
for more information.
78+
- `'postgresql_logfile_path' => "/var/log/postgresql/",`
79+
- Set this to the same setting as `log_directory` in `postgresql.conf`.
80+
- `'submitty_logfile_path' => "/var/log/submitty/",`
81+
- Where the sysadmin tool will write the preferred name logfile.
82+
- `'postgresql_logfile' => "postgresql",`
83+
- The name of the logfile created by Postgresql. Do not include the time
84+
stamp. This only needs to be changed when `log_filename` in
85+
`postgresql.conf` is changed.
86+
- `'submitty_logfile' => "submitty_preferred_names",`
87+
- Name of the preferred name change logfile. You can leave this as is.
88+
- `'submitty_log_retention' => 7`
89+
- How many days of preferred name change logs to keep.
90+
91+
### Running the Sysadmin Tool
92+
93+
This tool is meant to be executed on the command line and can be scheduled as a
94+
cron job. Errors will be outputted to `STDERR`, which in a crontab can either
95+
be redirected to `/dev/null` or emailed to a sysadmin. Running as `root` is
96+
required, and there are no command line arguments.

student_auto_feed/submitty_student_auto_feed.php

+1
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ private function upsert_psql() {
534534
user_email=upsert_users.user_email
535535
FROM upsert_users
536536
WHERE users.user_id=upsert_users.user_id
537+
/* AUTH: "AUTO_FEED" */
537538
SQL;
538539

539540
$sql['registration_section']['update'] = null;

0 commit comments

Comments
 (0)