Skip to content

Commit a4d28d0

Browse files
Merge pull request #396 from AutomatingSciencePipeline/392-system-logs-larger-than-16mb-cannot-be-written-to-the-database
Update results, logs, and zips to use file buckets
2 parents af312b1 + 1de5205 commit a4d28d0

File tree

6 files changed

+128
-55
lines changed

6 files changed

+128
-55
lines changed

apps/backend/modules/mongo.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,33 @@ def verify_mongo_connection(mongoClient: pymongo.MongoClient):
1212
raise Exception("MongoDB server not available/unreachable") from err
1313

1414
def upload_experiment_aggregated_results(experimentId: str, results: str, mongoClient: pymongo.MongoClient):
15-
experimentResultEntry = {"experimentId": experimentId, "resultContent": results}
1615
# Get the results connection
17-
resultsCollection = mongoClient["gladosdb"].results
16+
resultsBucket = GridFSBucket(mongoClient["gladosdb"], bucket_name='resultsBucket')
1817
try:
19-
resultId = resultsCollection.insert_one(experimentResultEntry).inserted_id
18+
# Encode the results string to bytes
19+
results_bytes = results.encode('utf-8')
20+
# Now we need to store the results in the GridFS bucket
21+
resultId = resultsBucket.upload_from_stream(f"results{experimentId}", results_bytes, metadata={"experimentId": experimentId})
2022
# return the resultID
21-
return resultId
23+
return str(resultId)
2224

2325
except Exception as err:
2426
# Change to generic exception
2527
raise Exception("Encountered error while storing aggregated results in MongoDB") from err
2628

2729
def upload_experiment_zip(experimentId: str, encoded: Binary, mongoClient: pymongo.MongoClient):
28-
experimentZipEntry = {"experimentId": experimentId, "fileContent": encoded}
29-
zipCollection = mongoClient["gladosdb"].zips
30+
zipsBucket = GridFSBucket(mongoClient["gladosdb"], bucket_name='zipsBucket')
3031
try:
31-
resultZipId = zipCollection.insert_one(experimentZipEntry).inserted_id
32-
return resultZipId
32+
resultId = zipsBucket.upload_from_stream(f"results{experimentId}.zip", encoded, metadata={"experimentId": experimentId})
33+
return str(resultId)
3334
except Exception as err:
3435
raise Exception("Encountered error while storing results zip in MongoDB") from err
3536

3637
def upload_log_file(experimentId: str, contents: str, mongoClient: pymongo.MongoClient):
37-
logFileEntry = {"experimentId": experimentId, "fileContent": contents}
38-
logCollection = mongoClient["gladosdb"].logs
38+
logsBucket = GridFSBucket(mongoClient["gladosdb"], bucket_name='logsBucket')
3939
try:
40-
resultId = logCollection.insert_one(logFileEntry).inserted_id
41-
return resultId
40+
resultId = logsBucket.upload_from_stream(f"log{experimentId}.txt", contents.encode('utf-8'), metadata={"experimentId": experimentId})
41+
return str(resultId)
4242
except Exception as err:
4343
raise Exception("Encountered error while storing log file in MongoDB") from err
4444

apps/frontend/lib/mongodb_funcs.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,24 @@ export async function deleteDocumentById(expId: string) {
4545

4646
//Since we found it, make sure to delete data from logs, results, and zips
4747
const db = client.db(DB_NAME);
48-
//Delete logs
49-
await db.collection('logs').deleteMany({ "experimentId": expId });
50-
//Delete results
51-
await db.collection('results').deleteMany({ "experimentId": expId });
52-
//Delete zips
53-
await db.collection('zips').deleteMany({ "experimentId": expId });
48+
//Delete logs from bucket
49+
const logsBucket = new GridFSBucket(db, { bucketName: 'logsBucket' });
50+
const filesToDelete = await logsBucket.find({ "metadata.experimentId": expId }).toArray();
51+
for (const file of filesToDelete) {
52+
await logsBucket.delete(file._id);
53+
}
54+
//Delete results from bucket
55+
const resultsBucket = new GridFSBucket(db, { bucketName: 'resultsBucket' });
56+
const resultsToDelete = await resultsBucket.find({ "metadata.experimentId": expId }).toArray();
57+
for (const file of resultsToDelete) {
58+
await resultsBucket.delete(file._id);
59+
}
60+
//Delete zips from bucket
61+
const zipsBucket = new GridFSBucket(db, { bucketName: 'zipsBucket' });
62+
const zipsToDelete = await zipsBucket.find({ "metadata.experimentId": expId }).toArray();
63+
for (const file of zipsToDelete) {
64+
await zipsBucket.delete(file._id);
65+
}
5466

5567
return Promise.resolve();
5668
}
Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import clientPromise, { DB_NAME, COLLECTION_RESULTS_CSVS } from '../../../../lib/mongodb';
22
import { NextApiHandler } from 'next';
33
import { ResultsCsv } from '../../../../lib/mongodb_types';
4+
import { GridFSBucket } from 'mongodb';
45

56
// TODO possible to extract a common function/class here? very little varies between this and zip
67

@@ -12,26 +13,44 @@ const mongoCSVHandler: NextApiHandler<ResultsCsv> = async (req, res) => {
1213
}
1314

1415
let results;
16+
1517
try {
1618
const client = await clientPromise;
1719
const db = client.db(DB_NAME);
20+
const resultsBucket = new GridFSBucket(db, { bucketName: 'resultsBucket' });
21+
//First check that the file exists
22+
results = await resultsBucket.find({ "metadata.experimentId": expIdToCsvDownload }).toArray();
23+
if (results.length === 0) {
24+
console.warn(`Experiment ${expIdToCsvDownload} CSV not found`);
25+
res.status(404).json({ response: 'Experiment CSV not found' } as any);
26+
return;
27+
}
28+
29+
if (results.length !== 1) {
30+
console.warn(`Experiment ${expIdToCsvDownload} CSV not found`);
31+
res.status(404).json({ response: 'Experiment CSV not found' } as any);
32+
} else {
33+
//Download the file
34+
const downloadStream = resultsBucket.openDownloadStream(results[0]._id);
35+
//This has to return the csv contents
36+
const chunks: Buffer[] = [];
37+
downloadStream.on('data', (chunk) => {
38+
chunks.push(chunk);
39+
});
40+
41+
downloadStream.on('end', () => {
42+
const csvContents = Buffer.concat(chunks as unknown as Uint8Array[]).toString('utf-8');
43+
var result = { resultContent: csvContents };
44+
res.json(result as unknown as ResultsCsv);
45+
});
46+
}
1847

19-
results = await db
20-
.collection(COLLECTION_RESULTS_CSVS)
21-
// TODO correct mongodb typescript type for id
22-
.find({ 'experimentId': expIdToCsvDownload as any }).toArray();
2348
} catch (error) {
2449
const message = 'Failed to download the csv';
2550
console.error('Error contacting server: ', error);
2651
res.status(500).json({ response: message } as any);
2752
}
28-
if (results.length !== 1) {
29-
console.warn(`Experiment ${expIdToCsvDownload} CSV not found`);
30-
res.status(404).json({ response: 'Experiment CSV not found' } as any);
31-
} else {
32-
// TODO correct way to typescript handle this?
33-
res.json(results[0] as unknown as ResultsCsv);
34-
}
53+
3554
};
3655

3756
export default mongoCSVHandler;
Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { GridFSBucket } from 'mongodb';
12
import clientPromise, { COLLECTION_LOGS, DB_NAME } from '../../../../lib/mongodb';
23
import { NextApiHandler } from 'next';
34

@@ -11,27 +12,49 @@ const mongoLogHandler: NextApiHandler<String> = async (req, res) => {
1112
}
1213

1314
let results;
15+
const client = await clientPromise;
16+
const db = client.db(DB_NAME);
17+
const logsBucket = new GridFSBucket(db, { bucketName: 'logsBucket' });
1418
try {
15-
const client = await clientPromise;
16-
const db = client.db(DB_NAME);
19+
//First check that the file exists
20+
results = await logsBucket.find({ "metadata.experimentId": idOfLogFile }).toArray();
21+
if (results.length === 0) {
22+
console.warn(`Experiment ${idOfLogFile} Log not found`);
23+
res.status(404).json({ response: `Experiment Log '${idOfLogFile}' not found. Please contact the GLADOS team for further troubleshooting.` } as any);
24+
return;
25+
}
26+
27+
if (results.length !== 1) {
28+
console.warn(`Experiment ${idOfLogFile} Log not found`);
29+
res.status(404).json({ response: `Experiment Log '${idOfLogFile}' not found. Please contact the GLADOS team for further troubleshooting.` } as any);
30+
} else {
31+
//Download the file
32+
const downloadStream = logsBucket.openDownloadStream(results[0]._id);
33+
//This has to return the csv contents
34+
const chunks: Buffer[] = [];
35+
downloadStream.on('data', (chunk) => {
36+
chunks.push(chunk);
37+
});
38+
39+
downloadStream.on('end', () => {
40+
const contents = Buffer.concat(chunks as unknown as Uint8Array[]).toString('utf-8');
41+
if (contents.length === 0) {
42+
console.warn(`Experiment ${idOfLogFile} Log was empty`);
43+
res.send(`Experiment Log '${idOfLogFile}' was empty.`);
44+
}
45+
else {
46+
res.send(contents);
47+
}
48+
49+
});
50+
}
1751

18-
results = await db
19-
.collection(COLLECTION_LOGS)
20-
// TODO correct mongodb typescript type for id
21-
.find({ 'experimentId': idOfLogFile as any }).toArray();
2252
} catch (error) {
2353
const message = 'Failed to download the log file';
2454
console.error('Error contacting server: ', error);
2555
res.status(500).json({ response: message } as any);
2656
}
27-
if (results.length !== 1) {
28-
console.warn(`Experiment ${idOfLogFile} Log not found`);
29-
res.status(404).json({ response: `Experiment Log '${idOfLogFile}' not found. Remember, the production database doesn't have the logs of experiments from dev!` } as any);
30-
} else {
31-
const result = results[0];
32-
const contents = `${result?.fileContent ?? 'The log file was empty, or an error occurred'}`;
33-
res.send(contents);
34-
}
57+
3558
};
3659

3760
export default mongoLogHandler;

apps/frontend/pages/api/download/zip/[expIdToZipDownload].tsx

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import clientPromise, { COLLECTION_ZIPS, DB_NAME } from '../../../../lib/mongodb';
22
import { NextApiHandler } from 'next';
33
import { ProjectZip } from '../../../../lib/mongodb_types';
4+
import { GridFSBucket } from 'mongodb';
45

56
// TODO possible to extract a common function/class here? very little varies between this and csv
67

@@ -15,23 +16,41 @@ const mongoZipHandler: NextApiHandler<ProjectZip> = async (req, res) => {
1516
try {
1617
const client = await clientPromise;
1718
const db = client.db(DB_NAME);
19+
const zipsBucket = new GridFSBucket(db, { bucketName: 'zipsBucket' });
20+
21+
// First check that the file exists
22+
results = await zipsBucket.find({ 'metadata.experimentId': expIdToZipDownload }).toArray();
23+
24+
if (results.length === 0) {
25+
console.warn(`Experiment ${expIdToZipDownload} ZIP not found`);
26+
res.status(404).json({ response: 'Experiment ZIP not found' } as any);
27+
return;
28+
}
29+
30+
// Download the file
31+
const downloadStream = zipsBucket.openDownloadStream(results[0]._id);
32+
// This has to return the zip contents
33+
const chunks: Buffer[] = [];
34+
downloadStream.on('data', (chunk) => {
35+
chunks.push(chunk);
36+
});
37+
38+
downloadStream.on('end', () => {
39+
//Zip contents are b64 encoded and stored in binary format
40+
//So we need to concatenate the chunks and return them as a buffer
41+
const zipContents = Buffer.concat(chunks as unknown as Uint8Array[]);
42+
//Make sure this is returned as a string
43+
const zipContentsString = zipContents.toString('base64');
44+
var result = { fileContent: zipContentsString };
45+
res.json(result as unknown as ProjectZip);
46+
});
1847

19-
results = await db
20-
.collection(COLLECTION_ZIPS)
21-
// TODO correct mongodb typescript type for id
22-
.find({ 'experimentId': expIdToZipDownload as any }).toArray();
2348
} catch (error) {
2449
const message = 'Failed to download the zip';
2550
console.error('Error contacting server: ', error);
2651
res.status(500).json({ response: message } as any);
2752
}
28-
if (results.length !== 1) {
29-
console.warn(`Experiment ${expIdToZipDownload} ZIP not found`);
30-
res.status(404).json({ response: 'Experiment ZIP not found' } as any);
31-
} else {
32-
// TODO correct way to typescript handle this?
33-
res.json(results[0] as unknown as ProjectZip);
34-
}
53+
3554
};
3655

3756
// Remove the file size limit on the response

apps/runner/modules/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def _call_backend(url, payload, log_msg):
160160
try:
161161
response = requests.post(url, json=payload, timeout=10)
162162
if response.status_code == 200:
163-
result_id = response.json().get('_id')
163+
result_id = response.json().get('id')
164164
if result_id:
165165
explogger.info(f"{log_msg}: {result_id}")
166166
else:

0 commit comments

Comments
 (0)