Skip to content

Commit 97e3f37

Browse files
rmunnmyieye
andauthored
Node script to copy projects from staging or prod (#1816)
* Node script to copy projects from staging or prod Often fails to copy assets because `kubectl cp` or `kubectl exec tar` get cut off partway through, but that should go away once Kubernetes version 1.30 is released. * Fetch project assets with rsync and retry failures This should ensure that the project assets eventually get copied over to the local Docker setup even under conditions where `kubectl exec` is flaky and fails every couple of minutes. * Only set up rsync if needed This will save a bit of time when kubectl cp is being reliable * Only include assets that are really there Before including pictures and audio in the tarball, make sure they're really there, and skip them if they are a broken symlink. * Make backup script slightly more cross-platform Windows has issues with single-quotes for quoting command-line params, but thankfully Linux handles double-quotes correctly in all the places I used single-quotes, so we'll just switch to double-quotes everywhere. * Clean up assets tarball when done Also use pod name instead of deploy/app since not every user account has access to deploy objects, at least on production * Rewrite path, to workaround kubectl interpreting Windows drive letter as pod name --------- Co-authored-by: Tim Haasdyk <[email protected]>
1 parent 037646b commit 97e3f37

File tree

3 files changed

+465
-0
lines changed

3 files changed

+465
-0
lines changed

backup.mjs

Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
import { execSync, spawn } from "child_process";
2+
import { existsSync, mkdtempSync, rmSync, statSync } from "fs";
3+
import { MongoClient, ObjectId } from "mongodb";
4+
import os from "os";
5+
import path from "path";
6+
import net from "net";
7+
8+
// Expected arguments: first arg is project ID (5dbf805650b51914727e06c4) or URL (http://localhost:8080/app/lexicon/5dbf805650b51914727e06c4)
9+
// Second arg is "qa" or "staging" to copy from staging, "live" or "prod" or "production" to copy from production
10+
// NOTE: You must edit the context names below if they don't match the context names you have (see `kubectl config get-contexts` output)
11+
12+
// ===== EDIT THIS =====
13+
14+
const stagingContext = "dallas-rke";
15+
const prodContext = "aws-rke";
16+
17+
// ===== END of EDIT THIS =====
18+
19+
let defaultContext = stagingContext;
20+
let defaultContextName = "staging";
21+
22+
// Create a temp dir reliably
23+
const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-"))
24+
// Work around kubectl bug where Windows drive letters are interpreted as pod names by kubectl cp
25+
.replace(/^C:\\/, "\\\\localhost\\C$\\");
26+
let portForwardProcess;
27+
let localConn;
28+
let remoteConn;
29+
let remoteTarball = undefined;
30+
let remotePodname = undefined;
31+
32+
async function cleanup() {
33+
try {
34+
if (existsSync(tempdir)) {
35+
console.warn(`Cleaning up temporary directory ${tempdir}...`);
36+
rmSync(tempdir, { recursive: true, force: true });
37+
}
38+
} catch (_) {}
39+
try {
40+
if (remotePodname && remoteTarball) {
41+
console.warn(`Cleaning up assets tarball from remote side...`);
42+
execSync(
43+
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- rm -f ${remoteTarball}`,
44+
);
45+
}
46+
} catch (_) {}
47+
try {
48+
if (localConn) await localConn.close();
49+
} catch (_) {}
50+
try {
51+
if (remoteConn) await remoteConn.close();
52+
} catch (_) {}
53+
try {
54+
if (portForwardProcess) await portForwardProcess.kill();
55+
} catch (_) {}
56+
}
57+
58+
async function randomFreePort() {
59+
return new Promise((resolve) => {
60+
const server = net.createServer();
61+
server.listen(0, () => {
62+
// Asking for port 0 makes Node automatically find a free port
63+
const port = server.address().port;
64+
server.close((_) => resolve(port));
65+
});
66+
});
67+
}
68+
69+
process.on("exit", cleanup);
70+
process.on("uncaughtExceptionMonitor", cleanup);
71+
72+
function run(cmd) {
73+
return execSync(cmd).toString().trimEnd();
74+
}
75+
76+
function getContexts() {
77+
var stdout = run("kubectl config get-contexts -o name");
78+
return stdout.split("\n");
79+
}
80+
81+
function reallyExists(name) {
82+
// Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks
83+
// This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken
84+
const result = execSync(
85+
`kubectl --context=${context} --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "readlink -eq ${name} >/dev/null && echo yes || echo no"`,
86+
)
87+
.toString()
88+
.trimEnd();
89+
if (result === "yes") return true;
90+
if (result === "no") return false;
91+
throw new Error(`Unexpected result from readlink ${name}: ${result}`);
92+
}
93+
94+
// Sanity check
95+
96+
var contexts = getContexts();
97+
if (!contexts.includes(stagingContext)) {
98+
console.warn("Staging context not found. Tried", stagingContext, "but did not find it in", contexts);
99+
console.warn("Might need to edit the top level of this file and try again");
100+
process.exit(1);
101+
}
102+
if (!contexts.includes(prodContext)) {
103+
console.warn("Prod context not found. Tried", prodContext, "but did not find it in", contexts);
104+
console.warn("Might need to edit the top level of this file and try again");
105+
process.exit(1);
106+
}
107+
108+
// Process args
109+
110+
if (process.argv.length < 3) {
111+
console.warn("Please pass project ID or URL as argument, e.g. node backup.mjs 5dbf805650b51914727e06c4");
112+
process.exit(2);
113+
}
114+
115+
let projId;
116+
const arg = process.argv[2];
117+
if (URL.canParse(arg)) {
118+
const url = new URL(arg);
119+
if (url.pathname.startsWith("/app/lexicon/")) {
120+
projId = url.pathname.substring("/app/lexicon/".length);
121+
} else {
122+
projId = url.pathname; // Will probably fail, but worth a try
123+
}
124+
} else {
125+
projId = arg;
126+
}
127+
128+
let context = defaultContext;
129+
let contextName = defaultContextName;
130+
131+
if (process.argv.length > 3) {
132+
const env = process.argv[3];
133+
switch (env) {
134+
case "qa":
135+
context = stagingContext;
136+
contextName = "staging";
137+
break;
138+
case "staging":
139+
context = stagingContext;
140+
contextName = "staging";
141+
break;
142+
143+
case "live":
144+
context = prodContext;
145+
contextName = "production";
146+
break;
147+
case "prod":
148+
context = prodContext;
149+
contextName = "production";
150+
break;
151+
case "production":
152+
context = prodContext;
153+
contextName = "production";
154+
break;
155+
156+
default:
157+
console.warn(`Unknown environment ${env}`);
158+
console.warn(`Valid values are qa, staging, live, prod, or production`);
159+
process.exit(2);
160+
}
161+
} else {
162+
console.warn("No environment selected. Defaulting to staging environment.");
163+
console.warn('Pass "prod" or "production" as second arg to copy projects from production envrionment instead.');
164+
}
165+
166+
projId = projId.trim();
167+
168+
console.warn(`Fetching project with ID ${projId} from ${contextName} context, named "${context}"`);
169+
console.warn("If that looks wrong, hit Ctrl+C right NOW!");
170+
console.warn();
171+
console.warn("Pausing for 2 seconds to give you time to hit Ctrl+C...");
172+
await new Promise((resolve) => setTimeout(resolve, 2000));
173+
// Start running
174+
175+
console.warn("Setting up kubectl port forwarding for remote Mongo...");
176+
const remoteMongoPort = await randomFreePort();
177+
let portForwardingReady;
178+
const portForwardingPromise = new Promise((resolve) => {
179+
portForwardingReady = resolve;
180+
});
181+
portForwardProcess = spawn(
182+
"kubectl",
183+
[`--context=${context}`, "--namespace=languageforge", "port-forward", "deploy/db", `${remoteMongoPort}:27017`],
184+
{
185+
stdio: "pipe",
186+
},
187+
);
188+
portForwardProcess.stdout.on("data", (data) => {
189+
portForwardingReady();
190+
});
191+
portForwardProcess.stderr.on("data", (data) => {
192+
console.warn("Port forwarding failed:");
193+
console.warn(data.toString());
194+
console.warn("Exiting");
195+
process.exit(1);
196+
});
197+
198+
console.warn("Setting up local Mongo connection...");
199+
200+
const localMongoPort = run("docker compose port db 27017").split(":")[1];
201+
const localConnStr = `mongodb://admin:pass@localhost:${localMongoPort}/?authSource=admin`;
202+
localConn = await MongoClient.connect(localConnStr);
203+
204+
const localAdmin = await localConn.db("scriptureforge").collection("users").findOne({ username: "admin" });
205+
const adminId = localAdmin._id.toString();
206+
console.log(`Local admin ID: ${adminId}`);
207+
console.warn("If that doesn't look right, hit Ctrl+C NOW");
208+
209+
await portForwardingPromise;
210+
console.warn("Port forwarding is ready. Setting up remote Mongo connection...");
211+
212+
const remoteConnStr = `mongodb://localhost:${remoteMongoPort}`;
213+
remoteConn = await MongoClient.connect(remoteConnStr);
214+
215+
console.warn("Remote Mongo connection established. Fetching project record...");
216+
217+
// Get project record
218+
const project = await remoteConn
219+
.db("scriptureforge")
220+
.collection("projects")
221+
.findOne({ _id: new ObjectId(projId) });
222+
console.log("Project code:", project.projectCode);
223+
224+
const dbname = `sf_${project.projectCode}`;
225+
project.users = { [adminId]: { role: "project_manager" } };
226+
project.ownerRef = new ObjectId(adminId);
227+
228+
// Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that
229+
230+
console.warn(`Copying ${dbname} database...`);
231+
const collections = await remoteConn.db(dbname).collections();
232+
for (const remoteColl of collections) {
233+
const name = remoteColl.collectionName;
234+
console.log(` Copying ${name} collection...`);
235+
const indexes = await remoteColl.indexes();
236+
const cursor = remoteColl.find();
237+
const docs = await cursor.toArray();
238+
const localColl = await localConn.db(dbname).collection(name);
239+
try {
240+
await localColl.drop();
241+
} catch (_) {} // Throws if collection doesn't exist, which is fine
242+
try {
243+
await localColl.dropIndexes();
244+
} catch (_) {} // Throws if collection doesn't exist, which is fine
245+
if (indexes?.length) await localColl.createIndexes(indexes);
246+
if (docs?.length) await localColl.insertMany(docs);
247+
console.log(` ${docs.length} documents copied`);
248+
}
249+
console.warn(`${dbname} database successfully copied`);
250+
251+
// Copy project record after its database has been copied, so there's never a race condition where the project exists but its entry database doesn't
252+
console.warn("Copying project record...");
253+
await localConn
254+
.db("scriptureforge")
255+
.collection("projects")
256+
.findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true });
257+
258+
// NOTE: mongodump/mongorestore approach below can be revived once Kubernetes 1.30 is installed on client *and* server, so kubectl exec is finally reliable
259+
260+
// console.warn(`About to try fetching ${dbname} database from remote, will retry until success`);
261+
// let done = false;
262+
// while (!done) {
263+
// try {
264+
// console.warn(`Fetching ${dbname} database...`);
265+
// execSync(
266+
// `kubectl --context="${context}" --namespace=languageforge exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`,
267+
// );
268+
// console.warn(`Uploading to local ${dbname} database...`);
269+
// execSync(`docker exec -i lf-db mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`);
270+
// console.warn(`Successfully uploaded ${dbname} database`);
271+
// done = true;
272+
// } catch (err) {
273+
// console.warn("mongodump failed, retrying...");
274+
// }
275+
// }
276+
277+
console.warn("Getting name of remote app pod...");
278+
remotePodname = run(
279+
`kubectl --context="${context}" --namespace=languageforge get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`,
280+
);
281+
282+
console.warn("Checking that remote assets really exist...");
283+
const includeAudio = reallyExists(`/var/www/html/assets/lexicon/${dbname}/audio`);
284+
const includePictures = reallyExists(`/var/www/html/assets/lexicon/${dbname}/pictures`);
285+
console.log(`Copy audio? ${includeAudio ? "yes" : "no"}`);
286+
console.log(`Copy pictures? ${includePictures ? "yes" : "no"}`);
287+
288+
const filesNeeded = [];
289+
if (includeAudio) {
290+
filesNeeded.push("audio");
291+
}
292+
if (includePictures) {
293+
filesNeeded.push("pictures");
294+
}
295+
296+
if (filesNeeded.length === 0) {
297+
console.warn("Project has no assets. Copy complete.");
298+
process.exit(0);
299+
}
300+
301+
const tarTargets = filesNeeded.join(" ");
302+
303+
console.warn("Creating assets tarball in remote...");
304+
remoteTarball = `/tmp/assets-${dbname}.tar`;
305+
execSync(
306+
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- tar chf ${remoteTarball} --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`,
307+
);
308+
const sizeStr = run(
309+
`kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "ls -l ${remoteTarball} | cut -d' ' -f5"`,
310+
);
311+
const correctSize = +sizeStr;
312+
console.warn(`Asserts tarball size is ${sizeStr}`);
313+
console.warn("Trying to fetch assets tarball with kubectl cp...");
314+
let failed = false;
315+
try {
316+
execSync(
317+
`kubectl --context="${context}" --namespace=languageforge cp ${remotePodname}:${remoteTarball} ${tempdir}/assets-${dbname}.tar`,
318+
);
319+
} catch (_) {
320+
console.warn("kubectl cp failed. Will try to continue with rsync...");
321+
failed = true;
322+
}
323+
if (!failed) {
324+
const localSize = statSync(`${tempdir}/assets-${dbname}.tar`).size;
325+
if (localSize < correctSize) {
326+
console.warn(`Got only ${localSize} bytes instead of ${correctSize}. Will try to continue with rsync...`);
327+
failed = true;
328+
}
329+
}
330+
if (failed) {
331+
console.warn("Ensuring rsync exists in target container...");
332+
execSync(
333+
`kubectl exec --context="${context}" -c app pod/${remotePodname} -- bash -c "which rsync || (apt update && apt install rsync -y)"`,
334+
);
335+
console.warn("\n===== IMPORTANT NOTE =====");
336+
console.warn(
337+
"The rsync transfer may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.",
338+
);
339+
console.warn("===== IMPORTANT NOTE =====\n");
340+
let done = false;
341+
while (!done) {
342+
try {
343+
execSync(
344+
`rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} --namespace=languageforge exec -i -c app pod/${remotePodname} -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`,
345+
{ stdio: "inherit" }, // Allows us to see rsync progress
346+
);
347+
done = true;
348+
} catch (err) {
349+
console.warn(`Rsync failed with error: ${err}. Retrying...`);
350+
}
351+
}
352+
}
353+
console.warn("Uploading assets tarball to local...");
354+
execSync(
355+
`docker exec lf-app mkdir -p "/var/www/html/assets/lexicon/${dbname}" ; docker exec lf-app chown www-data:www-data "/var/www/html/assets/lexicon/${dbname}" || true`,
356+
);
357+
execSync(`docker cp - lf-app:/var/www/html/assets/lexicon/${dbname}/ < ${tempdir}/assets-${dbname}.tar`);
358+
console.warn("Assets successfully uploaded");
359+
360+
process.exit(0);

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
"jasmine-spec-reporter": "^4.1.1",
8383
"lint-staged": "^13.0.3",
8484
"mini-css-extract-plugin": "^1.3.9",
85+
"mongodb": "^6.6.2",
8586
"ng-annotate-loader": "^0.7.0",
8687
"ngtemplate-loader": "^2.1.0",
8788
"npm-run-all": "^4.1.5",

0 commit comments

Comments
 (0)