From c80db4140821ee70f560a24214ee21e7c0aa0475 Mon Sep 17 00:00:00 2001 From: Marjan Kalanaki Date: Fri, 17 Jan 2025 13:47:47 +0000 Subject: [PATCH] zip implementation in progress --- package-lock.json | 595 ++++++++++++++++++++++++++- packages/backend-common/package.json | 8 +- packages/backend-common/src/sqs.ts | 4 +- packages/backend-common/src/zip.ts | 31 ++ packages/common/src/types.ts | 4 +- packages/worker/src/index.ts | 8 +- packages/worker/src/transcribe.ts | 16 +- packages/worker/src/util.ts | 16 +- 8 files changed, 666 insertions(+), 16 deletions(-) create mode 100644 packages/backend-common/src/zip.ts diff --git a/package-lock.json b/package-lock.json index 539a0673..2a8d97ab 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4701,6 +4701,14 @@ "devOptional": true, "license": "MIT" }, + "node_modules/@types/archiver": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/@types/archiver/-/archiver-6.0.3.tgz", + "integrity": "sha512-a6wUll6k3zX6qs5KlxIggs1P1JcYJaTCx2gnlr+f0S1yd2DoaEwoIK10HmBaLnZwWneBz+JBm0dwcZu0zECBcQ==", + "dependencies": { + "@types/readdir-glob": "*" + } + }, "node_modules/@types/aws-lambda": { "version": "8.10.145", "dev": true, @@ -4867,7 +4875,6 @@ }, "node_modules/@types/node": { "version": "20.16.10", - "devOptional": true, "license": "MIT", "dependencies": { "undici-types": "~6.19.2" @@ -4950,6 +4957,14 @@ "@types/react": "*" } }, + "node_modules/@types/readdir-glob": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@types/readdir-glob/-/readdir-glob-1.1.5.tgz", + "integrity": "sha512-raiuEPUYqXu+nvtY2Pe8s8FEmZ3x5yAH4VkLdihcPdalvsHltomrRC9BzuStrJ9yk06470hS0Crw0f1pXqD+Hg==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/resolve": { "version": "1.20.2", "license": "MIT" @@ -5326,6 +5341,203 @@ "resolved": "packages/api", "link": true }, + "node_modules/archiver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/archiver/-/archiver-7.0.1.tgz", + "integrity": "sha512-ZcbTaIqJOfCc03QwD468Unz/5Ir8ATtvAHsK+FdXbDIbGfihqh9mrvdcYunQzqn4HrvWWaFyaxJhGZagaJJpPQ==", + "dependencies": { + "archiver-utils": "^5.0.2", + "async": "^3.2.4", + "buffer-crc32": "^1.0.0", + "readable-stream": "^4.0.0", + "readdir-glob": "^1.1.2", + "tar-stream": "^3.0.0", + "zip-stream": "^6.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/archiver-utils": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/archiver-utils/-/archiver-utils-5.0.2.tgz", + "integrity": "sha512-wuLJMmIBQYCsGZgYLTy5FIB2pF6Lfb6cXMSF8Qywwk3t20zWnAi7zLcQFdKQmIB8wyZpY5ER38x08GbwtR2cLA==", + "dependencies": { + "glob": "^10.0.0", + "graceful-fs": "^4.2.0", + "is-stream": "^2.0.1", + "lazystream": "^1.0.0", + "lodash": "^4.17.15", + "normalize-path": "^3.0.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/archiver-utils/node_modules/buffer": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", + "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/archiver-utils/node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/archiver-utils/node_modules/glob": { + "version": "10.4.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", + "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/archiver-utils/node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/archiver-utils/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/archiver-utils/node_modules/readable-stream": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", + "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, + "node_modules/archiver/node_modules/buffer": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", + "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/archiver/node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/archiver/node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/archiver/node_modules/readable-stream": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", + "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, "node_modules/arg": { "version": "4.1.3", "devOptional": true, @@ -5976,6 +6188,11 @@ "proxy-from-env": "^1.1.0" } }, + "node_modules/b4a": { + "version": "1.6.7", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.7.tgz", + "integrity": "sha512-OnAYlL5b7LEkALw87fUVafQw5rVR9RjwGd4KUwNQ6DrrNmaVaUCgLipfVlzrPQ4tWOR9P0IXGNOx50jYCCdSJg==" + }, "node_modules/babel-jest": { "version": "29.7.0", "dev": true, @@ -6092,6 +6309,12 @@ "version": "1.0.2", "license": "MIT" }, + "node_modules/bare-events": { + "version": "2.5.4", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.5.4.tgz", + "integrity": "sha512-+gFfDkR8pj4/TrWCGUGWmJIkBwuxPS5F+a5yWjOHQt2hHvNZd5YLzadjmDUtFmMM4y429bnKLa8bYBMHcYdnQA==", + "optional": true + }, "node_modules/base64-js": { "version": "1.5.1", "funding": [ @@ -6247,6 +6470,14 @@ "isarray": "^1.0.0" } }, + "node_modules/buffer-crc32": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-1.0.0.tgz", + "integrity": "sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/buffer-equal-constant-time": { "version": "1.0.1", "license": "BSD-3-Clause" @@ -6696,6 +6927,86 @@ "node": ">=18" } }, + "node_modules/compress-commons": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/compress-commons/-/compress-commons-6.0.2.tgz", + "integrity": "sha512-6FqVXeETqWPoGcfzrXb37E50NP0LXT8kAMu5ooZayhWWdgEY4lBEEcbQNXtkuKQsGduxiIcI4gOTsxTmuq/bSg==", + "dependencies": { + "crc-32": "^1.2.0", + "crc32-stream": "^6.0.0", + "is-stream": "^2.0.1", + "normalize-path": "^3.0.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/compress-commons/node_modules/buffer": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", + "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/compress-commons/node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/compress-commons/node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/compress-commons/node_modules/readable-stream": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", + "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, "node_modules/concat-map": { "version": "0.0.1", "dev": true, @@ -6741,6 +7052,99 @@ "version": "1.0.6", "license": "MIT" }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" + }, + "node_modules/crc-32": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", + "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", + "bin": { + "crc32": "bin/crc32.njs" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/crc32-stream": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/crc32-stream/-/crc32-stream-6.0.0.tgz", + "integrity": "sha512-piICUB6ei4IlTv1+653yq5+KoqfBYmj9bw6LqXoOneTMDXk5nM1qt12mFW1caG3LlJXEKW1Bp0WggEmIfQB34g==", + "dependencies": { + "crc-32": "^1.2.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/crc32-stream/node_modules/buffer": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", + "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/crc32-stream/node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/crc32-stream/node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/crc32-stream/node_modules/readable-stream": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", + "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, "node_modules/create-jest": { "version": "29.7.0", "dev": true, @@ -7768,6 +8172,11 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" + }, "node_modules/fast-glob": { "version": "3.3.2", "license": "MIT", @@ -9875,6 +10284,44 @@ "version": "2.0.0", "license": "MIT" }, + "node_modules/lazystream": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/lazystream/-/lazystream-1.0.1.tgz", + "integrity": "sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==", + "dependencies": { + "readable-stream": "^2.0.5" + }, + "engines": { + "node": ">= 0.6.3" + } + }, + "node_modules/lazystream/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/lazystream/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/lazystream/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/leven": { "version": "3.1.0", "dev": true, @@ -10173,6 +10620,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, "node_modules/lodash.camelcase": { "version": "4.3.0", "dev": true, @@ -11498,6 +11950,11 @@ "node": ">= 0.6.0" } }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, "node_modules/prompts": { "version": "2.4.2", "dev": true, @@ -11596,6 +12053,11 @@ ], "license": "MIT" }, + "node_modules/queue-tick": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", + "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==" + }, "node_modules/range-parser": { "version": "1.2.1", "license": "MIT", @@ -11762,6 +12224,25 @@ "node": ">= 6" } }, + "node_modules/readdir-glob": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz", + "integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==", + "dependencies": { + "minimatch": "^5.1.0" + } + }, + "node_modules/readdir-glob/node_modules/minimatch": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz", + "integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/readdirp": { "version": "3.6.0", "license": "MIT", @@ -12321,6 +12802,19 @@ "node": ">=10.0.0" } }, + "node_modules/streamx": { + "version": "2.21.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.21.1.tgz", + "integrity": "sha512-PhP9wUnFLa+91CPy3N6tiQsK+gnYyUNuk15S3YG/zjYE7RuPeCjJngqnzpC31ow0lzBHQ+QGO4cNJnd0djYUsw==", + "dependencies": { + "fast-fifo": "^1.3.2", + "queue-tick": "^1.0.1", + "text-decoder": "^1.1.0" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" + } + }, "node_modules/string_decoder": { "version": "1.3.0", "license": "MIT", @@ -12666,6 +13160,16 @@ "node": ">=6" } }, + "node_modules/tar-stream": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "dependencies": { + "b4a": "^1.6.4", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, "node_modules/test-exclude": { "version": "6.0.0", "dev": true, @@ -12699,6 +13203,14 @@ "node": "*" } }, + "node_modules/text-decoder": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz", + "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==", + "dependencies": { + "b4a": "^1.6.4" + } + }, "node_modules/text-hex": { "version": "1.0.0", "license": "MIT" @@ -13059,7 +13571,6 @@ }, "node_modules/undici-types": { "version": "6.19.8", - "devOptional": true, "license": "MIT" }, "node_modules/universalify": { @@ -13528,6 +14039,84 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zip-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-6.0.1.tgz", + "integrity": "sha512-zK7YHHz4ZXpW89AHXUPbQVGKI7uvkd3hzusTdotCg1UxyaVtg0zFJSTfW/Dq5f7OBBVnq6cZIaC8Ti4hb6dtCA==", + "dependencies": { + "archiver-utils": "^5.0.0", + "compress-commons": "^6.0.2", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/zip-stream/node_modules/buffer": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", + "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/zip-stream/node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/zip-stream/node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/zip-stream/node_modules/readable-stream": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.7.0.tgz", + "integrity": "sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, "node_modules/zod": { "version": "3.22.4", "license": "MIT", @@ -13589,6 +14178,8 @@ "@aws-sdk/client-sqs": "^3.624.0", "@aws-sdk/client-ssm": "^3.624.0", "@aws-sdk/lib-dynamodb": "3.624.0", + "@types/archiver": "^6.0.3", + "archiver": "^7.0.1", "axios": "^1.7.4", "winston": "^3.11.0" } diff --git a/packages/backend-common/package.json b/packages/backend-common/package.json index 2126c78a..4b595ffe 100644 --- a/packages/backend-common/package.json +++ b/packages/backend-common/package.json @@ -2,14 +2,16 @@ "name": "@guardian/transcription-service-backend-common", "version": "1.0.0", "dependencies": { + "@aws-sdk/client-auto-scaling": "^3.624.0", + "@aws-sdk/client-cloudwatch": "^3.624.0", "@aws-sdk/client-dynamodb": "3.624.0", "@aws-sdk/client-s3": "^3.624.0", + "@aws-sdk/client-secrets-manager": "^3.624.0", "@aws-sdk/client-sqs": "^3.624.0", "@aws-sdk/client-ssm": "^3.624.0", "@aws-sdk/lib-dynamodb": "3.624.0", - "@aws-sdk/client-secrets-manager": "^3.624.0", - "@aws-sdk/client-cloudwatch": "^3.624.0", - "@aws-sdk/client-auto-scaling": "^3.624.0", + "@types/archiver": "^6.0.3", + "archiver": "^7.0.1", "axios": "^1.7.4", "winston": "^3.11.0" }, diff --git a/packages/backend-common/src/sqs.ts b/packages/backend-common/src/sqs.ts index bef4c6e3..09d14e61 100644 --- a/packages/backend-common/src/sqs.ts +++ b/packages/backend-common/src/sqs.ts @@ -307,7 +307,7 @@ const generateOutputSignedUrls = async ( const fileName = `${id}${translate ? '-translation' : ''}`; const expiresIn = expiresInDays * 24 * 60 * 60; const srtKey = `srt/${fileName}.srt`; - const jsonKey = `json/${fileName}.json`; + const jsonKey = `zip/${fileName}.zip`; const textKey = `text/${fileName}.txt`; const srtSignedS3Url = await getSignedUploadUrl( region, @@ -337,6 +337,6 @@ const generateOutputSignedUrls = async ( return { srt: { url: srtSignedS3Url, key: srtKey }, text: { url: textSignedS3Url, key: textKey }, - json: { url: jsonSignedS3Url, key: jsonKey }, + zip: { url: jsonSignedS3Url, key: jsonKey }, }; }; diff --git a/packages/backend-common/src/zip.ts b/packages/backend-common/src/zip.ts new file mode 100644 index 00000000..138a4885 --- /dev/null +++ b/packages/backend-common/src/zip.ts @@ -0,0 +1,31 @@ +import * as stream from 'stream'; +import archiver from 'archiver'; +import { Transcripts } from '../../worker/src/transcribe'; +import { promisify } from 'util'; + +export const getZipBlob = async (files: Transcripts) => { + // Create an archive stream and buffer + const archive = archiver('zip', { zlib: { level: 9 } }); + const bufferStream = new stream.PassThrough(); + const chunks: Uint8Array[] = []; + + // Listen for 'data' events to collect chunks of the zip file + bufferStream.on('data', (chunk) => chunks.push(chunk)); + + // Pipe the archive data to the buffer stream + archive.pipe(bufferStream); + + // Add files to the archive + archive.append(files.srt, { name: 'transcript.srt' }); + archive.append(files.text, { name: 'transcript.txt' }); + archive.append(files.json, { name: 'transcript.json' }); + + // Finalize the archive (ensures all files are added) + archive.finalize(); + + // Wait for the archive to complete and concatenate chunks into a Blob + await promisify(stream.finished)(bufferStream); // Ensure the stream finishes + const zipBlob = new Blob(chunks, { type: 'application/zip' }); + + return zipBlob; +}; diff --git a/packages/common/src/types.ts b/packages/common/src/types.ts index 37781b23..d7670967 100644 --- a/packages/common/src/types.ts +++ b/packages/common/src/types.ts @@ -24,7 +24,7 @@ export type SignedUrl = z.infer; const OutputBucketUrls = z.object({ srt: SignedUrl, text: SignedUrl, - json: SignedUrl, + zip: SignedUrl, }); export type OutputBucketUrls = z.infer; @@ -32,7 +32,7 @@ export type OutputBucketUrls = z.infer; const OutputBucketKeys = z.object({ srt: z.string(), text: z.string(), - json: z.string(), + zip: z.string(), }); export type OutputBucketKeys = z.infer; diff --git a/packages/worker/src/index.ts b/packages/worker/src/index.ts index 041add01..c0c08157 100644 --- a/packages/worker/src/index.ts +++ b/packages/worker/src/index.ts @@ -262,6 +262,10 @@ const pollTranscriptionQueue = async ( file: fileToTranscribe, numberOfThreads, model: config.app.stage === 'PROD' ? 'medium' : 'tiny', + subtitleFormat: + job.transcriptDestinationService === DestinationService.Giant + ? 'vtt' + : 'srt', }; const transcriptResult = await getTranscriptionText( @@ -301,7 +305,7 @@ const pollTranscriptionQueue = async ( const outputBucketKeys: OutputBucketKeys = { srt: outputBucketUrls.srt.key, - json: outputBucketUrls.json.key, + zip: outputBucketUrls.zip.key, text: outputBucketUrls.text.key, }; @@ -318,7 +322,7 @@ const pollTranscriptionQueue = async ( translationOutputBucketKeys: job.translationOutputBucketUrls && transcriptResult.transcriptTranslations && { srt: job.translationOutputBucketUrls.srt.key, - json: job.translationOutputBucketUrls.json.key, + zip: job.translationOutputBucketUrls.zip.key, text: job.translationOutputBucketUrls.text.key, }, isTranslation: job.translate, diff --git a/packages/worker/src/transcribe.ts b/packages/worker/src/transcribe.ts index 78727895..b5cac28e 100644 --- a/packages/worker/src/transcribe.ts +++ b/packages/worker/src/transcribe.ts @@ -7,6 +7,8 @@ import { } from '@guardian/transcription-service-common'; import { runSpawnCommand } from '@guardian/transcription-service-backend-common/src/process'; +type SubtitleFormat = 'srt' | 'vtt'; + interface FfmpegResult { wavPath: string; duration?: number; @@ -38,6 +40,7 @@ export type WhisperBaseParams = { file: string; numberOfThreads: number; model: WhisperModel; + subtitleFormat: SubtitleFormat; }; const CONTAINER_FOLDER = '/input'; @@ -139,6 +142,7 @@ const runTranscription = async ( const params = whisperParams( false, whisperBaseParams.wavPath, + whisperBaseParams.subtitleFormat, languageCode, translate, ); @@ -146,7 +150,7 @@ const runTranscription = async ( const srtPath = path.resolve( path.parse(whisperBaseParams.file).dir, - `${fileName}.srt`, + `${fileName}.vtt`, ); const textPath = path.resolve( path.parse(whisperBaseParams.file).dir, @@ -176,7 +180,11 @@ const transcribeAndTranslate = async ( whisperBaseParams: WhisperBaseParams, ): Promise => { try { - const dlParams = whisperParams(true, whisperBaseParams.wavPath); + const dlParams = whisperParams( + true, + whisperBaseParams.wavPath, + whisperBaseParams.subtitleFormat, + ); const { metadata } = await runWhisper(whisperBaseParams, dlParams); const languageCode = languageCodes.find((c) => c === metadata.detectedLanguageCode) || 'auto'; @@ -245,6 +253,7 @@ const extractWhisperStderrData = (stderr: string): TranscriptionMetadata => { const whisperParams = ( detectLanguageOnly: boolean, file: string, + subtitleFormat: SubtitleFormat = 'srt', languageCode: LanguageCode = 'auto', translate: boolean = false, ) => { @@ -255,8 +264,9 @@ const whisperParams = ( const containerOutputFilePath = path.resolve(CONTAINER_FOLDER, fileName); logger.info(`Transcription output file path: ${containerOutputFilePath}`); const translateParam: string[] = translate ? ['--translate'] : []; + logger.warn(`subtitleFormat is ${subtitleFormat}`); return [ - '--output-srt', + subtitleFormat == 'vtt' ? '--output-vtt' : '--output-srt', '--output-txt', '--output-json', '--output-file', diff --git a/packages/worker/src/util.ts b/packages/worker/src/util.ts index 31e3bdb0..d52a2013 100644 --- a/packages/worker/src/util.ts +++ b/packages/worker/src/util.ts @@ -4,6 +4,7 @@ import { uploadToS3, type OutputBucketUrls, } from '@guardian/transcription-service-common'; +import { getZipBlob } from '@guardian/transcription-service-backend-common/src/zip'; export const uploadAllTranscriptsToS3 = async ( destinationBucketUrls: OutputBucketUrls, @@ -12,13 +13,24 @@ export const uploadAllTranscriptsToS3 = async ( const getBlob = (file: string) => new Blob([file as BlobPart]); const blobs: [string, string, Blob][] = [ ['srt', destinationBucketUrls.srt.url, getBlob(files.srt)], - ['json', destinationBucketUrls.json.url, getBlob(files.json)], + ['json', destinationBucketUrls.zip.url, getBlob(files.json)], ['text', destinationBucketUrls.text.url, getBlob(files.text)], ]; + const zipBlob = await getZipBlob(files); + + console.log(`zipBlob.type: ${zipBlob.type}`); + for (const blobDetail of blobs) { const [fileFormat, url, blob] = blobDetail; - const response = await uploadToS3(url, blob); + + const blobTest = blobDetail[0] === 'json' ? zipBlob : blob; + + if (blobDetail[0] === 'json') { + console.log(`s3 url is: ${url}`); + } + const response = await uploadToS3(url, blobTest); + if (!response.isSuccess) { throw new Error( `Could not upload file format: ${fileFormat} to S3! ${response.errorMsg}`,