diff --git a/package.json b/package.json index a53c49ca..b111704b 100644 --- a/package.json +++ b/package.json @@ -69,6 +69,7 @@ "@types/minimist": "^1.2.2", "@types/node": "^18.15.3", "@types/react": "18.0.9", + "@types/yauzl": "^2.10.0", "concurrently": "^7.6.0", "copyfiles": "^2.4.1", "husky": "^4.3.8", @@ -94,6 +95,7 @@ "react-markdown": "^5.0.3", "rfc4648": "^1.5.2", "tsafe": "^1.6.0", + "yauzl": "^2.10.0", "zod": "^3.17.10" } } diff --git a/src/bin/tools/unzip.ts b/src/bin/tools/unzip.ts index 29d5d558..cf09f3fc 100644 --- a/src/bin/tools/unzip.ts +++ b/src/bin/tools/unzip.ts @@ -1,184 +1,89 @@ -import { createReadStream, createWriteStream } from "fs"; -import { mkdir, stat, unlink } from "fs/promises"; -import { dirname as pathDirname, join as pathJoin, relative as pathRelative } from "path"; -import { type Readable } from "stream"; -import { createInflateRaw } from "zlib"; -import { partitionPromiseSettledResults } from "./partitionPromiseSettledResults"; +import fsp from "node:fs/promises"; +import fs from "fs"; +import path from "node:path"; +import yauzl from "yauzl"; +import stream from "node:stream"; +import { promisify } from "node:util"; -export type MultiError = Error & { cause: Error[] }; +const pipeline = promisify(stream.pipeline); -/** - * Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given, - * only that directory will be extracted, stripping the given path components. - * - * If dir does not exist, it will be created. - * - * If any archive file exists, it will be overwritten. - * - * Will unzip using all available nodejs worker threads. - * - * Will try to clean up extracted files on failure. - * - * If unpacking fails, will either throw an regular error, or - * possibly an `MultiError`, which contains a `cause` field with - * a number of root cause errors. - * - * Warning this method is not optimized for continuous reading of the zip - * archive, but is a trade-off between simplicity and allowing extraction - * of a single directory from the archive. - * - * @param zipFilePath the file to unzip - * @param extractDirPath the target directory - * @param pathOfDirToExtractInArchive if given, unpack only files from this archive directory - * @throws {MultiError} error - * @returns Promise for a list of full file paths pointing to actually extracted files - */ -export async function unzip(zipFilePath: string, extractDirPath: string, pathOfDirToExtractInArchive?: string): Promise { - const dirsCreated: (string | undefined)[] = []; - dirsCreated.push(await mkdir(extractDirPath, { recursive: true })); - const promises: Promise[] = []; - - // Iterate over all files in the zip, skip files which are not in archiveDir, - // if given. - for await (const record of iterateZipArchive(zipFilePath)) { - const { path: recordPath, createReadStream: createRecordReadStream } = record; - if (pathOfDirToExtractInArchive && !recordPath.startsWith(pathOfDirToExtractInArchive)) { - continue; +async function pathExists(path: string) { + try { + await fsp.stat(path); + return true; + } catch (error) { + if ((error as { code: string }).code === "ENOENT") { + return false; } - const relativePath = pathOfDirToExtractInArchive ? pathRelative(pathOfDirToExtractInArchive, recordPath) : recordPath; - const filePath = pathJoin(extractDirPath, relativePath); - const parent = pathDirname(filePath); - promises.push( - new Promise(async (resolve, reject) => { - if (!dirsCreated.includes(parent)) dirsCreated.push(await mkdir(parent, { recursive: true })); + throw error; + } +} - // Pull the file out of the archive, write it to the target directory - const output = createWriteStream(filePath); - output.on("error", e => reject(Object.assign(e, { filePath }))); - output.on("finish", () => resolve(filePath)); - createRecordReadStream().pipe(output); - }) - ); +export async function unzip(file: string, targetFolder: string, unzipSubPath?: string) { + // add trailing slash to unzipSubPath and targetFolder + if (unzipSubPath && (!unzipSubPath.endsWith("/") || !unzipSubPath.endsWith("\\"))) { + unzipSubPath += "/"; } - // Wait until _all_ files are either extracted or failed - const [success, failure] = (await Promise.allSettled(promises)).reduce(...partitionPromiseSettledResults()); - - // If any extraction failed, try to clean up, then throw a MultiError, - // which has a `cause` field, containing a list of root cause errors. - if (failure.length) { - await Promise.all([ - ...success.map(path => unlink(path).catch(_unused => undefined)), - ...failure.map(e => e && e.path && unlink(e.path as string).catch(_unused => undefined)) - ]); - await Promise.all(dirsCreated.filter(Boolean).sort(sortByFolderDepth("desc"))); - const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";")); - (e as any).cause = failure; - throw e; + if (!targetFolder.endsWith("/") || !targetFolder.endsWith("\\")) { + targetFolder += "/"; } - return success; -} + return new Promise((resolve, reject) => { + yauzl.open(file, { lazyEntries: true }, async (err, zipfile) => { + if (err) { + reject(err); + return; + } -function depth(dir: string) { - return dir.match(/\//g)?.length ?? 0; -} + zipfile.readEntry(); -function sortByFolderDepth(order: "asc" | "desc") { - const ord = order === "asc" ? 1 : -1; - return (a: string | undefined, b: string | undefined) => ord * depth(a ?? "") + -ord * depth(b ?? ""); -} + zipfile.on("entry", async entry => { + if (unzipSubPath) { + // Skip files outside of the unzipSubPath + if (!entry.fileName.startsWith(unzipSubPath)) { + zipfile.readEntry(); + return; + } -/** - * - * @param file file to read - * @param start first byte to read - * @param end last byte to read - * @returns Promise of a buffer of read bytes - */ -async function readFileChunk(file: string, start: number, end: number): Promise { - const chunks: Buffer[] = []; - return new Promise((resolve, reject) => { - const stream = createReadStream(file, { start, end }); - stream.setMaxListeners(Infinity); - stream.on("error", e => reject(e)); - stream.on("end", () => resolve(Buffer.concat(chunks))); - stream.on("data", chunk => chunks.push(chunk as Buffer)); + // Remove the unzipSubPath from the file name + entry.fileName = entry.fileName.substring(unzipSubPath.length); + } + + const target = path.join(targetFolder, entry.fileName); + + // Directory file names end with '/'. + // Note that entries for directories themselves are optional. + // An entry's fileName implicitly requires its parent directories to exist. + if (/[\/\\]$/.test(target)) { + await fsp.mkdir(target, { recursive: true }); + + zipfile.readEntry(); + return; + } + + // Skip existing files + if (await pathExists(target)) { + zipfile.readEntry(); + return; + } + + zipfile.openReadStream(entry, async (err, readStream) => { + if (err) { + reject(err); + return; + } + + await pipeline(readStream, fs.createWriteStream(target)); + + zipfile.readEntry(); + }); + }); + + zipfile.once("end", function () { + zipfile.close(); + resolve(); + }); + }); }); } - -type ZipRecord = { - path: string; - createReadStream: () => Readable; - compressionMethod: "deflate" | undefined; -}; - -type ZipRecordGenerator = AsyncGenerator; - -/** - * Iterate over all records of a zipfile, and yield a ZipRecord. - * Use `record.createReadStream()` to actually read the file. - * - * Warning this method will only work with single-disk zip files. - * Warning this method may fail if the zip archive has an crazy amount - * of files and the central directory is not fully contained within the - * last 65k bytes of the zip file. - * - * @param zipFile - * @returns AsyncGenerator which will yield ZipRecords - */ -async function* iterateZipArchive(zipFile: string): ZipRecordGenerator { - // Need to know zip file size before we can do anything else - const { size } = await stat(zipFile); - const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle - // Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end, - // before that comes the zip central directory end header. - let chunk = await readFileChunk(zipFile, size - chunkSize, size); - const unread = size - chunk.length; - let i = chunk.length - 4; - let found = false; - // Find central directory end header, reading backwards from the end - while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true; - if (!found) throw new Error("Not a zip file"); - // This method will fail on a multi-disk zip, so bail early. - if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported"); - let nFiles = chunk.readUint16LE(i + 10); - // Get the position of the central directory - const directorySize = chunk.readUint32LE(i + 12); - const directoryOffset = chunk.readUint32LE(i + 16); - if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported"); - if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`); - i = directoryOffset - unread; - // If i < 0, it means that the central directory is not contained within `chunk` - if (i < 0) { - chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize); - i = 0; - } - // Now iterate the central directory records, yield an `ZipRecord` for every entry - while (nFiles-- > 0) { - // Check for marker bytes - if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i)); - const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)]; - const compressedFileSize = chunk.readUint32LE(i + 20); - const filenameLength = chunk.readUint16LE(i + 28); - const extraLength = chunk.readUint16LE(i + 30); - const commentLength = chunk.readUint16LE(i + 32); - // Start of the actual content byte stream is after the 'local' record header, - // which is 30 bytes long plus filename and extra field - const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength; - const end = start + compressedFileSize; - const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8"); - const createRecordReadStream = () => { - const input = createReadStream(zipFile, { start, end }); - if (compressionMethod === "deflate") { - const inflate = createInflateRaw(); - input.pipe(inflate); - return inflate; - } - return input; - }; - if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod }; - // advance pointer to next central directory entry - i += 46 + filenameLength + extraLength + commentLength; - } -} diff --git a/yarn.lock b/yarn.lock index 1aa0dc72..476c6d97 100644 --- a/yarn.lock +++ b/yarn.lock @@ -480,6 +480,13 @@ resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.6.tgz#250a7b16c3b91f672a24552ec64678eeb1d3a08d" integrity sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ== +"@types/yauzl@^2.10.0": + version "2.10.0" + resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599" + integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw== + dependencies: + "@types/node" "*" + acorn-walk@^8.1.1: version "8.2.0" resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1" @@ -635,6 +642,11 @@ browserslist@^4.21.3: node-releases "^2.0.8" update-browserslist-db "^1.0.10" +buffer-crc32@~0.2.3: + version "0.2.13" + resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" + integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ== + cacache@^17.0.0: version "17.0.4" resolved "https://registry.yarnpkg.com/cacache/-/cacache-17.0.4.tgz#5023ed892ba8843e3b7361c26d0ada37e146290c" @@ -1103,6 +1115,13 @@ fastq@^1.6.0: dependencies: reusify "^1.0.4" +fd-slicer@~1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e" + integrity sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g== + dependencies: + pend "~1.2.0" + fill-range@^7.0.1: version "7.0.1" resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40" @@ -1924,6 +1943,11 @@ path-type@^4.0.0: resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b" integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw== +pend@~1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" + integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg== + picocolors@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" @@ -2642,6 +2666,14 @@ yargs@^17.3.1: y18n "^5.0.5" yargs-parser "^21.1.1" +yauzl@^2.10.0: + version "2.10.0" + resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" + integrity sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g== + dependencies: + buffer-crc32 "~0.2.3" + fd-slicer "~1.1.0" + yn@3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"