From 3a6fe1b374bfd281f98004d8944f70e20e8b4874 Mon Sep 17 00:00:00 2001 From: Waldemar Reusch Date: Wed, 29 Mar 2023 09:54:29 +0200 Subject: [PATCH] fix(cache): fix download caches * also fix npm config running 4 times in the worst case * factor out unzip methods * factor and enhance trimindent * factor out more utils * restore windows build, which failed cause generate-i18n-messages did not write any files --- scripts/generate-i18n-messages.ts | 14 +- src/bin/download-builtin-keycloak-theme.ts | 34 +-- src/bin/tools/downloadAndUnzip.ts | 286 ++++-------------- src/bin/tools/jar.ts | 38 +-- .../tools/partitionPromiseSettledResults.ts | 11 + src/bin/tools/trimIndent.ts | 51 ++++ src/bin/tools/unzip.ts | 184 +++++++++++ 7 files changed, 339 insertions(+), 279 deletions(-) create mode 100644 src/bin/tools/partitionPromiseSettledResults.ts create mode 100644 src/bin/tools/trimIndent.ts create mode 100644 src/bin/tools/unzip.ts diff --git a/scripts/generate-i18n-messages.ts b/scripts/generate-i18n-messages.ts index 879fd3d3..4cebc755 100644 --- a/scripts/generate-i18n-messages.ts +++ b/scripts/generate-i18n-messages.ts @@ -7,7 +7,7 @@ import { getProjectRoot } from "../src/bin/tools/getProjectRoot"; import { getCliOptions } from "../src/bin/tools/cliOptions"; import { getLogger } from "../src/bin/tools/logger"; -//NOTE: To run without argument when we want to generate src/i18n/generated_kcMessages files, +// NOTE: To run without argument when we want to generate src/i18n/generated_kcMessages files, // update the version array for generating for newer version. //@ts-ignore @@ -16,7 +16,7 @@ const propertiesParser = require("properties-parser"); const { isSilent } = getCliOptions(process.argv.slice(2)); const logger = getLogger({ isSilent }); -(async () => { +async function main() { const keycloakVersion = "21.0.1"; const tmpDirPath = pathJoin(getProjectRoot(), "tmp_xImOef9dOd44"); @@ -37,7 +37,9 @@ const logger = getLogger({ isSilent }); const baseThemeDirPath = pathJoin(tmpDirPath, "base"); crawl(baseThemeDirPath).forEach(filePath => { - const match = filePath.match(/^([^/]+)\/messages\/messages_([^.]+)\.properties$/); + const match = + filePath.match(/^([^/]+)\/messages\/messages_([^.]+)\.properties$/) || + filePath.match(/^([^\\]+)\\messages\\messages_([^.]+)\.properties$/); if (match === null) { return; @@ -114,4 +116,8 @@ const logger = getLogger({ isSilent }); ) ); }); -})(); +} + +if (require.main === module) { + main().catch(e => console.error(e)); +} diff --git a/src/bin/download-builtin-keycloak-theme.ts b/src/bin/download-builtin-keycloak-theme.ts index 62ea29bc..d09188b3 100644 --- a/src/bin/download-builtin-keycloak-theme.ts +++ b/src/bin/download-builtin-keycloak-theme.ts @@ -8,35 +8,35 @@ import { getCliOptions } from "./tools/cliOptions"; import { getLogger } from "./tools/logger"; export async function downloadBuiltinKeycloakTheme(params: { keycloakVersion: string; destDirPath: string; isSilent: boolean }) { - const { keycloakVersion, destDirPath, isSilent } = params; + const { keycloakVersion, destDirPath } = params; await Promise.all( ["", "-community"].map(ext => downloadAndUnzip({ "destDirPath": destDirPath, "url": `https://github.com/keycloak/keycloak/archive/refs/tags/${keycloakVersion}.zip`, - "pathOfDirToExtractInArchive": `keycloak-${keycloakVersion}/themes/src/main/resources${ext}/theme`, - "cacheDirPath": pathJoin(keycloakThemeBuildingDirPath, ".cache"), - isSilent + "pathOfDirToExtractInArchive": `keycloak-${keycloakVersion}/themes/src/main/resources${ext}/theme` }) ) ); } -if (require.main === module) { - (async () => { - const { isSilent } = getCliOptions(process.argv.slice(2)); - const logger = getLogger({ isSilent }); - const { keycloakVersion } = await promptKeycloakVersion(); +async function main() { + const { isSilent } = getCliOptions(process.argv.slice(2)); + const logger = getLogger({ isSilent }); + const { keycloakVersion } = await promptKeycloakVersion(); - const destDirPath = pathJoin(keycloakThemeBuildingDirPath, "src", "main", "resources", "theme"); + const destDirPath = pathJoin(keycloakThemeBuildingDirPath, "src", "main", "resources", "theme"); - logger.log(`Downloading builtins theme of Keycloak ${keycloakVersion} here ${destDirPath}`); + logger.log(`Downloading builtins theme of Keycloak ${keycloakVersion} here ${destDirPath}`); - await downloadBuiltinKeycloakTheme({ - keycloakVersion, - destDirPath, - isSilent - }); - })(); + await downloadBuiltinKeycloakTheme({ + keycloakVersion, + destDirPath, + isSilent + }); +} + +if (require.main === module) { + main().catch(e => console.error(e)); } diff --git a/src/bin/tools/downloadAndUnzip.ts b/src/bin/tools/downloadAndUnzip.ts index a71391f4..2cb22cb5 100644 --- a/src/bin/tools/downloadAndUnzip.ts +++ b/src/bin/tools/downloadAndUnzip.ts @@ -1,15 +1,13 @@ -import { dirname as pathDirname, basename as pathBasename, join as pathJoin, join } from "path"; -import { createReadStream, createWriteStream } from "fs"; -import { stat, mkdir, unlink, writeFile } from "fs/promises"; -import { transformCodebase } from "./transformCodebase"; -import { createHash } from "crypto"; -import fetch from "make-fetch-happen"; -import { createInflateRaw } from "zlib"; -import type { Readable } from "stream"; -import { homedir } from "os"; -import { FetchOptions } from "make-fetch-happen"; import { exec as execCallback } from "child_process"; +import { createHash } from "crypto"; +import { mkdir, stat, writeFile } from "fs/promises"; +import fetch, { type FetchOptions } from "make-fetch-happen"; +import { dirname as pathDirname, join as pathJoin } from "path"; +import { assert } from "tsafe"; import { promisify } from "util"; +import { getProjectRoot } from "./getProjectRoot"; +import { transformCodebase } from "./transformCodebase"; +import { unzip } from "./unzip"; const exec = promisify(execCallback); @@ -17,25 +15,27 @@ function hash(s: string) { return createHash("sha256").update(s).digest("hex"); } -async function maybeStat(path: string) { +async function exists(path: string) { try { - return await stat(path); + await stat(path); + return true; } catch (error) { - if ((error as Error & { code: string }).code === "ENOENT") return undefined; + if ((error as Error & { code: string }).code === "ENOENT") return false; throw error; } } /** - * Get an npm configuration value as string, undefined if not set. - * - * @param key - * @returns string or undefined + * Get npm configuration as map */ -async function getNmpConfig(key: string): Promise { - const { stdout } = await exec(`npm config get ${key}`); - const value = stdout.trim(); - return value && value !== "null" ? value : undefined; +async function getNmpConfig(): Promise> { + const { stdout } = await exec("npm config get", { encoding: "utf8" }); + return stdout + .split("\n") + .filter(line => !line.startsWith(";")) + .map(line => line.trim()) + .map(line => line.split("=", 2)) + .reduce((cfg, [key, value]) => ({ ...cfg, [key]: value }), {}); } /** @@ -45,233 +45,49 @@ async function getNmpConfig(key: string): Promise { * @returns proxy configuration */ async function getNpmProxyConfig(): Promise> { - const proxy = (await getNmpConfig("https-proxy")) ?? (await getNmpConfig("proxy")); - const noProxy = (await getNmpConfig("noproxy")) ?? (await getNmpConfig("no-proxy")); + const cfg = await getNmpConfig(); + + const proxy = cfg["https-proxy"] ?? cfg["proxy"]; + const noProxy = cfg["noproxy"] ?? cfg["no-proxy"]; return { proxy, noProxy }; } -/** - * Download a file from `url` to `dir`. Will try to avoid downloading existing - * files by using the cache directory ~/.keycloakify/cache - * - * If the target directory does not exist, it will be created. - * - * If the target file exists, it will be overwritten. - * - * We use make-fetch-happen's internal file cache here, so we don't need to - * worry about redownloading the same file over and over. Unfortunately, that - * cache does not have a single file per entry, but bundles and indexes them, - * so we still need to write the contents to the target directory (possibly - * over and over), cause the current unzip implementation wants random access. - * - * @param url download url - * @param dir target directory - * @param filename target filename - * @returns promise for the full path of the downloaded file - */ -async function download(url: string, dir: string, filename: string): Promise { - const proxyOpts = await getNpmProxyConfig(); - const cacheRoot = process.env.XDG_CACHE_HOME ?? homedir(); - const cachePath = join(cacheRoot, ".keycloakify/cache"); - const opts: FetchOptions = { cachePath, cache: 'force-cache', ...proxyOpts }; - const response = await fetch(url, opts); - const filepath = pathJoin(dir, filename); - await mkdir(dir, { recursive: true }); - await writeFile(filepath, response.body); - return filepath; -} - -/** - * @typedef - * @type MultiError = Error & { cause: Error[] } - */ - -/** - * Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given, - * only that directory will be extracted, stripping the given path components. - * - * If dir does not exist, it will be created. - * - * If any archive file exists, it will be overwritten. - * - * Will unzip using all available nodejs worker threads. - * - * Will try to clean up extracted files on failure. - * - * If unpacking fails, will either throw an regular error, or - * possibly an `MultiError`, which contains a `cause` field with - * a number of root cause errors. - * - * Warning this method is not optimized for continuous reading of the zip - * archive, but is a trade-off between simplicity and allowing extraction - * of a single directory from the archive. - * - * @param zipFile the file to unzip - * @param dir the target directory - * @param archiveDir if given, unpack only files from this archive directory - * @throws {MultiError} error - * @returns Promise for a list of full file paths pointing to actually extracted files - */ -async function unzip(zipFile: string, dir: string, archiveDir?: string): Promise { - await mkdir(dir, { recursive: true }); - const promises: Promise[] = []; - - // Iterate over all files in the zip, skip files which are not in archiveDir, - // if given. - for await (const record of iterateZipArchive(zipFile)) { - const { path: recordPath, createReadStream: createRecordReadStream } = record; - const filePath = pathJoin(dir, recordPath); - const parent = pathDirname(filePath); - if (archiveDir && !recordPath.startsWith(archiveDir)) continue; - promises.push( - new Promise(async (resolve, reject) => { - await mkdir(parent, { recursive: true }); - // Pull the file out of the archive, write it to the target directory - const input = createRecordReadStream(); - const output = createWriteStream(filePath); - output.setMaxListeners(Infinity); - output.on("error", e => reject(Object.assign(e, { filePath }))); - output.on("finish", () => resolve(filePath)); - input.pipe(output); - }) - ); - } - - // Wait until _all_ files are either extracted or failed - const results = await Promise.allSettled(promises); - const success = results.filter(r => r.status === "fulfilled").map(r => (r as PromiseFulfilledResult).value); - const failure = results.filter(r => r.status === "rejected").map(r => (r as PromiseRejectedResult).reason); - - // If any extraction failed, try to clean up, then throw a MultiError, - // which has a `cause` field, containing a list of root cause errors. - if (failure.length) { - await Promise.all(success.map(path => unlink(path))); - await Promise.all(failure.map(e => e && e.path && unlink(e.path as string))); - const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";")); - (e as any).cause = failure; - throw e; - } - - return success; -} - -/** - * - * @param file file to read - * @param start first byte to read - * @param end last byte to read - * @returns Promise of a buffer of read bytes - */ -async function readFileChunk(file: string, start: number, end: number): Promise { - const chunks: Buffer[] = []; - return new Promise((resolve, reject) => { - const stream = createReadStream(file, { start, end }); - stream.setMaxListeners(Infinity); - stream.on("error", e => reject(e)); - stream.on("end", () => resolve(Buffer.concat(chunks))); - stream.on("data", chunk => chunks.push(chunk as Buffer)); - }); -} - -type ZipRecord = { - path: string; - createReadStream: () => Readable; - compressionMethod: "deflate" | undefined; -}; - -type ZipRecordGenerator = AsyncGenerator; - -/** - * Iterate over all records of a zipfile, and yield a ZipRecord. - * Use `record.createReadStream()` to actually read the file. - * - * Warning this method will only work with single-disk zip files. - * Warning this method may fail if the zip archive has an crazy amount - * of files and the central directory is not fully contained within the - * last 65k bytes of the zip file. - * - * @param zipFile - * @returns AsyncGenerator which will yield ZipRecords - */ -async function* iterateZipArchive(zipFile: string): ZipRecordGenerator { - // Need to know zip file size before we can do anything else - const { size } = await stat(zipFile); - const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle - // Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end, - // before that comes the zip central directory end header. - let chunk = await readFileChunk(zipFile, size - chunkSize, size); - const unread = size - chunk.length; - let i = chunk.length - 4; - let found = false; - // Find central directory end header, reading backwards from the end - while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true; - if (!found) throw new Error("Not a zip file"); - // This method will fail on a multi-disk zip, so bail early. - if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported"); - let nFiles = chunk.readUint16LE(i + 10); - // Get the position of the central directory - const directorySize = chunk.readUint32LE(i + 12); - const directoryOffset = chunk.readUint32LE(i + 16); - if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported"); - if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`); - i = directoryOffset - unread; - // If i < 0, it means that the central directory is not contained within `chunk` - if (i < 0) { - chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize); - i = 0; - } - // Now iterate the central directory records, yield an `ZipRecord` for every entry - while (nFiles-- > 0) { - // Check for marker bytes - if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i)); - const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)]; - const compressedFileSize = chunk.readUint32LE(i + 20); - const filenameLength = chunk.readUint16LE(i + 28); - const extraLength = chunk.readUint16LE(i + 30); - const commentLength = chunk.readUint16LE(i + 32); - // Start of the actual content byte stream is after the 'local' record header, - // which is 30 bytes long plus filename and extra field - const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength; - const end = start + compressedFileSize; - const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8"); - const createRecordReadStream = () => { - const input = createReadStream(zipFile, { start, end }); - if (compressionMethod === "deflate") { - const inflate = createInflateRaw(); - input.pipe(inflate); - return inflate; - } - return input; - }; - if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod }; - // advance pointer to next central directory entry - i += 46 + filenameLength + extraLength + commentLength; - } -} - export async function downloadAndUnzip({ url, destDirPath, - pathOfDirToExtractInArchive, - cacheDirPath + pathOfDirToExtractInArchive }: { - isSilent: boolean; url: string; destDirPath: string; pathOfDirToExtractInArchive?: string; - cacheDirPath: string; }) { - const downloadHash = hash(JSON.stringify({ url, pathOfDirToExtractInArchive })).substring(0, 15); - const extractDirPath = pathJoin(cacheDirPath, `_${downloadHash}`); + const downloadHash = hash(JSON.stringify({ url })).substring(0, 15); + const projectRoot = getProjectRoot(); + const cacheRoot = process.env.XDG_CACHE_HOME ?? `${projectRoot}/node_modules/.cache`; + const zipFilePath = pathJoin(cacheRoot, "keycloakify", "zip", `_${downloadHash}.zip`); + const extractDirPath = pathJoin(cacheRoot, "keycloakify", "unzip", `_${downloadHash}`); - const filename = pathBasename(url); - const zipFilepath = await download(url, cacheDirPath, filename); - const zipMtime = (await stat(zipFilepath)).mtimeMs; - const unzipMtime = (await maybeStat(extractDirPath))?.mtimeMs; + if (!(await exists(zipFilePath))) { + const proxyOpts = await getNpmProxyConfig(); + const response = await fetch(url, proxyOpts); + await mkdir(pathDirname(zipFilePath), { recursive: true }); + /** + * The correct way to fix this is to upgrade node-fetch beyond 3.2.5 + * (see https://github.com/node-fetch/node-fetch/issues/1295#issuecomment-1144061991.) + * Unfortunately, octokit (a dependency of keycloakify) also uses node-fetch, and + * does not support node-fetch 3.x. So we stick around with this band-aid until + * octokit upgrades. + */ + response.body?.setMaxListeners(Number.MAX_VALUE); + assert(typeof response.body !== "undefined" && response.body != null); + await writeFile(zipFilePath, response.body); + } - if (!unzipMtime || zipMtime > unzipMtime) await unzip(zipFilepath, extractDirPath, pathOfDirToExtractInArchive); + await unzip(zipFilePath, extractDirPath, pathOfDirToExtractInArchive); - const srcDirPath = pathOfDirToExtractInArchive === undefined ? extractDirPath : pathJoin(extractDirPath, pathOfDirToExtractInArchive); - transformCodebase({ srcDirPath, destDirPath }); + transformCodebase({ + "srcDirPath": extractDirPath, + "destDirPath": destDirPath + }); } diff --git a/src/bin/tools/jar.ts b/src/bin/tools/jar.ts index 2dc60f0e..0bd91837 100644 --- a/src/bin/tools/jar.ts +++ b/src/bin/tools/jar.ts @@ -3,12 +3,9 @@ import { dirname, relative, sep } from "path"; import { createWriteStream } from "fs"; import walk from "./walk"; -import type { ZipSource } from "./zip"; -import zip from "./zip"; +import zip, { type ZipSource } from "./zip"; import { mkdir } from "fs/promises"; - -/** Trim leading whitespace from every line */ -const trimIndent = (s: string) => s.replace(/(\n)\s+/g, "$1"); +import trimIndent from "./trimIndent"; type JarArgs = { rootPath: string; @@ -26,28 +23,23 @@ type JarArgs = { export default async function jar({ groupId, artifactId, version, rootPath, targetPath }: JarArgs) { const manifest: ZipSource = { path: "META-INF/MANIFEST.MF", - data: Buffer.from( - trimIndent( - `Manifest-Version: 1.0 - Archiver-Version: Plexus Archiver - Created-By: Keycloakify - Built-By: unknown - Build-Jdk: 19.0.0` - ) - ) + data: Buffer.from(trimIndent` + Manifest-Version: 1.0 + Archiver-Version: Plexus Archiver + Created-By: Keycloakify + Built-By: unknown + Build-Jdk: 19.0.0 + `) }; const pomProps: ZipSource = { path: `META-INF/maven/${groupId}/${artifactId}/pom.properties`, - data: Buffer.from( - trimIndent( - `# Generated by keycloakify - # ${new Date()} - artifactId=${artifactId} - groupId=${groupId} - version=${version}` - ) - ) + data: Buffer.from(trimIndent`# Generated by keycloakify + # ${new Date()} + artifactId=${artifactId} + groupId=${groupId} + version=${version} + `) }; /** diff --git a/src/bin/tools/partitionPromiseSettledResults.ts b/src/bin/tools/partitionPromiseSettledResults.ts new file mode 100644 index 00000000..d52eb737 --- /dev/null +++ b/src/bin/tools/partitionPromiseSettledResults.ts @@ -0,0 +1,11 @@ +export type PromiseSettledAndPartitioned = [T[], any[]]; + +export function partitionPromiseSettledResults() { + return [ + ([successes, failures]: PromiseSettledAndPartitioned, item: PromiseSettledResult) => + item.status === "rejected" + ? ([successes, [item.reason, ...failures]] as PromiseSettledAndPartitioned) + : ([[item.value, ...successes], failures] as PromiseSettledAndPartitioned), + [[], []] as PromiseSettledAndPartitioned + ] as const; +} diff --git a/src/bin/tools/trimIndent.ts b/src/bin/tools/trimIndent.ts new file mode 100644 index 00000000..e68a80cb --- /dev/null +++ b/src/bin/tools/trimIndent.ts @@ -0,0 +1,51 @@ +/** + * Concatenate the string fragments and interpolated values + * to get a single string. + */ +function populateTemplate(strings: TemplateStringsArray, ...args: any[]) { + const chunks = []; + for (let i = 0; i < strings.length; i++) { + let lastStringLineLength = 0; + if (strings[i]) { + chunks.push(strings[i]); + // remember last indent of the string portion + lastStringLineLength = strings[i].split("\n").at(-1)?.length ?? 0; + } + if (args[i]) { + // if the interpolation value has newlines, indent the interpolation values + // using the last known string indent + chunks.push(args[i].replace(/([\r?\n])/g, "$1" + " ".repeat(lastStringLineLength))); + } + } + return chunks.join(""); +} + +function trimIndentPrivate(removeEmptyLeadingAndTrailingLines: boolean, strings: TemplateStringsArray, ...args: any[]) { + // Remove initial and final newlines + let string = populateTemplate(strings, ...args); + if (removeEmptyLeadingAndTrailingLines) string = string.replace(/^[\r\n]/, "").replace(/[^\S\r\n]*[\r\n]$/, ""); + const dents = string.match(/^([ \t])+/gm)?.map(s => s.length) ?? []; + // No dents? no change required + if (!dents || dents.length == 0) return string; + const minDent = Math.min(...dents); + // The min indentation is 0, no change needed + if (!minDent) return string; + const dedented = string.replace(new RegExp(`^${" ".repeat(minDent)}`, "gm"), ""); + return dedented; +} + +/** + * Shift all lines left by the *smallest* indentation level, + * and remove initial newline and all trailing spaces. + */ +export default function trimIndent(strings: TemplateStringsArray, ...args: any[]) { + return trimIndentPrivate(true, strings, ...args); +} + +/** + * Shift all lines left by the *smallest* indentation level, + * and _keep_ initial newline and all trailing spaces. + */ +trimIndent.keepLeadingAndTrailingNewlines = function (strings: TemplateStringsArray, ...args: any[]) { + return trimIndentPrivate(false, strings, ...args); +}; diff --git a/src/bin/tools/unzip.ts b/src/bin/tools/unzip.ts new file mode 100644 index 00000000..29d5d558 --- /dev/null +++ b/src/bin/tools/unzip.ts @@ -0,0 +1,184 @@ +import { createReadStream, createWriteStream } from "fs"; +import { mkdir, stat, unlink } from "fs/promises"; +import { dirname as pathDirname, join as pathJoin, relative as pathRelative } from "path"; +import { type Readable } from "stream"; +import { createInflateRaw } from "zlib"; +import { partitionPromiseSettledResults } from "./partitionPromiseSettledResults"; + +export type MultiError = Error & { cause: Error[] }; + +/** + * Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given, + * only that directory will be extracted, stripping the given path components. + * + * If dir does not exist, it will be created. + * + * If any archive file exists, it will be overwritten. + * + * Will unzip using all available nodejs worker threads. + * + * Will try to clean up extracted files on failure. + * + * If unpacking fails, will either throw an regular error, or + * possibly an `MultiError`, which contains a `cause` field with + * a number of root cause errors. + * + * Warning this method is not optimized for continuous reading of the zip + * archive, but is a trade-off between simplicity and allowing extraction + * of a single directory from the archive. + * + * @param zipFilePath the file to unzip + * @param extractDirPath the target directory + * @param pathOfDirToExtractInArchive if given, unpack only files from this archive directory + * @throws {MultiError} error + * @returns Promise for a list of full file paths pointing to actually extracted files + */ +export async function unzip(zipFilePath: string, extractDirPath: string, pathOfDirToExtractInArchive?: string): Promise { + const dirsCreated: (string | undefined)[] = []; + dirsCreated.push(await mkdir(extractDirPath, { recursive: true })); + const promises: Promise[] = []; + + // Iterate over all files in the zip, skip files which are not in archiveDir, + // if given. + for await (const record of iterateZipArchive(zipFilePath)) { + const { path: recordPath, createReadStream: createRecordReadStream } = record; + if (pathOfDirToExtractInArchive && !recordPath.startsWith(pathOfDirToExtractInArchive)) { + continue; + } + const relativePath = pathOfDirToExtractInArchive ? pathRelative(pathOfDirToExtractInArchive, recordPath) : recordPath; + const filePath = pathJoin(extractDirPath, relativePath); + const parent = pathDirname(filePath); + promises.push( + new Promise(async (resolve, reject) => { + if (!dirsCreated.includes(parent)) dirsCreated.push(await mkdir(parent, { recursive: true })); + + // Pull the file out of the archive, write it to the target directory + const output = createWriteStream(filePath); + output.on("error", e => reject(Object.assign(e, { filePath }))); + output.on("finish", () => resolve(filePath)); + createRecordReadStream().pipe(output); + }) + ); + } + + // Wait until _all_ files are either extracted or failed + const [success, failure] = (await Promise.allSettled(promises)).reduce(...partitionPromiseSettledResults()); + + // If any extraction failed, try to clean up, then throw a MultiError, + // which has a `cause` field, containing a list of root cause errors. + if (failure.length) { + await Promise.all([ + ...success.map(path => unlink(path).catch(_unused => undefined)), + ...failure.map(e => e && e.path && unlink(e.path as string).catch(_unused => undefined)) + ]); + await Promise.all(dirsCreated.filter(Boolean).sort(sortByFolderDepth("desc"))); + const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";")); + (e as any).cause = failure; + throw e; + } + + return success; +} + +function depth(dir: string) { + return dir.match(/\//g)?.length ?? 0; +} + +function sortByFolderDepth(order: "asc" | "desc") { + const ord = order === "asc" ? 1 : -1; + return (a: string | undefined, b: string | undefined) => ord * depth(a ?? "") + -ord * depth(b ?? ""); +} + +/** + * + * @param file file to read + * @param start first byte to read + * @param end last byte to read + * @returns Promise of a buffer of read bytes + */ +async function readFileChunk(file: string, start: number, end: number): Promise { + const chunks: Buffer[] = []; + return new Promise((resolve, reject) => { + const stream = createReadStream(file, { start, end }); + stream.setMaxListeners(Infinity); + stream.on("error", e => reject(e)); + stream.on("end", () => resolve(Buffer.concat(chunks))); + stream.on("data", chunk => chunks.push(chunk as Buffer)); + }); +} + +type ZipRecord = { + path: string; + createReadStream: () => Readable; + compressionMethod: "deflate" | undefined; +}; + +type ZipRecordGenerator = AsyncGenerator; + +/** + * Iterate over all records of a zipfile, and yield a ZipRecord. + * Use `record.createReadStream()` to actually read the file. + * + * Warning this method will only work with single-disk zip files. + * Warning this method may fail if the zip archive has an crazy amount + * of files and the central directory is not fully contained within the + * last 65k bytes of the zip file. + * + * @param zipFile + * @returns AsyncGenerator which will yield ZipRecords + */ +async function* iterateZipArchive(zipFile: string): ZipRecordGenerator { + // Need to know zip file size before we can do anything else + const { size } = await stat(zipFile); + const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle + // Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end, + // before that comes the zip central directory end header. + let chunk = await readFileChunk(zipFile, size - chunkSize, size); + const unread = size - chunk.length; + let i = chunk.length - 4; + let found = false; + // Find central directory end header, reading backwards from the end + while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true; + if (!found) throw new Error("Not a zip file"); + // This method will fail on a multi-disk zip, so bail early. + if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported"); + let nFiles = chunk.readUint16LE(i + 10); + // Get the position of the central directory + const directorySize = chunk.readUint32LE(i + 12); + const directoryOffset = chunk.readUint32LE(i + 16); + if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported"); + if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`); + i = directoryOffset - unread; + // If i < 0, it means that the central directory is not contained within `chunk` + if (i < 0) { + chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize); + i = 0; + } + // Now iterate the central directory records, yield an `ZipRecord` for every entry + while (nFiles-- > 0) { + // Check for marker bytes + if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i)); + const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)]; + const compressedFileSize = chunk.readUint32LE(i + 20); + const filenameLength = chunk.readUint16LE(i + 28); + const extraLength = chunk.readUint16LE(i + 30); + const commentLength = chunk.readUint16LE(i + 32); + // Start of the actual content byte stream is after the 'local' record header, + // which is 30 bytes long plus filename and extra field + const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength; + const end = start + compressedFileSize; + const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8"); + const createRecordReadStream = () => { + const input = createReadStream(zipFile, { start, end }); + if (compressionMethod === "deflate") { + const inflate = createInflateRaw(); + input.pipe(inflate); + return inflate; + } + return input; + }; + if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod }; + // advance pointer to next central directory entry + i += 46 + filenameLength + extraLength + commentLength; + } +}