refactor: use yauzl for unzipping
This commit is contained in:
parent
b0db8caf65
commit
08c7e38587
@ -69,6 +69,7 @@
|
||||
"@types/minimist": "^1.2.2",
|
||||
"@types/node": "^18.15.3",
|
||||
"@types/react": "18.0.9",
|
||||
"@types/yauzl": "^2.10.0",
|
||||
"concurrently": "^7.6.0",
|
||||
"copyfiles": "^2.4.1",
|
||||
"husky": "^4.3.8",
|
||||
@ -94,6 +95,7 @@
|
||||
"react-markdown": "^5.0.3",
|
||||
"rfc4648": "^1.5.2",
|
||||
"tsafe": "^1.6.0",
|
||||
"yauzl": "^2.10.0",
|
||||
"zod": "^3.17.10"
|
||||
}
|
||||
}
|
||||
|
@ -1,184 +1,89 @@
|
||||
import { createReadStream, createWriteStream } from "fs";
|
||||
import { mkdir, stat, unlink } from "fs/promises";
|
||||
import { dirname as pathDirname, join as pathJoin, relative as pathRelative } from "path";
|
||||
import { type Readable } from "stream";
|
||||
import { createInflateRaw } from "zlib";
|
||||
import { partitionPromiseSettledResults } from "./partitionPromiseSettledResults";
|
||||
import fsp from "node:fs/promises";
|
||||
import fs from "fs";
|
||||
import path from "node:path";
|
||||
import yauzl from "yauzl";
|
||||
import stream from "node:stream";
|
||||
import { promisify } from "node:util";
|
||||
|
||||
export type MultiError = Error & { cause: Error[] };
|
||||
const pipeline = promisify(stream.pipeline);
|
||||
|
||||
/**
|
||||
* Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given,
|
||||
* only that directory will be extracted, stripping the given path components.
|
||||
*
|
||||
* If dir does not exist, it will be created.
|
||||
*
|
||||
* If any archive file exists, it will be overwritten.
|
||||
*
|
||||
* Will unzip using all available nodejs worker threads.
|
||||
*
|
||||
* Will try to clean up extracted files on failure.
|
||||
*
|
||||
* If unpacking fails, will either throw an regular error, or
|
||||
* possibly an `MultiError`, which contains a `cause` field with
|
||||
* a number of root cause errors.
|
||||
*
|
||||
* Warning this method is not optimized for continuous reading of the zip
|
||||
* archive, but is a trade-off between simplicity and allowing extraction
|
||||
* of a single directory from the archive.
|
||||
*
|
||||
* @param zipFilePath the file to unzip
|
||||
* @param extractDirPath the target directory
|
||||
* @param pathOfDirToExtractInArchive if given, unpack only files from this archive directory
|
||||
* @throws {MultiError} error
|
||||
* @returns Promise for a list of full file paths pointing to actually extracted files
|
||||
*/
|
||||
export async function unzip(zipFilePath: string, extractDirPath: string, pathOfDirToExtractInArchive?: string): Promise<string[]> {
|
||||
const dirsCreated: (string | undefined)[] = [];
|
||||
dirsCreated.push(await mkdir(extractDirPath, { recursive: true }));
|
||||
const promises: Promise<string>[] = [];
|
||||
|
||||
// Iterate over all files in the zip, skip files which are not in archiveDir,
|
||||
// if given.
|
||||
for await (const record of iterateZipArchive(zipFilePath)) {
|
||||
const { path: recordPath, createReadStream: createRecordReadStream } = record;
|
||||
if (pathOfDirToExtractInArchive && !recordPath.startsWith(pathOfDirToExtractInArchive)) {
|
||||
continue;
|
||||
async function pathExists(path: string) {
|
||||
try {
|
||||
await fsp.stat(path);
|
||||
return true;
|
||||
} catch (error) {
|
||||
if ((error as { code: string }).code === "ENOENT") {
|
||||
return false;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
const relativePath = pathOfDirToExtractInArchive ? pathRelative(pathOfDirToExtractInArchive, recordPath) : recordPath;
|
||||
const filePath = pathJoin(extractDirPath, relativePath);
|
||||
const parent = pathDirname(filePath);
|
||||
promises.push(
|
||||
new Promise<string>(async (resolve, reject) => {
|
||||
if (!dirsCreated.includes(parent)) dirsCreated.push(await mkdir(parent, { recursive: true }));
|
||||
|
||||
// Pull the file out of the archive, write it to the target directory
|
||||
const output = createWriteStream(filePath);
|
||||
output.on("error", e => reject(Object.assign(e, { filePath })));
|
||||
output.on("finish", () => resolve(filePath));
|
||||
createRecordReadStream().pipe(output);
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Wait until _all_ files are either extracted or failed
|
||||
const [success, failure] = (await Promise.allSettled(promises)).reduce(...partitionPromiseSettledResults<string>());
|
||||
|
||||
// If any extraction failed, try to clean up, then throw a MultiError,
|
||||
// which has a `cause` field, containing a list of root cause errors.
|
||||
if (failure.length) {
|
||||
await Promise.all([
|
||||
...success.map(path => unlink(path).catch(_unused => undefined)),
|
||||
...failure.map(e => e && e.path && unlink(e.path as string).catch(_unused => undefined))
|
||||
]);
|
||||
await Promise.all(dirsCreated.filter(Boolean).sort(sortByFolderDepth("desc")));
|
||||
const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";"));
|
||||
(e as any).cause = failure;
|
||||
throw e;
|
||||
export async function unzip(file: string, targetFolder: string, unzipSubPath?: string) {
|
||||
// add trailing slash to unzipSubPath and targetFolder
|
||||
if (unzipSubPath && (!unzipSubPath.endsWith("/") || !unzipSubPath.endsWith("\\"))) {
|
||||
unzipSubPath += "/";
|
||||
}
|
||||
|
||||
return success;
|
||||
if (!targetFolder.endsWith("/") || !targetFolder.endsWith("\\")) {
|
||||
targetFolder += "/";
|
||||
}
|
||||
|
||||
function depth(dir: string) {
|
||||
return dir.match(/\//g)?.length ?? 0;
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
yauzl.open(file, { lazyEntries: true }, async (err, zipfile) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
|
||||
function sortByFolderDepth(order: "asc" | "desc") {
|
||||
const ord = order === "asc" ? 1 : -1;
|
||||
return (a: string | undefined, b: string | undefined) => ord * depth(a ?? "") + -ord * depth(b ?? "");
|
||||
zipfile.readEntry();
|
||||
|
||||
zipfile.on("entry", async entry => {
|
||||
if (unzipSubPath) {
|
||||
// Skip files outside of the unzipSubPath
|
||||
if (!entry.fileName.startsWith(unzipSubPath)) {
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param file file to read
|
||||
* @param start first byte to read
|
||||
* @param end last byte to read
|
||||
* @returns Promise of a buffer of read bytes
|
||||
*/
|
||||
async function readFileChunk(file: string, start: number, end: number): Promise<Buffer> {
|
||||
const chunks: Buffer[] = [];
|
||||
return new Promise((resolve, reject) => {
|
||||
const stream = createReadStream(file, { start, end });
|
||||
stream.setMaxListeners(Infinity);
|
||||
stream.on("error", e => reject(e));
|
||||
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
||||
stream.on("data", chunk => chunks.push(chunk as Buffer));
|
||||
// Remove the unzipSubPath from the file name
|
||||
entry.fileName = entry.fileName.substring(unzipSubPath.length);
|
||||
}
|
||||
|
||||
const target = path.join(targetFolder, entry.fileName);
|
||||
|
||||
// Directory file names end with '/'.
|
||||
// Note that entries for directories themselves are optional.
|
||||
// An entry's fileName implicitly requires its parent directories to exist.
|
||||
if (/[\/\\]$/.test(target)) {
|
||||
await fsp.mkdir(target, { recursive: true });
|
||||
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip existing files
|
||||
if (await pathExists(target)) {
|
||||
zipfile.readEntry();
|
||||
return;
|
||||
}
|
||||
|
||||
zipfile.openReadStream(entry, async (err, readStream) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
|
||||
await pipeline(readStream, fs.createWriteStream(target));
|
||||
|
||||
zipfile.readEntry();
|
||||
});
|
||||
});
|
||||
|
||||
zipfile.once("end", function () {
|
||||
zipfile.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
type ZipRecord = {
|
||||
path: string;
|
||||
createReadStream: () => Readable;
|
||||
compressionMethod: "deflate" | undefined;
|
||||
};
|
||||
|
||||
type ZipRecordGenerator = AsyncGenerator<ZipRecord, void, unknown>;
|
||||
|
||||
/**
|
||||
* Iterate over all records of a zipfile, and yield a ZipRecord.
|
||||
* Use `record.createReadStream()` to actually read the file.
|
||||
*
|
||||
* Warning this method will only work with single-disk zip files.
|
||||
* Warning this method may fail if the zip archive has an crazy amount
|
||||
* of files and the central directory is not fully contained within the
|
||||
* last 65k bytes of the zip file.
|
||||
*
|
||||
* @param zipFile
|
||||
* @returns AsyncGenerator which will yield ZipRecords
|
||||
*/
|
||||
async function* iterateZipArchive(zipFile: string): ZipRecordGenerator {
|
||||
// Need to know zip file size before we can do anything else
|
||||
const { size } = await stat(zipFile);
|
||||
const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle
|
||||
// Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end,
|
||||
// before that comes the zip central directory end header.
|
||||
let chunk = await readFileChunk(zipFile, size - chunkSize, size);
|
||||
const unread = size - chunk.length;
|
||||
let i = chunk.length - 4;
|
||||
let found = false;
|
||||
// Find central directory end header, reading backwards from the end
|
||||
while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true;
|
||||
if (!found) throw new Error("Not a zip file");
|
||||
// This method will fail on a multi-disk zip, so bail early.
|
||||
if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported");
|
||||
let nFiles = chunk.readUint16LE(i + 10);
|
||||
// Get the position of the central directory
|
||||
const directorySize = chunk.readUint32LE(i + 12);
|
||||
const directoryOffset = chunk.readUint32LE(i + 16);
|
||||
if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported");
|
||||
if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`);
|
||||
i = directoryOffset - unread;
|
||||
// If i < 0, it means that the central directory is not contained within `chunk`
|
||||
if (i < 0) {
|
||||
chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize);
|
||||
i = 0;
|
||||
}
|
||||
// Now iterate the central directory records, yield an `ZipRecord` for every entry
|
||||
while (nFiles-- > 0) {
|
||||
// Check for marker bytes
|
||||
if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i));
|
||||
const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)];
|
||||
const compressedFileSize = chunk.readUint32LE(i + 20);
|
||||
const filenameLength = chunk.readUint16LE(i + 28);
|
||||
const extraLength = chunk.readUint16LE(i + 30);
|
||||
const commentLength = chunk.readUint16LE(i + 32);
|
||||
// Start of the actual content byte stream is after the 'local' record header,
|
||||
// which is 30 bytes long plus filename and extra field
|
||||
const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength;
|
||||
const end = start + compressedFileSize;
|
||||
const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8");
|
||||
const createRecordReadStream = () => {
|
||||
const input = createReadStream(zipFile, { start, end });
|
||||
if (compressionMethod === "deflate") {
|
||||
const inflate = createInflateRaw();
|
||||
input.pipe(inflate);
|
||||
return inflate;
|
||||
}
|
||||
return input;
|
||||
};
|
||||
if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod };
|
||||
// advance pointer to next central directory entry
|
||||
i += 46 + filenameLength + extraLength + commentLength;
|
||||
}
|
||||
}
|
||||
|
32
yarn.lock
32
yarn.lock
@ -480,6 +480,13 @@
|
||||
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.6.tgz#250a7b16c3b91f672a24552ec64678eeb1d3a08d"
|
||||
integrity sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==
|
||||
|
||||
"@types/yauzl@^2.10.0":
|
||||
version "2.10.0"
|
||||
resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599"
|
||||
integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw==
|
||||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
acorn-walk@^8.1.1:
|
||||
version "8.2.0"
|
||||
resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1"
|
||||
@ -635,6 +642,11 @@ browserslist@^4.21.3:
|
||||
node-releases "^2.0.8"
|
||||
update-browserslist-db "^1.0.10"
|
||||
|
||||
buffer-crc32@~0.2.3:
|
||||
version "0.2.13"
|
||||
resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
|
||||
integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==
|
||||
|
||||
cacache@^17.0.0:
|
||||
version "17.0.4"
|
||||
resolved "https://registry.yarnpkg.com/cacache/-/cacache-17.0.4.tgz#5023ed892ba8843e3b7361c26d0ada37e146290c"
|
||||
@ -1103,6 +1115,13 @@ fastq@^1.6.0:
|
||||
dependencies:
|
||||
reusify "^1.0.4"
|
||||
|
||||
fd-slicer@~1.1.0:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
|
||||
integrity sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==
|
||||
dependencies:
|
||||
pend "~1.2.0"
|
||||
|
||||
fill-range@^7.0.1:
|
||||
version "7.0.1"
|
||||
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
|
||||
@ -1924,6 +1943,11 @@ path-type@^4.0.0:
|
||||
resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
|
||||
integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
|
||||
|
||||
pend@~1.2.0:
|
||||
version "1.2.0"
|
||||
resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50"
|
||||
integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==
|
||||
|
||||
picocolors@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c"
|
||||
@ -2642,6 +2666,14 @@ yargs@^17.3.1:
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^21.1.1"
|
||||
|
||||
yauzl@^2.10.0:
|
||||
version "2.10.0"
|
||||
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
|
||||
integrity sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==
|
||||
dependencies:
|
||||
buffer-crc32 "~0.2.3"
|
||||
fd-slicer "~1.1.0"
|
||||
|
||||
yn@3.1.1:
|
||||
version "3.1.1"
|
||||
resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
|
||||
|
Loading…
x
Reference in New Issue
Block a user