refactor: use yauzl for unzipping
This commit is contained in:
parent
b0db8caf65
commit
08c7e38587
@ -69,6 +69,7 @@
|
|||||||
"@types/minimist": "^1.2.2",
|
"@types/minimist": "^1.2.2",
|
||||||
"@types/node": "^18.15.3",
|
"@types/node": "^18.15.3",
|
||||||
"@types/react": "18.0.9",
|
"@types/react": "18.0.9",
|
||||||
|
"@types/yauzl": "^2.10.0",
|
||||||
"concurrently": "^7.6.0",
|
"concurrently": "^7.6.0",
|
||||||
"copyfiles": "^2.4.1",
|
"copyfiles": "^2.4.1",
|
||||||
"husky": "^4.3.8",
|
"husky": "^4.3.8",
|
||||||
@ -94,6 +95,7 @@
|
|||||||
"react-markdown": "^5.0.3",
|
"react-markdown": "^5.0.3",
|
||||||
"rfc4648": "^1.5.2",
|
"rfc4648": "^1.5.2",
|
||||||
"tsafe": "^1.6.0",
|
"tsafe": "^1.6.0",
|
||||||
|
"yauzl": "^2.10.0",
|
||||||
"zod": "^3.17.10"
|
"zod": "^3.17.10"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,184 +1,89 @@
|
|||||||
import { createReadStream, createWriteStream } from "fs";
|
import fsp from "node:fs/promises";
|
||||||
import { mkdir, stat, unlink } from "fs/promises";
|
import fs from "fs";
|
||||||
import { dirname as pathDirname, join as pathJoin, relative as pathRelative } from "path";
|
import path from "node:path";
|
||||||
import { type Readable } from "stream";
|
import yauzl from "yauzl";
|
||||||
import { createInflateRaw } from "zlib";
|
import stream from "node:stream";
|
||||||
import { partitionPromiseSettledResults } from "./partitionPromiseSettledResults";
|
import { promisify } from "node:util";
|
||||||
|
|
||||||
export type MultiError = Error & { cause: Error[] };
|
const pipeline = promisify(stream.pipeline);
|
||||||
|
|
||||||
/**
|
async function pathExists(path: string) {
|
||||||
* Extract the archive `zipFile` into the directory `dir`. If `archiveDir` is given,
|
try {
|
||||||
* only that directory will be extracted, stripping the given path components.
|
await fsp.stat(path);
|
||||||
*
|
return true;
|
||||||
* If dir does not exist, it will be created.
|
} catch (error) {
|
||||||
*
|
if ((error as { code: string }).code === "ENOENT") {
|
||||||
* If any archive file exists, it will be overwritten.
|
return false;
|
||||||
*
|
|
||||||
* Will unzip using all available nodejs worker threads.
|
|
||||||
*
|
|
||||||
* Will try to clean up extracted files on failure.
|
|
||||||
*
|
|
||||||
* If unpacking fails, will either throw an regular error, or
|
|
||||||
* possibly an `MultiError`, which contains a `cause` field with
|
|
||||||
* a number of root cause errors.
|
|
||||||
*
|
|
||||||
* Warning this method is not optimized for continuous reading of the zip
|
|
||||||
* archive, but is a trade-off between simplicity and allowing extraction
|
|
||||||
* of a single directory from the archive.
|
|
||||||
*
|
|
||||||
* @param zipFilePath the file to unzip
|
|
||||||
* @param extractDirPath the target directory
|
|
||||||
* @param pathOfDirToExtractInArchive if given, unpack only files from this archive directory
|
|
||||||
* @throws {MultiError} error
|
|
||||||
* @returns Promise for a list of full file paths pointing to actually extracted files
|
|
||||||
*/
|
|
||||||
export async function unzip(zipFilePath: string, extractDirPath: string, pathOfDirToExtractInArchive?: string): Promise<string[]> {
|
|
||||||
const dirsCreated: (string | undefined)[] = [];
|
|
||||||
dirsCreated.push(await mkdir(extractDirPath, { recursive: true }));
|
|
||||||
const promises: Promise<string>[] = [];
|
|
||||||
|
|
||||||
// Iterate over all files in the zip, skip files which are not in archiveDir,
|
|
||||||
// if given.
|
|
||||||
for await (const record of iterateZipArchive(zipFilePath)) {
|
|
||||||
const { path: recordPath, createReadStream: createRecordReadStream } = record;
|
|
||||||
if (pathOfDirToExtractInArchive && !recordPath.startsWith(pathOfDirToExtractInArchive)) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
const relativePath = pathOfDirToExtractInArchive ? pathRelative(pathOfDirToExtractInArchive, recordPath) : recordPath;
|
throw error;
|
||||||
const filePath = pathJoin(extractDirPath, relativePath);
|
}
|
||||||
const parent = pathDirname(filePath);
|
}
|
||||||
promises.push(
|
|
||||||
new Promise<string>(async (resolve, reject) => {
|
|
||||||
if (!dirsCreated.includes(parent)) dirsCreated.push(await mkdir(parent, { recursive: true }));
|
|
||||||
|
|
||||||
// Pull the file out of the archive, write it to the target directory
|
export async function unzip(file: string, targetFolder: string, unzipSubPath?: string) {
|
||||||
const output = createWriteStream(filePath);
|
// add trailing slash to unzipSubPath and targetFolder
|
||||||
output.on("error", e => reject(Object.assign(e, { filePath })));
|
if (unzipSubPath && (!unzipSubPath.endsWith("/") || !unzipSubPath.endsWith("\\"))) {
|
||||||
output.on("finish", () => resolve(filePath));
|
unzipSubPath += "/";
|
||||||
createRecordReadStream().pipe(output);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait until _all_ files are either extracted or failed
|
if (!targetFolder.endsWith("/") || !targetFolder.endsWith("\\")) {
|
||||||
const [success, failure] = (await Promise.allSettled(promises)).reduce(...partitionPromiseSettledResults<string>());
|
targetFolder += "/";
|
||||||
|
|
||||||
// If any extraction failed, try to clean up, then throw a MultiError,
|
|
||||||
// which has a `cause` field, containing a list of root cause errors.
|
|
||||||
if (failure.length) {
|
|
||||||
await Promise.all([
|
|
||||||
...success.map(path => unlink(path).catch(_unused => undefined)),
|
|
||||||
...failure.map(e => e && e.path && unlink(e.path as string).catch(_unused => undefined))
|
|
||||||
]);
|
|
||||||
await Promise.all(dirsCreated.filter(Boolean).sort(sortByFolderDepth("desc")));
|
|
||||||
const e = new Error("Failed to extract: " + failure.map(e => e.message).join(";"));
|
|
||||||
(e as any).cause = failure;
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return success;
|
return new Promise<void>((resolve, reject) => {
|
||||||
}
|
yauzl.open(file, { lazyEntries: true }, async (err, zipfile) => {
|
||||||
|
if (err) {
|
||||||
|
reject(err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
function depth(dir: string) {
|
zipfile.readEntry();
|
||||||
return dir.match(/\//g)?.length ?? 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sortByFolderDepth(order: "asc" | "desc") {
|
zipfile.on("entry", async entry => {
|
||||||
const ord = order === "asc" ? 1 : -1;
|
if (unzipSubPath) {
|
||||||
return (a: string | undefined, b: string | undefined) => ord * depth(a ?? "") + -ord * depth(b ?? "");
|
// Skip files outside of the unzipSubPath
|
||||||
}
|
if (!entry.fileName.startsWith(unzipSubPath)) {
|
||||||
|
zipfile.readEntry();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
// Remove the unzipSubPath from the file name
|
||||||
*
|
entry.fileName = entry.fileName.substring(unzipSubPath.length);
|
||||||
* @param file file to read
|
}
|
||||||
* @param start first byte to read
|
|
||||||
* @param end last byte to read
|
const target = path.join(targetFolder, entry.fileName);
|
||||||
* @returns Promise of a buffer of read bytes
|
|
||||||
*/
|
// Directory file names end with '/'.
|
||||||
async function readFileChunk(file: string, start: number, end: number): Promise<Buffer> {
|
// Note that entries for directories themselves are optional.
|
||||||
const chunks: Buffer[] = [];
|
// An entry's fileName implicitly requires its parent directories to exist.
|
||||||
return new Promise((resolve, reject) => {
|
if (/[\/\\]$/.test(target)) {
|
||||||
const stream = createReadStream(file, { start, end });
|
await fsp.mkdir(target, { recursive: true });
|
||||||
stream.setMaxListeners(Infinity);
|
|
||||||
stream.on("error", e => reject(e));
|
zipfile.readEntry();
|
||||||
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
return;
|
||||||
stream.on("data", chunk => chunks.push(chunk as Buffer));
|
}
|
||||||
|
|
||||||
|
// Skip existing files
|
||||||
|
if (await pathExists(target)) {
|
||||||
|
zipfile.readEntry();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
zipfile.openReadStream(entry, async (err, readStream) => {
|
||||||
|
if (err) {
|
||||||
|
reject(err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await pipeline(readStream, fs.createWriteStream(target));
|
||||||
|
|
||||||
|
zipfile.readEntry();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
zipfile.once("end", function () {
|
||||||
|
zipfile.close();
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
type ZipRecord = {
|
|
||||||
path: string;
|
|
||||||
createReadStream: () => Readable;
|
|
||||||
compressionMethod: "deflate" | undefined;
|
|
||||||
};
|
|
||||||
|
|
||||||
type ZipRecordGenerator = AsyncGenerator<ZipRecord, void, unknown>;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate over all records of a zipfile, and yield a ZipRecord.
|
|
||||||
* Use `record.createReadStream()` to actually read the file.
|
|
||||||
*
|
|
||||||
* Warning this method will only work with single-disk zip files.
|
|
||||||
* Warning this method may fail if the zip archive has an crazy amount
|
|
||||||
* of files and the central directory is not fully contained within the
|
|
||||||
* last 65k bytes of the zip file.
|
|
||||||
*
|
|
||||||
* @param zipFile
|
|
||||||
* @returns AsyncGenerator which will yield ZipRecords
|
|
||||||
*/
|
|
||||||
async function* iterateZipArchive(zipFile: string): ZipRecordGenerator {
|
|
||||||
// Need to know zip file size before we can do anything else
|
|
||||||
const { size } = await stat(zipFile);
|
|
||||||
const chunkSize = 65_535 + 22 + 1; // max comment size + end header size + wiggle
|
|
||||||
// Read last ~65k bytes. Zip files have an comment up to 65_535 bytes at the very end,
|
|
||||||
// before that comes the zip central directory end header.
|
|
||||||
let chunk = await readFileChunk(zipFile, size - chunkSize, size);
|
|
||||||
const unread = size - chunk.length;
|
|
||||||
let i = chunk.length - 4;
|
|
||||||
let found = false;
|
|
||||||
// Find central directory end header, reading backwards from the end
|
|
||||||
while (!found && i-- > 0) if (chunk[i] === 0x50 && chunk.readUInt32LE(i) === 0x06054b50) found = true;
|
|
||||||
if (!found) throw new Error("Not a zip file");
|
|
||||||
// This method will fail on a multi-disk zip, so bail early.
|
|
||||||
if (chunk.readUInt16LE(i + 4) !== 0) throw new Error("Multi-disk zip not supported");
|
|
||||||
let nFiles = chunk.readUint16LE(i + 10);
|
|
||||||
// Get the position of the central directory
|
|
||||||
const directorySize = chunk.readUint32LE(i + 12);
|
|
||||||
const directoryOffset = chunk.readUint32LE(i + 16);
|
|
||||||
if (directoryOffset === 0xffff_ffff) throw new Error("zip64 not supported");
|
|
||||||
if (directoryOffset > size) throw new Error(`Central directory offset ${directoryOffset} is outside file`);
|
|
||||||
i = directoryOffset - unread;
|
|
||||||
// If i < 0, it means that the central directory is not contained within `chunk`
|
|
||||||
if (i < 0) {
|
|
||||||
chunk = await readFileChunk(zipFile, directoryOffset, directoryOffset + directorySize);
|
|
||||||
i = 0;
|
|
||||||
}
|
|
||||||
// Now iterate the central directory records, yield an `ZipRecord` for every entry
|
|
||||||
while (nFiles-- > 0) {
|
|
||||||
// Check for marker bytes
|
|
||||||
if (chunk.readUInt32LE(i) !== 0x02014b50) throw new Error("No central directory record at position " + (unread + i));
|
|
||||||
const compressionMethod = ({ 8: "deflate" } as const)[chunk.readUint16LE(i + 10)];
|
|
||||||
const compressedFileSize = chunk.readUint32LE(i + 20);
|
|
||||||
const filenameLength = chunk.readUint16LE(i + 28);
|
|
||||||
const extraLength = chunk.readUint16LE(i + 30);
|
|
||||||
const commentLength = chunk.readUint16LE(i + 32);
|
|
||||||
// Start of the actual content byte stream is after the 'local' record header,
|
|
||||||
// which is 30 bytes long plus filename and extra field
|
|
||||||
const start = chunk.readUint32LE(i + 42) + 30 + filenameLength + extraLength;
|
|
||||||
const end = start + compressedFileSize;
|
|
||||||
const filename = chunk.slice(i + 46, i + 46 + filenameLength).toString("utf-8");
|
|
||||||
const createRecordReadStream = () => {
|
|
||||||
const input = createReadStream(zipFile, { start, end });
|
|
||||||
if (compressionMethod === "deflate") {
|
|
||||||
const inflate = createInflateRaw();
|
|
||||||
input.pipe(inflate);
|
|
||||||
return inflate;
|
|
||||||
}
|
|
||||||
return input;
|
|
||||||
};
|
|
||||||
if (end > start) yield { path: filename, createReadStream: createRecordReadStream, compressionMethod };
|
|
||||||
// advance pointer to next central directory entry
|
|
||||||
i += 46 + filenameLength + extraLength + commentLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
32
yarn.lock
32
yarn.lock
@ -480,6 +480,13 @@
|
|||||||
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.6.tgz#250a7b16c3b91f672a24552ec64678eeb1d3a08d"
|
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.6.tgz#250a7b16c3b91f672a24552ec64678eeb1d3a08d"
|
||||||
integrity sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==
|
integrity sha512-PBjIUxZHOuj0R15/xuwJYjFi+KZdNFrehocChv4g5hu6aFroHue8m0lBP0POdK2nKzbw0cgV1mws8+V/JAcEkQ==
|
||||||
|
|
||||||
|
"@types/yauzl@^2.10.0":
|
||||||
|
version "2.10.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599"
|
||||||
|
integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw==
|
||||||
|
dependencies:
|
||||||
|
"@types/node" "*"
|
||||||
|
|
||||||
acorn-walk@^8.1.1:
|
acorn-walk@^8.1.1:
|
||||||
version "8.2.0"
|
version "8.2.0"
|
||||||
resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1"
|
resolved "https://registry.yarnpkg.com/acorn-walk/-/acorn-walk-8.2.0.tgz#741210f2e2426454508853a2f44d0ab83b7f69c1"
|
||||||
@ -635,6 +642,11 @@ browserslist@^4.21.3:
|
|||||||
node-releases "^2.0.8"
|
node-releases "^2.0.8"
|
||||||
update-browserslist-db "^1.0.10"
|
update-browserslist-db "^1.0.10"
|
||||||
|
|
||||||
|
buffer-crc32@~0.2.3:
|
||||||
|
version "0.2.13"
|
||||||
|
resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
|
||||||
|
integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==
|
||||||
|
|
||||||
cacache@^17.0.0:
|
cacache@^17.0.0:
|
||||||
version "17.0.4"
|
version "17.0.4"
|
||||||
resolved "https://registry.yarnpkg.com/cacache/-/cacache-17.0.4.tgz#5023ed892ba8843e3b7361c26d0ada37e146290c"
|
resolved "https://registry.yarnpkg.com/cacache/-/cacache-17.0.4.tgz#5023ed892ba8843e3b7361c26d0ada37e146290c"
|
||||||
@ -1103,6 +1115,13 @@ fastq@^1.6.0:
|
|||||||
dependencies:
|
dependencies:
|
||||||
reusify "^1.0.4"
|
reusify "^1.0.4"
|
||||||
|
|
||||||
|
fd-slicer@~1.1.0:
|
||||||
|
version "1.1.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e"
|
||||||
|
integrity sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==
|
||||||
|
dependencies:
|
||||||
|
pend "~1.2.0"
|
||||||
|
|
||||||
fill-range@^7.0.1:
|
fill-range@^7.0.1:
|
||||||
version "7.0.1"
|
version "7.0.1"
|
||||||
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
|
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
|
||||||
@ -1924,6 +1943,11 @@ path-type@^4.0.0:
|
|||||||
resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
|
resolved "https://registry.yarnpkg.com/path-type/-/path-type-4.0.0.tgz#84ed01c0a7ba380afe09d90a8c180dcd9d03043b"
|
||||||
integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
|
integrity sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==
|
||||||
|
|
||||||
|
pend@~1.2.0:
|
||||||
|
version "1.2.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50"
|
||||||
|
integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==
|
||||||
|
|
||||||
picocolors@^1.0.0:
|
picocolors@^1.0.0:
|
||||||
version "1.0.0"
|
version "1.0.0"
|
||||||
resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c"
|
resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c"
|
||||||
@ -2642,6 +2666,14 @@ yargs@^17.3.1:
|
|||||||
y18n "^5.0.5"
|
y18n "^5.0.5"
|
||||||
yargs-parser "^21.1.1"
|
yargs-parser "^21.1.1"
|
||||||
|
|
||||||
|
yauzl@^2.10.0:
|
||||||
|
version "2.10.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
|
||||||
|
integrity sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==
|
||||||
|
dependencies:
|
||||||
|
buffer-crc32 "~0.2.3"
|
||||||
|
fd-slicer "~1.1.0"
|
||||||
|
|
||||||
yn@3.1.1:
|
yn@3.1.1:
|
||||||
version "3.1.1"
|
version "3.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
|
resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user