You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
65 lines
2.1 KiB
65 lines
2.1 KiB
import fs from 'node:fs/promises';
|
|
import path from 'node:path';
|
|
import { Buffer } from 'node:buffer';
|
|
import puppeteer from "puppeteer";
|
|
|
|
// archive of saved PWM docs techmical drowings and files
|
|
const TARGET_URL = "https://web.archive.org/web/*/https://www.paragonmachineworks.com/files/public-docs/*";
|
|
const SEARCH_INPUT_LOCATOR = "input[type=search]";
|
|
const OUT = './pdfs';
|
|
|
|
const browser = await puppeteer.launch();
|
|
const page = await browser.newPage();
|
|
|
|
await page.goto(TARGET_URL);
|
|
|
|
// PART ONE -- download single PDF's
|
|
await page.locator(SEARCH_INPUT_LOCATOR).fill('pdf');
|
|
await page.waitForSelector('td.url', {timeout: 0});
|
|
|
|
const rows = await page.$$eval("tr", (rows) => rows.map(row => {
|
|
const link = row.querySelector("a");
|
|
const url = link?.href;
|
|
|
|
const captures = row.getElementsByClassName("captures")[0]?.textContent;
|
|
const uniques = row.getElementsByClassName("uniques")[0]?.textContent;
|
|
|
|
const capturesTime = captures ? parseInt(captures) : 0;
|
|
const uniquesTime = uniques ? parseInt(uniques) : 0;
|
|
|
|
return capturesTime == 1 & uniquesTime == 1 ? url : null;
|
|
}).filter(url => url));
|
|
|
|
async function downloadFiles(urls, outPath) {
|
|
try {
|
|
// Создаем папку, если её еще нет
|
|
await fs.mkdir(outPath, { recursive: true });
|
|
|
|
for (const url of urls) {
|
|
const fullUrl = url.replace(/(\/web\/\d+)/, '$1id_');
|
|
const response = await fetch(fullUrl);
|
|
|
|
if (!response.ok) {
|
|
console.error(`Ошибка при загрузке ${fullUrl}: ${response.statusText}`);
|
|
continue;
|
|
}
|
|
|
|
// Получаем имя файла из URL
|
|
const fileName = path.basename(new URL(url).pathname);
|
|
const destination = path.join(outPath, fileName);
|
|
|
|
// Записываем файл на диск
|
|
const arrayBuffer = await response.arrayBuffer();
|
|
await fs.writeFile(destination, Buffer.from(arrayBuffer));
|
|
|
|
console.log(`✅ Сохранено: ${fileName}`);
|
|
}
|
|
} catch (error) {
|
|
console.error('Критическая ошибка:', error.message);
|
|
}
|
|
}
|
|
|
|
await downloadFiles(rows, OUT);
|
|
browser.close();
|
|
|