process.env.NODE_ENV === "development" ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` }) : require("dotenv").config(); require("./utils/logger")(); const express = require("express"); const bodyParser = require("body-parser"); const cors = require("cors"); const path = require("path"); const { ACCEPTED_MIMES } = require("./utils/constants"); const { reqBody } = require("./utils/http"); const { processSingleFile } = require("./processSingleFile"); const { processLink, getLinkText } = require("./processLink"); const { wipeCollectorStorage } = require("./utils/files"); const extensions = require("./extensions"); const { processRawText } = require("./processRawText"); const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity"); const app = express(); const FILE_LIMIT = "3GB"; app.use(cors({ origin: true })); app.use( bodyParser.text({ limit: FILE_LIMIT }), bodyParser.json({ limit: FILE_LIMIT }), bodyParser.urlencoded({ limit: FILE_LIMIT, extended: true, }) ); // app.post( // "/process", // [verifyPayloadIntegrity], // async function (request, response) { // const { filename, options = {} } = reqBody(request); // try { // const targetFilename = path // .normalize(filename) // .replace(/^(\.\.(\/|\\|$))+/, ""); // const { // success, // reason, // documents = [], // } = await processSingleFile(targetFilename, options); // response // .status(200) // .json({ filename: targetFilename, success, reason, documents }); // } catch (e) { // console.error(e); // response.status(200).json({ // filename: filename, // success: false, // reason: "A processing error occurred.", // documents: [], // }); // } // return; // } // ); const fs = require("fs").promises; // 使用 fs.promises 支持异步操作 // const path = require("path"); app.post( "/process", [verifyPayloadIntegrity], async function (request, response) { const { filename, options = {} } = reqBody(request); console.log("文件名:", filename); try { const inputPath = path.resolve("./hotdir"); const sourceFile = path.join(inputPath, filename); // 拼接文件路径 console.log("源文件路径:", sourceFile); // 检查路径是否是文件 const stats = await fs.stat(sourceFile); if (!stats.isFile()) { return response.status(400).json({ success: false, error: "提供的路径不是文件", }); } // 读取文件内容 const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串 // 处理文件并返回结果 const { success, reason, documents = [] } = await processSingleFile(sourceFile, options); response.status(200).json({ filename: sourceFile, success, reason, documents, fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回 }); } catch (e) { console.error(e); if (e.code === "EISDIR") { response.status(400).json({ success: false, error: "提供的路径是目录,不是文件", }); } else { response.status(500).json({ filename: filename, success: false, reason: "A processing error occurred.", documents: [], }); } } } ); app.post( "/process-link", [verifyPayloadIntegrity], async function (request, response) { const { link } = reqBody(request); try { const { success, reason, documents = [] } = await processLink(link); response.status(200).json({ url: link, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, reason: "A processing error occurred.", documents: [], }); } return; } ); app.post( "/util/get-link", [verifyPayloadIntegrity], async function (request, response) { const { link, captureAs = "text" } = reqBody(request); try { const { success, content = null } = await getLinkText(link, captureAs); response.status(200).json({ url: link, success, content }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, content: null, }); } return; } ); app.post( "/process-raw-text", [verifyPayloadIntegrity], async function (request, response) { const { textContent, metadata } = reqBody(request); try { const { success, reason, documents = [], } = await processRawText(textContent, metadata); response .status(200) .json({ filename: metadata.title, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ filename: metadata?.title || "Unknown-doc.txt", success: false, reason: "A processing error occurred.", documents: [], }); } return; } ); extensions(app); app.get("/accepts", function (_, response) { response.status(200).json(ACCEPTED_MIMES); }); app.all("*", function (_, response) { response.sendStatus(200); }); app .listen(8888, async () => { await wipeCollectorStorage(); console.log(`Document processor app listening on port 8888`); }) .on("error", function (_) { process.once("SIGUSR2", function () { process.kill(process.pid, "SIGUSR2"); }); process.on("SIGINT", function () { process.kill(process.pid, "SIGINT"); }); });