You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
215 lines
6.0 KiB
215 lines
6.0 KiB
process.env.NODE_ENV === "development"
|
|
? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
|
|
: require("dotenv").config();
|
|
|
|
require("./utils/logger")();
|
|
const express = require("express");
|
|
const bodyParser = require("body-parser");
|
|
const cors = require("cors");
|
|
const path = require("path");
|
|
const { ACCEPTED_MIMES } = require("./utils/constants");
|
|
const { reqBody } = require("./utils/http");
|
|
const { processSingleFile } = require("./processSingleFile");
|
|
const { processLink, getLinkText } = require("./processLink");
|
|
const { wipeCollectorStorage } = require("./utils/files");
|
|
const extensions = require("./extensions");
|
|
const { processRawText } = require("./processRawText");
|
|
const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
|
|
const app = express();
|
|
const FILE_LIMIT = "3GB";
|
|
|
|
app.use(cors({ origin: true }));
|
|
app.use(
|
|
bodyParser.text({ limit: FILE_LIMIT }),
|
|
bodyParser.json({ limit: FILE_LIMIT }),
|
|
bodyParser.urlencoded({
|
|
limit: FILE_LIMIT,
|
|
extended: true,
|
|
})
|
|
);
|
|
|
|
// app.post(
|
|
// "/process",
|
|
// [verifyPayloadIntegrity],
|
|
// async function (request, response) {
|
|
// const { filename, options = {} } = reqBody(request);
|
|
// try {
|
|
// const targetFilename = path
|
|
// .normalize(filename)
|
|
// .replace(/^(\.\.(\/|\\|$))+/, "");
|
|
// const {
|
|
// success,
|
|
// reason,
|
|
// documents = [],
|
|
// } = await processSingleFile(targetFilename, options);
|
|
// response
|
|
// .status(200)
|
|
// .json({ filename: targetFilename, success, reason, documents });
|
|
// } catch (e) {
|
|
// console.error(e);
|
|
// response.status(200).json({
|
|
// filename: filename,
|
|
// success: false,
|
|
// reason: "A processing error occurred.",
|
|
// documents: [],
|
|
// });
|
|
// }
|
|
// return;
|
|
// }
|
|
// );
|
|
|
|
const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
|
|
// const path = require("path");
|
|
|
|
app.post(
|
|
"/process",
|
|
[verifyPayloadIntegrity],
|
|
async function (request, response) {
|
|
const { filename, options = {} } = reqBody(request);
|
|
console.log("文件名:", filename);
|
|
|
|
try {
|
|
const targetFilename = path
|
|
.normalize(filename)
|
|
.replace(/^(\.\.(\/|\\|$))+/, "");
|
|
// const inputPath = path.resolve("./hotdir");
|
|
const inputPath = process.env.NODE_ENV === "development"
|
|
? path.resolve("../server/storage/hotdir")
|
|
: path.resolve("/app/server/storage/hotdir");
|
|
console.log("输入路径:(((((((((((((((((((((((((((((((((((((((((:", inputPath,filename);
|
|
const sourceFile = path.join(inputPath, filename); // 拼接文件路径
|
|
console.log("源文件路径:", sourceFile);
|
|
|
|
// 检查路径是否是文件
|
|
const stats = await fs.stat(sourceFile);
|
|
if (!stats.isFile()) {
|
|
return response.status(400).json({
|
|
success: false,
|
|
error: "提供的路径不是文件",
|
|
});
|
|
}
|
|
|
|
// 读取文件内容
|
|
const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
|
|
const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
|
|
|
|
// 处理文件并返回结果
|
|
const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
|
|
|
|
response.status(200).json({
|
|
filename: sourceFile,
|
|
success,
|
|
reason,
|
|
documents,
|
|
fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
|
|
});
|
|
} catch (e) {
|
|
console.error(e);
|
|
if (e.code === "EISDIR") {
|
|
response.status(400).json({
|
|
success: false,
|
|
error: "提供的路径是目录,不是文件",
|
|
});
|
|
} else {
|
|
response.status(500).json({
|
|
filename: filename,
|
|
success: false,
|
|
reason: "A processing error occurred.",
|
|
documents: [],
|
|
});
|
|
}
|
|
}
|
|
}
|
|
);
|
|
|
|
app.post(
|
|
"/process-link",
|
|
[verifyPayloadIntegrity],
|
|
async function (request, response) {
|
|
const { link } = reqBody(request);
|
|
try {
|
|
const { success, reason, documents = [] } = await processLink(link);
|
|
response.status(200).json({ url: link, success, reason, documents });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
url: link,
|
|
success: false,
|
|
reason: "A processing error occurred.",
|
|
documents: [],
|
|
});
|
|
}
|
|
return;
|
|
}
|
|
);
|
|
|
|
app.post(
|
|
"/util/get-link",
|
|
[verifyPayloadIntegrity],
|
|
async function (request, response) {
|
|
const { link, captureAs = "text" } = reqBody(request);
|
|
try {
|
|
const { success, content = null } = await getLinkText(link, captureAs);
|
|
response.status(200).json({ url: link, success, content });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
url: link,
|
|
success: false,
|
|
content: null,
|
|
});
|
|
}
|
|
return;
|
|
}
|
|
);
|
|
|
|
app.post(
|
|
"/process-raw-text",
|
|
[verifyPayloadIntegrity],
|
|
async function (request, response) {
|
|
const { textContent, metadata } = reqBody(request);
|
|
try {
|
|
const {
|
|
success,
|
|
reason,
|
|
documents = [],
|
|
} = await processRawText(textContent, metadata);
|
|
response
|
|
.status(200)
|
|
.json({ filename: metadata.title, success, reason, documents });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
filename: metadata?.title || "Unknown-doc.txt",
|
|
success: false,
|
|
reason: "A processing error occurred.",
|
|
documents: [],
|
|
});
|
|
}
|
|
return;
|
|
}
|
|
);
|
|
|
|
extensions(app);
|
|
|
|
app.get("/accepts", function (_, response) {
|
|
response.status(200).json(ACCEPTED_MIMES);
|
|
});
|
|
|
|
app.all("*", function (_, response) {
|
|
response.sendStatus(200);
|
|
});
|
|
|
|
app
|
|
.listen(8888, async () => {
|
|
await wipeCollectorStorage();
|
|
console.log(`Document processor app listening on port 8888`);
|
|
})
|
|
.on("error", function (_) {
|
|
process.once("SIGUSR2", function () {
|
|
process.kill(process.pid, "SIGUSR2");
|
|
});
|
|
process.on("SIGINT", function () {
|
|
process.kill(process.pid, "SIGINT");
|
|
});
|
|
});
|