|
|
process.env.NODE_ENV === "development" ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` }) : require("dotenv").config();
require("./utils/logger")();const express = require("express");const bodyParser = require("body-parser");const cors = require("cors");const path = require("path");const { ACCEPTED_MIMES } = require("./utils/constants");const { reqBody } = require("./utils/http");const { processSingleFile } = require("./processSingleFile");const { processLink, getLinkText } = require("./processLink");const { wipeCollectorStorage } = require("./utils/files");const extensions = require("./extensions");const { processRawText } = require("./processRawText");const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");const app = express();const FILE_LIMIT = "3GB";
app.use(cors({ origin: true }));app.use( bodyParser.text({ limit: FILE_LIMIT }), bodyParser.json({ limit: FILE_LIMIT }), bodyParser.urlencoded({ limit: FILE_LIMIT, extended: true, }));
app.post( "/process", [verifyPayloadIntegrity], async function (request, response) { const { filename, options = {} } = reqBody(request); try { const targetFilename = path .normalize(filename) .replace(/^(\.\.(\/|\\|$))+/, ""); const { success, reason, documents = [], } = await processSingleFile(targetFilename, options); response .status(200) .json({ filename: targetFilename, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ filename: filename, success: false, reason: "A processing error occurred.", documents: [], }); } return; });
app.post( "/process-link", [verifyPayloadIntegrity], async function (request, response) { const { link } = reqBody(request); try { const { success, reason, documents = [] } = await processLink(link); response.status(200).json({ url: link, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, reason: "A processing error occurred.", documents: [], }); } return; });
app.post( "/util/get-link", [verifyPayloadIntegrity], async function (request, response) { const { link, captureAs = "text" } = reqBody(request); try { const { success, content = null } = await getLinkText(link, captureAs); response.status(200).json({ url: link, success, content }); } catch (e) { console.error(e); response.status(200).json({ url: link, success: false, content: null, }); } return; });
app.post( "/process-raw-text", [verifyPayloadIntegrity], async function (request, response) { const { textContent, metadata } = reqBody(request); try { const { success, reason, documents = [], } = await processRawText(textContent, metadata); response .status(200) .json({ filename: metadata.title, success, reason, documents }); } catch (e) { console.error(e); response.status(200).json({ filename: metadata?.title || "Unknown-doc.txt", success: false, reason: "A processing error occurred.", documents: [], }); } return; });
extensions(app);
app.get("/accepts", function (_, response) { response.status(200).json(ACCEPTED_MIMES);});
app.all("*", function (_, response) { response.sendStatus(200);});
app .listen(8888, async () => { await wipeCollectorStorage(); console.log(`Document processor app listening on port 8888`); }) .on("error", function (_) { process.once("SIGUSR2", function () { process.kill(process.pid, "SIGUSR2"); }); process.on("SIGINT", function () { process.kill(process.pid, "SIGINT"); }); });
|