You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
3.9 KiB

11 months ago
  1. process.env.NODE_ENV === "development"
  2. ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
  3. : require("dotenv").config();
  4. require("./utils/logger")();
  5. const express = require("express");
  6. const bodyParser = require("body-parser");
  7. const cors = require("cors");
  8. const path = require("path");
  9. const { ACCEPTED_MIMES } = require("./utils/constants");
  10. const { reqBody } = require("./utils/http");
  11. const { processSingleFile } = require("./processSingleFile");
  12. const { processLink, getLinkText } = require("./processLink");
  13. const { wipeCollectorStorage } = require("./utils/files");
  14. const extensions = require("./extensions");
  15. const { processRawText } = require("./processRawText");
  16. const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
  17. const app = express();
  18. const FILE_LIMIT = "3GB";
  19. app.use(cors({ origin: true }));
  20. app.use(
  21. bodyParser.text({ limit: FILE_LIMIT }),
  22. bodyParser.json({ limit: FILE_LIMIT }),
  23. bodyParser.urlencoded({
  24. limit: FILE_LIMIT,
  25. extended: true,
  26. })
  27. );
  28. app.post(
  29. "/process",
  30. [verifyPayloadIntegrity],
  31. async function (request, response) {
  32. const { filename, options = {} } = reqBody(request);
  33. try {
  34. const targetFilename = path
  35. .normalize(filename)
  36. .replace(/^(\.\.(\/|\\|$))+/, "");
  37. const {
  38. success,
  39. reason,
  40. documents = [],
  41. } = await processSingleFile(targetFilename, options);
  42. response
  43. .status(200)
  44. .json({ filename: targetFilename, success, reason, documents });
  45. } catch (e) {
  46. console.error(e);
  47. response.status(200).json({
  48. filename: filename,
  49. success: false,
  50. reason: "A processing error occurred.",
  51. documents: [],
  52. });
  53. }
  54. return;
  55. }
  56. );
  57. app.post(
  58. "/process-link",
  59. [verifyPayloadIntegrity],
  60. async function (request, response) {
  61. const { link } = reqBody(request);
  62. try {
  63. const { success, reason, documents = [] } = await processLink(link);
  64. response.status(200).json({ url: link, success, reason, documents });
  65. } catch (e) {
  66. console.error(e);
  67. response.status(200).json({
  68. url: link,
  69. success: false,
  70. reason: "A processing error occurred.",
  71. documents: [],
  72. });
  73. }
  74. return;
  75. }
  76. );
  77. app.post(
  78. "/util/get-link",
  79. [verifyPayloadIntegrity],
  80. async function (request, response) {
  81. const { link, captureAs = "text" } = reqBody(request);
  82. try {
  83. const { success, content = null } = await getLinkText(link, captureAs);
  84. response.status(200).json({ url: link, success, content });
  85. } catch (e) {
  86. console.error(e);
  87. response.status(200).json({
  88. url: link,
  89. success: false,
  90. content: null,
  91. });
  92. }
  93. return;
  94. }
  95. );
  96. app.post(
  97. "/process-raw-text",
  98. [verifyPayloadIntegrity],
  99. async function (request, response) {
  100. const { textContent, metadata } = reqBody(request);
  101. try {
  102. const {
  103. success,
  104. reason,
  105. documents = [],
  106. } = await processRawText(textContent, metadata);
  107. response
  108. .status(200)
  109. .json({ filename: metadata.title, success, reason, documents });
  110. } catch (e) {
  111. console.error(e);
  112. response.status(200).json({
  113. filename: metadata?.title || "Unknown-doc.txt",
  114. success: false,
  115. reason: "A processing error occurred.",
  116. documents: [],
  117. });
  118. }
  119. return;
  120. }
  121. );
  122. extensions(app);
  123. app.get("/accepts", function (_, response) {
  124. response.status(200).json(ACCEPTED_MIMES);
  125. });
  126. app.all("*", function (_, response) {
  127. response.sendStatus(200);
  128. });
  129. app
  130. .listen(8888, async () => {
  131. await wipeCollectorStorage();
  132. console.log(`Document processor app listening on port 8888`);
  133. })
  134. .on("error", function (_) {
  135. process.once("SIGUSR2", function () {
  136. process.kill(process.pid, "SIGUSR2");
  137. });
  138. process.on("SIGINT", function () {
  139. process.kill(process.pid, "SIGINT");
  140. });
  141. });