You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

211 lines
5.8 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. process.env.NODE_ENV === "development"
  2. ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
  3. : require("dotenv").config();
  4. require("./utils/logger")();
  5. const express = require("express");
  6. const bodyParser = require("body-parser");
  7. const cors = require("cors");
  8. const path = require("path");
  9. const { ACCEPTED_MIMES } = require("./utils/constants");
  10. const { reqBody } = require("./utils/http");
  11. const { processSingleFile } = require("./processSingleFile");
  12. const { processLink, getLinkText } = require("./processLink");
  13. const { wipeCollectorStorage } = require("./utils/files");
  14. const extensions = require("./extensions");
  15. const { processRawText } = require("./processRawText");
  16. const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
  17. const app = express();
  18. const FILE_LIMIT = "3GB";
  19. app.use(cors({ origin: true }));
  20. app.use(
  21. bodyParser.text({ limit: FILE_LIMIT }),
  22. bodyParser.json({ limit: FILE_LIMIT }),
  23. bodyParser.urlencoded({
  24. limit: FILE_LIMIT,
  25. extended: true,
  26. })
  27. );
  28. // app.post(
  29. // "/process",
  30. // [verifyPayloadIntegrity],
  31. // async function (request, response) {
  32. // const { filename, options = {} } = reqBody(request);
  33. // try {
  34. // const targetFilename = path
  35. // .normalize(filename)
  36. // .replace(/^(\.\.(\/|\\|$))+/, "");
  37. // const {
  38. // success,
  39. // reason,
  40. // documents = [],
  41. // } = await processSingleFile(targetFilename, options);
  42. // response
  43. // .status(200)
  44. // .json({ filename: targetFilename, success, reason, documents });
  45. // } catch (e) {
  46. // console.error(e);
  47. // response.status(200).json({
  48. // filename: filename,
  49. // success: false,
  50. // reason: "A processing error occurred.",
  51. // documents: [],
  52. // });
  53. // }
  54. // return;
  55. // }
  56. // );
  57. const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
  58. // const path = require("path");
  59. app.post(
  60. "/process",
  61. [verifyPayloadIntegrity],
  62. async function (request, response) {
  63. const { filename, options = {} } = reqBody(request);
  64. console.log("文件名:", filename);
  65. try {
  66. const targetFilename = path
  67. .normalize(filename)
  68. .replace(/^(\.\.(\/|\\|$))+/, "");
  69. const inputPath = path.resolve("./hotdir");
  70. const sourceFile = path.join(inputPath, filename); // 拼接文件路径
  71. console.log("源文件路径:", sourceFile);
  72. // 检查路径是否是文件
  73. const stats = await fs.stat(sourceFile);
  74. if (!stats.isFile()) {
  75. return response.status(400).json({
  76. success: false,
  77. error: "提供的路径不是文件",
  78. });
  79. }
  80. // 读取文件内容
  81. const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
  82. const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
  83. // 处理文件并返回结果
  84. const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
  85. response.status(200).json({
  86. filename: sourceFile,
  87. success,
  88. reason,
  89. documents,
  90. fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
  91. });
  92. } catch (e) {
  93. console.error(e);
  94. if (e.code === "EISDIR") {
  95. response.status(400).json({
  96. success: false,
  97. error: "提供的路径是目录,不是文件",
  98. });
  99. } else {
  100. response.status(500).json({
  101. filename: filename,
  102. success: false,
  103. reason: "A processing error occurred.",
  104. documents: [],
  105. });
  106. }
  107. }
  108. }
  109. );
  110. app.post(
  111. "/process-link",
  112. [verifyPayloadIntegrity],
  113. async function (request, response) {
  114. const { link } = reqBody(request);
  115. try {
  116. const { success, reason, documents = [] } = await processLink(link);
  117. response.status(200).json({ url: link, success, reason, documents });
  118. } catch (e) {
  119. console.error(e);
  120. response.status(200).json({
  121. url: link,
  122. success: false,
  123. reason: "A processing error occurred.",
  124. documents: [],
  125. });
  126. }
  127. return;
  128. }
  129. );
  130. app.post(
  131. "/util/get-link",
  132. [verifyPayloadIntegrity],
  133. async function (request, response) {
  134. const { link, captureAs = "text" } = reqBody(request);
  135. try {
  136. const { success, content = null } = await getLinkText(link, captureAs);
  137. response.status(200).json({ url: link, success, content });
  138. } catch (e) {
  139. console.error(e);
  140. response.status(200).json({
  141. url: link,
  142. success: false,
  143. content: null,
  144. });
  145. }
  146. return;
  147. }
  148. );
  149. app.post(
  150. "/process-raw-text",
  151. [verifyPayloadIntegrity],
  152. async function (request, response) {
  153. const { textContent, metadata } = reqBody(request);
  154. try {
  155. const {
  156. success,
  157. reason,
  158. documents = [],
  159. } = await processRawText(textContent, metadata);
  160. response
  161. .status(200)
  162. .json({ filename: metadata.title, success, reason, documents });
  163. } catch (e) {
  164. console.error(e);
  165. response.status(200).json({
  166. filename: metadata?.title || "Unknown-doc.txt",
  167. success: false,
  168. reason: "A processing error occurred.",
  169. documents: [],
  170. });
  171. }
  172. return;
  173. }
  174. );
  175. extensions(app);
  176. app.get("/accepts", function (_, response) {
  177. response.status(200).json(ACCEPTED_MIMES);
  178. });
  179. app.all("*", function (_, response) {
  180. response.sendStatus(200);
  181. });
  182. app
  183. .listen(8888, async () => {
  184. await wipeCollectorStorage();
  185. console.log(`Document processor app listening on port 8888`);
  186. })
  187. .on("error", function (_) {
  188. process.once("SIGUSR2", function () {
  189. process.kill(process.pid, "SIGUSR2");
  190. });
  191. process.on("SIGINT", function () {
  192. process.kill(process.pid, "SIGINT");
  193. });
  194. });