You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.0 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. process.env.NODE_ENV === "development"
  2. ? require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` })
  3. : require("dotenv").config();
  4. require("./utils/logger")();
  5. const express = require("express");
  6. const bodyParser = require("body-parser");
  7. const cors = require("cors");
  8. const path = require("path");
  9. const { ACCEPTED_MIMES } = require("./utils/constants");
  10. const { reqBody } = require("./utils/http");
  11. const { processSingleFile } = require("./processSingleFile");
  12. const { processLink, getLinkText } = require("./processLink");
  13. const { wipeCollectorStorage } = require("./utils/files");
  14. const extensions = require("./extensions");
  15. const { processRawText } = require("./processRawText");
  16. const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity");
  17. const app = express();
  18. const FILE_LIMIT = "3GB";
  19. app.use(cors({ origin: true }));
  20. app.use(
  21. bodyParser.text({ limit: FILE_LIMIT }),
  22. bodyParser.json({ limit: FILE_LIMIT }),
  23. bodyParser.urlencoded({
  24. limit: FILE_LIMIT,
  25. extended: true,
  26. })
  27. );
  28. // app.post(
  29. // "/process",
  30. // [verifyPayloadIntegrity],
  31. // async function (request, response) {
  32. // const { filename, options = {} } = reqBody(request);
  33. // try {
  34. // const targetFilename = path
  35. // .normalize(filename)
  36. // .replace(/^(\.\.(\/|\\|$))+/, "");
  37. // const {
  38. // success,
  39. // reason,
  40. // documents = [],
  41. // } = await processSingleFile(targetFilename, options);
  42. // response
  43. // .status(200)
  44. // .json({ filename: targetFilename, success, reason, documents });
  45. // } catch (e) {
  46. // console.error(e);
  47. // response.status(200).json({
  48. // filename: filename,
  49. // success: false,
  50. // reason: "A processing error occurred.",
  51. // documents: [],
  52. // });
  53. // }
  54. // return;
  55. // }
  56. // );
  57. const fs = require("fs").promises; // 使用 fs.promises 支持异步操作
  58. // const path = require("path");
  59. app.post(
  60. "/process",
  61. [verifyPayloadIntegrity],
  62. async function (request, response) {
  63. const { filename, options = {} } = reqBody(request);
  64. console.log("文件名:", filename);
  65. try {
  66. const targetFilename = path
  67. .normalize(filename)
  68. .replace(/^(\.\.(\/|\\|$))+/, "");
  69. // const inputPath = path.resolve("./hotdir");
  70. const inputPath = process.env.NODE_ENV === "development"
  71. ? path.resolve("../server/storage/hotdir")
  72. : path.resolve("/app/server/storage/hotdir");
  73. console.log("输入路径:(((((((((((((((((((((((((((((((((((((((((:", inputPath,filename);
  74. const sourceFile = path.join(inputPath, filename); // 拼接文件路径
  75. console.log("源文件路径:", sourceFile);
  76. // 检查路径是否是文件
  77. const stats = await fs.stat(sourceFile);
  78. if (!stats.isFile()) {
  79. return response.status(400).json({
  80. success: false,
  81. error: "提供的路径不是文件",
  82. });
  83. }
  84. // 读取文件内容
  85. const fileContent = await fs.readFile(sourceFile); // 读取文件为 Buffer
  86. const fileContentBase64 = fileContent.toString("base64"); // 将文件内容转换为 Base64 字符串
  87. // 处理文件并返回结果
  88. const { success, reason, documents = [] } = await processSingleFile(targetFilename, options);
  89. response.status(200).json({
  90. filename: sourceFile,
  91. success,
  92. reason,
  93. documents,
  94. fileContent: fileContentBase64, // 将文件内容作为 Base64 字符串返回
  95. });
  96. } catch (e) {
  97. console.error(e);
  98. if (e.code === "EISDIR") {
  99. response.status(400).json({
  100. success: false,
  101. error: "提供的路径是目录,不是文件",
  102. });
  103. } else {
  104. response.status(500).json({
  105. filename: filename,
  106. success: false,
  107. reason: "A processing error occurred.",
  108. documents: [],
  109. });
  110. }
  111. }
  112. }
  113. );
  114. app.post(
  115. "/process-link",
  116. [verifyPayloadIntegrity],
  117. async function (request, response) {
  118. const { link } = reqBody(request);
  119. try {
  120. const { success, reason, documents = [] } = await processLink(link);
  121. response.status(200).json({ url: link, success, reason, documents });
  122. } catch (e) {
  123. console.error(e);
  124. response.status(200).json({
  125. url: link,
  126. success: false,
  127. reason: "A processing error occurred.",
  128. documents: [],
  129. });
  130. }
  131. return;
  132. }
  133. );
  134. app.post(
  135. "/util/get-link",
  136. [verifyPayloadIntegrity],
  137. async function (request, response) {
  138. const { link, captureAs = "text" } = reqBody(request);
  139. try {
  140. const { success, content = null } = await getLinkText(link, captureAs);
  141. response.status(200).json({ url: link, success, content });
  142. } catch (e) {
  143. console.error(e);
  144. response.status(200).json({
  145. url: link,
  146. success: false,
  147. content: null,
  148. });
  149. }
  150. return;
  151. }
  152. );
  153. app.post(
  154. "/process-raw-text",
  155. [verifyPayloadIntegrity],
  156. async function (request, response) {
  157. const { textContent, metadata } = reqBody(request);
  158. try {
  159. const {
  160. success,
  161. reason,
  162. documents = [],
  163. } = await processRawText(textContent, metadata);
  164. response
  165. .status(200)
  166. .json({ filename: metadata.title, success, reason, documents });
  167. } catch (e) {
  168. console.error(e);
  169. response.status(200).json({
  170. filename: metadata?.title || "Unknown-doc.txt",
  171. success: false,
  172. reason: "A processing error occurred.",
  173. documents: [],
  174. });
  175. }
  176. return;
  177. }
  178. );
  179. extensions(app);
  180. app.get("/accepts", function (_, response) {
  181. response.status(200).json(ACCEPTED_MIMES);
  182. });
  183. app.all("*", function (_, response) {
  184. response.sendStatus(200);
  185. });
  186. app
  187. .listen(8888, async () => {
  188. await wipeCollectorStorage();
  189. console.log(`Document processor app listening on port 8888`);
  190. })
  191. .on("error", function (_) {
  192. process.once("SIGUSR2", function () {
  193. process.kill(process.pid, "SIGUSR2");
  194. });
  195. process.on("SIGINT", function () {
  196. process.kill(process.pid, "SIGINT");
  197. });
  198. });