You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

732 lines
22 KiB

11 months ago
  1. const { Telemetry } = require("../../../models/telemetry");
  2. const { validApiKey } = require("../../../utils/middleware/validApiKey");
  3. const { handleAPIFileUpload } = require("../../../utils/files/multer");
  4. const {
  5. viewLocalFiles,
  6. findDocumentInDocuments,
  7. normalizePath,
  8. isWithin,
  9. } = require("../../../utils/files");
  10. const { reqBody } = require("../../../utils/http");
  11. const { EventLogs } = require("../../../models/eventLogs");
  12. const { CollectorApi } = require("../../../utils/collectorApi");
  13. const fs = require("fs");
  14. const path = require("path");
  15. const { Document } = require("../../../models/documents");
  16. const documentsPath =
  17. process.env.NODE_ENV === "development"
  18. ? path.resolve(__dirname, "../../../storage/documents")
  19. : path.resolve(process.env.STORAGE_DIR, `documents`);
  20. function apiDocumentEndpoints(app) {
  21. if (!app) return;
  22. app.post(
  23. "/v1/document/upload",
  24. [validApiKey, handleAPIFileUpload],
  25. async (request, response) => {
  26. /*
  27. #swagger.tags = ['Documents']
  28. #swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.'
  29. #swagger.requestBody = {
  30. description: 'File to be uploaded.',
  31. required: true,
  32. content: {
  33. "multipart/form-data": {
  34. schema: {
  35. type: 'string',
  36. format: 'binary',
  37. properties: {
  38. file: {
  39. type: 'string',
  40. format: 'binary',
  41. }
  42. }
  43. }
  44. }
  45. }
  46. }
  47. #swagger.responses[200] = {
  48. content: {
  49. "application/json": {
  50. schema: {
  51. type: 'object',
  52. example: {
  53. success: true,
  54. error: null,
  55. documents: [
  56. {
  57. "location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
  58. "name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json",
  59. "url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt",
  60. "title": "anythingllm.txt",
  61. "docAuthor": "Unknown",
  62. "description": "Unknown",
  63. "docSource": "a text file uploaded by the user.",
  64. "chunkSource": "anythingllm.txt",
  65. "published": "1/16/2024, 3:07:00 PM",
  66. "wordCount": 93,
  67. "token_count_estimate": 115,
  68. }
  69. ]
  70. }
  71. }
  72. }
  73. }
  74. }
  75. #swagger.responses[403] = {
  76. schema: {
  77. "$ref": "#/definitions/InvalidAPIKey"
  78. }
  79. }
  80. */
  81. try {
  82. const Collector = new CollectorApi();
  83. const { originalname } = request.file;
  84. const processingOnline = await Collector.online();
  85. if (!processingOnline) {
  86. response
  87. .status(500)
  88. .json({
  89. success: false,
  90. error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`,
  91. })
  92. .end();
  93. return;
  94. }
  95. const { success, reason, documents } =
  96. await Collector.processDocument(originalname);
  97. if (!success) {
  98. response
  99. .status(500)
  100. .json({ success: false, error: reason, documents })
  101. .end();
  102. return;
  103. }
  104. Collector.log(
  105. `Document ${originalname} uploaded processed and successfully. It is now available in documents.`
  106. );
  107. await Telemetry.sendTelemetry("document_uploaded");
  108. await EventLogs.logEvent("api_document_uploaded", {
  109. documentName: originalname,
  110. });
  111. response.status(200).json({ success: true, error: null, documents });
  112. } catch (e) {
  113. console.error(e.message, e);
  114. response.sendStatus(500).end();
  115. }
  116. }
  117. );
  118. app.post(
  119. "/v1/document/upload-link",
  120. [validApiKey],
  121. async (request, response) => {
  122. /*
  123. #swagger.tags = ['Documents']
  124. #swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
  125. #swagger.requestBody = {
  126. description: 'Link of web address to be scraped.',
  127. required: true,
  128. content: {
  129. "application/json": {
  130. schema: {
  131. type: 'object',
  132. example: {
  133. "link": "https://anythingllm.com"
  134. }
  135. }
  136. }
  137. }
  138. }
  139. #swagger.responses[200] = {
  140. content: {
  141. "application/json": {
  142. schema: {
  143. type: 'object',
  144. example: {
  145. success: true,
  146. error: null,
  147. documents: [
  148. {
  149. "id": "c530dbe6-bff1-4b9e-b87f-710d539d20bc",
  150. "url": "file://useanything_com.html",
  151. "title": "useanything_com.html",
  152. "docAuthor": "no author found",
  153. "description": "No description found.",
  154. "docSource": "URL link uploaded by the user.",
  155. "chunkSource": "https:anythingllm.com.html",
  156. "published": "1/16/2024, 3:46:33 PM",
  157. "wordCount": 252,
  158. "pageContent": "AnythingLLM is the best....",
  159. "token_count_estimate": 447,
  160. "location": "custom-documents/url-useanything_com-c530dbe6-bff1-4b9e-b87f-710d539d20bc.json"
  161. }
  162. ]
  163. }
  164. }
  165. }
  166. }
  167. }
  168. #swagger.responses[403] = {
  169. schema: {
  170. "$ref": "#/definitions/InvalidAPIKey"
  171. }
  172. }
  173. */
  174. try {
  175. const Collector = new CollectorApi();
  176. const { link } = reqBody(request);
  177. const processingOnline = await Collector.online();
  178. if (!processingOnline) {
  179. response
  180. .status(500)
  181. .json({
  182. success: false,
  183. error: `Document processing API is not online. Link ${link} will not be processed automatically.`,
  184. })
  185. .end();
  186. return;
  187. }
  188. const { success, reason, documents } =
  189. await Collector.processLink(link);
  190. if (!success) {
  191. response
  192. .status(500)
  193. .json({ success: false, error: reason, documents })
  194. .end();
  195. return;
  196. }
  197. Collector.log(
  198. `Link ${link} uploaded processed and successfully. It is now available in documents.`
  199. );
  200. await Telemetry.sendTelemetry("link_uploaded");
  201. await EventLogs.logEvent("api_link_uploaded", {
  202. link,
  203. });
  204. response.status(200).json({ success: true, error: null, documents });
  205. } catch (e) {
  206. console.error(e.message, e);
  207. response.sendStatus(500).end();
  208. }
  209. }
  210. );
  211. app.post(
  212. "/v1/document/raw-text",
  213. [validApiKey],
  214. async (request, response) => {
  215. /*
  216. #swagger.tags = ['Documents']
  217. #swagger.description = 'Upload a file by specifying its raw text content and metadata values without having to upload a file.'
  218. #swagger.requestBody = {
  219. description: 'Text content and metadata of the file to be saved to the system. Use metadata-schema endpoint to get the possible metadata keys',
  220. required: true,
  221. content: {
  222. "application/json": {
  223. schema: {
  224. type: 'object',
  225. example: {
  226. "textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
  227. "metadata": {
  228. "title": "This key is required. See in /server/endpoints/api/document/index.js:287",
  229. keyOne: "valueOne",
  230. keyTwo: "valueTwo",
  231. etc: "etc"
  232. }
  233. }
  234. }
  235. }
  236. }
  237. }
  238. #swagger.responses[200] = {
  239. content: {
  240. "application/json": {
  241. schema: {
  242. type: 'object',
  243. example: {
  244. success: true,
  245. error: null,
  246. documents: [
  247. {
  248. "id": "c530dbe6-bff1-4b9e-b87f-710d539d20bc",
  249. "url": "file://my-document.txt",
  250. "title": "hello-world.txt",
  251. "docAuthor": "no author found",
  252. "description": "No description found.",
  253. "docSource": "My custom description set during upload",
  254. "chunkSource": "no chunk source specified",
  255. "published": "1/16/2024, 3:46:33 PM",
  256. "wordCount": 252,
  257. "pageContent": "AnythingLLM is the best....",
  258. "token_count_estimate": 447,
  259. "location": "custom-documents/raw-my-doc-text-c530dbe6-bff1-4b9e-b87f-710d539d20bc.json"
  260. }
  261. ]
  262. }
  263. }
  264. }
  265. }
  266. }
  267. #swagger.responses[403] = {
  268. schema: {
  269. "$ref": "#/definitions/InvalidAPIKey"
  270. }
  271. }
  272. */
  273. try {
  274. const Collector = new CollectorApi();
  275. const requiredMetadata = ["title"];
  276. const { textContent, metadata = {} } = reqBody(request);
  277. const processingOnline = await Collector.online();
  278. if (!processingOnline) {
  279. response
  280. .status(500)
  281. .json({
  282. success: false,
  283. error: `Document processing API is not online. Request will not be processed.`,
  284. })
  285. .end();
  286. return;
  287. }
  288. if (
  289. !requiredMetadata.every(
  290. (reqKey) =>
  291. Object.keys(metadata).includes(reqKey) && !!metadata[reqKey]
  292. )
  293. ) {
  294. response
  295. .status(422)
  296. .json({
  297. success: false,
  298. error: `You are missing required metadata key:value pairs in your request. Required metadata key:values are ${requiredMetadata
  299. .map((v) => `'${v}'`)
  300. .join(", ")}`,
  301. })
  302. .end();
  303. return;
  304. }
  305. if (!textContent || textContent?.length === 0) {
  306. response
  307. .status(422)
  308. .json({
  309. success: false,
  310. error: `The 'textContent' key cannot have an empty value.`,
  311. })
  312. .end();
  313. return;
  314. }
  315. const { success, reason, documents } = await Collector.processRawText(
  316. textContent,
  317. metadata
  318. );
  319. if (!success) {
  320. response
  321. .status(500)
  322. .json({ success: false, error: reason, documents })
  323. .end();
  324. return;
  325. }
  326. Collector.log(
  327. `Document created successfully. It is now available in documents.`
  328. );
  329. await Telemetry.sendTelemetry("raw_document_uploaded");
  330. await EventLogs.logEvent("api_raw_document_uploaded");
  331. response.status(200).json({ success: true, error: null, documents });
  332. } catch (e) {
  333. console.error(e.message, e);
  334. response.sendStatus(500).end();
  335. }
  336. }
  337. );
  338. app.get("/v1/documents", [validApiKey], async (_, response) => {
  339. /*
  340. #swagger.tags = ['Documents']
  341. #swagger.description = 'List of all locally-stored documents in instance'
  342. #swagger.responses[200] = {
  343. content: {
  344. "application/json": {
  345. schema: {
  346. type: 'object',
  347. example: {
  348. "localFiles": {
  349. "name": "documents",
  350. "type": "folder",
  351. items: [
  352. {
  353. "name": "my-stored-document.json",
  354. "type": "file",
  355. "id": "bb07c334-4dab-4419-9462-9d00065a49a1",
  356. "url": "file://my-stored-document.txt",
  357. "title": "my-stored-document.txt",
  358. "cached": false
  359. },
  360. ]
  361. }
  362. }
  363. }
  364. }
  365. }
  366. }
  367. #swagger.responses[403] = {
  368. schema: {
  369. "$ref": "#/definitions/InvalidAPIKey"
  370. }
  371. }
  372. */
  373. try {
  374. const localFiles = await viewLocalFiles();
  375. response.status(200).json({ localFiles });
  376. } catch (e) {
  377. console.error(e.message, e);
  378. response.sendStatus(500).end();
  379. }
  380. });
  381. app.get(
  382. "/v1/document/accepted-file-types",
  383. [validApiKey],
  384. async (_, response) => {
  385. /*
  386. #swagger.tags = ['Documents']
  387. #swagger.description = 'Check available filetypes and MIMEs that can be uploaded.'
  388. #swagger.responses[200] = {
  389. content: {
  390. "application/json": {
  391. schema: {
  392. type: 'object',
  393. example: {
  394. "types": {
  395. "application/mbox": [
  396. ".mbox"
  397. ],
  398. "application/pdf": [
  399. ".pdf"
  400. ],
  401. "application/vnd.oasis.opendocument.text": [
  402. ".odt"
  403. ],
  404. "application/vnd.openxmlformats-officedocument.wordprocessingml.document": [
  405. ".docx"
  406. ],
  407. "text/plain": [
  408. ".txt",
  409. ".md"
  410. ]
  411. }
  412. }
  413. }
  414. }
  415. }
  416. }
  417. #swagger.responses[403] = {
  418. schema: {
  419. "$ref": "#/definitions/InvalidAPIKey"
  420. }
  421. }
  422. */
  423. try {
  424. const types = await new CollectorApi().acceptedFileTypes();
  425. if (!types) {
  426. response.sendStatus(404).end();
  427. return;
  428. }
  429. response.status(200).json({ types });
  430. } catch (e) {
  431. console.error(e.message, e);
  432. response.sendStatus(500).end();
  433. }
  434. }
  435. );
  436. app.get(
  437. "/v1/document/metadata-schema",
  438. [validApiKey],
  439. async (_, response) => {
  440. /*
  441. #swagger.tags = ['Documents']
  442. #swagger.description = 'Get the known available metadata schema for when doing a raw-text upload and the acceptable type of value for each key.'
  443. #swagger.responses[200] = {
  444. content: {
  445. "application/json": {
  446. schema: {
  447. type: 'object',
  448. example: {
  449. "schema": {
  450. "keyOne": "string | number | nullable",
  451. "keyTwo": "string | number | nullable",
  452. "specialKey": "number",
  453. "title": "string",
  454. }
  455. }
  456. }
  457. }
  458. }
  459. }
  460. #swagger.responses[403] = {
  461. schema: {
  462. "$ref": "#/definitions/InvalidAPIKey"
  463. }
  464. }
  465. */
  466. try {
  467. response.status(200).json({
  468. schema: {
  469. // If you are updating this be sure to update the collector METADATA_KEYS constant in /processRawText.
  470. url: "string | nullable",
  471. title: "string",
  472. docAuthor: "string | nullable",
  473. description: "string | nullable",
  474. docSource: "string | nullable",
  475. chunkSource: "string | nullable",
  476. published: "epoch timestamp in ms | nullable",
  477. },
  478. });
  479. } catch (e) {
  480. console.error(e.message, e);
  481. response.sendStatus(500).end();
  482. }
  483. }
  484. );
  485. // Be careful and place as last route to prevent override of the other /document/ GET
  486. // endpoints!
  487. app.get("/v1/document/:docName", [validApiKey], async (request, response) => {
  488. /*
  489. #swagger.tags = ['Documents']
  490. #swagger.description = 'Get a single document by its unique AnythingLLM document name'
  491. #swagger.parameters['docName'] = {
  492. in: 'path',
  493. description: 'Unique document name to find (name in /documents)',
  494. required: true,
  495. type: 'string'
  496. }
  497. #swagger.responses[200] = {
  498. content: {
  499. "application/json": {
  500. schema: {
  501. type: 'object',
  502. example: {
  503. "localFiles": {
  504. "name": "documents",
  505. "type": "folder",
  506. items: [
  507. {
  508. "name": "my-stored-document.txt-uuid1234.json",
  509. "type": "file",
  510. "id": "bb07c334-4dab-4419-9462-9d00065a49a1",
  511. "url": "file://my-stored-document.txt",
  512. "title": "my-stored-document.txt",
  513. "cached": false
  514. },
  515. ]
  516. }
  517. }
  518. }
  519. }
  520. }
  521. }
  522. #swagger.responses[403] = {
  523. schema: {
  524. "$ref": "#/definitions/InvalidAPIKey"
  525. }
  526. }
  527. */
  528. try {
  529. const { docName } = request.params;
  530. const document = await findDocumentInDocuments(docName);
  531. if (!document) {
  532. response.sendStatus(404).end();
  533. return;
  534. }
  535. response.status(200).json({ document });
  536. } catch (e) {
  537. console.error(e.message, e);
  538. response.sendStatus(500).end();
  539. }
  540. });
  541. app.post(
  542. "/v1/document/create-folder",
  543. [validApiKey],
  544. async (request, response) => {
  545. /*
  546. #swagger.tags = ['Documents']
  547. #swagger.description = 'Create a new folder inside the documents storage directory.'
  548. #swagger.requestBody = {
  549. description: 'Name of the folder to create.',
  550. required: true,
  551. content: {
  552. "application/json": {
  553. schema: {
  554. type: 'string',
  555. example: {
  556. "name": "new-folder"
  557. }
  558. }
  559. }
  560. }
  561. }
  562. #swagger.responses[200] = {
  563. content: {
  564. "application/json": {
  565. schema: {
  566. type: 'object',
  567. example: {
  568. success: true,
  569. message: null
  570. }
  571. }
  572. }
  573. }
  574. }
  575. #swagger.responses[403] = {
  576. schema: {
  577. "$ref": "#/definitions/InvalidAPIKey"
  578. }
  579. }
  580. */
  581. try {
  582. const { name } = reqBody(request);
  583. const storagePath = path.join(documentsPath, normalizePath(name));
  584. if (!isWithin(path.resolve(documentsPath), path.resolve(storagePath)))
  585. throw new Error("Invalid path name");
  586. if (fs.existsSync(storagePath)) {
  587. response.status(500).json({
  588. success: false,
  589. message: "Folder by that name already exists",
  590. });
  591. return;
  592. }
  593. fs.mkdirSync(storagePath, { recursive: true });
  594. response.status(200).json({ success: true, message: null });
  595. } catch (e) {
  596. console.error(e);
  597. response.status(500).json({
  598. success: false,
  599. message: `Failed to create folder: ${e.message}`,
  600. });
  601. }
  602. }
  603. );
  604. app.post(
  605. "/v1/document/move-files",
  606. [validApiKey],
  607. async (request, response) => {
  608. /*
  609. #swagger.tags = ['Documents']
  610. #swagger.description = 'Move files within the documents storage directory.'
  611. #swagger.requestBody = {
  612. description: 'Array of objects containing source and destination paths of files to move.',
  613. required: true,
  614. content: {
  615. "application/json": {
  616. schema: {
  617. type: 'object',
  618. example: {
  619. "files": [
  620. {
  621. "from": "custom-documents/file.txt-fc4beeeb-e436-454d-8bb4-e5b8979cb48f.json",
  622. "to": "folder/file.txt-fc4beeeb-e436-454d-8bb4-e5b8979cb48f.json"
  623. }
  624. ]
  625. }
  626. }
  627. }
  628. }
  629. }
  630. #swagger.responses[200] = {
  631. content: {
  632. "application/json": {
  633. schema: {
  634. type: 'object',
  635. example: {
  636. success: true,
  637. message: null
  638. }
  639. }
  640. }
  641. }
  642. }
  643. #swagger.responses[403] = {
  644. schema: {
  645. "$ref": "#/definitions/InvalidAPIKey"
  646. }
  647. }
  648. */
  649. try {
  650. const { files } = reqBody(request);
  651. const docpaths = files.map(({ from }) => from);
  652. const documents = await Document.where({ docpath: { in: docpaths } });
  653. const embeddedFiles = documents.map((doc) => doc.docpath);
  654. const moveableFiles = files.filter(
  655. ({ from }) => !embeddedFiles.includes(from)
  656. );
  657. const movePromises = moveableFiles.map(({ from, to }) => {
  658. const sourcePath = path.join(documentsPath, normalizePath(from));
  659. const destinationPath = path.join(documentsPath, normalizePath(to));
  660. return new Promise((resolve, reject) => {
  661. if (
  662. !isWithin(documentsPath, sourcePath) ||
  663. !isWithin(documentsPath, destinationPath)
  664. )
  665. return reject("Invalid file location");
  666. fs.rename(sourcePath, destinationPath, (err) => {
  667. if (err) {
  668. console.error(`Error moving file ${from} to ${to}:`, err);
  669. reject(err);
  670. } else {
  671. resolve();
  672. }
  673. });
  674. });
  675. });
  676. Promise.all(movePromises)
  677. .then(() => {
  678. const unmovableCount = files.length - moveableFiles.length;
  679. if (unmovableCount > 0) {
  680. response.status(200).json({
  681. success: true,
  682. message: `${unmovableCount}/${files.length} files not moved. Unembed them from all workspaces.`,
  683. });
  684. } else {
  685. response.status(200).json({
  686. success: true,
  687. message: null,
  688. });
  689. }
  690. })
  691. .catch((err) => {
  692. console.error("Error moving files:", err);
  693. response
  694. .status(500)
  695. .json({ success: false, message: "Failed to move some files." });
  696. });
  697. } catch (e) {
  698. console.error(e);
  699. response
  700. .status(500)
  701. .json({ success: false, message: "Failed to move files." });
  702. }
  703. }
  704. );
  705. }
  706. module.exports = { apiDocumentEndpoints };