You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.5 KiB

11 months ago
  1. const { v4 } = require("uuid");
  2. const fs = require("fs");
  3. const { tokenizeString } = require("../../utils/tokenizer");
  4. const {
  5. createdDate,
  6. trashFile,
  7. writeToServerDocuments,
  8. } = require("../../utils/files");
  9. const { default: slugify } = require("slugify");
  10. async function asTxt({ fullFilePath = "", filename = "" }) {
  11. let content = "";
  12. try {
  13. content = fs.readFileSync(fullFilePath, "utf8");
  14. } catch (err) {
  15. console.error("Could not read file!", err);
  16. }
  17. if (!content?.length) {
  18. console.error(`Resulting text content was empty for ${filename}.`);
  19. trashFile(fullFilePath);
  20. return {
  21. success: false,
  22. reason: `No text content found in ${filename}.`,
  23. documents: [],
  24. };
  25. }
  26. console.log(`-- Working ${filename} --`);
  27. const data = {
  28. id: v4(),
  29. url: "file://" + fullFilePath,
  30. title: filename,
  31. docAuthor: "Unknown", // TODO: Find a better author
  32. description: "Unknown", // TODO: Find a better description
  33. docSource: "a text file uploaded by the user.",
  34. chunkSource: "",
  35. published: createdDate(fullFilePath),
  36. wordCount: content.split(" ").length,
  37. pageContent: content,
  38. token_count_estimate: tokenizeString(content),
  39. };
  40. const document = writeToServerDocuments(
  41. data,
  42. `${slugify(filename)}-${data.id}`
  43. );
  44. trashFile(fullFilePath);
  45. console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
  46. return { success: true, reason: null, documents: [document] };
  47. }
  48. module.exports = asTxt;