You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.9 KiB

11 months ago
  1. class GeminiEmbedder {
  2. constructor() {
  3. if (!process.env.GEMINI_EMBEDDING_API_KEY)
  4. throw new Error("No Gemini API key was set.");
  5. const { GoogleGenerativeAI } = require("@google/generative-ai");
  6. const genAI = new GoogleGenerativeAI(process.env.GEMINI_EMBEDDING_API_KEY);
  7. this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
  8. this.gemini = genAI.getGenerativeModel({ model: this.model });
  9. // This property is disabled as it causes issues when sending multiple chunks at once
  10. // since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks
  11. // instead of 4 embeddings - no idea why this is the case, but it is not how the results are
  12. // expected to be returned.
  13. // this.maxConcurrentChunks = 1;
  14. // https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding
  15. this.embeddingMaxChunkLength = 2_048;
  16. this.log(`Initialized with ${this.model}`);
  17. }
  18. log(text, ...args) {
  19. console.log(`\x1b[36m[GeminiEmbedder]\x1b[0m ${text}`, ...args);
  20. }
  21. /**
  22. * Embeds a single text input
  23. * @param {string} textInput - The text to embed
  24. * @returns {Promise<Array<number>>} The embedding values
  25. */
  26. async embedTextInput(textInput) {
  27. const result = await this.gemini.embedContent(textInput);
  28. return result.embedding.values || [];
  29. }
  30. /**
  31. * Embeds a list of text inputs
  32. * @param {Array<string>} textInputs - The list of text to embed
  33. * @returns {Promise<Array<Array<number>>>} The embedding values
  34. */
  35. async embedChunks(textChunks = []) {
  36. let embeddings = [];
  37. for (const chunk of textChunks) {
  38. const results = await this.gemini.embedContent(chunk);
  39. if (!results.embedding || !results.embedding.values)
  40. throw new Error("No embedding values returned from gemini");
  41. embeddings.push(results.embedding.values);
  42. }
  43. return embeddings;
  44. }
  45. }
  46. module.exports = {
  47. GeminiEmbedder,
  48. };