You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

251 lines
8.2 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. LLMPerformanceMonitor,
  4. } = require("../../helpers/chat/LLMPerformanceMonitor");
  5. const {
  6. handleDefaultStreamResponseV2,
  7. } = require("../../helpers/chat/responses");
  8. const { MODEL_MAP } = require("../modelMap");
  9. class GroqLLM {
  10. constructor(embedder = null, modelPreference = null) {
  11. const { OpenAI: OpenAIApi } = require("openai");
  12. if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set.");
  13. this.openai = new OpenAIApi({
  14. baseURL: "https://api.groq.com/openai/v1",
  15. apiKey: process.env.GROQ_API_KEY,
  16. });
  17. this.model =
  18. modelPreference || process.env.GROQ_MODEL_PREF || "llama-3.1-8b-instant";
  19. this.limits = {
  20. history: this.promptWindowLimit() * 0.15,
  21. system: this.promptWindowLimit() * 0.15,
  22. user: this.promptWindowLimit() * 0.7,
  23. };
  24. this.embedder = embedder ?? new NativeEmbedder();
  25. this.defaultTemp = 0.7;
  26. }
  27. #appendContext(contextTexts = []) {
  28. if (!contextTexts || !contextTexts.length) return "";
  29. return (
  30. "\nContext:\n" +
  31. contextTexts
  32. .map((text, i) => {
  33. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  34. })
  35. .join("")
  36. );
  37. }
  38. #log(text, ...args) {
  39. console.log(`\x1b[32m[GroqAi]\x1b[0m ${text}`, ...args);
  40. }
  41. streamingEnabled() {
  42. return "streamGetChatCompletion" in this;
  43. }
  44. static promptWindowLimit(modelName) {
  45. return MODEL_MAP.groq[modelName] ?? 8192;
  46. }
  47. promptWindowLimit() {
  48. return MODEL_MAP.groq[this.model] ?? 8192;
  49. }
  50. async isValidChatCompletionModel(modelName = "") {
  51. return !!modelName; // name just needs to exist
  52. }
  53. /**
  54. * Generates appropriate content array for a message + attachments.
  55. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  56. * @returns {string|object[]}
  57. */
  58. #generateContent({ userPrompt, attachments = [] }) {
  59. if (!attachments.length) return userPrompt;
  60. const content = [{ type: "text", text: userPrompt }];
  61. for (let attachment of attachments) {
  62. content.push({
  63. type: "image_url",
  64. image_url: {
  65. url: attachment.contentString,
  66. },
  67. });
  68. }
  69. return content.flat();
  70. }
  71. /**
  72. * Last Updated: October 21, 2024
  73. * According to https://console.groq.com/docs/vision
  74. * the vision models supported all make a mess of prompting depending on the model.
  75. * Currently the llama3.2 models are only in preview and subject to change and the llava model is deprecated - so we will not support attachments for that at all.
  76. *
  77. * Since we can only explicitly support the current models, this is a temporary solution.
  78. * If the attachments are empty or the model is not a vision model, we will return the default prompt structure which will work for all models.
  79. * If the attachments are present and the model is a vision model - we only return the user prompt with attachments - see comment at end of function for more.
  80. *
  81. * Historical attachments are also omitted from prompt chat history for the reasons above. (TDC: Dec 30, 2024)
  82. */
  83. #conditionalPromptStruct({
  84. systemPrompt = "",
  85. contextTexts = [],
  86. chatHistory = [],
  87. userPrompt = "",
  88. attachments = [], // This is the specific attachment for only this prompt
  89. }) {
  90. const VISION_MODELS = [
  91. "llama-3.2-90b-vision-preview",
  92. "llama-3.2-11b-vision-preview",
  93. ];
  94. const DEFAULT_PROMPT_STRUCT = [
  95. {
  96. role: "system",
  97. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  98. },
  99. ...chatHistory,
  100. { role: "user", content: userPrompt },
  101. ];
  102. // If there are no attachments or model is not a vision model, return the default prompt structure
  103. // as there is nothing to attach or do and no model limitations to consider
  104. if (!attachments.length) return DEFAULT_PROMPT_STRUCT;
  105. if (!VISION_MODELS.includes(this.model)) {
  106. this.#log(
  107. `${this.model} is not an explicitly supported vision model! Will omit attachments.`
  108. );
  109. return DEFAULT_PROMPT_STRUCT;
  110. }
  111. return [
  112. // Why is the system prompt and history commented out?
  113. // The current vision models for Groq perform VERY poorly with ANY history or text prior to the image.
  114. // In order to not get LLM refusals for every single message, we will not include the "system prompt" or even the chat history.
  115. // This is a temporary solution until Groq fixes their vision models to be more coherent and also handle context prior to the image.
  116. // Note for the future:
  117. // Groq vision models also do not support system prompts - which is why you see the user/assistant emulation used instead of "system".
  118. // This means any vision call is assessed independently of the chat context prior to the image.
  119. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  120. // {
  121. // role: "user",
  122. // content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  123. // },
  124. // {
  125. // role: "assistant",
  126. // content: "OK",
  127. // },
  128. // ...chatHistory,
  129. {
  130. role: "user",
  131. content: this.#generateContent({ userPrompt, attachments }),
  132. },
  133. ];
  134. }
  135. /**
  136. * Construct the user prompt for this model.
  137. * @param {{attachments: import("../../helpers").Attachment[]}} param0
  138. * @returns
  139. */
  140. constructPrompt({
  141. systemPrompt = "",
  142. contextTexts = [],
  143. chatHistory = [],
  144. userPrompt = "",
  145. attachments = [], // This is the specific attachment for only this prompt
  146. }) {
  147. // NOTICE: SEE GroqLLM.#conditionalPromptStruct for more information on how attachments are handled with Groq.
  148. return this.#conditionalPromptStruct({
  149. systemPrompt,
  150. contextTexts,
  151. chatHistory,
  152. userPrompt,
  153. attachments,
  154. });
  155. }
  156. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  157. if (!(await this.isValidChatCompletionModel(this.model)))
  158. throw new Error(
  159. `GroqAI:chatCompletion: ${this.model} is not valid for chat completion!`
  160. );
  161. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  162. this.openai.chat.completions
  163. .create({
  164. model: this.model,
  165. messages,
  166. temperature,
  167. })
  168. .catch((e) => {
  169. throw new Error(e.message);
  170. })
  171. );
  172. if (
  173. !result.output.hasOwnProperty("choices") ||
  174. result.output.choices.length === 0
  175. )
  176. return null;
  177. return {
  178. textResponse: result.output.choices[0].message.content,
  179. metrics: {
  180. prompt_tokens: result.output.usage.prompt_tokens || 0,
  181. completion_tokens: result.output.usage.completion_tokens || 0,
  182. total_tokens: result.output.usage.total_tokens || 0,
  183. outputTps:
  184. result.output.usage.completion_tokens /
  185. result.output.usage.completion_time,
  186. duration: result.output.usage.total_time,
  187. },
  188. };
  189. }
  190. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  191. if (!(await this.isValidChatCompletionModel(this.model)))
  192. throw new Error(
  193. `GroqAI:streamChatCompletion: ${this.model} is not valid for chat completion!`
  194. );
  195. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  196. this.openai.chat.completions.create({
  197. model: this.model,
  198. stream: true,
  199. messages,
  200. temperature,
  201. }),
  202. messages,
  203. false
  204. );
  205. return measuredStreamRequest;
  206. }
  207. handleStream(response, stream, responseProps) {
  208. return handleDefaultStreamResponseV2(response, stream, responseProps);
  209. }
  210. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  211. async embedTextInput(textInput) {
  212. return await this.embedder.embedTextInput(textInput);
  213. }
  214. async embedChunks(textChunks = []) {
  215. return await this.embedder.embedChunks(textChunks);
  216. }
  217. async compressMessages(promptArgs = {}, rawHistory = []) {
  218. const { messageArrayCompressor } = require("../../helpers/chat");
  219. const messageArray = this.constructPrompt(promptArgs);
  220. return await messageArrayCompressor(this, messageArray, rawHistory);
  221. }
  222. }
  223. module.exports = {
  224. GroqLLM,
  225. };