You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
7.0 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. LLMPerformanceMonitor,
  4. } = require("../../helpers/chat/LLMPerformanceMonitor");
  5. const {
  6. handleDefaultStreamResponseV2,
  7. formatChatHistory,
  8. } = require("../../helpers/chat/responses");
  9. class NvidiaNimLLM {
  10. constructor(embedder = null, modelPreference = null) {
  11. if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
  12. throw new Error("No NVIDIA NIM API Base Path was set.");
  13. const { OpenAI: OpenAIApi } = require("openai");
  14. this.nvidiaNim = new OpenAIApi({
  15. baseURL: parseNvidiaNimBasePath(process.env.NVIDIA_NIM_LLM_BASE_PATH),
  16. apiKey: null,
  17. });
  18. this.model = modelPreference || process.env.NVIDIA_NIM_LLM_MODEL_PREF;
  19. this.limits = {
  20. history: this.promptWindowLimit() * 0.15,
  21. system: this.promptWindowLimit() * 0.15,
  22. user: this.promptWindowLimit() * 0.7,
  23. };
  24. this.embedder = embedder ?? new NativeEmbedder();
  25. this.defaultTemp = 0.7;
  26. this.#log(
  27. `Loaded with model: ${this.model} with context window: ${this.promptWindowLimit()}`
  28. );
  29. }
  30. #log(text, ...args) {
  31. console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
  32. }
  33. #appendContext(contextTexts = []) {
  34. if (!contextTexts || !contextTexts.length) return "";
  35. return (
  36. "\nContext:\n" +
  37. contextTexts
  38. .map((text, i) => {
  39. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  40. })
  41. .join("")
  42. );
  43. }
  44. /**
  45. * Set the model token limit `NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT` for the given model ID
  46. * @param {string} modelId
  47. * @param {string} basePath
  48. * @returns {Promise<void>}
  49. */
  50. static async setModelTokenLimit(modelId, basePath = null) {
  51. if (!modelId) return;
  52. const { OpenAI: OpenAIApi } = require("openai");
  53. const openai = new OpenAIApi({
  54. baseURL: parseNvidiaNimBasePath(
  55. basePath || process.env.NVIDIA_NIM_LLM_BASE_PATH
  56. ),
  57. apiKey: null,
  58. });
  59. const model = await openai.models
  60. .list()
  61. .then((results) => results.data)
  62. .catch(() => {
  63. return [];
  64. });
  65. if (!model.length) return;
  66. const modelInfo = model.find((model) => model.id === modelId);
  67. if (!modelInfo) return;
  68. process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT = Number(
  69. modelInfo.max_model_len || 4096
  70. );
  71. }
  72. streamingEnabled() {
  73. return "streamGetChatCompletion" in this;
  74. }
  75. static promptWindowLimit(_modelName) {
  76. const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
  77. if (!limit || isNaN(Number(limit)))
  78. throw new Error("No NVIDIA NIM token context limit was set.");
  79. return Number(limit);
  80. }
  81. // Ensure the user set a value for the token limit
  82. // and if undefined - assume 4096 window.
  83. promptWindowLimit() {
  84. const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
  85. if (!limit || isNaN(Number(limit)))
  86. throw new Error("No NVIDIA NIM token context limit was set.");
  87. return Number(limit);
  88. }
  89. async isValidChatCompletionModel(_ = "") {
  90. return true;
  91. }
  92. /**
  93. * Generates appropriate content array for a message + attachments.
  94. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  95. * @returns {string|object[]}
  96. */
  97. #generateContent({ userPrompt, attachments = [] }) {
  98. if (!attachments.length) {
  99. return userPrompt;
  100. }
  101. const content = [{ type: "text", text: userPrompt }];
  102. for (let attachment of attachments) {
  103. content.push({
  104. type: "image_url",
  105. image_url: {
  106. url: attachment.contentString,
  107. detail: "auto",
  108. },
  109. });
  110. }
  111. return content.flat();
  112. }
  113. /**
  114. * Construct the user prompt for this model.
  115. * @param {{attachments: import("../../helpers").Attachment[]}} param0
  116. * @returns
  117. */
  118. constructPrompt({
  119. systemPrompt = "",
  120. contextTexts = [],
  121. chatHistory = [],
  122. userPrompt = "",
  123. attachments = [],
  124. }) {
  125. const prompt = {
  126. role: "system",
  127. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  128. };
  129. return [
  130. prompt,
  131. ...formatChatHistory(chatHistory, this.#generateContent),
  132. {
  133. role: "user",
  134. content: this.#generateContent({ userPrompt, attachments }),
  135. },
  136. ];
  137. }
  138. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  139. if (!this.model)
  140. throw new Error(
  141. `NVIDIA NIM chat: ${this.model} is not valid or defined model for chat completion!`
  142. );
  143. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  144. this.nvidiaNim.chat.completions
  145. .create({
  146. model: this.model,
  147. messages,
  148. temperature,
  149. })
  150. .catch((e) => {
  151. throw new Error(e.message);
  152. })
  153. );
  154. if (
  155. !result.output.hasOwnProperty("choices") ||
  156. result.output.choices.length === 0
  157. )
  158. return null;
  159. return {
  160. textResponse: result.output.choices[0].message.content,
  161. metrics: {
  162. prompt_tokens: result.output.usage.prompt_tokens || 0,
  163. completion_tokens: result.output.usage.completion_tokens || 0,
  164. total_tokens: result.output.usage.total_tokens || 0,
  165. outputTps: result.output.usage.completion_tokens / result.duration,
  166. duration: result.duration,
  167. },
  168. };
  169. }
  170. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  171. if (!this.model)
  172. throw new Error(
  173. `NVIDIA NIM chat: ${this.model} is not valid or defined model for chat completion!`
  174. );
  175. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  176. this.nvidiaNim.chat.completions.create({
  177. model: this.model,
  178. stream: true,
  179. messages,
  180. temperature,
  181. }),
  182. messages
  183. );
  184. return measuredStreamRequest;
  185. }
  186. handleStream(response, stream, responseProps) {
  187. return handleDefaultStreamResponseV2(response, stream, responseProps);
  188. }
  189. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  190. async embedTextInput(textInput) {
  191. return await this.embedder.embedTextInput(textInput);
  192. }
  193. async embedChunks(textChunks = []) {
  194. return await this.embedder.embedChunks(textChunks);
  195. }
  196. async compressMessages(promptArgs = {}, rawHistory = []) {
  197. const { messageArrayCompressor } = require("../../helpers/chat");
  198. const messageArray = this.constructPrompt(promptArgs);
  199. return await messageArrayCompressor(this, messageArray, rawHistory);
  200. }
  201. }
  202. /**
  203. * Parse the base path for the Nvidia NIM container API. Since the base path must end in /v1 and cannot have a trailing slash,
  204. * and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
  205. * @param {string} basePath
  206. * @returns {string}
  207. */
  208. function parseNvidiaNimBasePath(providedBasePath = "") {
  209. try {
  210. const baseURL = new URL(providedBasePath);
  211. const basePath = `${baseURL.origin}/v1`;
  212. return basePath;
  213. } catch (e) {
  214. return providedBasePath;
  215. }
  216. }
  217. module.exports = {
  218. NvidiaNimLLM,
  219. parseNvidiaNimBasePath,
  220. };