You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

217 lines
6.6 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. handleDefaultStreamResponseV2,
  4. formatChatHistory,
  5. } = require("../../helpers/chat/responses");
  6. const {
  7. LLMPerformanceMonitor,
  8. } = require("../../helpers/chat/LLMPerformanceMonitor");
  9. // hybrid of openAi LLM chat completion for LMStudio
  10. class LMStudioLLM {
  11. constructor(embedder = null, modelPreference = null) {
  12. if (!process.env.LMSTUDIO_BASE_PATH)
  13. throw new Error("No LMStudio API Base Path was set.");
  14. const { OpenAI: OpenAIApi } = require("openai");
  15. this.lmstudio = new OpenAIApi({
  16. baseURL: parseLMStudioBasePath(process.env.LMSTUDIO_BASE_PATH), // here is the URL to your LMStudio instance
  17. apiKey: null,
  18. });
  19. // Prior to LMStudio 0.2.17 the `model` param was not required and you could pass anything
  20. // into that field and it would work. On 0.2.17 LMStudio introduced multi-model chat
  21. // which now has a bug that reports the server model id as "Loaded from Chat UI"
  22. // and any other value will crash inferencing. So until this is patched we will
  23. // try to fetch the `/models` and have the user set it, or just fallback to "Loaded from Chat UI"
  24. // which will not impact users with <v0.2.17 and should work as well once the bug is fixed.
  25. this.model =
  26. modelPreference ||
  27. process.env.LMSTUDIO_MODEL_PREF ||
  28. "Loaded from Chat UI";
  29. this.limits = {
  30. history: this.promptWindowLimit() * 0.15,
  31. system: this.promptWindowLimit() * 0.15,
  32. user: this.promptWindowLimit() * 0.7,
  33. };
  34. this.embedder = embedder ?? new NativeEmbedder();
  35. this.defaultTemp = 0.7;
  36. }
  37. #appendContext(contextTexts = []) {
  38. if (!contextTexts || !contextTexts.length) return "";
  39. return (
  40. "\nContext:\n" +
  41. contextTexts
  42. .map((text, i) => {
  43. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  44. })
  45. .join("")
  46. );
  47. }
  48. streamingEnabled() {
  49. return "streamGetChatCompletion" in this;
  50. }
  51. static promptWindowLimit(_modelName) {
  52. const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
  53. if (!limit || isNaN(Number(limit)))
  54. throw new Error("No LMStudio token context limit was set.");
  55. return Number(limit);
  56. }
  57. // Ensure the user set a value for the token limit
  58. // and if undefined - assume 4096 window.
  59. promptWindowLimit() {
  60. const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
  61. if (!limit || isNaN(Number(limit)))
  62. throw new Error("No LMStudio token context limit was set.");
  63. return Number(limit);
  64. }
  65. async isValidChatCompletionModel(_ = "") {
  66. // LMStudio may be anything. The user must do it correctly.
  67. // See comment about this.model declaration in constructor
  68. return true;
  69. }
  70. /**
  71. * Generates appropriate content array for a message + attachments.
  72. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  73. * @returns {string|object[]}
  74. */
  75. #generateContent({ userPrompt, attachments = [] }) {
  76. if (!attachments.length) {
  77. return userPrompt;
  78. }
  79. const content = [{ type: "text", text: userPrompt }];
  80. for (let attachment of attachments) {
  81. content.push({
  82. type: "image_url",
  83. image_url: {
  84. url: attachment.contentString,
  85. detail: "auto",
  86. },
  87. });
  88. }
  89. return content.flat();
  90. }
  91. /**
  92. * Construct the user prompt for this model.
  93. * @param {{attachments: import("../../helpers").Attachment[]}} param0
  94. * @returns
  95. */
  96. constructPrompt({
  97. systemPrompt = "",
  98. contextTexts = [],
  99. chatHistory = [],
  100. userPrompt = "",
  101. attachments = [],
  102. }) {
  103. const prompt = {
  104. role: "system",
  105. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  106. };
  107. return [
  108. prompt,
  109. ...formatChatHistory(chatHistory, this.#generateContent),
  110. {
  111. role: "user",
  112. content: this.#generateContent({ userPrompt, attachments }),
  113. },
  114. ];
  115. }
  116. async getChatCompletion(messages = null, { temperature = 0.7 }) {
  117. if (!this.model)
  118. throw new Error(
  119. `LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
  120. );
  121. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  122. this.lmstudio.chat.completions.create({
  123. model: this.model,
  124. messages,
  125. temperature,
  126. })
  127. );
  128. if (
  129. !result.output.hasOwnProperty("choices") ||
  130. result.output.choices.length === 0
  131. )
  132. return null;
  133. return {
  134. textResponse: result.output.choices[0].message.content,
  135. metrics: {
  136. prompt_tokens: result.output.usage?.prompt_tokens || 0,
  137. completion_tokens: result.output.usage?.completion_tokens || 0,
  138. total_tokens: result.output.usage?.total_tokens || 0,
  139. outputTps: result.output.usage?.completion_tokens / result.duration,
  140. duration: result.duration,
  141. },
  142. };
  143. }
  144. async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
  145. if (!this.model)
  146. throw new Error(
  147. `LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
  148. );
  149. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  150. this.lmstudio.chat.completions.create({
  151. model: this.model,
  152. stream: true,
  153. messages,
  154. temperature,
  155. }),
  156. messages
  157. );
  158. return measuredStreamRequest;
  159. }
  160. handleStream(response, stream, responseProps) {
  161. return handleDefaultStreamResponseV2(response, stream, responseProps);
  162. }
  163. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  164. async embedTextInput(textInput) {
  165. return await this.embedder.embedTextInput(textInput);
  166. }
  167. async embedChunks(textChunks = []) {
  168. return await this.embedder.embedChunks(textChunks);
  169. }
  170. async compressMessages(promptArgs = {}, rawHistory = []) {
  171. const { messageArrayCompressor } = require("../../helpers/chat");
  172. const messageArray = this.constructPrompt(promptArgs);
  173. return await messageArrayCompressor(this, messageArray, rawHistory);
  174. }
  175. }
  176. /**
  177. * Parse the base path for the LMStudio API. Since the base path must end in /v1 and cannot have a trailing slash,
  178. * and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
  179. * @param {string} basePath
  180. * @returns {string}
  181. */
  182. function parseLMStudioBasePath(providedBasePath = "") {
  183. try {
  184. const baseURL = new URL(providedBasePath);
  185. const basePath = `${baseURL.origin}/v1`;
  186. return basePath;
  187. } catch (e) {
  188. return providedBasePath;
  189. }
  190. }
  191. module.exports = {
  192. LMStudioLLM,
  193. parseLMStudioBasePath,
  194. };