You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

259 lines
8.0 KiB

11 months ago
  1. const { NativeEmbedder } = require("../../EmbeddingEngines/native");
  2. const {
  3. LLMPerformanceMonitor,
  4. } = require("../../helpers/chat/LLMPerformanceMonitor");
  5. const {
  6. writeResponseChunk,
  7. clientAbortedHandler,
  8. formatChatHistory,
  9. } = require("../../helpers/chat/responses");
  10. class AzureOpenAiLLM {
  11. constructor(embedder = null, modelPreference = null) {
  12. const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
  13. if (!process.env.AZURE_OPENAI_ENDPOINT)
  14. throw new Error("No Azure API endpoint was set.");
  15. if (!process.env.AZURE_OPENAI_KEY)
  16. throw new Error("No Azure API key was set.");
  17. this.apiVersion = "2024-12-01-preview";
  18. this.openai = new OpenAIClient(
  19. process.env.AZURE_OPENAI_ENDPOINT,
  20. new AzureKeyCredential(process.env.AZURE_OPENAI_KEY),
  21. {
  22. apiVersion: this.apiVersion,
  23. }
  24. );
  25. this.model = modelPreference ?? process.env.OPEN_MODEL_PREF;
  26. this.isOTypeModel =
  27. process.env.AZURE_OPENAI_MODEL_TYPE === "reasoning" || false;
  28. this.limits = {
  29. history: this.promptWindowLimit() * 0.15,
  30. system: this.promptWindowLimit() * 0.15,
  31. user: this.promptWindowLimit() * 0.7,
  32. };
  33. this.embedder = embedder ?? new NativeEmbedder();
  34. this.defaultTemp = 0.7;
  35. this.#log(
  36. `Initialized. Model "${this.model}" @ ${this.promptWindowLimit()} tokens.\nAPI-Version: ${this.apiVersion}.\nModel Type: ${this.isOTypeModel ? "reasoning" : "default"}`
  37. );
  38. }
  39. #log(text, ...args) {
  40. console.log(`\x1b[32m[AzureOpenAi]\x1b[0m ${text}`, ...args);
  41. }
  42. #appendContext(contextTexts = []) {
  43. if (!contextTexts || !contextTexts.length) return "";
  44. return (
  45. "\nContext:\n" +
  46. contextTexts
  47. .map((text, i) => {
  48. return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
  49. })
  50. .join("")
  51. );
  52. }
  53. streamingEnabled() {
  54. // Streaming of reasoning models is not supported
  55. if (this.isOTypeModel) {
  56. this.#log(
  57. "Streaming will be disabled. AZURE_OPENAI_MODEL_TYPE is set to 'reasoning'."
  58. );
  59. return false;
  60. }
  61. return "streamGetChatCompletion" in this;
  62. }
  63. static promptWindowLimit(_modelName) {
  64. return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
  65. ? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
  66. : 4096;
  67. }
  68. // Sure the user selected a proper value for the token limit
  69. // could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models
  70. // and if undefined - assume it is the lowest end.
  71. promptWindowLimit() {
  72. return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
  73. ? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
  74. : 4096;
  75. }
  76. isValidChatCompletionModel(_modelName = "") {
  77. // The Azure user names their "models" as deployments and they can be any name
  78. // so we rely on the user to put in the correct deployment as only they would
  79. // know it.
  80. return true;
  81. }
  82. /**
  83. * Generates appropriate content array for a message + attachments.
  84. * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
  85. * @returns {string|object[]}
  86. */
  87. #generateContent({ userPrompt, attachments = [] }) {
  88. if (!attachments.length) {
  89. return userPrompt;
  90. }
  91. const content = [{ type: "text", text: userPrompt }];
  92. for (let attachment of attachments) {
  93. content.push({
  94. type: "image_url",
  95. imageUrl: {
  96. url: attachment.contentString,
  97. },
  98. });
  99. }
  100. return content.flat();
  101. }
  102. constructPrompt({
  103. systemPrompt = "",
  104. contextTexts = [],
  105. chatHistory = [],
  106. userPrompt = "",
  107. attachments = [], // This is the specific attachment for only this prompt
  108. }) {
  109. const prompt = {
  110. role: this.isOTypeModel ? "user" : "system",
  111. content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
  112. };
  113. return [
  114. prompt,
  115. ...formatChatHistory(chatHistory, this.#generateContent),
  116. {
  117. role: "user",
  118. content: this.#generateContent({ userPrompt, attachments }),
  119. },
  120. ];
  121. }
  122. async getChatCompletion(messages = [], { temperature = 0.7 }) {
  123. if (!this.model)
  124. throw new Error(
  125. "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
  126. );
  127. const result = await LLMPerformanceMonitor.measureAsyncFunction(
  128. this.openai.getChatCompletions(this.model, messages, {
  129. ...(this.isOTypeModel ? {} : { temperature }),
  130. })
  131. );
  132. if (
  133. !result.output.hasOwnProperty("choices") ||
  134. result.output.choices.length === 0
  135. )
  136. return null;
  137. return {
  138. textResponse: result.output.choices[0].message.content,
  139. metrics: {
  140. prompt_tokens: result.output.usage.promptTokens || 0,
  141. completion_tokens: result.output.usage.completionTokens || 0,
  142. total_tokens: result.output.usage.totalTokens || 0,
  143. outputTps: result.output.usage.completionTokens / result.duration,
  144. duration: result.duration,
  145. },
  146. };
  147. }
  148. async streamGetChatCompletion(messages = [], { temperature = 0.7 }) {
  149. if (!this.model)
  150. throw new Error(
  151. "No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
  152. );
  153. const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
  154. await this.openai.streamChatCompletions(this.model, messages, {
  155. ...(this.isOTypeModel ? {} : { temperature }),
  156. n: 1,
  157. }),
  158. messages
  159. );
  160. return measuredStreamRequest;
  161. }
  162. /**
  163. * Handles the stream response from the AzureOpenAI API.
  164. * Azure does not return the usage metrics in the stream response, but 1msg = 1token
  165. * so we can estimate the completion tokens by counting the number of messages.
  166. * @param {Object} response - the response object
  167. * @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream - the stream response from the AzureOpenAI API w/tracking
  168. * @param {Object} responseProps - the response properties
  169. * @returns {Promise<string>}
  170. */
  171. handleStream(response, stream, responseProps) {
  172. const { uuid = uuidv4(), sources = [] } = responseProps;
  173. return new Promise(async (resolve) => {
  174. let fullText = "";
  175. let usage = {
  176. completion_tokens: 0,
  177. };
  178. // Establish listener to early-abort a streaming response
  179. // in case things go sideways or the user does not like the response.
  180. // We preserve the generated text but continue as if chat was completed
  181. // to preserve previously generated content.
  182. const handleAbort = () => {
  183. stream?.endMeasurement(usage);
  184. clientAbortedHandler(resolve, fullText);
  185. };
  186. response.on("close", handleAbort);
  187. for await (const event of stream) {
  188. for (const choice of event.choices) {
  189. const delta = choice.delta?.content;
  190. if (!delta) continue;
  191. fullText += delta;
  192. usage.completion_tokens++;
  193. writeResponseChunk(response, {
  194. uuid,
  195. sources: [],
  196. type: "textResponseChunk",
  197. textResponse: delta,
  198. close: false,
  199. error: false,
  200. });
  201. }
  202. }
  203. writeResponseChunk(response, {
  204. uuid,
  205. sources,
  206. type: "textResponseChunk",
  207. textResponse: "",
  208. close: true,
  209. error: false,
  210. });
  211. response.removeListener("close", handleAbort);
  212. stream?.endMeasurement(usage);
  213. resolve(fullText);
  214. });
  215. }
  216. // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  217. async embedTextInput(textInput) {
  218. return await this.embedder.embedTextInput(textInput);
  219. }
  220. async embedChunks(textChunks = []) {
  221. return await this.embedder.embedChunks(textChunks);
  222. }
  223. async compressMessages(promptArgs = {}, rawHistory = []) {
  224. const { messageArrayCompressor } = require("../../helpers/chat");
  225. const messageArray = this.constructPrompt(promptArgs);
  226. return await messageArrayCompressor(this, messageArray, rawHistory);
  227. }
  228. }
  229. module.exports = {
  230. AzureOpenAiLLM,
  231. };