You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.7 KiB

11 months ago
  1. const { TokenManager } = require("../tiktoken");
  2. /**
  3. * @typedef {import("openai/streaming").Stream<import("openai").OpenAI.ChatCompletionChunk>} OpenAICompatibleStream
  4. * @typedef {(reportedUsage: {[key: string]: number, completion_tokens?: number, prompt_tokens?: number}) => StreamMetrics} EndMeasurementFunction
  5. * @typedef {Array<{content: string}>} Messages
  6. */
  7. /**
  8. * @typedef {Object} StreamMetrics
  9. * @property {number} prompt_tokens - the number of tokens in the prompt
  10. * @property {number} completion_tokens - the number of tokens in the completion
  11. * @property {number} total_tokens - the total number of tokens
  12. * @property {number} outputTps - the tokens per second of the output
  13. * @property {number} duration - the duration of the stream
  14. */
  15. /**
  16. * @typedef {Object} MonitoredStream
  17. * @property {number} start - the start time of the stream
  18. * @property {number} duration - the duration of the stream
  19. * @property {StreamMetrics} metrics - the metrics of the stream
  20. * @property {EndMeasurementFunction} endMeasurement - the method to end the stream and calculate the metrics
  21. */
  22. class LLMPerformanceMonitor {
  23. static tokenManager = new TokenManager();
  24. /**
  25. * Counts the tokens in the messages.
  26. * @param {Array<{content: string}>} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
  27. * @returns {number}
  28. */
  29. static countTokens(messages = []) {
  30. try {
  31. return this.tokenManager.statsFrom(messages);
  32. } catch (e) {
  33. return 0;
  34. }
  35. }
  36. /**
  37. * Wraps a function and logs the duration (in seconds) of the function call.
  38. * @param {Function} func
  39. * @returns {Promise<{output: any, duration: number}>}
  40. */
  41. static measureAsyncFunction(func) {
  42. return (async () => {
  43. const start = Date.now();
  44. const output = await func; // is a promise
  45. const end = Date.now();
  46. return { output, duration: (end - start) / 1000 };
  47. })();
  48. }
  49. /**
  50. * Wraps a completion stream and and attaches a start time and duration property to the stream.
  51. * Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
  52. * @param {Promise<OpenAICompatibleStream>} func
  53. * @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
  54. * @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
  55. * @returns {Promise<MonitoredStream>}
  56. */
  57. static async measureStream(
  58. func,
  59. messages = [],
  60. runPromptTokenCalculation = true
  61. ) {
  62. const stream = await func;
  63. stream.start = Date.now();
  64. stream.duration = 0;
  65. stream.metrics = {
  66. completion_tokens: 0,
  67. prompt_tokens: runPromptTokenCalculation ? this.countTokens(messages) : 0,
  68. total_tokens: 0,
  69. outputTps: 0,
  70. duration: 0,
  71. };
  72. stream.endMeasurement = (reportedUsage = {}) => {
  73. const end = Date.now();
  74. const duration = (end - stream.start) / 1000;
  75. // Merge the reported usage with the existing metrics
  76. // so the math in the metrics object is correct when calculating
  77. stream.metrics = {
  78. ...stream.metrics,
  79. ...reportedUsage,
  80. };
  81. stream.metrics.total_tokens =
  82. stream.metrics.prompt_tokens + (stream.metrics.completion_tokens || 0);
  83. stream.metrics.outputTps = stream.metrics.completion_tokens / duration;
  84. stream.metrics.duration = duration;
  85. return stream.metrics;
  86. };
  87. return stream;
  88. }
  89. }
  90. module.exports = {
  91. LLMPerformanceMonitor,
  92. };