You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

660 lines
26 KiB

11 months ago
  1. const { SystemSettings } = require("../../../../models/systemSettings");
  2. const { TokenManager } = require("../../../helpers/tiktoken");
  3. const tiktoken = new TokenManager();
  4. const webBrowsing = {
  5. name: "web-browsing",
  6. startupConfig: {
  7. params: {},
  8. },
  9. plugin: function () {
  10. return {
  11. name: this.name,
  12. setup(aibitat) {
  13. aibitat.function({
  14. super: aibitat,
  15. name: this.name,
  16. countTokens: (string) =>
  17. tiktoken
  18. .countFromString(string)
  19. .toString()
  20. .replace(/\B(?=(\d{3})+(?!\d))/g, ","),
  21. description:
  22. "Searches for a given query using a search engine to get better results for the user query.",
  23. examples: [
  24. {
  25. prompt: "Who won the world series today?",
  26. call: JSON.stringify({ query: "Winner of today's world series" }),
  27. },
  28. {
  29. prompt: "What is AnythingLLM?",
  30. call: JSON.stringify({ query: "AnythingLLM" }),
  31. },
  32. {
  33. prompt: "Current AAPL stock price",
  34. call: JSON.stringify({ query: "AAPL stock price today" }),
  35. },
  36. ],
  37. parameters: {
  38. $schema: "http://json-schema.org/draft-07/schema#",
  39. type: "object",
  40. properties: {
  41. query: {
  42. type: "string",
  43. description: "A search query.",
  44. },
  45. },
  46. additionalProperties: false,
  47. },
  48. handler: async function ({ query }) {
  49. try {
  50. if (query) return await this.search(query);
  51. return "There is nothing we can do. This function call returns no information.";
  52. } catch (error) {
  53. return `There was an error while calling the function. No data or response was found. Let the user know this was the error: ${error.message}`;
  54. }
  55. },
  56. /**
  57. * Use Google Custom Search Engines
  58. * Free to set up, easy to use, 100 calls/day!
  59. * https://programmablesearchengine.google.com/controlpanel/create
  60. */
  61. search: async function (query) {
  62. const provider =
  63. (await SystemSettings.get({ label: "agent_search_provider" }))
  64. ?.value ?? "unknown";
  65. let engine;
  66. switch (provider) {
  67. case "google-search-engine":
  68. engine = "_googleSearchEngine";
  69. break;
  70. case "searchapi":
  71. engine = "_searchApi";
  72. break;
  73. case "serper-dot-dev":
  74. engine = "_serperDotDev";
  75. break;
  76. case "bing-search":
  77. engine = "_bingWebSearch";
  78. break;
  79. case "serply-engine":
  80. engine = "_serplyEngine";
  81. break;
  82. case "searxng-engine":
  83. engine = "_searXNGEngine";
  84. break;
  85. case "tavily-search":
  86. engine = "_tavilySearch";
  87. break;
  88. case "duckduckgo-engine":
  89. engine = "_duckDuckGoEngine";
  90. break;
  91. default:
  92. engine = "_googleSearchEngine";
  93. }
  94. return await this[engine](query);
  95. },
  96. /**
  97. * Utility function to truncate a string to a given length for debugging
  98. * calls to the API while keeping the actual values mostly intact
  99. * @param {string} str - The string to truncate
  100. * @param {number} length - The length to truncate the string to
  101. * @returns {string} The truncated string
  102. */
  103. middleTruncate(str, length = 5) {
  104. if (str.length <= length) return str;
  105. return `${str.slice(0, length)}...${str.slice(-length)}`;
  106. },
  107. /**
  108. * Use Google Custom Search Engines
  109. * Free to set up, easy to use, 100 calls/day
  110. * https://programmablesearchengine.google.com/controlpanel/create
  111. */
  112. _googleSearchEngine: async function (query) {
  113. if (!process.env.AGENT_GSE_CTX || !process.env.AGENT_GSE_KEY) {
  114. this.super.introspect(
  115. `${this.caller}: I can't use Google searching because the user has not defined the required API keys.\nVisit: https://programmablesearchengine.google.com/controlpanel/create to create the API keys.`
  116. );
  117. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  118. }
  119. const searchURL = new URL(
  120. "https://www.googleapis.com/customsearch/v1"
  121. );
  122. searchURL.searchParams.append("key", process.env.AGENT_GSE_KEY);
  123. searchURL.searchParams.append("cx", process.env.AGENT_GSE_CTX);
  124. searchURL.searchParams.append("q", query);
  125. this.super.introspect(
  126. `${this.caller}: Searching on Google for "${
  127. query.length > 100 ? `${query.slice(0, 100)}...` : query
  128. }"`
  129. );
  130. const data = await fetch(searchURL)
  131. .then((res) => {
  132. if (res.ok) return res.json();
  133. throw new Error(
  134. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ key: this.middleTruncate(process.env.AGENT_GSE_KEY, 5), cx: this.middleTruncate(process.env.AGENT_GSE_CTX, 5), q: query })}`
  135. );
  136. })
  137. .then((searchResult) => searchResult?.items || [])
  138. .then((items) => {
  139. return items.map((item) => {
  140. return {
  141. title: item.title,
  142. link: item.link,
  143. snippet: item.snippet,
  144. };
  145. });
  146. })
  147. .catch((e) => {
  148. this.super.handlerProps.log(
  149. `${this.name}: Google Search Error: ${e.message}`
  150. );
  151. return [];
  152. });
  153. if (data.length === 0)
  154. return `No information was found online for the search query.`;
  155. const result = JSON.stringify(data);
  156. this.super.introspect(
  157. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  158. );
  159. return result;
  160. },
  161. /**
  162. * Use SearchApi
  163. * SearchApi supports multiple search engines like Google Search, Bing Search, Baidu Search, Google News, YouTube, and many more.
  164. * https://www.searchapi.io/
  165. */
  166. _searchApi: async function (query) {
  167. if (!process.env.AGENT_SEARCHAPI_API_KEY) {
  168. this.super.introspect(
  169. `${this.caller}: I can't use SearchApi searching because the user has not defined the required API key.\nVisit: https://www.searchapi.io/ to create the API key for free.`
  170. );
  171. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  172. }
  173. this.super.introspect(
  174. `${this.caller}: Using SearchApi to search for "${
  175. query.length > 100 ? `${query.slice(0, 100)}...` : query
  176. }"`
  177. );
  178. const engine = process.env.AGENT_SEARCHAPI_ENGINE;
  179. const params = new URLSearchParams({
  180. engine: engine,
  181. q: query,
  182. });
  183. const url = `https://www.searchapi.io/api/v1/search?${params.toString()}`;
  184. const { response, error } = await fetch(url, {
  185. method: "GET",
  186. headers: {
  187. Authorization: `Bearer ${process.env.AGENT_SEARCHAPI_API_KEY}`,
  188. "Content-Type": "application/json",
  189. "X-SearchApi-Source": "AnythingLLM",
  190. },
  191. })
  192. .then((res) => {
  193. if (res.ok) return res.json();
  194. throw new Error(
  195. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SEARCHAPI_API_KEY, 5), q: query })}`
  196. );
  197. })
  198. .then((data) => {
  199. return { response: data, error: null };
  200. })
  201. .catch((e) => {
  202. this.super.handlerProps.log(`SearchApi Error: ${e.message}`);
  203. return { response: null, error: e.message };
  204. });
  205. if (error)
  206. return `There was an error searching for content. ${error}`;
  207. const data = [];
  208. if (response.hasOwnProperty("knowledge_graph"))
  209. data.push(response.knowledge_graph?.description);
  210. if (response.hasOwnProperty("answer_box"))
  211. data.push(response.answer_box?.answer);
  212. response.organic_results?.forEach((searchResult) => {
  213. const { title, link, snippet } = searchResult;
  214. data.push({
  215. title,
  216. link,
  217. snippet,
  218. });
  219. });
  220. if (data.length === 0)
  221. return `No information was found online for the search query.`;
  222. const result = JSON.stringify(data);
  223. this.super.introspect(
  224. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  225. );
  226. return result;
  227. },
  228. /**
  229. * Use Serper.dev
  230. * Free to set up, easy to use, 2,500 calls for free one-time
  231. * https://serper.dev
  232. */
  233. _serperDotDev: async function (query) {
  234. if (!process.env.AGENT_SERPER_DEV_KEY) {
  235. this.super.introspect(
  236. `${this.caller}: I can't use Serper.dev searching because the user has not defined the required API key.\nVisit: https://serper.dev to create the API key for free.`
  237. );
  238. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  239. }
  240. this.super.introspect(
  241. `${this.caller}: Using Serper.dev to search for "${
  242. query.length > 100 ? `${query.slice(0, 100)}...` : query
  243. }"`
  244. );
  245. const { response, error } = await fetch(
  246. "https://google.serper.dev/search",
  247. {
  248. method: "POST",
  249. headers: {
  250. "X-API-KEY": process.env.AGENT_SERPER_DEV_KEY,
  251. "Content-Type": "application/json",
  252. },
  253. body: JSON.stringify({ q: query }),
  254. redirect: "follow",
  255. }
  256. )
  257. .then((res) => {
  258. if (res.ok) return res.json();
  259. throw new Error(
  260. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SERPER_DEV_KEY, 5), q: query })}`
  261. );
  262. })
  263. .then((data) => {
  264. return { response: data, error: null };
  265. })
  266. .catch((e) => {
  267. this.super.handlerProps.log(`Serper.dev Error: ${e.message}`);
  268. return { response: null, error: e.message };
  269. });
  270. if (error)
  271. return `There was an error searching for content. ${error}`;
  272. const data = [];
  273. if (response.hasOwnProperty("knowledgeGraph"))
  274. data.push(response.knowledgeGraph);
  275. response.organic?.forEach((searchResult) => {
  276. const { title, link, snippet } = searchResult;
  277. data.push({
  278. title,
  279. link,
  280. snippet,
  281. });
  282. });
  283. if (data.length === 0)
  284. return `No information was found online for the search query.`;
  285. const result = JSON.stringify(data);
  286. this.super.introspect(
  287. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  288. );
  289. return result;
  290. },
  291. _bingWebSearch: async function (query) {
  292. if (!process.env.AGENT_BING_SEARCH_API_KEY) {
  293. this.super.introspect(
  294. `${this.caller}: I can't use Bing Web Search because the user has not defined the required API key.\nVisit: https://portal.azure.com/ to create the API key.`
  295. );
  296. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  297. }
  298. const searchURL = new URL(
  299. "https://api.bing.microsoft.com/v7.0/search"
  300. );
  301. searchURL.searchParams.append("q", query);
  302. this.super.introspect(
  303. `${this.caller}: Using Bing Web Search to search for "${
  304. query.length > 100 ? `${query.slice(0, 100)}...` : query
  305. }"`
  306. );
  307. const searchResponse = await fetch(searchURL, {
  308. headers: {
  309. "Ocp-Apim-Subscription-Key":
  310. process.env.AGENT_BING_SEARCH_API_KEY,
  311. },
  312. })
  313. .then((res) => {
  314. if (res.ok) return res.json();
  315. throw new Error(
  316. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_BING_SEARCH_API_KEY, 5), q: query })}`
  317. );
  318. })
  319. .then((data) => {
  320. const searchResults = data.webPages?.value || [];
  321. return searchResults.map((result) => ({
  322. title: result.name,
  323. link: result.url,
  324. snippet: result.snippet,
  325. }));
  326. })
  327. .catch((e) => {
  328. this.super.handlerProps.log(
  329. `Bing Web Search Error: ${e.message}`
  330. );
  331. return [];
  332. });
  333. if (searchResponse.length === 0)
  334. return `No information was found online for the search query.`;
  335. const result = JSON.stringify(searchResponse);
  336. this.super.introspect(
  337. `${this.caller}: I found ${searchResponse.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  338. );
  339. return result;
  340. },
  341. _serplyEngine: async function (
  342. query,
  343. language = "en",
  344. hl = "us",
  345. limit = 100,
  346. device_type = "desktop",
  347. proxy_location = "US"
  348. ) {
  349. // query (str): The query to search for
  350. // hl (str): Host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages)
  351. // limit (int): The maximum number of results to return [10-100, defaults to 100]
  352. // device_type: get results based on desktop/mobile (defaults to desktop)
  353. if (!process.env.AGENT_SERPLY_API_KEY) {
  354. this.super.introspect(
  355. `${this.caller}: I can't use Serply.io searching because the user has not defined the required API key.\nVisit: https://serply.io to create the API key for free.`
  356. );
  357. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  358. }
  359. this.super.introspect(
  360. `${this.caller}: Using Serply to search for "${
  361. query.length > 100 ? `${query.slice(0, 100)}...` : query
  362. }"`
  363. );
  364. const params = new URLSearchParams({
  365. q: query,
  366. language: language,
  367. hl,
  368. gl: proxy_location.toUpperCase(),
  369. });
  370. const url = `https://api.serply.io/v1/search/${params.toString()}`;
  371. const { response, error } = await fetch(url, {
  372. method: "GET",
  373. headers: {
  374. "X-API-KEY": process.env.AGENT_SERPLY_API_KEY,
  375. "Content-Type": "application/json",
  376. "User-Agent": "anything-llm",
  377. "X-Proxy-Location": proxy_location,
  378. "X-User-Agent": device_type,
  379. },
  380. })
  381. .then((res) => {
  382. if (res.ok) return res.json();
  383. throw new Error(
  384. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_SERPLY_API_KEY, 5), q: query })}`
  385. );
  386. })
  387. .then((data) => {
  388. if (data?.message === "Unauthorized")
  389. throw new Error(
  390. "Unauthorized. Please double check your AGENT_SERPLY_API_KEY"
  391. );
  392. return { response: data, error: null };
  393. })
  394. .catch((e) => {
  395. this.super.handlerProps.log(`Serply Error: ${e.message}`);
  396. return { response: null, error: e.message };
  397. });
  398. if (error)
  399. return `There was an error searching for content. ${error}`;
  400. const data = [];
  401. response.results?.forEach((searchResult) => {
  402. const { title, link, description } = searchResult;
  403. data.push({
  404. title,
  405. link,
  406. snippet: description,
  407. });
  408. });
  409. if (data.length === 0)
  410. return `No information was found online for the search query.`;
  411. const result = JSON.stringify(data);
  412. this.super.introspect(
  413. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  414. );
  415. return result;
  416. },
  417. _searXNGEngine: async function (query) {
  418. let searchURL;
  419. if (!process.env.AGENT_SEARXNG_API_URL) {
  420. this.super.introspect(
  421. `${this.caller}: I can't use SearXNG searching because the user has not defined the required base URL.\nPlease set this value in the agent skill settings.`
  422. );
  423. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  424. }
  425. try {
  426. searchURL = new URL(process.env.AGENT_SEARXNG_API_URL);
  427. searchURL.searchParams.append("q", encodeURIComponent(query));
  428. searchURL.searchParams.append("format", "json");
  429. } catch (e) {
  430. this.super.handlerProps.log(`SearXNG Search: ${e.message}`);
  431. this.super.introspect(
  432. `${this.caller}: I can't use SearXNG searching because the url provided is not a valid URL.`
  433. );
  434. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  435. }
  436. this.super.introspect(
  437. `${this.caller}: Using SearXNG to search for "${
  438. query.length > 100 ? `${query.slice(0, 100)}...` : query
  439. }"`
  440. );
  441. const { response, error } = await fetch(searchURL.toString(), {
  442. method: "GET",
  443. headers: {
  444. "Content-Type": "application/json",
  445. "User-Agent": "anything-llm",
  446. },
  447. })
  448. .then((res) => {
  449. if (res.ok) return res.json();
  450. throw new Error(
  451. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ url: searchURL.toString() })}`
  452. );
  453. })
  454. .then((data) => {
  455. return { response: data, error: null };
  456. })
  457. .catch((e) => {
  458. this.super.handlerProps.log(
  459. `SearXNG Search Error: ${e.message}`
  460. );
  461. return { response: null, error: e.message };
  462. });
  463. if (error)
  464. return `There was an error searching for content. ${error}`;
  465. const data = [];
  466. response.results?.forEach((searchResult) => {
  467. const { url, title, content, publishedDate } = searchResult;
  468. data.push({
  469. title,
  470. link: url,
  471. snippet: content,
  472. publishedDate,
  473. });
  474. });
  475. if (data.length === 0)
  476. return `No information was found online for the search query.`;
  477. const result = JSON.stringify(data);
  478. this.super.introspect(
  479. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  480. );
  481. return result;
  482. },
  483. _tavilySearch: async function (query) {
  484. if (!process.env.AGENT_TAVILY_API_KEY) {
  485. this.super.introspect(
  486. `${this.caller}: I can't use Tavily searching because the user has not defined the required API key.\nVisit: https://tavily.com/ to create the API key.`
  487. );
  488. return `Search is disabled and no content was found. This functionality is disabled because the user has not set it up yet.`;
  489. }
  490. this.super.introspect(
  491. `${this.caller}: Using Tavily to search for "${
  492. query.length > 100 ? `${query.slice(0, 100)}...` : query
  493. }"`
  494. );
  495. const url = "https://api.tavily.com/search";
  496. const { response, error } = await fetch(url, {
  497. method: "POST",
  498. headers: {
  499. "Content-Type": "application/json",
  500. },
  501. body: JSON.stringify({
  502. api_key: process.env.AGENT_TAVILY_API_KEY,
  503. query: query,
  504. }),
  505. })
  506. .then((res) => {
  507. if (res.ok) return res.json();
  508. throw new Error(
  509. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ auth: this.middleTruncate(process.env.AGENT_TAVILY_API_KEY, 5), q: query })}`
  510. );
  511. })
  512. .then((data) => {
  513. return { response: data, error: null };
  514. })
  515. .catch((e) => {
  516. this.super.handlerProps.log(
  517. `Tavily Search Error: ${e.message}`
  518. );
  519. return { response: null, error: e.message };
  520. });
  521. if (error)
  522. return `There was an error searching for content. ${error}`;
  523. const data = [];
  524. response.results?.forEach((searchResult) => {
  525. const { title, url, content } = searchResult;
  526. data.push({
  527. title,
  528. link: url,
  529. snippet: content,
  530. });
  531. });
  532. if (data.length === 0)
  533. return `No information was found online for the search query.`;
  534. const result = JSON.stringify(data);
  535. this.super.introspect(
  536. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  537. );
  538. return result;
  539. },
  540. _duckDuckGoEngine: async function (query) {
  541. this.super.introspect(
  542. `${this.caller}: Using DuckDuckGo to search for "${
  543. query.length > 100 ? `${query.slice(0, 100)}...` : query
  544. }"`
  545. );
  546. const searchURL = new URL("https://html.duckduckgo.com/html");
  547. searchURL.searchParams.append("q", query);
  548. const response = await fetch(searchURL.toString())
  549. .then((res) => {
  550. if (res.ok) return res.text();
  551. throw new Error(
  552. `${res.status} - ${res.statusText}. params: ${JSON.stringify({ url: searchURL.toString() })}`
  553. );
  554. })
  555. .catch((e) => {
  556. this.super.handlerProps.log(
  557. `DuckDuckGo Search Error: ${e.message}`
  558. );
  559. return null;
  560. });
  561. if (!response) return `There was an error searching DuckDuckGo.`;
  562. const html = response;
  563. const data = [];
  564. const results = html.split('<div class="result results_links');
  565. // Skip first element since it's before the first result
  566. for (let i = 1; i < results.length; i++) {
  567. const result = results[i];
  568. // Extract title
  569. const titleMatch = result.match(
  570. /<a[^>]*class="result__a"[^>]*>(.*?)<\/a>/
  571. );
  572. const title = titleMatch ? titleMatch[1].trim() : "";
  573. // Extract URL
  574. const urlMatch = result.match(
  575. /<a[^>]*class="result__a"[^>]*href="([^"]*)">/
  576. );
  577. const link = urlMatch ? urlMatch[1] : "";
  578. // Extract snippet
  579. const snippetMatch = result.match(
  580. /<a[^>]*class="result__snippet"[^>]*>(.*?)<\/a>/
  581. );
  582. const snippet = snippetMatch
  583. ? snippetMatch[1].replace(/<\/?b>/g, "").trim()
  584. : "";
  585. if (title && link && snippet) {
  586. data.push({ title, link, snippet });
  587. }
  588. }
  589. if (data.length === 0) {
  590. return `No information was found online for the search query.`;
  591. }
  592. const result = JSON.stringify(data);
  593. this.super.introspect(
  594. `${this.caller}: I found ${data.length} results - reviewing the results now. (~${this.countTokens(result)} tokens)`
  595. );
  596. return result;
  597. },
  598. });
  599. },
  600. };
  601. },
  602. };
  603. module.exports = {
  604. webBrowsing,
  605. };