feat: Add WebSearchTool and URLFetcherTool for web content access
- Add WebSearchTool with DuckDuckGo integration for web search
- Provides titles, snippets, and links for current information
- Add URLFetcherTool for AI-powered web content analysis
- Fetches and converts HTML content to markdown
- Processes content using AI with user-provided prompts
- Includes 15-minute caching for efficiency
- Uses queryQuick for fast content analysis
- Register both tools in the tools registry
- Update documentation to reflect new web capabilities
This commit is contained in:
parent
3d963fb4a6
commit
e3d903e7bc
@ -92,6 +92,7 @@ Standardized tool interface enabling:
|
||||
- File operations (read, write, edit)
|
||||
- Shell command execution
|
||||
- Code searching and analysis
|
||||
- Web search and content analysis
|
||||
- Task management and planning
|
||||
- External tool integration via MCP
|
||||
|
||||
@ -151,6 +152,7 @@ Permission checks are mandatory for potentially dangerous operations, with clear
|
||||
- **File Manipulation**: Read, write, and edit files with validation
|
||||
- **Command Execution**: Run shell commands with output capture
|
||||
- **Search & Analysis**: Find code patterns and dependencies
|
||||
- **Web Search & Content**: Search the web and analyze web content with AI
|
||||
- **Task Management**: Plan and track development tasks
|
||||
|
||||
### AI-Enhanced Features
|
||||
|
||||
@ -120,6 +120,39 @@ export abstract class Tool {
|
||||
- Size and permission information
|
||||
- Pattern filtering
|
||||
|
||||
#### WebSearchTool
|
||||
- **Purpose**: Search the web for current information
|
||||
- **Key Features**:
|
||||
- DuckDuckGo integration for web search
|
||||
- Returns all available results (no artificial limits)
|
||||
- Provides titles, snippets, and links
|
||||
- Fast search results for current events and data
|
||||
- **Use Cases**: Finding recent news, current documentation, product updates
|
||||
- **Implementation**: HTML parsing of DuckDuckGo search results
|
||||
|
||||
#### URLFetcherTool
|
||||
- **Purpose**: Fetch and analyze web content using AI
|
||||
- **Key Features**:
|
||||
- Fetches content from any URL
|
||||
- Converts HTML to clean markdown
|
||||
- AI-powered content analysis based on user prompts
|
||||
- 15-minute caching for raw content efficiency
|
||||
- Smart content truncation for large pages
|
||||
- Uses quick model for fast analysis
|
||||
- **Input Schema**:
|
||||
```typescript
|
||||
{
|
||||
url: string (URI format, required)
|
||||
prompt: string (analysis instruction, required)
|
||||
}
|
||||
```
|
||||
- **Use Cases**:
|
||||
- Summarizing articles or documentation
|
||||
- Extracting specific information from web pages
|
||||
- Analyzing pricing, features, or technical requirements
|
||||
- Content research and analysis
|
||||
- **Implementation**: Combines web fetching with AI analysis using queryQuick
|
||||
|
||||
### 3. System Execution Tools
|
||||
|
||||
#### BashTool
|
||||
@ -173,14 +206,6 @@ export abstract class Tool {
|
||||
- Error propagation
|
||||
- **Implementation**: JSON-RPC over stdio/SSE
|
||||
|
||||
#### WebFetchTool
|
||||
- **Purpose**: Fetch and process web content
|
||||
- **Key Features**:
|
||||
- HTML to markdown conversion
|
||||
- Content extraction
|
||||
- Caching support
|
||||
- Redirect handling
|
||||
|
||||
## Tool Implementation Guide
|
||||
|
||||
### Creating a New Tool
|
||||
|
||||
@ -76,12 +76,15 @@
|
||||
"lru-cache": "^11.1.0",
|
||||
"marked": "^15.0.12",
|
||||
"nanoid": "^5.1.5",
|
||||
"node-fetch": "^3.3.2",
|
||||
"node-html-parser": "^7.0.1",
|
||||
"openai": "^4.104.0",
|
||||
"react": "18.3.1",
|
||||
"semver": "^7.7.2",
|
||||
"shell-quote": "^1.8.3",
|
||||
"spawn-rx": "^5.1.2",
|
||||
"tsx": "^4.20.3",
|
||||
"turndown": "^7.2.1",
|
||||
"undici": "^7.11.0",
|
||||
"wrap-ansi": "^9.0.0",
|
||||
"zod": "^3.25.76",
|
||||
|
||||
@ -16,6 +16,8 @@ import { NotebookEditTool } from './tools/NotebookEditTool/NotebookEditTool'
|
||||
import { NotebookReadTool } from './tools/NotebookReadTool/NotebookReadTool'
|
||||
import { ThinkTool } from './tools/ThinkTool/ThinkTool'
|
||||
import { TodoWriteTool } from './tools/TodoWriteTool/TodoWriteTool'
|
||||
import { WebSearchTool } from './tools/WebSearchTool/WebSearchTool'
|
||||
import { URLFetcherTool } from './tools/URLFetcherTool/URLFetcherTool'
|
||||
import { getMCPTools } from './services/mcpClient'
|
||||
import { memoize } from 'lodash-es'
|
||||
|
||||
@ -38,6 +40,8 @@ export const getAllTools = (): Tool[] => {
|
||||
NotebookEditTool as unknown as Tool,
|
||||
ThinkTool as unknown as Tool,
|
||||
TodoWriteTool as unknown as Tool,
|
||||
WebSearchTool as unknown as Tool,
|
||||
URLFetcherTool as unknown as Tool,
|
||||
...ANT_ONLY_TOOLS,
|
||||
]
|
||||
}
|
||||
|
||||
178
src/tools/URLFetcherTool/URLFetcherTool.tsx
Normal file
178
src/tools/URLFetcherTool/URLFetcherTool.tsx
Normal file
@ -0,0 +1,178 @@
|
||||
import { Box, Text } from 'ink'
|
||||
import React from 'react'
|
||||
import { z } from 'zod'
|
||||
import fetch from 'node-fetch'
|
||||
import { Cost } from '../../components/Cost'
|
||||
import { FallbackToolUseRejectedMessage } from '../../components/FallbackToolUseRejectedMessage'
|
||||
import { Tool, ToolUseContext } from '../../Tool'
|
||||
import { DESCRIPTION, TOOL_NAME_FOR_PROMPT } from './prompt'
|
||||
import { convertHtmlToMarkdown } from './htmlToMarkdown'
|
||||
import { urlCache } from './cache'
|
||||
import { queryQuick } from '../../services/claude'
|
||||
|
||||
const inputSchema = z.strictObject({
|
||||
url: z.string().url().describe('The URL to fetch content from'),
|
||||
prompt: z.string().describe('The prompt to run on the fetched content'),
|
||||
})
|
||||
|
||||
type Input = z.infer<typeof inputSchema>
|
||||
type Output = {
|
||||
url: string
|
||||
fromCache: boolean
|
||||
aiAnalysis: string
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string): string {
|
||||
// Auto-upgrade HTTP to HTTPS
|
||||
if (url.startsWith('http://')) {
|
||||
return url.replace('http://', 'https://')
|
||||
}
|
||||
return url
|
||||
}
|
||||
|
||||
export const URLFetcherTool = {
|
||||
name: TOOL_NAME_FOR_PROMPT,
|
||||
async description() {
|
||||
return DESCRIPTION
|
||||
},
|
||||
userFacingName: () => 'URL Fetcher',
|
||||
inputSchema,
|
||||
isReadOnly: () => true,
|
||||
isConcurrencySafe: () => true,
|
||||
async isEnabled() {
|
||||
return true
|
||||
},
|
||||
needsPermissions() {
|
||||
return false
|
||||
},
|
||||
async prompt() {
|
||||
return DESCRIPTION
|
||||
},
|
||||
renderToolUseMessage({ url, prompt }: Input) {
|
||||
return `Fetching content from ${url} and analyzing with prompt: "${prompt}"`
|
||||
},
|
||||
renderToolUseRejectedMessage() {
|
||||
return <FallbackToolUseRejectedMessage />
|
||||
},
|
||||
renderToolResultMessage(output: Output) {
|
||||
const statusText = output.fromCache ? 'from cache' : 'fetched'
|
||||
|
||||
return (
|
||||
<Box justifyContent="space-between" width="100%">
|
||||
<Box flexDirection="row">
|
||||
<Text> ⎿ Content </Text>
|
||||
<Text bold>{statusText} </Text>
|
||||
<Text>and analyzed</Text>
|
||||
</Box>
|
||||
<Cost costUSD={0} durationMs={0} debug={false} />
|
||||
</Box>
|
||||
)
|
||||
},
|
||||
renderResultForAssistant(output: Output) {
|
||||
if (!output.aiAnalysis.trim()) {
|
||||
return `No content could be analyzed from URL: ${output.url}`
|
||||
}
|
||||
|
||||
return output.aiAnalysis
|
||||
},
|
||||
async *call({ url, prompt }: Input, {}: ToolUseContext) {
|
||||
const normalizedUrl = normalizeUrl(url)
|
||||
|
||||
try {
|
||||
let content: string
|
||||
let fromCache = false
|
||||
|
||||
// Check cache first
|
||||
const cachedContent = urlCache.get(normalizedUrl)
|
||||
if (cachedContent) {
|
||||
content = cachedContent
|
||||
fromCache = true
|
||||
} else {
|
||||
// Fetch from URL with AbortController for timeout
|
||||
const abortController = new AbortController()
|
||||
const timeout = setTimeout(() => abortController.abort(), 30000)
|
||||
|
||||
const response = await fetch(normalizedUrl, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; URLFetcher/1.0)',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
},
|
||||
signal: abortController.signal,
|
||||
redirect: 'follow',
|
||||
})
|
||||
|
||||
clearTimeout(timeout)
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') || ''
|
||||
if (!contentType.includes('text/') && !contentType.includes('application/')) {
|
||||
throw new Error(`Unsupported content type: ${contentType}`)
|
||||
}
|
||||
|
||||
const html = await response.text()
|
||||
content = convertHtmlToMarkdown(html)
|
||||
|
||||
// Cache the result
|
||||
urlCache.set(normalizedUrl, content)
|
||||
fromCache = false
|
||||
}
|
||||
|
||||
// Truncate content if too large (keep within reasonable token limits)
|
||||
const maxContentLength = 50000 // ~15k tokens approximately
|
||||
const truncatedContent = content.length > maxContentLength
|
||||
? content.substring(0, maxContentLength) + '\n\n[Content truncated due to length]'
|
||||
: content
|
||||
|
||||
// AI Analysis - always performed fresh, even with cached content
|
||||
const systemPrompt = [
|
||||
'You are analyzing web content based on a user\'s specific request.',
|
||||
'The content has been extracted from a webpage and converted to markdown.',
|
||||
'Provide a focused response that directly addresses the user\'s prompt.',
|
||||
]
|
||||
|
||||
const userPrompt = `Here is the content from ${normalizedUrl}:
|
||||
|
||||
${truncatedContent}
|
||||
|
||||
User request: ${prompt}`
|
||||
|
||||
const aiResponse = await queryQuick({
|
||||
systemPrompt,
|
||||
userPrompt,
|
||||
enablePromptCaching: false,
|
||||
})
|
||||
|
||||
const output: Output = {
|
||||
url: normalizedUrl,
|
||||
fromCache,
|
||||
aiAnalysis: aiResponse.message.content[0]?.text || 'Unable to analyze content',
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'result' as const,
|
||||
resultForAssistant: this.renderResultForAssistant(output),
|
||||
data: output,
|
||||
}
|
||||
} catch (error: any) {
|
||||
const output: Output = {
|
||||
url: normalizedUrl,
|
||||
fromCache: false,
|
||||
aiAnalysis: '',
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'result' as const,
|
||||
resultForAssistant: `Error processing URL ${normalizedUrl}: ${error.message}`,
|
||||
data: output,
|
||||
}
|
||||
}
|
||||
},
|
||||
} satisfies Tool<typeof inputSchema, Output>
|
||||
55
src/tools/URLFetcherTool/cache.ts
Normal file
55
src/tools/URLFetcherTool/cache.ts
Normal file
@ -0,0 +1,55 @@
|
||||
interface CacheEntry {
|
||||
content: string
|
||||
timestamp: number
|
||||
}
|
||||
|
||||
class URLCache {
|
||||
private cache = new Map<string, CacheEntry>()
|
||||
private readonly CACHE_DURATION = 15 * 60 * 1000 // 15 minutes in milliseconds
|
||||
|
||||
set(url: string, content: string): void {
|
||||
this.cache.set(url, {
|
||||
content,
|
||||
timestamp: Date.now()
|
||||
})
|
||||
}
|
||||
|
||||
get(url: string): string | null {
|
||||
const entry = this.cache.get(url)
|
||||
if (!entry) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Check if entry has expired
|
||||
if (Date.now() - entry.timestamp > this.CACHE_DURATION) {
|
||||
this.cache.delete(url)
|
||||
return null
|
||||
}
|
||||
|
||||
return entry.content
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear()
|
||||
}
|
||||
|
||||
// Clean expired entries
|
||||
private cleanExpired(): void {
|
||||
const now = Date.now()
|
||||
for (const [url, entry] of this.cache.entries()) {
|
||||
if (now - entry.timestamp > this.CACHE_DURATION) {
|
||||
this.cache.delete(url)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-clean expired entries every 5 minutes
|
||||
constructor() {
|
||||
setInterval(() => {
|
||||
this.cleanExpired()
|
||||
}, 5 * 60 * 1000) // 5 minutes
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const urlCache = new URLCache()
|
||||
55
src/tools/URLFetcherTool/htmlToMarkdown.ts
Normal file
55
src/tools/URLFetcherTool/htmlToMarkdown.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import TurndownService from 'turndown'
|
||||
|
||||
const turndownService = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
hr: '---',
|
||||
bulletListMarker: '-',
|
||||
codeBlockStyle: 'fenced',
|
||||
fence: '```',
|
||||
emDelimiter: '_',
|
||||
strongDelimiter: '**'
|
||||
})
|
||||
|
||||
// Configure rules to handle common HTML elements
|
||||
turndownService.addRule('removeScripts', {
|
||||
filter: ['script', 'style', 'noscript'],
|
||||
replacement: () => ''
|
||||
})
|
||||
|
||||
turndownService.addRule('removeComments', {
|
||||
filter: (node) => node.nodeType === 8, // Comment nodes
|
||||
replacement: () => ''
|
||||
})
|
||||
|
||||
turndownService.addRule('cleanLinks', {
|
||||
filter: 'a',
|
||||
replacement: (content, node) => {
|
||||
const href = node.getAttribute('href')
|
||||
if (!href || href.startsWith('javascript:') || href.startsWith('#')) {
|
||||
return content
|
||||
}
|
||||
return `[${content}](${href})`
|
||||
}
|
||||
})
|
||||
|
||||
export function convertHtmlToMarkdown(html: string): string {
|
||||
try {
|
||||
// Clean up the HTML before conversion
|
||||
const cleanHtml = html
|
||||
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') // Remove script tags
|
||||
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') // Remove style tags
|
||||
.replace(/<!--[\s\S]*?-->/g, '') // Remove HTML comments
|
||||
.replace(/\s+/g, ' ') // Normalize whitespace
|
||||
.trim()
|
||||
|
||||
const markdown = turndownService.turndown(cleanHtml)
|
||||
|
||||
// Clean up the resulting markdown
|
||||
return markdown
|
||||
.replace(/\n{3,}/g, '\n\n') // Remove excessive line breaks
|
||||
.replace(/^\s+|\s+$/gm, '') // Remove leading/trailing spaces on each line
|
||||
.trim()
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to convert HTML to markdown: ${error instanceof Error ? error.message : String(error)}`)
|
||||
}
|
||||
}
|
||||
17
src/tools/URLFetcherTool/prompt.ts
Normal file
17
src/tools/URLFetcherTool/prompt.ts
Normal file
@ -0,0 +1,17 @@
|
||||
export const TOOL_NAME_FOR_PROMPT = 'URLFetcher'
|
||||
export const DESCRIPTION = `- Fetches content from a specified URL and processes it using an AI model
|
||||
- Takes a URL and a prompt as input
|
||||
- Fetches the URL content, converts HTML to markdown
|
||||
- Processes the content with the prompt using a small, fast model
|
||||
- Returns the model's response about the content
|
||||
- Use this tool when you need to retrieve and analyze web content
|
||||
|
||||
Usage notes:
|
||||
- IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with "mcp__".
|
||||
- The URL must be a fully-formed valid URL (e.g., https://example.com)
|
||||
- HTTP URLs will be automatically upgraded to HTTPS
|
||||
- The prompt should describe what information you want to extract from the page
|
||||
- This tool is read-only and does not modify any files
|
||||
- Results may be summarized if the content is very large
|
||||
- Includes a self-cleaning 15-minute cache for faster responses when repeatedly accessing the same URL
|
||||
- When a URL redirects, the tool will inform you and provide the redirect URL in a special format. You should then make a new URLFetcher request with the redirect URL to fetch the content.`
|
||||
103
src/tools/WebSearchTool/WebSearchTool.tsx
Normal file
103
src/tools/WebSearchTool/WebSearchTool.tsx
Normal file
@ -0,0 +1,103 @@
|
||||
import { Box, Text } from 'ink'
|
||||
import React from 'react'
|
||||
import { z } from 'zod'
|
||||
import { Cost } from '../../components/Cost'
|
||||
import { FallbackToolUseRejectedMessage } from '../../components/FallbackToolUseRejectedMessage'
|
||||
import { Tool, ToolUseContext } from '../../Tool'
|
||||
import { DESCRIPTION, TOOL_NAME_FOR_PROMPT } from './prompt'
|
||||
import { SearchResult, searchProviders } from './searchProviders'
|
||||
|
||||
const inputSchema = z.strictObject({
|
||||
query: z.string().describe('The search query'),
|
||||
})
|
||||
|
||||
type Input = z.infer<typeof inputSchema>
|
||||
type Output = {
|
||||
durationMs: number
|
||||
results: SearchResult[]
|
||||
}
|
||||
|
||||
|
||||
export const WebSearchTool = {
|
||||
name: TOOL_NAME_FOR_PROMPT,
|
||||
async description() {
|
||||
return DESCRIPTION
|
||||
},
|
||||
userFacingName: () => 'Web Search',
|
||||
inputSchema,
|
||||
isReadOnly: () => true,
|
||||
isConcurrencySafe: () => true,
|
||||
async isEnabled() {
|
||||
return true
|
||||
},
|
||||
needsPermissions() {
|
||||
return false
|
||||
},
|
||||
async prompt() {
|
||||
return DESCRIPTION
|
||||
},
|
||||
renderToolUseMessage({ query }: Input) {
|
||||
return `Searching for: "${query}" using DuckDuckGo`
|
||||
},
|
||||
renderToolUseRejectedMessage() {
|
||||
return <FallbackToolUseRejectedMessage />
|
||||
},
|
||||
renderToolResultMessage(output: Output) {
|
||||
return (
|
||||
<Box justifyContent="space-between" width="100%">
|
||||
<Box flexDirection="row">
|
||||
<Text> ⎿ Found </Text>
|
||||
<Text bold>{output.results.length} </Text>
|
||||
<Text>
|
||||
{output.results.length === 1 ? 'result' : 'results'} using DuckDuckGo
|
||||
</Text>
|
||||
</Box>
|
||||
<Cost costUSD={0} durationMs={output.durationMs} debug={false} />
|
||||
</Box>
|
||||
)
|
||||
},
|
||||
renderResultForAssistant(output: Output) {
|
||||
if (output.results.length === 0) {
|
||||
return `No results found using DuckDuckGo.`
|
||||
}
|
||||
|
||||
let result = `Found ${output.results.length} search results using DuckDuckGo:\n\n`
|
||||
|
||||
output.results.forEach((item, index) => {
|
||||
result += `${index + 1}. **${item.title}**\n`
|
||||
result += ` ${item.snippet}\n`
|
||||
result += ` Link: ${item.link}\n\n`
|
||||
})
|
||||
|
||||
result += `You can reference these results to provide current, accurate information to the user.`
|
||||
return result
|
||||
},
|
||||
async *call({ query }: Input, {}: ToolUseContext) {
|
||||
const start = Date.now()
|
||||
|
||||
try {
|
||||
const searchResults = await searchProviders.duckduckgo.search(query)
|
||||
|
||||
const output: Output = {
|
||||
results: searchResults,
|
||||
durationMs: Date.now() - start,
|
||||
}
|
||||
|
||||
yield {
|
||||
type: 'result' as const,
|
||||
resultForAssistant: this.renderResultForAssistant(output),
|
||||
data: output,
|
||||
}
|
||||
} catch (error: any) {
|
||||
const output: Output = {
|
||||
results: [],
|
||||
durationMs: Date.now() - start,
|
||||
}
|
||||
yield {
|
||||
type: 'result' as const,
|
||||
resultForAssistant: `An error occurred during web search with DuckDuckGo: ${error.message}`,
|
||||
data: output,
|
||||
}
|
||||
}
|
||||
},
|
||||
} satisfies Tool<typeof inputSchema, Output>
|
||||
13
src/tools/WebSearchTool/prompt.ts
Normal file
13
src/tools/WebSearchTool/prompt.ts
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
export const TOOL_NAME_FOR_PROMPT = 'WebSearch'
|
||||
export const DESCRIPTION = `- Allows Kode to search the web and use the results to inform responses
|
||||
- Provides up-to-date information for current events and recent data
|
||||
- Returns search result information formatted as search result blocks
|
||||
- Use this tool for accessing information beyond the Kode's knowledge cutoff
|
||||
- Searches are performed automatically within a single API call using DuckDuckGo
|
||||
|
||||
Usage notes:
|
||||
- Use when you need current information not in training data
|
||||
- Effective for recent news, current events, product updates, or real-time data
|
||||
- Search queries should be specific and well-targeted for best results
|
||||
- Results include both title and snippet content for context`
|
||||
66
src/tools/WebSearchTool/searchProviders.ts
Normal file
66
src/tools/WebSearchTool/searchProviders.ts
Normal file
@ -0,0 +1,66 @@
|
||||
import fetch from 'node-fetch'
|
||||
import { parse } from 'node-html-parser'
|
||||
|
||||
export interface SearchResult {
|
||||
title: string
|
||||
snippet: string
|
||||
link: string
|
||||
}
|
||||
|
||||
export interface SearchProvider {
|
||||
search: (query: string, apiKey?: string) => Promise<SearchResult[]>
|
||||
isEnabled: (apiKey?: string) => boolean
|
||||
}
|
||||
|
||||
|
||||
const duckDuckGoSearchProvider: SearchProvider = {
|
||||
isEnabled: () => true,
|
||||
search: async (query: string): Promise<SearchResult[]> => {
|
||||
const response = await fetch(`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`DuckDuckGo search failed with status: ${response.status}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const root = parse(html);
|
||||
const results: SearchResult[] = [];
|
||||
|
||||
const resultNodes = root.querySelectorAll('.result.web-result');
|
||||
|
||||
for (const node of resultNodes) {
|
||||
const titleNode = node.querySelector('.result__a');
|
||||
const snippetNode = node.querySelector('.result__snippet');
|
||||
|
||||
if (titleNode && snippetNode) {
|
||||
const title = titleNode.text;
|
||||
const link = titleNode.getAttribute('href');
|
||||
const snippet = snippetNode.text;
|
||||
|
||||
if (title && link && snippet) {
|
||||
// Clean the link - DuckDuckGo doesn't use uddg parameter anymore
|
||||
let cleanLink = link;
|
||||
if (link.startsWith('https://duckduckgo.com/l/?uddg=')) {
|
||||
try {
|
||||
const url = new URL(link);
|
||||
cleanLink = url.searchParams.get('uddg') || link;
|
||||
} catch {
|
||||
cleanLink = link;
|
||||
}
|
||||
}
|
||||
results.push({ title: title.trim(), snippet: snippet.trim(), link: cleanLink });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
},
|
||||
}
|
||||
|
||||
export const searchProviders = {
|
||||
duckduckgo: duckDuckGoSearchProvider,
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user