plugins/wasm-go/mcp-servers/mcp-firecrawl/mcp-server.yaml (940 lines of code) (raw):
server:
config:
apiKey: ""
name: "rest-crawl-server"
tools:
- args:
- description: "要抓取的URL"
name: "url"
required: true
type: "string"
- default:
- "markdown"
description: "输出中包含的格式"
items:
enum:
- "markdown"
- "html"
- "rawHtml"
- "links"
- "screenshot"
- "screenshot@fullPage"
- "json"
type: "string"
name: "formats"
type: "array"
- default: true
description: "是否只返回主要内容"
name: "onlyMainContent"
type: "boolean"
- description: "输出中包含的标签"
items:
type: "string"
name: "includeTags"
type: "array"
- description: "输出中排除的标签"
items:
type: "string"
name: "excludeTags"
type: "array"
- description: "请求头信息"
name: "headers"
type: "object"
- default: 0
description: "抓取前的等待时间(毫秒)"
name: "waitFor"
type: "integer"
- default: false
description: "是否模拟移动设备"
name: "mobile"
type: "boolean"
- default: false
description: "是否跳过TLS验证"
name: "skipTlsVerification"
type: "boolean"
- default: 30000
description: "请求超时时间(毫秒)"
name: "timeout"
type: "integer"
- description: "JSON提取选项"
name: "jsonOptions"
properties:
prompt:
description: "提取提示"
type: "string"
schema:
description: "提取使用的schema"
type: "object"
systemPrompt:
description: "系统提示"
type: "string"
type: "object"
- description: "抓取前执行的操作"
items:
oneOf:
- properties:
milliseconds:
minimum: 1
type: "integer"
selector:
type: "string"
type:
enum:
- "wait"
type: "string"
type: "object"
- properties:
fullPage:
default: false
type: "boolean"
type:
enum:
- "screenshot"
type: "string"
type: "object"
- properties:
all:
default: false
type: "boolean"
selector:
type: "string"
type:
enum:
- "click"
type: "string"
type: "object"
- properties:
text:
type: "string"
type:
enum:
- "write"
type: "string"
type: "object"
- properties:
key:
type: "string"
type:
enum:
- "press"
type: "string"
type: "object"
- properties:
direction:
default: "down"
enum:
- "up"
- "down"
type: "string"
selector:
type: "string"
type:
enum:
- "scroll"
type: "string"
type: "object"
- properties:
type:
enum:
- "scrape"
type: "string"
type: "object"
- properties:
script:
type: "string"
type:
enum:
- "executeJavascript"
type: "string"
type: "object"
name: "actions"
type: "array"
- description: "位置设置"
name: "location"
properties:
country:
default: "US"
pattern: "^[A-Z]{2}$"
type: "string"
languages:
items:
type: "string"
type: "array"
type: "object"
- description: "是否移除base64图片"
name: "removeBase64Images"
type: "boolean"
- default: true
description: "是否启用广告拦截"
name: "blockAds"
type: "boolean"
- description: "使用的代理类型"
enum:
- "basic"
- "stealth"
name: "proxy"
type: "string"
description: "抓取单个URL并可选地使用LLM提取信息"
name: "scrape"
requestTemplate:
argsToJsonBody: true
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "POST"
url: "https://api.firecrawl.dev/v1/scrape"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
数据:
Markdown: {{ .data.markdown }}
HTML: {{ .data.html }}
Raw HTML: {{ .data.rawHtml }}
链接: {{ .data.links }}
截图: {{ .data.screenshot }}
元数据:
标题: {{ .data.metadata.title }}
描述: {{ .data.metadata.description }}
语言: {{ .data.metadata.language }}
源URL: {{ .data.metadata.sourceURL }}
状态码: {{ .data.metadata.statusCode }}
错误: {{ .data.metadata.error }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "要抓取的URL列表"
items:
format: "uri"
type: "string"
name: "urls"
required: true
type: "array"
- description: "Webhook配置"
name: "webhook"
properties:
events:
description: "触发Webhook的事件类型"
items:
enum:
- "completed"
- "page"
- "failed"
- "started"
type: "string"
type: "array"
headers:
description: "Webhook请求头"
type: "object"
metadata:
description: "自定义元数据"
type: "object"
url:
description: "Webhook URL"
type: "string"
type: "object"
- default: false
description: "是否忽略无效URL"
name: "ignoreInvalidURLs"
type: "boolean"
- default:
- "markdown"
description: "输出中包含的格式"
items:
enum:
- "markdown"
- "html"
- "rawHtml"
- "links"
- "screenshot"
- "screenshot@fullPage"
- "json"
type: "string"
name: "formats"
type: "array"
- default: true
description: "是否只返回主要内容"
name: "onlyMainContent"
type: "boolean"
- description: "输出中包含的标签"
items:
type: "string"
name: "includeTags"
type: "array"
- description: "输出中排除的标签"
items:
type: "string"
name: "excludeTags"
type: "array"
- description: "请求头信息"
name: "headers"
type: "object"
- default: 0
description: "抓取前的等待时间(毫秒)"
name: "waitFor"
type: "integer"
- default: false
description: "是否模拟移动设备"
name: "mobile"
type: "boolean"
- default: false
description: "是否跳过TLS验证"
name: "skipTlsVerification"
type: "boolean"
- default: 30000
description: "请求超时时间(毫秒)"
name: "timeout"
type: "integer"
- description: "JSON提取选项"
name: "jsonOptions"
properties:
prompt:
description: "提取提示"
type: "string"
schema:
description: "提取使用的schema"
type: "object"
systemPrompt:
description: "系统提示"
type: "string"
type: "object"
- description: "抓取前执行的操作"
items:
oneOf:
- properties:
milliseconds:
minimum: 1
type: "integer"
selector:
type: "string"
type:
enum:
- "wait"
type: "string"
type: "object"
- properties:
fullPage:
default: false
type: "boolean"
type:
enum:
- "screenshot"
type: "string"
type: "object"
- properties:
all:
default: false
type: "boolean"
selector:
type: "string"
type:
enum:
- "click"
type: "string"
type: "object"
- properties:
text:
type: "string"
type:
enum:
- "write"
type: "string"
type: "object"
- properties:
key:
type: "string"
type:
enum:
- "press"
type: "string"
type: "object"
- properties:
direction:
default: "down"
enum:
- "up"
- "down"
type: "string"
selector:
type: "string"
type:
enum:
- "scroll"
type: "string"
type: "object"
- properties:
type:
enum:
- "scrape"
type: "string"
type: "object"
- properties:
script:
type: "string"
type:
enum:
- "executeJavascript"
type: "string"
type: "object"
name: "actions"
type: "array"
- description: "位置设置"
name: "location"
properties:
country:
default: "US"
pattern: "^[A-Z]{2}$"
type: "string"
languages:
items:
type: "string"
type: "array"
type: "object"
- description: "是否移除base64图片"
name: "removeBase64Images"
type: "boolean"
- default: true
description: "是否启用广告拦截"
name: "blockAds"
type: "boolean"
- description: "使用的代理类型"
enum:
- "basic"
- "stealth"
name: "proxy"
type: "string"
description: "批量抓取多个URL并可选地使用LLM提取信息"
name: "batch_scrape"
requestTemplate:
argsToJsonBody: true
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "POST"
url: "https://api.firecrawl.dev/v1/batch/scrape"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
任务ID: {{ .id }}
URL: {{ .url }}
无效URL: {{ .invalidURLs }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "基础URL"
format: "uri"
name: "url"
required: true
type: "string"
- description: "搜索查询"
name: "search"
type: "string"
- default: true
description: "是否忽略网站地图"
name: "ignoreSitemap"
type: "boolean"
- default: false
description: "是否只返回网站地图中的链接"
name: "sitemapOnly"
type: "boolean"
- default: false
description: "是否包含子域名"
name: "includeSubdomains"
type: "boolean"
- default: 5000
description: "最大返回链接数"
maximum: 5000
name: "limit"
type: "integer"
- description: "超时时间(毫秒)"
name: "timeout"
type: "integer"
description: "根据选项映射多个URL"
name: "map"
requestTemplate:
argsToJsonBody: true
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "POST"
url: "https://api.firecrawl.dev/v1/map"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
链接: {{ .links }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "要提取数据的URL"
items:
format: "uri"
type: "string"
name: "urls"
required: true
type: "array"
- description: "指导提取过程的提示"
name: "prompt"
type: "string"
- description: "定义提取数据结构的schema"
name: "schema"
properties:
property1:
description: "属性1的描述"
required: true
type: "string"
property2:
description: "属性2的描述"
required: true
type: "integer"
type: "object"
- default: false
description: "是否启用网络搜索"
name: "enableWebSearch"
type: "boolean"
- default: false
description: "是否忽略网站地图"
name: "ignoreSitemap"
type: "boolean"
- default: true
description: "是否包含子域名"
name: "includeSubdomains"
type: "boolean"
- default: false
description: "是否显示数据来源"
name: "showSources"
type: "boolean"
- description: "抓取选项"
name: "scrapeOptions"
properties:
actions:
description: "抓取前执行的操作"
items:
oneOf:
- properties:
milliseconds:
minimum: 1
type: "integer"
selector:
type: "string"
type:
enum:
- "wait"
type: "string"
type: "object"
- properties:
fullPage:
default: false
type: "boolean"
type:
enum:
- "screenshot"
type: "string"
type: "object"
- properties:
all:
default: false
type: "boolean"
selector:
type: "string"
type:
enum:
- "click"
type: "string"
type: "object"
- properties:
text:
type: "string"
type:
enum:
- "write"
type: "string"
type: "object"
- properties:
key:
type: "string"
type:
enum:
- "press"
type: "string"
type: "object"
- properties:
direction:
default: "down"
enum:
- "up"
- "down"
type: "string"
selector:
type: "string"
type:
enum:
- "scroll"
type: "string"
type: "object"
- properties:
type:
enum:
- "scrape"
type: "string"
type: "object"
- properties:
script:
type: "string"
type:
enum:
- "executeJavascript"
type: "string"
type: "object"
type: "array"
blockAds:
default: true
description: "是否启用广告拦截"
type: "boolean"
excludeTags:
description: "输出中排除的标签"
items:
type: "string"
type: "array"
formats:
default:
- "markdown"
description: "输出中包含的格式"
items:
enum:
- "markdown"
- "html"
- "rawHtml"
- "links"
- "screenshot"
- "screenshot@fullPage"
- "json"
type: "string"
type: "array"
headers:
description: "请求头信息"
type: "object"
includeTags:
description: "输出中包含的标签"
items:
type: "string"
type: "array"
jsonOptions:
description: "JSON提取选项"
properties:
prompt:
description: "提取提示"
type: "string"
schema:
description: "提取使用的schema"
type: "object"
systemPrompt:
description: "系统提示"
type: "string"
type: "object"
location:
description: "位置设置"
properties:
country:
default: "US"
pattern: "^[A-Z]{2}$"
type: "string"
languages:
items:
type: "string"
type: "array"
type: "object"
mobile:
default: false
description: "是否模拟移动设备"
type: "boolean"
onlyMainContent:
default: true
description: "是否只返回主要内容"
type: "boolean"
proxy:
description: "使用的代理类型"
enum:
- "basic"
- "stealth"
type: "string"
removeBase64Images:
description: "是否移除base64图片"
type: "boolean"
skipTlsVerification:
default: false
description: "是否跳过TLS验证"
type: "boolean"
timeout:
default: 30000
description: "请求超时时间(毫秒)"
type: "integer"
waitFor:
default: 0
description: "抓取前的等待时间(毫秒)"
type: "integer"
type: "object"
description: "使用LLM从页面中提取结构化数据"
name: "extract"
requestTemplate:
argsToJsonBody: true
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "POST"
url: "https://api.firecrawl.dev/v1/extract"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
任务ID: {{ .id }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "搜索查询"
name: "query"
required: true
type: "string"
- default: 5
description: "最大返回结果数"
maximum: 10
minimum: 1
name: "limit"
type: "integer"
- description: "基于时间的搜索参数"
name: "tbs"
type: "string"
- default: "en"
description: "搜索结果的语言代码"
name: "lang"
type: "string"
- default: "us"
description: "搜索结果的国家代码"
name: "country"
type: "string"
- description: "搜索结果的location参数"
name: "location"
type: "string"
- default: 60000
description: "超时时间(毫秒)"
name: "timeout"
type: "integer"
- default: {}
description: "抓取搜索结果的选项"
name: "scrapeOptions"
properties:
formats:
default: []
description: "输出中包含的格式"
items:
enum:
- "markdown"
- "html"
- "rawHtml"
- "links"
- "screenshot"
- "screenshot@fullPage"
- "extract"
type: "string"
type: "array"
type: "object"
description: "搜索并可选地抓取搜索结果"
name: "search"
requestTemplate:
argsToJsonBody: true
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "POST"
url: "https://api.firecrawl.dev/v1/search"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
数据:
{{- range .data }}
- 标题: {{ .title }}
描述: {{ .description }}
URL: {{ .url }}
Markdown: {{ .markdown }}
HTML: {{ .html }}
Raw HTML: {{ .rawHtml }}
链接: {{ .links }}
截图: {{ .screenshot }}
元数据:
标题: {{ .metadata.title }}
描述: {{ .metadata.description }}
源URL: {{ .metadata.sourceURL }}
状态码: {{ .metadata.statusCode }}
错误: {{ .metadata.error }}
{{- end }}
警告: {{ .warning }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "批量抓取任务的ID"
name: "id"
required: true
type: "string"
description: "获取批量抓取任务的状态"
name: "get_batch_scrape_status"
requestTemplate:
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "GET"
url: "https://api.firecrawl.dev/v1/batch/scrape/{{.args.id}}"
responseTemplate:
body: |
{{- if .status }}
状态: {{ .status }}
总数: {{ .total }}
已完成: {{ .completed }}
使用信用: {{ .creditsUsed }}
过期时间: {{ .expiresAt }}
数据:
{{- range .data }}
- Markdown: {{ .markdown }}
HTML: {{ .html }}
Raw HTML: {{ .rawHtml }}
链接: {{ .links }}
截图: {{ .screenshot }}
元数据:
标题: {{ .metadata.title }}
描述: {{ .metadata.description }}
语言: {{ .metadata.language }}
源URL: {{ .metadata.sourceURL }}
状态码: {{ .metadata.statusCode }}
错误: {{ .metadata.error }}
{{- end }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "批量抓取任务的ID"
name: "id"
required: true
type: "string"
description: "获取批量抓取任务的错误信息"
name: "get_batch_scrape_errors"
requestTemplate:
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "GET"
url: "https://api.firecrawl.dev/v1/batch/scrape/{{.args.id}}/errors"
responseTemplate:
body: |
{{- if .errors }}
错误:
{{- range .errors }}
- ID: {{ .id }}
时间戳: {{ .timestamp }}
URL: {{ .url }}
错误信息: {{ .error }}
{{- end }}
被robots.txt阻止的URL:
{{- range .robotsBlocked }}
- {{ . }}
{{- end }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "爬取任务的ID"
name: "id"
required: true
type: "string"
description: "获取爬取任务的状态"
name: "get_crawl_status"
requestTemplate:
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "GET"
url: "https://api.firecrawl.dev/v1/crawl/{{.args.id}}"
responseTemplate:
body: |
{{- if .status }}
状态: {{ .status }}
总数: {{ .total }}
已完成: {{ .completed }}
使用信用: {{ .creditsUsed }}
过期时间: {{ .expiresAt }}
数据:
{{- range .data }}
- Markdown: {{ .markdown }}
HTML: {{ .html }}
Raw HTML: {{ .rawHtml }}
链接: {{ .links }}
截图: {{ .screenshot }}
元数据:
标题: {{ .metadata.title }}
描述: {{ .metadata.description }}
语言: {{ .metadata.language }}
源URL: {{ .metadata.sourceURL }}
状态码: {{ .metadata.statusCode }}
错误: {{ .metadata.error }}
{{- end }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "爬取任务的ID"
name: "id"
required: true
type: "string"
description: "获取爬取任务的错误信息"
name: "get_crawl_errors"
requestTemplate:
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "GET"
url: "https://api.firecrawl.dev/v1/crawl/{{.args.id}}/errors"
responseTemplate:
body: |
{{- if .errors }}
错误:
{{- range .errors }}
- ID: {{ .id }}
时间戳: {{ .timestamp }}
URL: {{ .url }}
错误信息: {{ .error }}
{{- end }}
被robots.txt阻止的URL:
{{- range .robotsBlocked }}
- {{ . }}
{{- end }}
{{- else }}
错误: {{ .error }}
{{- end }}
- args:
- description: "提取任务的ID"
name: "id"
required: true
type: "string"
description: "获取提取任务的状态"
name: "get_extract_job_status"
requestTemplate:
headers:
- key: "Authorization"
value: "Bearer {{.config.apiKey}}"
method: "GET"
url: "https://api.firecrawl.dev/v1/extract/{{.args.id}}"
responseTemplate:
body: |
{{- if .success }}
成功: {{ .success }}
数据: {{ .data }}
状态: {{ .status }}
过期时间: {{ .expiresAt }}
{{- else }}
错误: {{ .error }}
{{- end }}