安装
- Gradle (Kotlin DSL)
- Gradle (Groovy)
- Maven
repositories {
mavenCentral()
}
dependencies {
implementation("com.firecrawl:firecrawl-java:1.1.1")
}
repositories {
mavenCentral()
}
dependencies {
implementation 'com.firecrawl:firecrawl-java:1.1.1'
}
<dependency>
<groupId>com.firecrawl</groupId>
<artifactId>firecrawl-java</artifactId>
<version>1.1.1</version>
</dependency>
需要 Java 11 或更高版本。
使用方式
- 从 firecrawl.dev 获取 API 密钥
- 将 API 密钥设置为名为
FIRECRAWL_API_KEY的环境变量,或通过FirecrawlClient.builder().apiKey(...)传入
import com.firecrawl.client.FirecrawlClient;
import com.firecrawl.models.CrawlJob;
import com.firecrawl.models.CrawlOptions;
import com.firecrawl.models.Document;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
public class Example {
public static void main(String[] args) {
FirecrawlClient client = FirecrawlClient.fromEnv();
Document doc = client.scrape(
"https://firecrawl.dev",
ScrapeOptions.builder()
.formats(List.of((Object) "markdown"))
.build()
);
CrawlJob crawl = client.crawl(
"https://firecrawl.dev",
CrawlOptions.builder().limit(5).build()
);
System.out.println(doc.getMarkdown());
System.out.println("已爬取页面数: " + (crawl.getData() != null ? crawl.getData().size() : 0));
}
}
抓取 URL
scrape 方法。
import com.firecrawl.models.Document;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
Document doc = client.scrape(
"https://firecrawl.dev",
ScrapeOptions.builder()
.formats(List.of((Object) "markdown", "html"))
.onlyMainContent(true)
.waitFor(5000)
.build()
);
System.out.println(doc.getMarkdown());
System.out.println(doc.getMetadata().get("title"));
JSON 提取
JsonFormat 通过 scrape 端点提取结构化 JSON:
import com.firecrawl.models.Document;
import com.firecrawl.models.JsonFormat;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
import java.util.Map;
JsonFormat jsonFmt = JsonFormat.builder()
.prompt("Extract the product name and price")
.schema(Map.of(
"type", "object",
"properties", Map.of(
"name", Map.of("type", "string"),
"price", Map.of("type", "number")
)
))
.build();
Document doc = client.scrape(
"https://example.com/product",
ScrapeOptions.builder()
.formats(List.of((Object) jsonFmt))
.build()
);
System.out.println(doc.getJson());
爬取网站
crawl。
import com.firecrawl.models.CrawlJob;
import com.firecrawl.models.CrawlOptions;
import com.firecrawl.models.Document;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
CrawlJob job = client.crawl(
"https://firecrawl.dev",
CrawlOptions.builder()
.limit(50)
.maxDiscoveryDepth(3)
.scrapeOptions(
ScrapeOptions.builder()
.formats(List.of((Object) "markdown"))
.build()
)
.build()
);
System.out.println("Status: " + job.getStatus());
System.out.println("Progress: " + job.getCompleted() + "/" + job.getTotal());
if (job.getData() != null) {
for (Document page : job.getData()) {
System.out.println(page.getMetadata().get("sourceURL"));
}
}
开始爬取
startCrawl 可直接启动任务,无需等待完成。
import com.firecrawl.models.CrawlOptions;
import com.firecrawl.models.CrawlResponse;
CrawlResponse start = client.startCrawl(
"https://firecrawl.dev",
CrawlOptions.builder().limit(100).build()
);
System.out.println("Job ID: " + start.getId());
检查爬取状态
getCrawlStatus 检查爬取进度。
import com.firecrawl.models.CrawlJob;
CrawlJob status = client.getCrawlStatus(start.getId());
System.out.println("Status: " + status.getStatus());
System.out.println("Progress: " + status.getCompleted() + "/" + status.getTotal());
取消爬取任务
cancelCrawl 取消正在运行的爬取任务。
import java.util.Map;
Map<String, Object> result = client.cancelCrawl(start.getId());
System.out.println(result);
网站映射
map 发现网站上的链接。
import com.firecrawl.models.MapData;
import com.firecrawl.models.MapOptions;
import java.util.Map;
MapData data = client.map(
"https://firecrawl.dev",
MapOptions.builder()
.limit(100)
.search("blog")
.build()
);
if (data.getLinks() != null) {
for (Map<String, Object> link : data.getLinks()) {
System.out.println(link.get("url") + " - " + link.get("title"));
}
}
搜索网页
search 并可选择配置搜索设置来进行搜索。
import com.firecrawl.models.SearchData;
import com.firecrawl.models.SearchOptions;
import java.util.Map;
SearchData results = client.search(
"firecrawl web scraping",
SearchOptions.builder()
.limit(10)
.build()
);
if (results.getWeb() != null) {
for (Map<String, Object> result : results.getWeb()) {
System.out.println(result.get("title") + " - " + result.get("url"));
}
}
批量抓取
batchScrape 并行抓取多个 URL。
import com.firecrawl.models.BatchScrapeJob;
import com.firecrawl.models.BatchScrapeOptions;
import com.firecrawl.models.Document;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
BatchScrapeJob job = client.batchScrape(
List.of("https://firecrawl.dev", "https://firecrawl.dev/blog"),
BatchScrapeOptions.builder()
.options(
ScrapeOptions.builder()
.formats(List.of((Object) "markdown"))
.build()
)
.build()
);
if (job.getData() != null) {
for (Document doc : job.getData()) {
System.out.println(doc.getMarkdown());
}
}
代理
agent 运行一个由 AI 驱动的代理。
import com.firecrawl.models.AgentOptions;
import com.firecrawl.models.AgentStatusResponse;
AgentStatusResponse result = client.agent(
AgentOptions.builder()
.prompt("Find the pricing plans for Firecrawl and compare them")
.build()
);
System.out.println(result.getData());
import com.firecrawl.models.AgentOptions;
import com.firecrawl.models.AgentStatusResponse;
import java.util.List;
import java.util.Map;
AgentStatusResponse result = client.agent(
AgentOptions.builder()
.prompt("Extract pricing plan details")
.urls(List.of("https://firecrawl.dev"))
.schema(Map.of(
"type", "object",
"properties", Map.of(
"plans", Map.of(
"type", "array",
"items", Map.of(
"type", "object",
"properties", Map.of(
"name", Map.of("type", "string"),
"price", Map.of("type", "string")
)
)
)
)
))
.build()
);
System.out.println(result.getData());
使用方式与指标
import com.firecrawl.models.ConcurrencyCheck;
import com.firecrawl.models.CreditUsage;
ConcurrencyCheck concurrency = client.getConcurrency();
System.out.println("Concurrency: " + concurrency.getConcurrency() + "/" + concurrency.getMaxConcurrency());
CreditUsage credits = client.getCreditUsage();
System.out.println("Remaining credits: " + credits.getRemainingCredits());
异步支持
CompletableFuture。
import com.firecrawl.models.Document;
import com.firecrawl.models.ScrapeOptions;
import java.util.List;
import java.util.concurrent.CompletableFuture;
CompletableFuture<Document> future = client.scrapeAsync(
"https://example.com",
ScrapeOptions.builder()
.formats(List.of((Object) "markdown"))
.build()
);
future.thenAccept(doc -> System.out.println(doc.getMarkdown()));
浏览器
创建会话
import com.firecrawl.models.BrowserCreateResponse;
BrowserCreateResponse session = client.browser(120, 60, true);
System.out.println(session.getId());
System.out.println(session.getCdpUrl());
System.out.println(session.getLiveViewUrl());
执行代码
import com.firecrawl.models.BrowserExecuteResponse;
BrowserExecuteResponse run = client.browserExecute(
session.getId(),
"await page.goto(\"https://example.com\"); console.log(await page.title());",
"node",
60
);
System.out.println(run.getStdout());
System.out.println(run.getExitCode());
与抓取绑定的交互式会话
interact(...)会在与抓取绑定的浏览器会话中运行代码(并在首次使用时初始化该会话)。stopInteraction(...)会在你完成后显式停止交互式会话。
import com.firecrawl.models.BrowserDeleteResponse;
import com.firecrawl.models.BrowserExecuteResponse;
String scrapeJobId = "550e8400-e29b-41d4-a716-446655440000";
BrowserExecuteResponse scrapeRun = client.interact(
scrapeJobId,
"console.log(page.url());",
"node",
60
);
System.out.println(scrapeRun.getStdout());
BrowserDeleteResponse deleted = client.stopInteraction(scrapeJobId);
System.out.println("Deleted: " + deleted.isSuccess());
列出并关闭会话
import com.firecrawl.models.BrowserDeleteResponse;
import com.firecrawl.models.BrowserListResponse;
import com.firecrawl.models.BrowserSession;
BrowserListResponse active = client.listBrowsers("active");
if (active.getSessions() != null) {
for (BrowserSession s : active.getSessions()) {
System.out.println(s.getId() + " - " + s.getStatus());
}
}
BrowserDeleteResponse closed = client.deleteBrowser(session.getId());
System.out.println("Closed: " + closed.isSuccess());
配置
FirecrawlClient.builder() 支持以下选项:
| 选项 | 类型 | 默认值 | 说明 |
|---|---|---|---|
apiKey | String | FIRECRAWL_API_KEY 环境变量或 firecrawl.apiKey 系统属性 | Firecrawl API 密钥 |
apiUrl | String | https://api.firecrawl.dev (或 FIRECRAWL_API_URL) | API 基础 URL |
timeoutMs | long | 300000 | HTTP 请求超时时间 (毫秒) |
maxRetries | int | 3 | 发生瞬时故障时自动重试的次数 |
backoffFactor | double | 0.5 | 指数退避系数 (秒) |
asyncExecutor | Executor | ForkJoinPool.commonPool() | 用于异步方法的自定义执行器 |
httpClient | OkHttpClient | 基于 timeoutMs 构建 | 预配置的 OkHttpClient 实例 |
import com.firecrawl.client.FirecrawlClient;
FirecrawlClient client = FirecrawlClient.builder()
.apiKey("fc-your-api-key")
.apiUrl("https://api.firecrawl.dev")
.timeoutMs(300_000)
.maxRetries(3)
.backoffFactor(0.5)
.build();
自定义 HTTP 客户端
OkHttpClient,以控制连接池、拦截器、SSL 配置、代理设置及其他任何 OkHttp 功能。提供该客户端后,timeoutMs 设置将被忽略,改为使用客户端自身的配置。
import com.firecrawl.client.FirecrawlClient;
import okhttp3.OkHttpClient;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.util.concurrent.TimeUnit;
OkHttpClient custom = new OkHttpClient.Builder()
.proxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("proxy.example.com", 8080)))
.addInterceptor(chain -> {
System.out.println("Request: " + chain.request().url());
return chain.proceed(chain.request());
})
.connectTimeout(10, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.build();
FirecrawlClient client = FirecrawlClient.builder()
.apiKey("fc-your-api-key")
.httpClient(custom)
.build();
错误处理
com.firecrawl.errors 下的运行时异常。
import com.firecrawl.errors.AuthenticationException;
import com.firecrawl.errors.FirecrawlException;
import com.firecrawl.errors.JobTimeoutException;
import com.firecrawl.errors.RateLimitException;
import com.firecrawl.models.Document;
try {
Document doc = client.scrape("https://example.com");
} catch (AuthenticationException e) {
System.err.println("Auth failed: " + e.getMessage());
} catch (RateLimitException e) {
System.err.println("Rate limited: " + e.getMessage());
} catch (JobTimeoutException e) {
System.err.println("Job " + e.getJobId() + " timed out after " + e.getTimeoutSeconds() + "s");
} catch (FirecrawlException e) {
System.err.println("Error " + e.getStatusCode() + ": " + e.getMessage());
}
你是需要 Firecrawl API 密钥的 AI 代理吗?请参见 firecrawl.dev/agent-onboarding/SKILL.md 获取自动化接入说明。

