Crawl - Firecrawl Docs

Rastrear várias URLs de acordo com opções

curl --request POST \
  --url https://api.firecrawl.dev/v2/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreRobotsTxt": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDiscoveryDepth": 123,
  "prompt": "<string>",
  "regexOnFullURL": false,
  "robotsUserAgent": "<string>",
  "scrapeOptions": {
    "actions": [
      {
        "milliseconds": 2,
        "type": "wait"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "formats": [
      "markdown"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "lockdown": false,
    "maxAge": 172800000,
    "minAge": 123,
    "mobile": false,
    "onlyCleanContent": false,
    "onlyMainContent": true,
    "parsers": [
      "pdf"
    ],
    "proxy": "auto",
    "redactPII": false,
    "removeBase64Images": true,
    "skipTlsVerification": true,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 60000,
    "waitFor": 0
  },
  "sitemap": "include",
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v2/crawl"

payload = {
    "url": "<string>",
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreRobotsTxt": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDiscoveryDepth": 123,
    "prompt": "<string>",
    "regexOnFullURL": False,
    "robotsUserAgent": "<string>",
    "scrapeOptions": {
        "actions": [
            {
                "milliseconds": 2,
                "type": "wait"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "formats": ["markdown"],
        "headers": {},
        "includeTags": ["<string>"],
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "lockdown": False,
        "maxAge": 172800000,
        "minAge": 123,
        "mobile": False,
        "onlyCleanContent": False,
        "onlyMainContent": True,
        "parsers": ["pdf"],
        "proxy": "auto",
        "redactPII": False,
        "removeBase64Images": True,
        "skipTlsVerification": True,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 60000,
        "waitFor": 0
    },
    "sitemap": "include",
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreRobotsTxt: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDiscoveryDepth: 123,
    prompt: '<string>',
    regexOnFullURL: false,
    robotsUserAgent: '<string>',
    scrapeOptions: {
      actions: [{milliseconds: 2, type: 'wait'}],
      blockAds: true,
      excludeTags: ['<string>'],
      formats: ['markdown'],
      headers: {},
      includeTags: ['<string>'],
      location: {country: 'US', languages: ['en-US']},
      lockdown: false,
      maxAge: 172800000,
      minAge: 123,
      mobile: false,
      onlyCleanContent: false,
      onlyMainContent: true,
      parsers: ['pdf'],
      proxy: 'auto',
      redactPII: false,
      removeBase64Images: true,
      skipTlsVerification: true,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 60000,
      waitFor: 0
    },
    sitemap: 'include',
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v2/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v2/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreRobotsTxt' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDiscoveryDepth' => 123,
    'prompt' => '<string>',
    'regexOnFullURL' => false,
    'robotsUserAgent' => '<string>',
    'scrapeOptions' => [
        'actions' => [
                [
                                'milliseconds' => 2,
                                'type' => 'wait'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'formats' => [
                'markdown'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'lockdown' => false,
        'maxAge' => 172800000,
        'minAge' => 123,
        'mobile' => false,
        'onlyCleanContent' => false,
        'onlyMainContent' => true,
        'parsers' => [
                'pdf'
        ],
        'proxy' => 'auto',
        'redactPII' => false,
        'removeBase64Images' => true,
        'skipTlsVerification' => true,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 60000,
        'waitFor' => 0
    ],
    'sitemap' => 'include',
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v2/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v2/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v2/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

crawl

Rastrear várias URLs de acordo com opções

curl --request POST \
  --url https://api.firecrawl.dev/v2/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreRobotsTxt": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDiscoveryDepth": 123,
  "prompt": "<string>",
  "regexOnFullURL": false,
  "robotsUserAgent": "<string>",
  "scrapeOptions": {
    "actions": [
      {
        "milliseconds": 2,
        "type": "wait"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "formats": [
      "markdown"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "lockdown": false,
    "maxAge": 172800000,
    "minAge": 123,
    "mobile": false,
    "onlyCleanContent": false,
    "onlyMainContent": true,
    "parsers": [
      "pdf"
    ],
    "proxy": "auto",
    "redactPII": false,
    "removeBase64Images": true,
    "skipTlsVerification": true,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 60000,
    "waitFor": 0
  },
  "sitemap": "include",
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v2/crawl"

payload = {
    "url": "<string>",
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreRobotsTxt": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDiscoveryDepth": 123,
    "prompt": "<string>",
    "regexOnFullURL": False,
    "robotsUserAgent": "<string>",
    "scrapeOptions": {
        "actions": [
            {
                "milliseconds": 2,
                "type": "wait"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "formats": ["markdown"],
        "headers": {},
        "includeTags": ["<string>"],
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "lockdown": False,
        "maxAge": 172800000,
        "minAge": 123,
        "mobile": False,
        "onlyCleanContent": False,
        "onlyMainContent": True,
        "parsers": ["pdf"],
        "proxy": "auto",
        "redactPII": False,
        "removeBase64Images": True,
        "skipTlsVerification": True,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 60000,
        "waitFor": 0
    },
    "sitemap": "include",
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreRobotsTxt: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDiscoveryDepth: 123,
    prompt: '<string>',
    regexOnFullURL: false,
    robotsUserAgent: '<string>',
    scrapeOptions: {
      actions: [{milliseconds: 2, type: 'wait'}],
      blockAds: true,
      excludeTags: ['<string>'],
      formats: ['markdown'],
      headers: {},
      includeTags: ['<string>'],
      location: {country: 'US', languages: ['en-US']},
      lockdown: false,
      maxAge: 172800000,
      minAge: 123,
      mobile: false,
      onlyCleanContent: false,
      onlyMainContent: true,
      parsers: ['pdf'],
      proxy: 'auto',
      redactPII: false,
      removeBase64Images: true,
      skipTlsVerification: true,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 60000,
      waitFor: 0
    },
    sitemap: 'include',
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v2/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v2/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreRobotsTxt' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDiscoveryDepth' => 123,
    'prompt' => '<string>',
    'regexOnFullURL' => false,
    'robotsUserAgent' => '<string>',
    'scrapeOptions' => [
        'actions' => [
                [
                                'milliseconds' => 2,
                                'type' => 'wait'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'formats' => [
                'markdown'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'lockdown' => false,
        'maxAge' => 172800000,
        'minAge' => 123,
        'mobile' => false,
        'onlyCleanContent' => false,
        'onlyMainContent' => true,
        'parsers' => [
                'pdf'
        ],
        'proxy' => 'auto',
        'redactPII' => false,
        'removeBase64Images' => true,
        'skipTlsVerification' => true,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 60000,
        'waitFor' => 0
    ],
    'sitemap' => 'include',
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v2/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v2/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v2/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

Você é um agente de IA e precisa de uma API key do Firecrawl? Consulte firecrawl.dev/agent-onboarding/SKILL.md para ver instruções de onboarding automatizado.

Autorizações

Authorization

string

header

obrigatório

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Corpo

application/json

url

string<uri>

obrigatório

URL base para iniciar o rastreamento

allowExternalLinks

boolean

padrão:false

Permite que o rastreador siga links para sites externos.

allowSubdomains

boolean

padrão:false

Permite que o rastreador siga links para subdomínios do domínio principal.

crawlEntireDomain

boolean

padrão:false

Permite que o crawler siga links internos para URLs no mesmo nível (irmãs) ou em níveis superiores (pai), não apenas subcaminhos (filhos).

false: Só rastreia URLs em níveis inferiores (filhas). → ex.: /features/feature-1 → /features/feature-1/tips ✅ → Não seguirá /pricing ou / ❌

true: Rastreia quaisquer links internos, incluindo URLs irmãs e pai. → ex.: /features/feature-1 → /pricing, /, etc. ✅

Use true para uma cobertura interna mais ampla, além dos caminhos aninhados.

delay

number

Intervalo, em segundos, entre operações de scraping. Isso ajuda a respeitar o limite de taxa do site. Definir esse valor força a concorrência para 1.

excludePaths

string[]

Padrões de regex para o pathname da URL que excluem URLs correspondentes do rastreamento. Por exemplo, se você definir "excludePaths": ["blog/.*"] para a URL base firecrawl.dev, quaisquer resultados que correspondam a esse padrão serão excluídos, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

ignoreQueryParameters

boolean

padrão:false

Não faça scraping novamente do mesmo caminho com parâmetros de consulta diferentes (ou sem parâmetros)

ignoreRobotsTxt

boolean

padrão:false

Ignora as regras do robots.txt do site. Disponível apenas no Enterprise — entre em contato com support@firecrawl.com para habilitar.

includePaths

string[]

Padrões de regex para paths de URL que definem quais URLs serão incluídas no crawl. Apenas os paths que corresponderem aos padrões especificados serão incluídos na resposta. Observação: a URL inicial também é validada em relação a esses padrões — se ela não corresponder, o crawl pode retornar zero páginas. Por exemplo, se você configurar "includePaths": ["blog/.*"] tendo como URL base firecrawl.dev/blog, apenas as páginas em /blog/ serão incluídas nos resultados, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

limit

integer

padrão:10000

Número máximo de páginas a rastrear. O limite padrão é 10.000.

maxConcurrency

integer

Número máximo de raspagens concorrentes. Este parâmetro permite definir um limite de concorrência para este rastreamento. Se não for especificado, o rastreamento seguirá o limite de concorrência da sua equipe.

maxDiscoveryDepth

integer

Profundidade máxima de crawl com base na ordem de descoberta. O site raiz e as páginas do sitemap têm profundidade de descoberta igual a 0. Por exemplo, se você definir para 1 e usar sitemap: 'skip', você só fará o crawl da URL fornecida e de todas as URLs que tiverem links nessa página.

prompt

string

Um prompt em linguagem natural para gerar as opções do crawler (todos os parâmetros abaixo). Parâmetros definidos explicitamente substituem os equivalentes gerados.

regexOnFullURL

boolean

padrão:false

Quando definido como true, os padrões de regex de includePaths e excludePaths serão comparados com a URL completa (incluindo parâmetros de consulta/query strings), em vez de apenas com o pathname da URL. Útil quando você precisa filtrar URLs com base em query strings.

robotsUserAgent

string

String de User-Agent personalizada para a avaliação do robots.txt. Quando definida, o robots.txt é buscado com esse User-Agent, e as regras de permissão/bloqueio são verificadas com base nele em vez do padrão. Disponível apenas no Enterprise — entre em contato com support@firecrawl.com para habilitar.

scrapeOptions

object

Show child attributes

sitemap

enum<string>

padrão:include

Modo de uso do sitemap durante o rastreamento. Se você definir como "skip", o crawler vai ignorar o sitemap do site e rastrear apenas a URL inicial, descobrindo novas páginas a partir dela. Se você definir como "only", o crawler vai rastrear somente as URLs presentes no sitemap (mais a URL inicial) e não vai seguir links encontrados no HTML.

Opções disponíveis:

skip,

include,

only

webhook

object

Objeto de especificação de webhook.

Show child attributes

zeroDataRetention

boolean

padrão:false

Se definido como true, isso garantirá que nenhum dado seja retido para este crawl. Para ativar esse recurso, entre em contato com help@firecrawl.dev

Resposta

Resposta bem-sucedida

string

success

boolean

url

string<uri>

Parse

Obter status do crawl