Crawl

Rastrear várias URLs de acordo com opções

curl --request POST \
  --url https://api.firecrawl.dev/v1/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowBackwardLinks": false,
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreSitemap": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDepth": 10,
  "maxDiscoveryDepth": 123,
  "regexOnFullURL": false,
  "scrapeOptions": {
    "actions": [
      {
        "type": "wait",
        "milliseconds": 2,
        "selector": "#my-element"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "jsonOptions": {
      "prompt": "<string>",
      "schema": {},
      "systemPrompt": "<string>"
    },
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "maxAge": 0,
    "mobile": false,
    "onlyMainContent": true,
    "parsePDF": true,
    "removeBase64Images": true,
    "skipTlsVerification": false,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
      "modes": [],
      "prompt": "<string>",
      "schema": {},
      "tag": null
    },
    "formats": [
      "markdown"
    ]
  },
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/crawl"

payload = {
    "url": "<string>",
    "allowBackwardLinks": False,
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreSitemap": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDepth": 10,
    "maxDiscoveryDepth": 123,
    "regexOnFullURL": False,
    "scrapeOptions": {
        "actions": [
            {
                "type": "wait",
                "milliseconds": 2,
                "selector": "#my-element"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "headers": {},
        "includeTags": ["<string>"],
        "jsonOptions": {
            "prompt": "<string>",
            "schema": {},
            "systemPrompt": "<string>"
        },
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "maxAge": 0,
        "mobile": False,
        "onlyMainContent": True,
        "parsePDF": True,
        "removeBase64Images": True,
        "skipTlsVerification": False,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 30000,
        "waitFor": 0,
        "changeTrackingOptions": {
            "modes": [],
            "prompt": "<string>",
            "schema": {},
            "tag": None
        },
        "formats": ["markdown"]
    },
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowBackwardLinks: false,
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreSitemap: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDepth: 10,
    maxDiscoveryDepth: 123,
    regexOnFullURL: false,
    scrapeOptions: {
      actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
      blockAds: true,
      excludeTags: ['<string>'],
      headers: {},
      includeTags: ['<string>'],
      jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
      location: {country: 'US', languages: ['en-US']},
      maxAge: 0,
      mobile: false,
      onlyMainContent: true,
      parsePDF: true,
      removeBase64Images: true,
      skipTlsVerification: false,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 30000,
      waitFor: 0,
      changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
      formats: ['markdown']
    },
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowBackwardLinks' => false,
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreSitemap' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDepth' => 10,
    'maxDiscoveryDepth' => 123,
    'regexOnFullURL' => false,
    'scrapeOptions' => [
        'actions' => [
                [
                                'type' => 'wait',
                                'milliseconds' => 2,
                                'selector' => '#my-element'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'jsonOptions' => [
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'systemPrompt' => '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'maxAge' => 0,
        'mobile' => false,
        'onlyMainContent' => true,
        'parsePDF' => true,
        'removeBase64Images' => true,
        'skipTlsVerification' => false,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 30000,
        'waitFor' => 0,
        'changeTrackingOptions' => [
                'modes' => [
                                
                ],
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'tag' => null
        ],
        'formats' => [
                'markdown'
        ]
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

crawl

Rastrear várias URLs de acordo com opções

curl --request POST \
  --url https://api.firecrawl.dev/v1/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowBackwardLinks": false,
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreSitemap": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDepth": 10,
  "maxDiscoveryDepth": 123,
  "regexOnFullURL": false,
  "scrapeOptions": {
    "actions": [
      {
        "type": "wait",
        "milliseconds": 2,
        "selector": "#my-element"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "jsonOptions": {
      "prompt": "<string>",
      "schema": {},
      "systemPrompt": "<string>"
    },
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "maxAge": 0,
    "mobile": false,
    "onlyMainContent": true,
    "parsePDF": true,
    "removeBase64Images": true,
    "skipTlsVerification": false,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
      "modes": [],
      "prompt": "<string>",
      "schema": {},
      "tag": null
    },
    "formats": [
      "markdown"
    ]
  },
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/crawl"

payload = {
    "url": "<string>",
    "allowBackwardLinks": False,
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreSitemap": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDepth": 10,
    "maxDiscoveryDepth": 123,
    "regexOnFullURL": False,
    "scrapeOptions": {
        "actions": [
            {
                "type": "wait",
                "milliseconds": 2,
                "selector": "#my-element"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "headers": {},
        "includeTags": ["<string>"],
        "jsonOptions": {
            "prompt": "<string>",
            "schema": {},
            "systemPrompt": "<string>"
        },
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "maxAge": 0,
        "mobile": False,
        "onlyMainContent": True,
        "parsePDF": True,
        "removeBase64Images": True,
        "skipTlsVerification": False,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 30000,
        "waitFor": 0,
        "changeTrackingOptions": {
            "modes": [],
            "prompt": "<string>",
            "schema": {},
            "tag": None
        },
        "formats": ["markdown"]
    },
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowBackwardLinks: false,
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreSitemap: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDepth: 10,
    maxDiscoveryDepth: 123,
    regexOnFullURL: false,
    scrapeOptions: {
      actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
      blockAds: true,
      excludeTags: ['<string>'],
      headers: {},
      includeTags: ['<string>'],
      jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
      location: {country: 'US', languages: ['en-US']},
      maxAge: 0,
      mobile: false,
      onlyMainContent: true,
      parsePDF: true,
      removeBase64Images: true,
      skipTlsVerification: false,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 30000,
      waitFor: 0,
      changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
      formats: ['markdown']
    },
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowBackwardLinks' => false,
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreSitemap' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDepth' => 10,
    'maxDiscoveryDepth' => 123,
    'regexOnFullURL' => false,
    'scrapeOptions' => [
        'actions' => [
                [
                                'type' => 'wait',
                                'milliseconds' => 2,
                                'selector' => '#my-element'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'jsonOptions' => [
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'systemPrompt' => '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'maxAge' => 0,
        'mobile' => false,
        'onlyMainContent' => true,
        'parsePDF' => true,
        'removeBase64Images' => true,
        'skipTlsVerification' => false,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 30000,
        'waitFor' => 0,
        'changeTrackingOptions' => [
                'modes' => [
                                
                ],
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'tag' => null
        ],
        'formats' => [
                'markdown'
        ]
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

Observação: uma nova versão v2 desta API já está disponível com recursos e desempenho aprimorados.

Autorizações

Authorization

string

header

obrigatório

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Corpo

application/json

url

string<uri>

obrigatório

A URL base de onde o rastreamento será iniciado

allowBackwardLinks

boolean

padrão:false

obsoleto

⚠️ DESCONTINUADO: Use "crawlEntireDomain" em vez disso. Permite que o crawler siga links internos para URLs irmãs ou URL de nível superior, não apenas caminhos filhos.

allowExternalLinks

boolean

padrão:false

Permite que o rastreador siga links para sites externos.

allowSubdomains

boolean

padrão:false

Permite que o crawler rastreie links que apontam para subdomínios do domínio principal.

crawlEntireDomain

boolean

padrão:false

Permite que o rastreador siga links internos para URLs no mesmo nível (irmãs) ou URLs pai, não apenas caminhos filhos.

false: Somente rastreia URLs mais profundas (filhas). → ex.: /features/feature-1 → /features/feature-1/tips ✅ → Não seguirá /pricing ou / ❌

true: Rastreia qualquer link interno, incluindo URLs no mesmo nível e URLs pai. → ex.: /features/feature-1 → /pricing, /, etc. ✅

Use true para obter uma cobertura interna mais ampla, além de caminhos aninhados.

delay

number

Intervalo, em segundos, entre as coletas. Isso ajuda a respeitar os limites de requisições dos sites.

excludePaths

string[]

Padrões de regex para o pathname da URL que excluem URLs correspondentes do crawl. Por exemplo, se você definir "excludePaths": ["blog/.*"] para a URL base firecrawl.dev, quaisquer resultados que corresponderem a esse padrão serão excluídos, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

ignoreQueryParameters

boolean

padrão:false

Não reextraia o mesmo path com parâmetros de consulta diferentes (ou sem nenhum)

ignoreSitemap

boolean

padrão:false

Ignorar o sitemap do site durante o rastreamento

includePaths

string[]

Padrões de regex para o pathname da URL que definem quais URLs serão incluídas no rastreamento. Somente os caminhos que corresponderem aos padrões especificados serão incluídos na resposta. Por exemplo, se você definir "includePaths": ["blog/.*"] para a URL base firecrawl.dev, apenas resultados que correspondam a esse padrão serão incluídos, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

limit

integer

padrão:10000

Número máximo de páginas a serem rastreadas. O limite padrão é 10.000.

maxConcurrency

integer

Número máximo de raspagens simultâneas. Esse parâmetro permite definir um limite de concorrência para este rastreamento. Se não for especificado, o rastreamento usará o limite de concorrência da sua equipe.

maxDepth

integer

padrão:10

Profundidade absoluta máxima de rastreamento a partir da base da URL informada. Basicamente, é o número máximo de barras (/) que o pathname de uma URL coletada pode conter.

maxDiscoveryDepth

integer

Profundidade máxima de rastreamento com base na ordem de descoberta. O site raiz e as páginas do sitemap têm profundidade de descoberta igual a 0. Por exemplo, se você definir como 1 e ativar ignoreSitemap, você só irá rastrear a URL informada e todas as URLs que estiverem linkadas nessa página.

regexOnFullURL

boolean

padrão:false

Quando configurado como true, os padrões de regex em includePaths e excludePaths são comparados com a URL completa (incluindo parâmetros de query), em vez de apenas com o caminho (pathname) da URL. Útil quando você precisa filtrar URLs com base em query strings.

scrapeOptions

object

Show child attributes

webhook

object

Objeto de especificação de webhook.

Show child attributes

zeroDataRetention

boolean

padrão:false

Se definido como true, não haverá retenção de dados para este crawl. Para habilitar esse recurso, entre em contato pelo e-mail help@firecrawl.dev

Resposta

Resposta bem-sucedida

string

success

boolean

url

string<uri>

Obter erros de scraping em lote

Obter status do crawl

Usando a API

Endpoints de scraping

Endpoints de rastreamento

Endpoints de mapeamento

Endpoints de busca

Endpoints de extração

Endpoints de conta

Autorizações

Corpo

Resposta