Crawl

Rastrear varias URL en función de las opciones

curl --request POST \
  --url https://api.firecrawl.dev/v2/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreRobotsTxt": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDiscoveryDepth": 123,
  "prompt": "<string>",
  "regexOnFullURL": false,
  "robotsUserAgent": "<string>",
  "scrapeOptions": {
    "actions": [
      {
        "milliseconds": 2,
        "type": "wait"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "formats": [
      "markdown"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "lockdown": false,
    "maxAge": 172800000,
    "minAge": 123,
    "mobile": false,
    "onlyCleanContent": false,
    "onlyMainContent": true,
    "parsers": [
      "pdf"
    ],
    "proxy": "auto",
    "redactPII": false,
    "removeBase64Images": true,
    "skipTlsVerification": true,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 60000,
    "waitFor": 0
  },
  "sitemap": "include",
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v2/crawl"

payload = {
    "url": "<string>",
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreRobotsTxt": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDiscoveryDepth": 123,
    "prompt": "<string>",
    "regexOnFullURL": False,
    "robotsUserAgent": "<string>",
    "scrapeOptions": {
        "actions": [
            {
                "milliseconds": 2,
                "type": "wait"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "formats": ["markdown"],
        "headers": {},
        "includeTags": ["<string>"],
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "lockdown": False,
        "maxAge": 172800000,
        "minAge": 123,
        "mobile": False,
        "onlyCleanContent": False,
        "onlyMainContent": True,
        "parsers": ["pdf"],
        "proxy": "auto",
        "redactPII": False,
        "removeBase64Images": True,
        "skipTlsVerification": True,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 60000,
        "waitFor": 0
    },
    "sitemap": "include",
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreRobotsTxt: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDiscoveryDepth: 123,
    prompt: '<string>',
    regexOnFullURL: false,
    robotsUserAgent: '<string>',
    scrapeOptions: {
      actions: [{milliseconds: 2, type: 'wait'}],
      blockAds: true,
      excludeTags: ['<string>'],
      formats: ['markdown'],
      headers: {},
      includeTags: ['<string>'],
      location: {country: 'US', languages: ['en-US']},
      lockdown: false,
      maxAge: 172800000,
      minAge: 123,
      mobile: false,
      onlyCleanContent: false,
      onlyMainContent: true,
      parsers: ['pdf'],
      proxy: 'auto',
      redactPII: false,
      removeBase64Images: true,
      skipTlsVerification: true,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 60000,
      waitFor: 0
    },
    sitemap: 'include',
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v2/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v2/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreRobotsTxt' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDiscoveryDepth' => 123,
    'prompt' => '<string>',
    'regexOnFullURL' => false,
    'robotsUserAgent' => '<string>',
    'scrapeOptions' => [
        'actions' => [
                [
                                'milliseconds' => 2,
                                'type' => 'wait'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'formats' => [
                'markdown'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'lockdown' => false,
        'maxAge' => 172800000,
        'minAge' => 123,
        'mobile' => false,
        'onlyCleanContent' => false,
        'onlyMainContent' => true,
        'parsers' => [
                'pdf'
        ],
        'proxy' => 'auto',
        'redactPII' => false,
        'removeBase64Images' => true,
        'skipTlsVerification' => true,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 60000,
        'waitFor' => 0
    ],
    'sitemap' => 'include',
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v2/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v2/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v2/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

crawl

Rastrear varias URL en función de las opciones

curl --request POST \
  --url https://api.firecrawl.dev/v2/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreRobotsTxt": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDiscoveryDepth": 123,
  "prompt": "<string>",
  "regexOnFullURL": false,
  "robotsUserAgent": "<string>",
  "scrapeOptions": {
    "actions": [
      {
        "milliseconds": 2,
        "type": "wait"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "formats": [
      "markdown"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "lockdown": false,
    "maxAge": 172800000,
    "minAge": 123,
    "mobile": false,
    "onlyCleanContent": false,
    "onlyMainContent": true,
    "parsers": [
      "pdf"
    ],
    "proxy": "auto",
    "redactPII": false,
    "removeBase64Images": true,
    "skipTlsVerification": true,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 60000,
    "waitFor": 0
  },
  "sitemap": "include",
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v2/crawl"

payload = {
    "url": "<string>",
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreRobotsTxt": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDiscoveryDepth": 123,
    "prompt": "<string>",
    "regexOnFullURL": False,
    "robotsUserAgent": "<string>",
    "scrapeOptions": {
        "actions": [
            {
                "milliseconds": 2,
                "type": "wait"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "formats": ["markdown"],
        "headers": {},
        "includeTags": ["<string>"],
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "lockdown": False,
        "maxAge": 172800000,
        "minAge": 123,
        "mobile": False,
        "onlyCleanContent": False,
        "onlyMainContent": True,
        "parsers": ["pdf"],
        "proxy": "auto",
        "redactPII": False,
        "removeBase64Images": True,
        "skipTlsVerification": True,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 60000,
        "waitFor": 0
    },
    "sitemap": "include",
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreRobotsTxt: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDiscoveryDepth: 123,
    prompt: '<string>',
    regexOnFullURL: false,
    robotsUserAgent: '<string>',
    scrapeOptions: {
      actions: [{milliseconds: 2, type: 'wait'}],
      blockAds: true,
      excludeTags: ['<string>'],
      formats: ['markdown'],
      headers: {},
      includeTags: ['<string>'],
      location: {country: 'US', languages: ['en-US']},
      lockdown: false,
      maxAge: 172800000,
      minAge: 123,
      mobile: false,
      onlyCleanContent: false,
      onlyMainContent: true,
      parsers: ['pdf'],
      proxy: 'auto',
      redactPII: false,
      removeBase64Images: true,
      skipTlsVerification: true,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 60000,
      waitFor: 0
    },
    sitemap: 'include',
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v2/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v2/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreRobotsTxt' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDiscoveryDepth' => 123,
    'prompt' => '<string>',
    'regexOnFullURL' => false,
    'robotsUserAgent' => '<string>',
    'scrapeOptions' => [
        'actions' => [
                [
                                'milliseconds' => 2,
                                'type' => 'wait'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'formats' => [
                'markdown'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'lockdown' => false,
        'maxAge' => 172800000,
        'minAge' => 123,
        'mobile' => false,
        'onlyCleanContent' => false,
        'onlyMainContent' => true,
        'parsers' => [
                'pdf'
        ],
        'proxy' => 'auto',
        'redactPII' => false,
        'removeBase64Images' => true,
        'skipTlsVerification' => true,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 60000,
        'waitFor' => 0
    ],
    'sitemap' => 'include',
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v2/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v2/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v2/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreRobotsTxt\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDiscoveryDepth\": 123,\n  \"prompt\": \"<string>\",\n  \"regexOnFullURL\": false,\n  \"robotsUserAgent\": \"<string>\",\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"milliseconds\": 2,\n        \"type\": \"wait\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"formats\": [\n      \"markdown\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"lockdown\": false,\n    \"maxAge\": 172800000,\n    \"minAge\": 123,\n    \"mobile\": false,\n    \"onlyCleanContent\": false,\n    \"onlyMainContent\": true,\n    \"parsers\": [\n      \"pdf\"\n    ],\n    \"proxy\": \"auto\",\n    \"redactPII\": false,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": true,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 60000,\n    \"waitFor\": 0\n  },\n  \"sitemap\": \"include\",\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

¿Eres un agente de IA que necesita una clave de API de Firecrawl? Consulta firecrawl.dev/agent-onboarding/SKILL.md para obtener instrucciones para la incorporación automatizada.

Autorizaciones

Authorization

string

header

requerido

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Cuerpo

application/json

url

string<uri>

requerido

La URL base desde la que se iniciará el rastreo

allowExternalLinks

boolean

predeterminado:false

Permite que el rastreador siga enlaces a sitios web externos.

allowSubdomains

boolean

predeterminado:false

Permite que el rastreador siga enlaces a subdominios del dominio principal.

crawlEntireDomain

boolean

predeterminado:false

Permite que el crawler siga enlaces internos a URLs hermanas o padre, no solo rutas hijas.

false: Solo rastrea URLs más profundas (hijas). → p. ej. /features/feature-1 → /features/feature-1/tips ✅ → No seguirá /pricing ni / ❌

true: Rastrea cualquier enlace interno, incluidos hermanos y padres. → p. ej. /features/feature-1 → /pricing, /, etc. ✅

Usa true para lograr una cobertura interna más amplia, más allá de las rutas anidadas.

delay

number

Retraso, en segundos, entre scrapes. Esto ayuda a respetar los límites de tasa del sitio web. Al configurar esto, la concurrencia se fuerza a 1.

excludePaths

string[]

Patrones de expresiones regulares para las rutas (pathname) de URL que excluyen del rastreo las URLs que coincidan con ellos. Por ejemplo, si configuras "excludePaths": ["blog/.*"] para la URL base firecrawl.dev, se excluirán todos los resultados que coincidan con ese patrón, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

ignoreQueryParameters

boolean

predeterminado:false

No vuelvas a scrapear la misma ruta con distintos parámetros de consulta (o sin parámetros)

ignoreRobotsTxt

boolean

predeterminado:false

Ignora las reglas de robots.txt del sitio web. Solo disponible para Enterprise; contacta con support@firecrawl.com para habilitarlo.

includePaths

string[]

Patrones regex de rutas (pathname) de URL que determinan qué URLs se incluyen en el rastreo. Solo las rutas que coincidan con los patrones especificados se incluirán en la respuesta. Nota: la URL inicial también se comprueba con estos patrones; si no coincide, el rastreo puede devolver 0 páginas. Por ejemplo, si configuras "includePaths": ["blog/.*"] para la URL base firecrawl.dev/blog, solo se incluirán en los resultados las páginas bajo /blog/, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

limit

integer

predeterminado:10000

Número máximo de páginas a rastrear. El límite por defecto es 10.000.

maxConcurrency

integer

Número máximo de scrapes simultáneos. Este parámetro te permite establecer un límite de concurrencia para este rastreo. Si no se especifica, el rastreo respeta el límite de concurrencia de tu equipo.

maxDiscoveryDepth

integer

Profundidad máxima de rastreo basada en el orden de descubrimiento. El sitio raíz y las páginas incluidas en el sitemap tienen una profundidad de descubrimiento de 0. Por ejemplo, si la estableces en 1 y configuras sitemap: 'skip', solo se rastreará la URL introducida y todas las URL que estén enlazadas en esa página.

prompt

string

Un prompt que se usa para generar las opciones del crawler (todos los parámetros que se indican a continuación) a partir de lenguaje natural. Los parámetros establecidos explícitamente tendrán prioridad sobre los equivalentes generados.

regexOnFullURL

boolean

predeterminado:false

Cuando su valor es true, los patrones de expresiones regulares (regex) de includePaths y excludePaths se comparan con la URL completa (incluidos los parámetros de consulta), en lugar de solo con el pathname de la URL. Resulta útil cuando necesitas filtrar URLs en función de los parámetros de consulta.

robotsUserAgent

string

Cadena User-Agent personalizada para evaluar robots.txt. Cuando se configura, robots.txt se obtiene con este User-Agent y las reglas de allow/disallow se aplican en función de él en lugar del predeterminado. Solo disponible para Enterprise; contacta con support@firecrawl.com para habilitarlo.

scrapeOptions

object

Show child attributes

sitemap

enum<string>

predeterminado:include

Modo de sitemap al rastrear. Si lo configuras en "skip", el crawler ignorará el sitemap del sitio web y solo rastreará la URL indicada y descubrirá páginas a partir de ahí. Si lo configuras en "only", el crawler solo rastreará las URLs del sitemap (más la URL inicial) y no descubrirá enlaces desde el HTML.

Opciones disponibles:

skip,

include,

only

webhook

object

Un objeto de especificación de webhook.

Show child attributes

zeroDataRetention

boolean

predeterminado:false

Si se establece en true, se desactivará la retención de datos para este rastreo. Para habilitar esta función, póngase en contacto con help@firecrawl.dev

Respuesta

Respuesta correcta

string

success

boolean

url

string<uri>

Procesar

Obtener el estado del rastreo

Uso de la API

Endpoints de búsqueda

Endpoints de scraping

Endpoints de interacción

Endpoints de Research Index

Endpoints de mapeo

Endpoints de procesamiento

Endpoints de rastreo

Endpoints de supervisión

Endpoints de feedback

Endpoints de depuración con agentes

Endpoints de la cuenta

Cargas útiles de webhook

Integración de socios

Autorizaciones

Cuerpo

Respuesta