Crawl

Rastrear varias URL en función de opciones

curl --request POST \
  --url https://api.firecrawl.dev/v1/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowBackwardLinks": false,
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreSitemap": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDepth": 10,
  "maxDiscoveryDepth": 123,
  "regexOnFullURL": false,
  "scrapeOptions": {
    "actions": [
      {
        "type": "wait",
        "milliseconds": 2,
        "selector": "#my-element"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "jsonOptions": {
      "prompt": "<string>",
      "schema": {},
      "systemPrompt": "<string>"
    },
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "maxAge": 0,
    "mobile": false,
    "onlyMainContent": true,
    "parsePDF": true,
    "removeBase64Images": true,
    "skipTlsVerification": false,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
      "modes": [],
      "prompt": "<string>",
      "schema": {},
      "tag": null
    },
    "formats": [
      "markdown"
    ]
  },
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/crawl"

payload = {
    "url": "<string>",
    "allowBackwardLinks": False,
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreSitemap": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDepth": 10,
    "maxDiscoveryDepth": 123,
    "regexOnFullURL": False,
    "scrapeOptions": {
        "actions": [
            {
                "type": "wait",
                "milliseconds": 2,
                "selector": "#my-element"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "headers": {},
        "includeTags": ["<string>"],
        "jsonOptions": {
            "prompt": "<string>",
            "schema": {},
            "systemPrompt": "<string>"
        },
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "maxAge": 0,
        "mobile": False,
        "onlyMainContent": True,
        "parsePDF": True,
        "removeBase64Images": True,
        "skipTlsVerification": False,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 30000,
        "waitFor": 0,
        "changeTrackingOptions": {
            "modes": [],
            "prompt": "<string>",
            "schema": {},
            "tag": None
        },
        "formats": ["markdown"]
    },
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowBackwardLinks: false,
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreSitemap: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDepth: 10,
    maxDiscoveryDepth: 123,
    regexOnFullURL: false,
    scrapeOptions: {
      actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
      blockAds: true,
      excludeTags: ['<string>'],
      headers: {},
      includeTags: ['<string>'],
      jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
      location: {country: 'US', languages: ['en-US']},
      maxAge: 0,
      mobile: false,
      onlyMainContent: true,
      parsePDF: true,
      removeBase64Images: true,
      skipTlsVerification: false,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 30000,
      waitFor: 0,
      changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
      formats: ['markdown']
    },
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowBackwardLinks' => false,
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreSitemap' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDepth' => 10,
    'maxDiscoveryDepth' => 123,
    'regexOnFullURL' => false,
    'scrapeOptions' => [
        'actions' => [
                [
                                'type' => 'wait',
                                'milliseconds' => 2,
                                'selector' => '#my-element'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'jsonOptions' => [
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'systemPrompt' => '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'maxAge' => 0,
        'mobile' => false,
        'onlyMainContent' => true,
        'parsePDF' => true,
        'removeBase64Images' => true,
        'skipTlsVerification' => false,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 30000,
        'waitFor' => 0,
        'changeTrackingOptions' => [
                'modes' => [
                                
                ],
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'tag' => null
        ],
        'formats' => [
                'markdown'
        ]
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

crawl

Rastrear varias URL en función de opciones

curl --request POST \
  --url https://api.firecrawl.dev/v1/crawl \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "allowBackwardLinks": false,
  "allowExternalLinks": false,
  "allowSubdomains": false,
  "crawlEntireDomain": false,
  "delay": 123,
  "excludePaths": [
    "<string>"
  ],
  "ignoreQueryParameters": false,
  "ignoreSitemap": false,
  "includePaths": [
    "<string>"
  ],
  "limit": 10000,
  "maxConcurrency": 123,
  "maxDepth": 10,
  "maxDiscoveryDepth": 123,
  "regexOnFullURL": false,
  "scrapeOptions": {
    "actions": [
      {
        "type": "wait",
        "milliseconds": 2,
        "selector": "#my-element"
      }
    ],
    "blockAds": true,
    "excludeTags": [
      "<string>"
    ],
    "headers": {},
    "includeTags": [
      "<string>"
    ],
    "jsonOptions": {
      "prompt": "<string>",
      "schema": {},
      "systemPrompt": "<string>"
    },
    "location": {
      "country": "US",
      "languages": [
        "en-US"
      ]
    },
    "maxAge": 0,
    "mobile": false,
    "onlyMainContent": true,
    "parsePDF": true,
    "removeBase64Images": true,
    "skipTlsVerification": false,
    "storeInCache": true,
    "threatProtection": {
      "blacklist": [
        "<string>"
      ],
      "blockedTlds": [
        "<string>"
      ],
      "riskScoreThreshold": 75,
      "whitelist": [
        "<string>"
      ]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
      "modes": [],
      "prompt": "<string>",
      "schema": {},
      "tag": null
    },
    "formats": [
      "markdown"
    ]
  },
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/crawl"

payload = {
    "url": "<string>",
    "allowBackwardLinks": False,
    "allowExternalLinks": False,
    "allowSubdomains": False,
    "crawlEntireDomain": False,
    "delay": 123,
    "excludePaths": ["<string>"],
    "ignoreQueryParameters": False,
    "ignoreSitemap": False,
    "includePaths": ["<string>"],
    "limit": 10000,
    "maxConcurrency": 123,
    "maxDepth": 10,
    "maxDiscoveryDepth": 123,
    "regexOnFullURL": False,
    "scrapeOptions": {
        "actions": [
            {
                "type": "wait",
                "milliseconds": 2,
                "selector": "#my-element"
            }
        ],
        "blockAds": True,
        "excludeTags": ["<string>"],
        "headers": {},
        "includeTags": ["<string>"],
        "jsonOptions": {
            "prompt": "<string>",
            "schema": {},
            "systemPrompt": "<string>"
        },
        "location": {
            "country": "US",
            "languages": ["en-US"]
        },
        "maxAge": 0,
        "mobile": False,
        "onlyMainContent": True,
        "parsePDF": True,
        "removeBase64Images": True,
        "skipTlsVerification": False,
        "storeInCache": True,
        "threatProtection": {
            "blacklist": ["<string>"],
            "blockedTlds": ["<string>"],
            "riskScoreThreshold": 75,
            "whitelist": ["<string>"]
        },
        "timeout": 30000,
        "waitFor": 0,
        "changeTrackingOptions": {
            "modes": [],
            "prompt": "<string>",
            "schema": {},
            "tag": None
        },
        "formats": ["markdown"]
    },
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    allowBackwardLinks: false,
    allowExternalLinks: false,
    allowSubdomains: false,
    crawlEntireDomain: false,
    delay: 123,
    excludePaths: ['<string>'],
    ignoreQueryParameters: false,
    ignoreSitemap: false,
    includePaths: ['<string>'],
    limit: 10000,
    maxConcurrency: 123,
    maxDepth: 10,
    maxDiscoveryDepth: 123,
    regexOnFullURL: false,
    scrapeOptions: {
      actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
      blockAds: true,
      excludeTags: ['<string>'],
      headers: {},
      includeTags: ['<string>'],
      jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
      location: {country: 'US', languages: ['en-US']},
      maxAge: 0,
      mobile: false,
      onlyMainContent: true,
      parsePDF: true,
      removeBase64Images: true,
      skipTlsVerification: false,
      storeInCache: true,
      threatProtection: {
        blacklist: ['<string>'],
        blockedTlds: ['<string>'],
        riskScoreThreshold: 75,
        whitelist: ['<string>']
      },
      timeout: 30000,
      waitFor: 0,
      changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
      formats: ['markdown']
    },
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/crawl', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/crawl",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'allowBackwardLinks' => false,
    'allowExternalLinks' => false,
    'allowSubdomains' => false,
    'crawlEntireDomain' => false,
    'delay' => 123,
    'excludePaths' => [
        '<string>'
    ],
    'ignoreQueryParameters' => false,
    'ignoreSitemap' => false,
    'includePaths' => [
        '<string>'
    ],
    'limit' => 10000,
    'maxConcurrency' => 123,
    'maxDepth' => 10,
    'maxDiscoveryDepth' => 123,
    'regexOnFullURL' => false,
    'scrapeOptions' => [
        'actions' => [
                [
                                'type' => 'wait',
                                'milliseconds' => 2,
                                'selector' => '#my-element'
                ]
        ],
        'blockAds' => true,
        'excludeTags' => [
                '<string>'
        ],
        'headers' => [
                
        ],
        'includeTags' => [
                '<string>'
        ],
        'jsonOptions' => [
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'systemPrompt' => '<string>'
        ],
        'location' => [
                'country' => 'US',
                'languages' => [
                                'en-US'
                ]
        ],
        'maxAge' => 0,
        'mobile' => false,
        'onlyMainContent' => true,
        'parsePDF' => true,
        'removeBase64Images' => true,
        'skipTlsVerification' => false,
        'storeInCache' => true,
        'threatProtection' => [
                'blacklist' => [
                                '<string>'
                ],
                'blockedTlds' => [
                                '<string>'
                ],
                'riskScoreThreshold' => 75,
                'whitelist' => [
                                '<string>'
                ]
        ],
        'timeout' => 30000,
        'waitFor' => 0,
        'changeTrackingOptions' => [
                'modes' => [
                                
                ],
                'prompt' => '<string>',
                'schema' => [
                                
                ],
                'tag' => null
        ],
        'formats' => [
                'markdown'
        ]
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/crawl"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/crawl")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/crawl")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"allowBackwardLinks\": false,\n  \"allowExternalLinks\": false,\n  \"allowSubdomains\": false,\n  \"crawlEntireDomain\": false,\n  \"delay\": 123,\n  \"excludePaths\": [\n    \"<string>\"\n  ],\n  \"ignoreQueryParameters\": false,\n  \"ignoreSitemap\": false,\n  \"includePaths\": [\n    \"<string>\"\n  ],\n  \"limit\": 10000,\n  \"maxConcurrency\": 123,\n  \"maxDepth\": 10,\n  \"maxDiscoveryDepth\": 123,\n  \"regexOnFullURL\": false,\n  \"scrapeOptions\": {\n    \"actions\": [\n      {\n        \"type\": \"wait\",\n        \"milliseconds\": 2,\n        \"selector\": \"#my-element\"\n      }\n    ],\n    \"blockAds\": true,\n    \"excludeTags\": [\n      \"<string>\"\n    ],\n    \"headers\": {},\n    \"includeTags\": [\n      \"<string>\"\n    ],\n    \"jsonOptions\": {\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"systemPrompt\": \"<string>\"\n    },\n    \"location\": {\n      \"country\": \"US\",\n      \"languages\": [\n        \"en-US\"\n      ]\n    },\n    \"maxAge\": 0,\n    \"mobile\": false,\n    \"onlyMainContent\": true,\n    \"parsePDF\": true,\n    \"removeBase64Images\": true,\n    \"skipTlsVerification\": false,\n    \"storeInCache\": true,\n    \"threatProtection\": {\n      \"blacklist\": [\n        \"<string>\"\n      ],\n      \"blockedTlds\": [\n        \"<string>\"\n      ],\n      \"riskScoreThreshold\": 75,\n      \"whitelist\": [\n        \"<string>\"\n      ]\n    },\n    \"timeout\": 30000,\n    \"waitFor\": 0,\n    \"changeTrackingOptions\": {\n      \"modes\": [],\n      \"prompt\": \"<string>\",\n      \"schema\": {},\n      \"tag\": null\n    },\n    \"formats\": [\n      \"markdown\"\n    ]\n  },\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "success": true,
  "url": "<string>"
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

Nota: Una nueva versión v2 de esta API ya está disponible con funciones y rendimiento mejorados.

Autorizaciones

Authorization

string

header

requerido

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Cuerpo

application/json

url

string<uri>

requerido

La URL base desde la que se iniciará el rastreo

allowBackwardLinks

boolean

predeterminado:false

obsoleto

⚠️ EN DESUSO: Usa 'crawlEntireDomain' en su lugar. Permite que el rastreador siga enlaces internos a URL hermanas o superiores, no solo a rutas hijas.

allowExternalLinks

boolean

predeterminado:false

Permite que el rastreador siga enlaces a sitios web externos.

allowSubdomains

boolean

predeterminado:false

Permite que el rastreador siga enlaces a subdominios del dominio principal.

crawlEntireDomain

boolean

predeterminado:false

Permite que el rastreador siga enlaces internos a URLs del mismo nivel o superiores, no solo rutas hijas.

false: Solo rastrea URLs más profundas (hijas). → p. ej. /features/feature-1 → /features/feature-1/tips ✅ → No seguirá /pricing ni / ❌

true: Rastrea cualquier enlace interno, incluyendo del mismo nivel y superiores. → p. ej. /features/feature-1 → /pricing, /, etc. ✅

Usa true para lograr una cobertura interna más amplia, más allá de rutas anidadas.

delay

number

Pausa en segundos entre scrapes. Esto ayuda a respetar los límites de tasa del sitio web.

excludePaths

string[]

Patrones de expresiones regulares para el pathname de la URL que excluyen del rastreo las URL que coincidan. Por ejemplo, si configuras "excludePaths": ["blog/.*"] para la URL base firecrawl.dev, se excluirán todos los resultados que coincidan con ese patrón, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

ignoreQueryParameters

boolean

predeterminado:false

No vuelvas a hacer scraping de la misma ruta con distintos parámetros de consulta (o sin parámetros)

ignoreSitemap

boolean

predeterminado:false

Ignorar el sitemap del sitio web durante el rastreo

includePaths

string[]

Patrones regex de rutas de URL que determinan qué URLs se incluyen en el rastreo. Solo las rutas que coincidan con los patrones especificados se incluirán en la respuesta. Por ejemplo, si configuras "includePaths": ["blog/.*"] para la URL base firecrawl.dev, solo se incluirán los resultados que coincidan con ese patrón, como https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap.

limit

integer

predeterminado:10000

Número máximo de páginas a rastrear. El límite por defecto es 10.000.

maxConcurrency

integer

Número máximo de scrapes concurrentes. Este parámetro te permite establecer un límite de concurrencia para este rastreo. Si no se especifica, el rastreo se ajusta al límite de concurrencia de tu equipo.

maxDepth

integer

predeterminado:10

Profundidad absoluta máxima de rastreo desde la base de la URL introducida. Básicamente, es el número máximo de barras diagonales (/) que puede contener el pathname de una URL rastreada.

maxDiscoveryDepth

integer

Profundidad máxima de rastreo basada en el orden de descubrimiento. El sitio raíz y las páginas del mapa del sitio tienen una profundidad de descubrimiento de 0. Por ejemplo, si la configuras en 1 y habilitas ignoreSitemap, solo se rastreará la URL ingresada y todas las URL que estén enlazadas en esa página.

regexOnFullURL

boolean

predeterminado:false

Cuando es true, los patrones regex de includePaths y excludePaths se comparan con la URL completa (incluidos los parámetros de consulta), en lugar de solo con la ruta (pathname) de la URL. Es útil cuando necesitas filtrar URLs en función de las cadenas de consulta (query strings).

scrapeOptions

object

Show child attributes

webhook

object

Un objeto de especificación de un webhook.

Show child attributes

zeroDataRetention

boolean

predeterminado:false

Si se establece en true, no se conservarán datos de este rastreo. Para activar esta función, ponte en contacto con help@firecrawl.dev.

Respuesta

Respuesta exitosa

string

success

boolean

url

string<uri>

Obtener errores de Batch Scrape

Obtener el estado del rastreo

Uso de la API

Endpoints de Scrape

Endpoints de Crawl

Endpoints de Map

Endpoints de Search

Endpoints de Extract

Endpoints de la cuenta

Autorizaciones

Cuerpo

Respuesta