Scrapear

Realiza scraping de una única URL y, opcionalmente, extrae información usando un LLM

curl --request POST \
  --url https://api.firecrawl.dev/v1/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "actions": [
    {
      "type": "wait",
      "milliseconds": 2,
      "selector": "#my-element"
    }
  ],
  "blockAds": true,
  "excludeTags": [
    "<string>"
  ],
  "headers": {},
  "includeTags": [
    "<string>"
  ],
  "jsonOptions": {
    "prompt": "<string>",
    "schema": {},
    "systemPrompt": "<string>"
  },
  "location": {
    "country": "US",
    "languages": [
      "en-US"
    ]
  },
  "maxAge": 0,
  "mobile": false,
  "onlyMainContent": true,
  "parsePDF": true,
  "removeBase64Images": true,
  "skipTlsVerification": false,
  "storeInCache": true,
  "threatProtection": {
    "blacklist": [
      "<string>"
    ],
    "blockedTlds": [
      "<string>"
    ],
    "riskScoreThreshold": 75,
    "whitelist": [
      "<string>"
    ]
  },
  "timeout": 30000,
  "waitFor": 0,
  "changeTrackingOptions": {
    "modes": [],
    "prompt": "<string>",
    "schema": {},
    "tag": null
  },
  "formats": [
    "markdown"
  ],
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/scrape"

payload = {
    "url": "<string>",
    "actions": [
        {
            "type": "wait",
            "milliseconds": 2,
            "selector": "#my-element"
        }
    ],
    "blockAds": True,
    "excludeTags": ["<string>"],
    "headers": {},
    "includeTags": ["<string>"],
    "jsonOptions": {
        "prompt": "<string>",
        "schema": {},
        "systemPrompt": "<string>"
    },
    "location": {
        "country": "US",
        "languages": ["en-US"]
    },
    "maxAge": 0,
    "mobile": False,
    "onlyMainContent": True,
    "parsePDF": True,
    "removeBase64Images": True,
    "skipTlsVerification": False,
    "storeInCache": True,
    "threatProtection": {
        "blacklist": ["<string>"],
        "blockedTlds": ["<string>"],
        "riskScoreThreshold": 75,
        "whitelist": ["<string>"]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
        "modes": [],
        "prompt": "<string>",
        "schema": {},
        "tag": None
    },
    "formats": ["markdown"],
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
    blockAds: true,
    excludeTags: ['<string>'],
    headers: {},
    includeTags: ['<string>'],
    jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
    location: {country: 'US', languages: ['en-US']},
    maxAge: 0,
    mobile: false,
    onlyMainContent: true,
    parsePDF: true,
    removeBase64Images: true,
    skipTlsVerification: false,
    storeInCache: true,
    threatProtection: {
      blacklist: ['<string>'],
      blockedTlds: ['<string>'],
      riskScoreThreshold: 75,
      whitelist: ['<string>']
    },
    timeout: 30000,
    waitFor: 0,
    changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
    formats: ['markdown'],
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'actions' => [
        [
                'type' => 'wait',
                'milliseconds' => 2,
                'selector' => '#my-element'
        ]
    ],
    'blockAds' => true,
    'excludeTags' => [
        '<string>'
    ],
    'headers' => [
        
    ],
    'includeTags' => [
        '<string>'
    ],
    'jsonOptions' => [
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'systemPrompt' => '<string>'
    ],
    'location' => [
        'country' => 'US',
        'languages' => [
                'en-US'
        ]
    ],
    'maxAge' => 0,
    'mobile' => false,
    'onlyMainContent' => true,
    'parsePDF' => true,
    'removeBase64Images' => true,
    'skipTlsVerification' => false,
    'storeInCache' => true,
    'threatProtection' => [
        'blacklist' => [
                '<string>'
        ],
        'blockedTlds' => [
                '<string>'
        ],
        'riskScoreThreshold' => 75,
        'whitelist' => [
                '<string>'
        ]
    ],
    'timeout' => 30000,
    'waitFor' => 0,
    'changeTrackingOptions' => [
        'modes' => [
                
        ],
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'tag' => null
    ],
    'formats' => [
        'markdown'
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/scrape"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "actions": {
      "javascriptReturns": [
        {
          "type": "<string>",
          "value": "<unknown>"
        }
      ],
      "pdfs": [
        "<string>"
      ],
      "scrapes": [
        {
          "html": "<string>",
          "url": "<string>"
        }
      ],
      "screenshots": [
        "<string>"
      ]
    },
    "changeTracking": {
      "diff": "<string>",
      "json": {},
      "previousScrapeAt": "2023-11-07T05:31:56Z"
    },
    "html": "<string>",
    "links": [
      "<string>"
    ],
    "llm_extraction": {},
    "markdown": "<string>",
    "metadata": {
      "<any other metadata> ": "<string>",
      "description": "<string>",
      "error": "<string>",
      "keywords": "<string>",
      "language": "<string>",
      "numPages": 123,
      "ogLocaleAlternate": [
        "<string>"
      ],
      "sourceURL": "<string>",
      "statusCode": 123,
      "title": "<string>",
      "totalPages": 123
    },
    "rawHtml": "<string>",
    "screenshot": "<string>",
    "warning": "<string>"
  },
  "success": true
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

scrape

Realiza scraping de una única URL y, opcionalmente, extrae información usando un LLM

curl --request POST \
  --url https://api.firecrawl.dev/v1/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "actions": [
    {
      "type": "wait",
      "milliseconds": 2,
      "selector": "#my-element"
    }
  ],
  "blockAds": true,
  "excludeTags": [
    "<string>"
  ],
  "headers": {},
  "includeTags": [
    "<string>"
  ],
  "jsonOptions": {
    "prompt": "<string>",
    "schema": {},
    "systemPrompt": "<string>"
  },
  "location": {
    "country": "US",
    "languages": [
      "en-US"
    ]
  },
  "maxAge": 0,
  "mobile": false,
  "onlyMainContent": true,
  "parsePDF": true,
  "removeBase64Images": true,
  "skipTlsVerification": false,
  "storeInCache": true,
  "threatProtection": {
    "blacklist": [
      "<string>"
    ],
    "blockedTlds": [
      "<string>"
    ],
    "riskScoreThreshold": 75,
    "whitelist": [
      "<string>"
    ]
  },
  "timeout": 30000,
  "waitFor": 0,
  "changeTrackingOptions": {
    "modes": [],
    "prompt": "<string>",
    "schema": {},
    "tag": null
  },
  "formats": [
    "markdown"
  ],
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/scrape"

payload = {
    "url": "<string>",
    "actions": [
        {
            "type": "wait",
            "milliseconds": 2,
            "selector": "#my-element"
        }
    ],
    "blockAds": True,
    "excludeTags": ["<string>"],
    "headers": {},
    "includeTags": ["<string>"],
    "jsonOptions": {
        "prompt": "<string>",
        "schema": {},
        "systemPrompt": "<string>"
    },
    "location": {
        "country": "US",
        "languages": ["en-US"]
    },
    "maxAge": 0,
    "mobile": False,
    "onlyMainContent": True,
    "parsePDF": True,
    "removeBase64Images": True,
    "skipTlsVerification": False,
    "storeInCache": True,
    "threatProtection": {
        "blacklist": ["<string>"],
        "blockedTlds": ["<string>"],
        "riskScoreThreshold": 75,
        "whitelist": ["<string>"]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
        "modes": [],
        "prompt": "<string>",
        "schema": {},
        "tag": None
    },
    "formats": ["markdown"],
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
    blockAds: true,
    excludeTags: ['<string>'],
    headers: {},
    includeTags: ['<string>'],
    jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
    location: {country: 'US', languages: ['en-US']},
    maxAge: 0,
    mobile: false,
    onlyMainContent: true,
    parsePDF: true,
    removeBase64Images: true,
    skipTlsVerification: false,
    storeInCache: true,
    threatProtection: {
      blacklist: ['<string>'],
      blockedTlds: ['<string>'],
      riskScoreThreshold: 75,
      whitelist: ['<string>']
    },
    timeout: 30000,
    waitFor: 0,
    changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
    formats: ['markdown'],
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'actions' => [
        [
                'type' => 'wait',
                'milliseconds' => 2,
                'selector' => '#my-element'
        ]
    ],
    'blockAds' => true,
    'excludeTags' => [
        '<string>'
    ],
    'headers' => [
        
    ],
    'includeTags' => [
        '<string>'
    ],
    'jsonOptions' => [
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'systemPrompt' => '<string>'
    ],
    'location' => [
        'country' => 'US',
        'languages' => [
                'en-US'
        ]
    ],
    'maxAge' => 0,
    'mobile' => false,
    'onlyMainContent' => true,
    'parsePDF' => true,
    'removeBase64Images' => true,
    'skipTlsVerification' => false,
    'storeInCache' => true,
    'threatProtection' => [
        'blacklist' => [
                '<string>'
        ],
        'blockedTlds' => [
                '<string>'
        ],
        'riskScoreThreshold' => 75,
        'whitelist' => [
                '<string>'
        ]
    ],
    'timeout' => 30000,
    'waitFor' => 0,
    'changeTrackingOptions' => [
        'modes' => [
                
        ],
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'tag' => null
    ],
    'formats' => [
        'markdown'
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/scrape"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "actions": {
      "javascriptReturns": [
        {
          "type": "<string>",
          "value": "<unknown>"
        }
      ],
      "pdfs": [
        "<string>"
      ],
      "scrapes": [
        {
          "html": "<string>",
          "url": "<string>"
        }
      ],
      "screenshots": [
        "<string>"
      ]
    },
    "changeTracking": {
      "diff": "<string>",
      "json": {},
      "previousScrapeAt": "2023-11-07T05:31:56Z"
    },
    "html": "<string>",
    "links": [
      "<string>"
    ],
    "llm_extraction": {},
    "markdown": "<string>",
    "metadata": {
      "<any other metadata> ": "<string>",
      "description": "<string>",
      "error": "<string>",
      "keywords": "<string>",
      "language": "<string>",
      "numPages": 123,
      "ogLocaleAlternate": [
        "<string>"
      ],
      "sourceURL": "<string>",
      "statusCode": 123,
      "title": "<string>",
      "totalPages": 123
    },
    "rawHtml": "<string>",
    "screenshot": "<string>",
    "warning": "<string>"
  },
  "success": true
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

Nota: Ya está disponible una nueva versión v2 de esta API con funciones y rendimiento mejorados.

Autorizaciones

Authorization

string

header

requerido

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Cuerpo

application/json

url

string<uri>

requerido

La URL que se va a rastrear

actions

Acciones que se ejecutarán en la página antes de extraer el contenido

Show child attributes

blockAds

boolean

predeterminado:true

Habilita el bloqueo de anuncios y ventanas emergentes de cookies.

excludeTags

string[]

Etiquetas que se excluirán de la salida.

headers

object

Cabeceras que se enviarán con la solicitud. Pueden usarse para enviar cookies, user-agent, etc.

includeTags

string[]

Etiquetas que se deben incluir en la salida.

jsonOptions

object

Objeto de opciones JSON

Show child attributes

location

object

Configuración de ubicación de la solicitud. Cuando se especifique, usará un proxy adecuado si está disponible y emulará la configuración de idioma y zona horaria correspondientes. Si no se especifica, el valor predeterminado es 'US'.

Show child attributes

maxAge

integer

predeterminado:0

Devuelve una versión en caché de la página si su antigüedad es menor que este valor, en milisegundos. Si la versión en caché de la página es más antigua que este valor, la página se volverá a scrapear. Si no necesitas datos extremadamente recientes, activar esta opción puede acelerar tus procesos de scraping hasta un 500 %. El valor predeterminado es 0, lo que desactiva la caché.

mobile

boolean

predeterminado:false

Configúralo en true si quieres emular el scraping desde un dispositivo móvil. Es útil para probar páginas responsive y tomar capturas de pantalla en dispositivos móviles.

onlyMainContent

boolean

predeterminado:true

Devuelve únicamente el contenido principal de la página, excluyendo encabezados, elementos de navegación, pies de página, etc.

parsePDF

boolean

predeterminado:true

Controla cómo se procesan los archivos PDF durante el scraping. Cuando es true, el contenido del PDF se extrae y se convierte al formato Markdown, y la facturación se basa en el número de páginas (1 crédito por página). Cuando es false, el archivo PDF se devuelve codificado en base64 con una tarifa plana total de 1 crédito.

proxy

enum<string>

Especifica el tipo de proxy que se va a utilizar.

basic: Proxies para hacer scraping de sitios con sistemas anti‑bots nulos o básicos. Es rápido y suele funcionar.
enhanced: Proxies mejorados para hacer scraping de sitios con sistemas anti‑bots avanzados. Es más lento, pero más fiable en ciertos sitios. Cuesta hasta 5 créditos por solicitud.
auto: Firecrawl reintentará automáticamente el scraping con proxies mejorados si el proxy básico falla. Si el reintento con enhanced tiene éxito, se cobrarán 5 créditos por la extracción. Si el primer intento con basic tiene éxito, solo se cobrará el coste estándar.

Si no especificas un proxy, Firecrawl usará basic por defecto.

Opciones disponibles:

basic,

enhanced,

auto

removeBase64Images

boolean

predeterminado:true

Elimina todas las imágenes en formato base64 de la salida, que pueden hacerla excesivamente larga. El texto alternativo de la imagen se conserva en la salida, pero la URL se reemplaza por un marcador de posición.

skipTlsVerification

boolean

predeterminado:false

Omitir la verificación del certificado TLS al realizar solicitudes

storeInCache

boolean

predeterminado:true

Si es true, la página se almacenará en el índice y la caché de Firecrawl. Establecerlo en false es útil si tu actividad de scraping puede implicar problemas de protección de datos. El uso de algunos parámetros asociados con scraping sensible (acciones, headers) hará que este parámetro tenga que ser false.

threatProtection

Threat Protection Override · object

Anulación por solicitud de Protección contra amenazas. Los campos que proporciones reemplazan los campos correspondientes de la política de tu organización solo para esta solicitud; los campos omitidos conservan sus valores a nivel de organización. Requiere que Protección contra amenazas esté habilitada para tu equipo (función Enterprise); de lo contrario, la solicitud se rechaza con un 403. Si tu organización ha deshabilitado las anulaciones por solicitud, cualquier solicitud que incluya este objeto se rechaza con un 403. Si Protección contra amenazas se aplica de forma obligatoria para tu equipo, mode no puede establecerse en off.

Show child attributes

timeout

integer

predeterminado:30000

Tiempo de espera de la solicitud en milisegundos

waitFor

integer

predeterminado:0

Especifica un retraso, en milisegundos, antes de obtener el contenido, permitiendo que la página tenga tiempo suficiente para cargarse.

changeTrackingOptions

object

Opciones de seguimiento de cambios (Beta). Solo aplicable cuando 'changeTracking' está incluido en los formatos. El formato 'markdown' también debe especificarse al usar el seguimiento de cambios.

Show child attributes

formats

enum<string>[]

Formatos que se incluirán en el resultado.

Opciones disponibles:

markdown,

html,

rawHtml,

links,

screenshot,

screenshot@fullPage,

json,

changeTracking

zeroDataRetention

boolean

predeterminado:false

Si se establece en true, se habilitará la no conservación de datos para esta extracción. Para activar esta función, ponte en contacto con help@firecrawl.dev

Respuesta

Respuesta satisfactoria

data

object

Show child attributes

success

boolean

Introducción

Scraping por lotes

Uso de la API

Endpoints de Scrape

Endpoints de Crawl

Endpoints de Map

Endpoints de Search

Endpoints de Extract

Endpoints de la cuenta

Autorizaciones

Cuerpo

Respuesta