Scrape

Raspar uma única URL e, opcionalmente, extrair informações usando um LLM

curl --request POST \
  --url https://api.firecrawl.dev/v1/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "actions": [
    {
      "type": "wait",
      "milliseconds": 2,
      "selector": "#my-element"
    }
  ],
  "blockAds": true,
  "excludeTags": [
    "<string>"
  ],
  "headers": {},
  "includeTags": [
    "<string>"
  ],
  "jsonOptions": {
    "prompt": "<string>",
    "schema": {},
    "systemPrompt": "<string>"
  },
  "location": {
    "country": "US",
    "languages": [
      "en-US"
    ]
  },
  "maxAge": 0,
  "mobile": false,
  "onlyMainContent": true,
  "parsePDF": true,
  "removeBase64Images": true,
  "skipTlsVerification": false,
  "storeInCache": true,
  "threatProtection": {
    "blacklist": [
      "<string>"
    ],
    "blockedTlds": [
      "<string>"
    ],
    "riskScoreThreshold": 75,
    "whitelist": [
      "<string>"
    ]
  },
  "timeout": 30000,
  "waitFor": 0,
  "changeTrackingOptions": {
    "modes": [],
    "prompt": "<string>",
    "schema": {},
    "tag": null
  },
  "formats": [
    "markdown"
  ],
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/scrape"

payload = {
    "url": "<string>",
    "actions": [
        {
            "type": "wait",
            "milliseconds": 2,
            "selector": "#my-element"
        }
    ],
    "blockAds": True,
    "excludeTags": ["<string>"],
    "headers": {},
    "includeTags": ["<string>"],
    "jsonOptions": {
        "prompt": "<string>",
        "schema": {},
        "systemPrompt": "<string>"
    },
    "location": {
        "country": "US",
        "languages": ["en-US"]
    },
    "maxAge": 0,
    "mobile": False,
    "onlyMainContent": True,
    "parsePDF": True,
    "removeBase64Images": True,
    "skipTlsVerification": False,
    "storeInCache": True,
    "threatProtection": {
        "blacklist": ["<string>"],
        "blockedTlds": ["<string>"],
        "riskScoreThreshold": 75,
        "whitelist": ["<string>"]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
        "modes": [],
        "prompt": "<string>",
        "schema": {},
        "tag": None
    },
    "formats": ["markdown"],
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
    blockAds: true,
    excludeTags: ['<string>'],
    headers: {},
    includeTags: ['<string>'],
    jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
    location: {country: 'US', languages: ['en-US']},
    maxAge: 0,
    mobile: false,
    onlyMainContent: true,
    parsePDF: true,
    removeBase64Images: true,
    skipTlsVerification: false,
    storeInCache: true,
    threatProtection: {
      blacklist: ['<string>'],
      blockedTlds: ['<string>'],
      riskScoreThreshold: 75,
      whitelist: ['<string>']
    },
    timeout: 30000,
    waitFor: 0,
    changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
    formats: ['markdown'],
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'actions' => [
        [
                'type' => 'wait',
                'milliseconds' => 2,
                'selector' => '#my-element'
        ]
    ],
    'blockAds' => true,
    'excludeTags' => [
        '<string>'
    ],
    'headers' => [
        
    ],
    'includeTags' => [
        '<string>'
    ],
    'jsonOptions' => [
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'systemPrompt' => '<string>'
    ],
    'location' => [
        'country' => 'US',
        'languages' => [
                'en-US'
        ]
    ],
    'maxAge' => 0,
    'mobile' => false,
    'onlyMainContent' => true,
    'parsePDF' => true,
    'removeBase64Images' => true,
    'skipTlsVerification' => false,
    'storeInCache' => true,
    'threatProtection' => [
        'blacklist' => [
                '<string>'
        ],
        'blockedTlds' => [
                '<string>'
        ],
        'riskScoreThreshold' => 75,
        'whitelist' => [
                '<string>'
        ]
    ],
    'timeout' => 30000,
    'waitFor' => 0,
    'changeTrackingOptions' => [
        'modes' => [
                
        ],
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'tag' => null
    ],
    'formats' => [
        'markdown'
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/scrape"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "actions": {
      "javascriptReturns": [
        {
          "type": "<string>",
          "value": "<unknown>"
        }
      ],
      "pdfs": [
        "<string>"
      ],
      "scrapes": [
        {
          "html": "<string>",
          "url": "<string>"
        }
      ],
      "screenshots": [
        "<string>"
      ]
    },
    "changeTracking": {
      "diff": "<string>",
      "json": {},
      "previousScrapeAt": "2023-11-07T05:31:56Z"
    },
    "html": "<string>",
    "links": [
      "<string>"
    ],
    "llm_extraction": {},
    "markdown": "<string>",
    "metadata": {
      "<any other metadata> ": "<string>",
      "description": "<string>",
      "error": "<string>",
      "keywords": "<string>",
      "language": "<string>",
      "numPages": 123,
      "ogLocaleAlternate": [
        "<string>"
      ],
      "sourceURL": "<string>",
      "statusCode": 123,
      "title": "<string>",
      "totalPages": 123
    },
    "rawHtml": "<string>",
    "screenshot": "<string>",
    "warning": "<string>"
  },
  "success": true
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

POST

scrape

Raspar uma única URL e, opcionalmente, extrair informações usando um LLM

curl --request POST \
  --url https://api.firecrawl.dev/v1/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "<string>",
  "actions": [
    {
      "type": "wait",
      "milliseconds": 2,
      "selector": "#my-element"
    }
  ],
  "blockAds": true,
  "excludeTags": [
    "<string>"
  ],
  "headers": {},
  "includeTags": [
    "<string>"
  ],
  "jsonOptions": {
    "prompt": "<string>",
    "schema": {},
    "systemPrompt": "<string>"
  },
  "location": {
    "country": "US",
    "languages": [
      "en-US"
    ]
  },
  "maxAge": 0,
  "mobile": false,
  "onlyMainContent": true,
  "parsePDF": true,
  "removeBase64Images": true,
  "skipTlsVerification": false,
  "storeInCache": true,
  "threatProtection": {
    "blacklist": [
      "<string>"
    ],
    "blockedTlds": [
      "<string>"
    ],
    "riskScoreThreshold": 75,
    "whitelist": [
      "<string>"
    ]
  },
  "timeout": 30000,
  "waitFor": 0,
  "changeTrackingOptions": {
    "modes": [],
    "prompt": "<string>",
    "schema": {},
    "tag": null
  },
  "formats": [
    "markdown"
  ],
  "zeroDataRetention": false
}
'

import requests

url = "https://api.firecrawl.dev/v1/scrape"

payload = {
    "url": "<string>",
    "actions": [
        {
            "type": "wait",
            "milliseconds": 2,
            "selector": "#my-element"
        }
    ],
    "blockAds": True,
    "excludeTags": ["<string>"],
    "headers": {},
    "includeTags": ["<string>"],
    "jsonOptions": {
        "prompt": "<string>",
        "schema": {},
        "systemPrompt": "<string>"
    },
    "location": {
        "country": "US",
        "languages": ["en-US"]
    },
    "maxAge": 0,
    "mobile": False,
    "onlyMainContent": True,
    "parsePDF": True,
    "removeBase64Images": True,
    "skipTlsVerification": False,
    "storeInCache": True,
    "threatProtection": {
        "blacklist": ["<string>"],
        "blockedTlds": ["<string>"],
        "riskScoreThreshold": 75,
        "whitelist": ["<string>"]
    },
    "timeout": 30000,
    "waitFor": 0,
    "changeTrackingOptions": {
        "modes": [],
        "prompt": "<string>",
        "schema": {},
        "tag": None
    },
    "formats": ["markdown"],
    "zeroDataRetention": False
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: '<string>',
    actions: [{type: 'wait', milliseconds: 2, selector: '#my-element'}],
    blockAds: true,
    excludeTags: ['<string>'],
    headers: {},
    includeTags: ['<string>'],
    jsonOptions: {prompt: '<string>', schema: {}, systemPrompt: '<string>'},
    location: {country: 'US', languages: ['en-US']},
    maxAge: 0,
    mobile: false,
    onlyMainContent: true,
    parsePDF: true,
    removeBase64Images: true,
    skipTlsVerification: false,
    storeInCache: true,
    threatProtection: {
      blacklist: ['<string>'],
      blockedTlds: ['<string>'],
      riskScoreThreshold: 75,
      whitelist: ['<string>']
    },
    timeout: 30000,
    waitFor: 0,
    changeTrackingOptions: {modes: [], prompt: '<string>', schema: {}, tag: null},
    formats: ['markdown'],
    zeroDataRetention: false
  })
};

fetch('https://api.firecrawl.dev/v1/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.firecrawl.dev/v1/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => '<string>',
    'actions' => [
        [
                'type' => 'wait',
                'milliseconds' => 2,
                'selector' => '#my-element'
        ]
    ],
    'blockAds' => true,
    'excludeTags' => [
        '<string>'
    ],
    'headers' => [
        
    ],
    'includeTags' => [
        '<string>'
    ],
    'jsonOptions' => [
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'systemPrompt' => '<string>'
    ],
    'location' => [
        'country' => 'US',
        'languages' => [
                'en-US'
        ]
    ],
    'maxAge' => 0,
    'mobile' => false,
    'onlyMainContent' => true,
    'parsePDF' => true,
    'removeBase64Images' => true,
    'skipTlsVerification' => false,
    'storeInCache' => true,
    'threatProtection' => [
        'blacklist' => [
                '<string>'
        ],
        'blockedTlds' => [
                '<string>'
        ],
        'riskScoreThreshold' => 75,
        'whitelist' => [
                '<string>'
        ]
    ],
    'timeout' => 30000,
    'waitFor' => 0,
    'changeTrackingOptions' => [
        'modes' => [
                
        ],
        'prompt' => '<string>',
        'schema' => [
                
        ],
        'tag' => null
    ],
    'formats' => [
        'markdown'
    ],
    'zeroDataRetention' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.firecrawl.dev/v1/scrape"

	payload := strings.NewReader("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.firecrawl.dev/v1/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.firecrawl.dev/v1/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"<string>\",\n  \"actions\": [\n    {\n      \"type\": \"wait\",\n      \"milliseconds\": 2,\n      \"selector\": \"#my-element\"\n    }\n  ],\n  \"blockAds\": true,\n  \"excludeTags\": [\n    \"<string>\"\n  ],\n  \"headers\": {},\n  \"includeTags\": [\n    \"<string>\"\n  ],\n  \"jsonOptions\": {\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"systemPrompt\": \"<string>\"\n  },\n  \"location\": {\n    \"country\": \"US\",\n    \"languages\": [\n      \"en-US\"\n    ]\n  },\n  \"maxAge\": 0,\n  \"mobile\": false,\n  \"onlyMainContent\": true,\n  \"parsePDF\": true,\n  \"removeBase64Images\": true,\n  \"skipTlsVerification\": false,\n  \"storeInCache\": true,\n  \"threatProtection\": {\n    \"blacklist\": [\n      \"<string>\"\n    ],\n    \"blockedTlds\": [\n      \"<string>\"\n    ],\n    \"riskScoreThreshold\": 75,\n    \"whitelist\": [\n      \"<string>\"\n    ]\n  },\n  \"timeout\": 30000,\n  \"waitFor\": 0,\n  \"changeTrackingOptions\": {\n    \"modes\": [],\n    \"prompt\": \"<string>\",\n    \"schema\": {},\n    \"tag\": null\n  },\n  \"formats\": [\n    \"markdown\"\n  ],\n  \"zeroDataRetention\": false\n}"

response = http.request(request)
puts response.read_body

{
  "data": {
    "actions": {
      "javascriptReturns": [
        {
          "type": "<string>",
          "value": "<unknown>"
        }
      ],
      "pdfs": [
        "<string>"
      ],
      "scrapes": [
        {
          "html": "<string>",
          "url": "<string>"
        }
      ],
      "screenshots": [
        "<string>"
      ]
    },
    "changeTracking": {
      "diff": "<string>",
      "json": {},
      "previousScrapeAt": "2023-11-07T05:31:56Z"
    },
    "html": "<string>",
    "links": [
      "<string>"
    ],
    "llm_extraction": {},
    "markdown": "<string>",
    "metadata": {
      "<any other metadata> ": "<string>",
      "description": "<string>",
      "error": "<string>",
      "keywords": "<string>",
      "language": "<string>",
      "numPages": 123,
      "ogLocaleAlternate": [
        "<string>"
      ],
      "sourceURL": "<string>",
      "statusCode": 123,
      "title": "<string>",
      "totalPages": 123
    },
    "rawHtml": "<string>",
    "screenshot": "<string>",
    "warning": "<string>"
  },
  "success": true
}

{
  "error": "Payment required to access this resource."
}

{
  "error": "Request rate limit exceeded. Please wait and try again later."
}

{
  "error": "An unexpected error occurred on the server."
}

Observação: uma nova versão v2 desta API já está disponível, com recursos e desempenho aprimorados.

Autorizações

Authorization

string

header

obrigatório

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Corpo

application/json

url

string<uri>

obrigatório

URL a ser raspada

actions

Ações a serem realizadas na página antes de extrair o conteúdo

Show child attributes

blockAds

boolean

padrão:true

Habilita o bloqueio de anúncios e de pop-ups de cookies.

excludeTags

string[]

Tags a serem excluídas da saída.

headers

object

Cabeçalhos a serem enviados com a requisição. Podem ser usados para enviar cookies, user-agent etc.

includeTags

string[]

Tags para incluir na saída.

jsonOptions

object

Objeto JSON de opções

Show child attributes

location

object

Configurações de localização para a requisição. Quando especificadas, será usado um proxy apropriado, se disponível, e serão emuladas as configurações correspondentes de idioma e fuso horário. O padrão é "US" se não for especificado.

Show child attributes

maxAge

integer

padrão:0

Retorna uma versão em cache da página se ela tiver menos que essa idade, em milissegundos. Se a versão em cache da página for mais antiga que esse valor, a página será raspada novamente. Se você não precisar de dados extremamente atualizados, ativar essa opção pode acelerar suas raspagens em até 500%. O padrão é 0, o que desativa o cache.

mobile

boolean

padrão:false

Defina como true para emular a raspagem de dados a partir de um dispositivo móvel. Útil para testar páginas responsivas e gerar capturas de tela da versão mobile.

onlyMainContent

boolean

padrão:true

Retorne apenas o conteúdo principal da página, excluindo cabeçalhos, áreas de navegação, rodapés etc.

parsePDF

boolean

padrão:true

Controla como os arquivos PDF são processados durante o scraping. Quando definido como true, o conteúdo do PDF é extraído e convertido para o formato Markdown, com cobrança baseada no número de páginas (1 crédito por página). Quando definido como false, o arquivo PDF é retornado codificado em base64, com uma tarifa fixa de 1 crédito no total.

proxy

enum<string>

Especifica o tipo de proxy a ser usado.

basic: Proxies para scraping de sites sem ou com soluções anti-bot básicas. Rápido e geralmente funciona.
enhanced: Proxies avançados para scraping de sites com soluções anti-bot mais sofisticadas. Mais lento, mas mais confiável em certos sites. Custa até 5 créditos por requisição.
auto: O Firecrawl tentará automaticamente fazer o scraping novamente com proxies enhanced se o proxy basic falhar. Se a nova tentativa com enhanced for bem-sucedida, 5 créditos serão cobrados pelo scraping. Se a primeira tentativa com basic for bem-sucedida, apenas o custo normal será cobrado.

Se você não especificar um proxy, o Firecrawl usará basic por padrão.

Opções disponíveis:

basic,

enhanced,

auto

removeBase64Images

boolean

padrão:true

Remove todas as imagens em base64 da saída, que podem ser excessivamente longas. O texto alternativo (alt) da imagem permanece na saída, mas a URL é substituída por um espaço reservado.

skipTlsVerification

boolean

padrão:false

Ignorar a verificação do certificado TLS ao fazer requisições

storeInCache

boolean

padrão:true

Se definido como true, a página será armazenada no índice e no cache do Firecrawl. Definir isso como false é útil se sua atividade de scraping puder levantar preocupações relacionadas à proteção de dados. O uso de alguns parâmetros associados a scraping sensível (ações, headers) fará com que esse parâmetro seja definido como false.

threatProtection

Threat Protection Override · object

Substituição por solicitação da Proteção contra ameaças. Os campos fornecidos substituem os campos correspondentes da política da sua organização somente para esta solicitação; os campos omitidos mantêm os valores definidos no nível da organização. Exige que a Proteção contra ameaças esteja ativada para sua equipe (recurso enterprise) — caso contrário, a solicitação será rejeitada com 403. Se sua organização tiver desativado as substituições por solicitação, qualquer solicitação que inclua este objeto será rejeitada com 403. Se a Proteção contra ameaças for obrigatória para sua equipe, mode não poderá ser definido como off.

Show child attributes

timeout

integer

padrão:30000

Tempo limite da requisição em milissegundos

waitFor

integer

padrão:0

Defina um atraso, em milissegundos, antes de buscar o conteúdo, permitindo que a página tenha tempo suficiente para carregar.

changeTrackingOptions

object

Opções de rastreio de mudanças (Beta). Aplicável somente quando 'changeTracking' estiver incluído em formatos. O formato 'markdown' também deve ser especificado ao usar o rastreio de mudanças.

Show child attributes

formats

enum<string>[]

Formatos a serem incluídos no resultado.

Opções disponíveis:

markdown,

html,

rawHtml,

links,

screenshot,

screenshot@fullPage,

json,

changeTracking

zeroDataRetention

boolean

padrão:false

Se definido como true, isso ativará retenção zero de dados para este scrape. Para ativar esse recurso, entre em contato com help@firecrawl.dev

Resposta

Resposta bem-sucedida

data

object

Show child attributes

success

boolean

Introdução

Raspagem em Lote

Usando a API

Endpoints de scraping

Endpoints de rastreamento

Endpoints de mapeamento

Endpoints de busca

Endpoints de extração

Endpoints de conta

Autorizações

Corpo

Resposta