import FirecrawlApp from '@mendable/firecrawl-js';
import { ChatOpenAI } from '@langchain/openai';
import { StateGraph, Annotation, START, END } from '@langchain/langgraph';
const firecrawl = new FirecrawlApp({ apiKey: process.env.FIRECRAWL_API_KEY });
const llm = new ChatOpenAI({ model: "gpt-5-nano", apiKey: process.env.OPENAI_API_KEY });
// Definir estado personalizado
const WorkflowState = Annotation.Root({
urls: Annotation<string[]>(),
scrapedData: Annotation<Array<{ url: string; content: string }>>(),
summary: Annotation<string>()
});
// Scrapear múltiples URLs
async function scrapeMultiple(state: typeof WorkflowState.State) {
const scrapedData = [];
for (const url of state.urls) {
const result = await firecrawl.scrape(url, { formats: ['markdown'] });
scrapedData.push({ url, content: result.markdown || '' });
}
return { scrapedData };
}
// Resumir todo el contenido scrapeado
async function summarizeAll(state: typeof WorkflowState.State) {
const combinedContent = state.scrapedData
.map(item => `Contenido de ${item.url}:\n${item.content}`)
.join('\n\n');
const response = await llm.invoke([
{ role: "user", content: `Resume estos sitios web:\n${combinedContent}` }
]);
return { summary: response.content as string };
}
// Construir el grafo del flujo de trabajo
const workflow = new StateGraph(WorkflowState)
.addNode("scrape", scrapeMultiple)
.addNode("summarize", summarizeAll)
.addEdge(START, "scrape")
.addEdge("scrape", "summarize")
.addEdge("summarize", END);
const app = workflow.compile();
// Ejecutar flujo de trabajo
const result = await app.invoke({
urls: ["https://firecrawl.dev", "https://firecrawl.dev/pricing"]
});
console.log(result.summary);