From ecac3dd1c99479deb2f5c5b7fbea2faad04c3e83 Mon Sep 17 00:00:00 2001
From: us <rahmetsaritekin@gmail.com>
Date: Mon, 15 Jun 2026 02:47:11 +0300
Subject: [PATCH] feat: add fastCRW tool

---
 agentstack/_tools/crw/__init__.py | 151 ++++++++++++++++++++++++++++++
 agentstack/_tools/crw/config.json |  23 +++++
 2 files changed, 174 insertions(+)
 create mode 100644 agentstack/_tools/crw/__init__.py
 create mode 100644 agentstack/_tools/crw/config.json

diff --git a/agentstack/_tools/crw/__init__.py b/agentstack/_tools/crw/__init__.py
new file mode 100644
index 0000000..498764c
--- /dev/null
+++ b/agentstack/_tools/crw/__init__.py
@@ -0,0 +1,151 @@
+import os
+from firecrawl import FirecrawlApp
+from typing import List, Dict, Any, Optional
+
+# fastCRW is a Firecrawl-compatible web data engine (single ~8MB Rust binary;
+# self-host or managed cloud). Because the API is Firecrawl-compatible, we reuse
+# the official Firecrawl client and just point it at the fastCRW base URL.
+# Defaults to the managed cloud; override CRW_API_URL to target a self-hosted server.
+app = FirecrawlApp(
+    api_key=os.getenv('CRW_API_KEY'),
+    api_url=os.getenv('CRW_API_URL', 'https://fastcrw.com/api'),
+)
+
+
+def web_scrape(url: str):
+    """
+    Scrape a url and return markdown. Use this to read a singular page and web_crawl only if you
+    need to read all other links as well.
+    """
+    scrape_result = app.scrape_url(url, params={'formats': ['markdown']})
+    return scrape_result
+
+
+def web_crawl(url: str):
+    """
+    Scrape a url and crawl through other links from that page, scraping their contents.
+    This tool returns a crawl_id that you will need to use after waiting for a period of time
+    to retrieve the final contents. You should attempt to accomplish another task while waiting
+    for the crawl to complete.
+
+    Crawl will ignore sublinks of a page if they aren’t children of the url you provide.
+    So, the website.com/other-parent/blog-1 wouldn’t be returned if you crawled website.com/blogs/.
+    """
+
+    crawl_status = app.crawl_url(
+        url, params={'limit': 100, 'scrapeOptions': {'formats': ['markdown']}}, poll_interval=30
+    )
+
+    return crawl_status
+
+
+def retrieve_web_crawl(crawl_id: str):
+    """
+    Retrieve the results of a previously started web crawl. Crawls take time to process
+    so be sure to only use this tool some time after initiating a crawl. The result
+    will tell you if the crawl is finished. If it is not, wait some more time then try again.
+    """
+    return app.check_crawl_status(crawl_id)
+
+
+def batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
+    """
+    Batch scrape multiple URLs simultaneously.
+
+    Args:
+        urls: List of URLs to scrape
+        formats: List of desired output formats (e.g., ['markdown', 'html'])
+
+    Returns:
+        Dictionary containing the batch scrape results
+    """
+    batch_result = app.batch_scrape_urls(urls, {'formats': formats})
+    return batch_result
+
+
+def async_batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
+    """
+    Asynchronously batch scrape multiple URLs.
+
+    Args:
+        urls: List of URLs to scrape
+        formats: List of desired output formats (e.g., ['markdown', 'html'])
+
+    Returns:
+        Dictionary containing the job ID and status URL
+    """
+    batch_job = app.async_batch_scrape_urls(urls, {'formats': formats})
+    return batch_job
+
+
+def check_batch_status(job_id: str):
+    """
+    Check the status of an asynchronous batch scrape job.
+
+    Args:
+        job_id: The ID of the batch scrape job
+
+    Returns:
+        Dictionary containing the current status and results if completed
+    """
+    return app.check_batch_scrape_status(job_id)
+
+
+def extract_data(urls: List[str], schema: Optional[Dict[str, Any]] = None, prompt: Optional[str] = None) -> Dict[
+    str, Any]:
+    """
+    Extract structured data from URLs using LLMs.
+
+    Args:
+        urls: List of URLs to extract data from
+        schema: Optional JSON schema defining the structure of data to extract
+        prompt: Optional natural language prompt describing the data to extract
+
+    Returns:
+        Dictionary containing the extracted structured data
+    """
+    params: Dict[str, Any] = {}
+
+    if prompt is not None:
+        params['prompt'] = prompt
+    elif schema is not None:
+        params['schema'] = schema
+
+    data = app.extract(urls, params)
+    return data
+
+
+def map_website(url: str, search: Optional[str] = None):
+    """
+    Map a website to get all URLs, with optional search functionality.
+
+    Args:
+        url: The base URL to map
+        search: Optional search term to filter URLs
+
+    Returns:
+        Dictionary containing the list of discovered URLs
+    """
+    params = {'search': search} if search else {}
+    map_result = app.map_url(url, params)
+    return map_result
+
+
+def batch_extract(urls: List[str], extract_params: Dict[str, Any]):
+    """
+    Batch extract structured data from multiple URLs.
+
+    Args:
+        urls: List of URLs to extract data from
+        extract_params: Dictionary containing extraction parameters including prompt or schema
+
+    Returns:
+        Dictionary containing the extracted data from all URLs
+    """
+    params = {
+        'formats': ['extract'],
+        'extract': extract_params
+    }
+
+    batch_result = app.batch_scrape_urls(urls, params)
+    return batch_result
diff --git a/agentstack/_tools/crw/config.json b/agentstack/_tools/crw/config.json
new file mode 100644
index 0000000..b8a1d42
--- /dev/null
+++ b/agentstack/_tools/crw/config.json
@@ -0,0 +1,23 @@
+{
+  "name": "crw",
+  "url": "https://fastcrw.com/",
+  "category": "browsing",
+  "env": {
+    "CRW_API_KEY": null,
+    "CRW_API_URL": "https://fastcrw.com/api"
+  },
+  "dependencies": [
+    "firecrawl-py>=1.6.4"
+  ],
+  "tools": [
+    "web_scrape",
+    "web_crawl",
+    "retrieve_web_crawl",
+    "batch_scrape",
+    "check_batch_status",
+    "extract_data",
+    "map_website",
+    "batch_extract"
+  ],
+  "cta": "Create an API key at https://fastcrw.com/"
+}