-
Notifications
You must be signed in to change notification settings - Fork 14
feat: add webhook_url parameter to crawler endpoint #71
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -864,6 +864,7 @@ async def crawl( | |
| stealth: bool = False, | ||
| include_paths: Optional[list[str]] = None, | ||
| exclude_paths: Optional[list[str]] = None, | ||
| webhook_url: Optional[str] = None, | ||
| return_toon: bool = False, | ||
| ): | ||
| """Send a crawl request with support for both AI extraction and | ||
|
|
@@ -887,6 +888,7 @@ async def crawl( | |
| Supports wildcards: * matches any characters, ** matches any path segments | ||
| exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*']) | ||
| Supports wildcards and takes precedence over include_paths | ||
| webhook_url: URL to receive webhook notifications when the crawl completes | ||
| return_toon: If True, return response in TOON format (reduces token usage by 30-60%) | ||
| """ | ||
| logger.info("🔍 Starting crawl request") | ||
|
|
@@ -916,6 +918,8 @@ async def crawl( | |
| logger.debug(f"✅ Include paths: {include_paths}") | ||
| if exclude_paths: | ||
| logger.debug(f"❌ Exclude paths: {exclude_paths}") | ||
| if webhook_url: | ||
| logger.debug(f"🔔 Webhook URL: {webhook_url}") | ||
| if return_toon: | ||
| logger.debug("🎨 TOON format output enabled") | ||
|
|
||
|
|
@@ -945,6 +949,8 @@ async def crawl( | |
| request_data["include_paths"] = include_paths | ||
| if exclude_paths is not None: | ||
| request_data["exclude_paths"] = exclude_paths | ||
| if webhook_url is not None: | ||
| request_data["webhook_url"] = webhook_url | ||
|
Comment on lines
949
to
+953
|
||
|
|
||
| request = CrawlRequest(**request_data) | ||
| logger.debug("✅ Request validation passed") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -874,6 +874,7 @@ def crawl( | |
| stealth: bool = False, | ||
| include_paths: Optional[list[str]] = None, | ||
| exclude_paths: Optional[list[str]] = None, | ||
| webhook_url: Optional[str] = None, | ||
| return_toon: bool = False, | ||
| ): | ||
| """Send a crawl request with support for both AI extraction and | ||
|
|
@@ -897,6 +898,7 @@ def crawl( | |
| Supports wildcards: * matches any characters, ** matches any path segments | ||
| exclude_paths: List of path patterns to exclude (e.g., ['/admin/*', '/api/*']) | ||
| Supports wildcards and takes precedence over include_paths | ||
| webhook_url: URL to receive webhook notifications when the crawl completes | ||
| return_toon: If True, return response in TOON format (reduces token usage by 30-60%) | ||
| """ | ||
| logger.info("🔍 Starting crawl request") | ||
|
|
@@ -926,6 +928,8 @@ def crawl( | |
| logger.debug(f"✅ Include paths: {include_paths}") | ||
| if exclude_paths: | ||
| logger.debug(f"❌ Exclude paths: {exclude_paths}") | ||
| if webhook_url: | ||
| logger.debug(f"🔔 Webhook URL: {webhook_url}") | ||
| if return_toon: | ||
| logger.debug("🎨 TOON format output enabled") | ||
|
|
||
|
|
@@ -955,6 +959,8 @@ def crawl( | |
| request_data["include_paths"] = include_paths | ||
| if exclude_paths is not None: | ||
| request_data["exclude_paths"] = exclude_paths | ||
| if webhook_url is not None: | ||
| request_data["webhook_url"] = webhook_url | ||
|
Comment on lines
961
to
+963
|
||
|
|
||
| request = CrawlRequest(**request_data) | ||
| logger.debug("✅ Request validation passed") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -94,6 +94,12 @@ class CrawlRequest(BaseModel): | |
| "Takes precedence over include_paths.", | ||
| example=["/admin/*", "/api/**"] | ||
| ) | ||
| webhook_url: Optional[str] = Field( | ||
| default=None, | ||
| description="URL to receive webhook notifications when the crawl job completes. " | ||
| "The webhook will receive a POST request with the crawl results.", | ||
| example="https://example.com/webhook" | ||
| ) | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_url(self) -> "CrawlRequest": | ||
|
|
@@ -169,6 +175,21 @@ def validate_path_patterns(self) -> "CrawlRequest": | |
|
|
||
| return self | ||
|
|
||
| @model_validator(mode="after") | ||
| def validate_webhook_url(self) -> "CrawlRequest": | ||
| """Validate webhook URL format if provided""" | ||
| if self.webhook_url is not None: | ||
| if not self.webhook_url.strip(): | ||
| raise ValueError("Webhook URL cannot be empty") | ||
| if not ( | ||
| self.webhook_url.startswith("http://") | ||
| or self.webhook_url.startswith("https://") | ||
| ): | ||
| raise ValueError( | ||
| "Invalid webhook URL - must start with http:// or https://" | ||
| ) | ||
| return self | ||
|
Comment on lines
+178
to
+191
|
||
|
|
||
|
|
||
| class GetCrawlRequest(BaseModel): | ||
| """Request model for get_crawl endpoint""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (webhookUrl)silently ignores explicitly provided empty-string values and also allows whitespace-only strings through (both are likely invalid URLs). Prefer treating “provided” aswebhookUrl != null, validating it’s a non-empty string after trim and starts with http:// or https://, and then setting payload.webhook_url (or throwing a clear error) to avoid surprising no-op behavior.