diff --git a/recon/helpers/resource_enum/__init__.py b/recon/helpers/resource_enum/__init__.py index b6a1f496..caa7115a 100644 --- a/recon/helpers/resource_enum/__init__.py +++ b/recon/helpers/resource_enum/__init__.py @@ -62,8 +62,11 @@ # jsluice helpers from .jsluice_helpers import ( + DEFAULT_JSLUICE_EXCLUDE_PATTERNS, + filter_jsluice_url, run_jsluice_analysis, merge_jsluice_into_by_base_url, + verify_jsluice_urls, ) # FFuf helpers @@ -123,8 +126,11 @@ "pull_hakrawler_docker_image", "merge_hakrawler_into_by_base_url", # jsluice + "DEFAULT_JSLUICE_EXCLUDE_PATTERNS", + "filter_jsluice_url", "run_jsluice_analysis", "merge_jsluice_into_by_base_url", + "verify_jsluice_urls", # FFuf "run_ffuf_discovery", "pull_ffuf_binary_check", diff --git a/recon/helpers/resource_enum/jsluice_helpers.py b/recon/helpers/resource_enum/jsluice_helpers.py index 345a1990..4bbcb95b 100644 --- a/recon/helpers/resource_enum/jsluice_helpers.py +++ b/recon/helpers/resource_enum/jsluice_helpers.py @@ -11,12 +11,182 @@ import ssl import subprocess import urllib.request +import uuid from pathlib import Path -from typing import Dict, List, Tuple +from typing import Dict, List, Set, Tuple from urllib.parse import urlparse, urljoin from concurrent.futures import ThreadPoolExecutor, as_completed +DEFAULT_JSLUICE_EXCLUDE_PATTERNS = [ + '/_next/image', '/_next/static', '/_next/data', '/__nextjs', + '/_nuxt/', '/__nuxt', + '/runtime.', '/polyfills.', '/vendor.', + '/webpack', '/chunk.', '.chunk.js', '.bundle.js', 'hot-update', + '/static/', '/public/', '/dist/', '/build/', '/lib/', '/vendor/', '/node_modules/', + '.js', '.mjs', '.map', '.css', '.scss', '.sass', '.less', + '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp', '.avif', + '.woff', '.woff2', '.ttf', '.eot', '.otf', + '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', + '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.zip', '.rar', '.7z', '.tar', '.gz', + '/rxjs/', '/react/', '/angular/', '/lodash/', '/zone.js/', +] + + +def _create_temp_dir(prefix: str = "jsluice_verify") -> Path: + """Create a temp directory under /tmp/redamon for Docker-in-Docker compatibility.""" + temp_dir = Path(f"/tmp/redamon/.{prefix}_{uuid.uuid4().hex[:8]}") + temp_dir.mkdir(parents=True, exist_ok=True) + return temp_dir + + +def _cleanup_temp_dir(temp_dir: Path): + """Clean up a temp directory.""" + try: + if temp_dir.exists(): + shutil.rmtree(temp_dir) + except Exception: + pass + + +def filter_jsluice_url(url: str, exclude_patterns: List[str]) -> bool: + """ + Return True when a jsluice URL should be probed. + + jsluice often extracts library, bundle, sourcemap, and static asset paths + from JavaScript source. These are filtered before HTTP validation to avoid + spending probe budget on obvious non-application endpoints. + """ + if not url: + return False + + try: + url_lower = url.lower() + parsed = urlparse(url) + path_lower = (parsed.path or "").lower() + query_lower = (parsed.query or "").lower() + haystack = f"{url_lower} {path_lower} {query_lower}" + + return not any( + pattern and pattern.lower() in haystack + for pattern in exclude_patterns + ) + except Exception: + return False + + +def verify_jsluice_urls( + urls: List[str], + docker_image: str, + threads: int, + timeout: int, + rate_limit: int, + accept_status: List[int], + exclude_patterns: List[str] = None, + use_proxy: bool = False, +) -> Tuple[Set[str], Dict[str, int]]: + """ + Verify jsluice-discovered URLs are live using httpx. + + This verifier fails closed: if probing fails or times out, unverified + jsluice URLs are not returned for graph publication. + """ + exclude_patterns = exclude_patterns or [] + stats = { + "jsluice_verify_total": len(urls), + "jsluice_verify_candidates": 0, + "jsluice_skipped_blacklist": 0, + "jsluice_verified": 0, + "jsluice_skipped_unverified": 0, + } + + if not urls: + return set(), stats + + candidates = [] + for url in sorted(set(urls)): + if filter_jsluice_url(url, exclude_patterns): + candidates.append(url) + else: + stats["jsluice_skipped_blacklist"] += 1 + + stats["jsluice_verify_candidates"] = len(candidates) + if not candidates: + stats["jsluice_skipped_unverified"] = 0 + print(f"[*][jsluice] Verification skipped: all {len(urls)} URLs matched noise filters") + return set(), stats + + print(f"\n[*][jsluice] Verifying {len(candidates)} jsluice URLs...") + if stats["jsluice_skipped_blacklist"]: + print(f"[*][jsluice] Skipped {stats['jsluice_skipped_blacklist']} URLs via noise filters") + + temp_dir = _create_temp_dir("jsluice_verify") + try: + urls_file = temp_dir / "urls.txt" + output_file = temp_dir / "verified.json" + + with open(urls_file, 'w') as f: + for url in candidates: + f.write(f"{url}\n") + + cmd = [ + "docker", "run", "--rm", + "-v", f"{temp_dir}:/data", + docker_image, + "-l", "/data/urls.txt", + "-o", "/data/verified.json", + "-json", + "-silent", + "-nc", + "-t", str(threads), + "-timeout", str(timeout), + "-rl", str(rate_limit), + ] + + if use_proxy: + cmd.extend(["-proxy", "socks5://127.0.0.1:9050"]) + + try: + subprocess.run(cmd, capture_output=True, text=True, timeout=300) + except subprocess.TimeoutExpired: + print("[!][jsluice] URL verification timeout; dropping unverified jsluice URLs") + stats["jsluice_skipped_unverified"] = len(candidates) + return set(), stats + except Exception as e: + print(f"[!][jsluice] URL verification error: {e}; dropping unverified jsluice URLs") + stats["jsluice_skipped_unverified"] = len(candidates) + return set(), stats + + verified = set() + accept_codes = {int(code) for code in accept_status} + + if output_file.exists(): + with open(output_file, 'r') as f: + for line in f: + try: + entry = json.loads(line.strip()) + except json.JSONDecodeError: + continue + + url = entry.get('url', '') + status = entry.get('status_code') or entry.get('status-code') + try: + status = int(status) + except (TypeError, ValueError): + continue + + if url and status in accept_codes: + verified.add(url) + + stats["jsluice_verified"] = len(verified) + stats["jsluice_skipped_unverified"] = len(candidates) - len(verified) + print(f"[+][jsluice] Verified: {len(verified)}/{len(candidates)} URLs are live") + return verified, stats + finally: + _cleanup_temp_dir(temp_dir) + + def _extract_urls_for_base(base_url, file_entries, concurrency, timeout, allowed_hosts): """Extract URLs from JS files for a single base URL.""" extracted_urls = [] diff --git a/recon/main_recon_modules/resource_enum.py b/recon/main_recon_modules/resource_enum.py index 114e0ad2..be9109fc 100644 --- a/recon/main_recon_modules/resource_enum.py +++ b/recon/main_recon_modules/resource_enum.py @@ -68,8 +68,10 @@ pull_hakrawler_docker_image, merge_hakrawler_into_by_base_url, # jsluice helpers + DEFAULT_JSLUICE_EXCLUDE_PATTERNS, run_jsluice_analysis, merge_jsluice_into_by_base_url, + verify_jsluice_urls, # FFuf helpers run_ffuf_discovery, pull_ffuf_binary_check, @@ -165,6 +167,9 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett ("JSLUICE_ENABLED", "jsluice"), ("JSLUICE_MAX_FILES", "jsluice"), ("JSLUICE_PARALLELISM", "jsluice"), + ("JSLUICE_VERIFY_URLS", "jsluice"), + ("JSLUICE_VERIFY_RATE_LIMIT", "jsluice"), + ("JSLUICE_VERIFY_THREADS", "jsluice"), ("ARJUN_ENABLED", "Arjun"), ("ARJUN_THREADS", "Arjun"), ("ARJUN_RATE_LIMIT", "Arjun"), @@ -216,6 +221,19 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett JSLUICE_EXTRACT_SECRETS = settings.get('JSLUICE_EXTRACT_SECRETS', True) JSLUICE_CONCURRENCY = settings.get('JSLUICE_CONCURRENCY', 5) JSLUICE_PARALLELISM = settings.get('JSLUICE_PARALLELISM', 3) + JSLUICE_VERIFY_URLS = settings.get('JSLUICE_VERIFY_URLS', True) + JSLUICE_VERIFY_DOCKER_IMAGE = settings.get('JSLUICE_VERIFY_DOCKER_IMAGE', 'projectdiscovery/httpx:latest') + JSLUICE_VERIFY_TIMEOUT = settings.get('JSLUICE_VERIFY_TIMEOUT', 5) + JSLUICE_VERIFY_RATE_LIMIT = settings.get('JSLUICE_VERIFY_RATE_LIMIT', 50) + JSLUICE_VERIFY_THREADS = settings.get('JSLUICE_VERIFY_THREADS', 50) + JSLUICE_VERIFY_ACCEPT_STATUS = settings.get( + 'JSLUICE_VERIFY_ACCEPT_STATUS', + [200, 201, 301, 302, 307, 308, 401, 403] + ) + JSLUICE_EXCLUDE_PATTERNS = list(settings.get( + 'JSLUICE_EXCLUDE_PATTERNS', + DEFAULT_JSLUICE_EXCLUDE_PATTERNS, + )) # FFuf settings FFUF_ENABLED = settings.get('FFUF_ENABLED', False) @@ -415,6 +433,12 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett print(f"[*][jsluice] Extract URLs: {JSLUICE_EXTRACT_URLS}") print(f"[*][jsluice] Extract secrets: {JSLUICE_EXTRACT_SECRETS}") print(f"[*][jsluice] Parallelism: {JSLUICE_PARALLELISM} concurrent base URLs") + print(f"[*][jsluice] URL verification: {JSLUICE_VERIFY_URLS}") + if JSLUICE_VERIFY_URLS: + print(f"[*][jsluice] Verify rate limit: {JSLUICE_VERIFY_RATE_LIMIT} req/s") + print(f"[*][jsluice] Verify threads: {JSLUICE_VERIFY_THREADS}") + print(f"[*][jsluice] Verify timeout: {JSLUICE_VERIFY_TIMEOUT}s") + print(f"[*][jsluice] Noise filter patterns: {len(JSLUICE_EXCLUDE_PATTERNS)}") # FFuf settings print(f"[*][FFuf] Enabled: {FFUF_ENABLED}") if FFUF_ENABLED: @@ -693,11 +717,17 @@ def _run_kr_wordlist(wordlist_name): "jsluice_parsed": 0, "jsluice_new": 0, "jsluice_overlap": 0, + "jsluice_verify_total": 0, + "jsluice_verify_candidates": 0, + "jsluice_skipped_blacklist": 0, + "jsluice_verified": 0, + "jsluice_skipped_unverified": 0, } if JSLUICE_ENABLED and (JSLUICE_EXTRACT_URLS or JSLUICE_EXTRACT_SECRETS): all_crawl_urls = list(set(katana_urls + hakrawler_urls)) if all_crawl_urls: + verify_stats = {} jsluice_result = run_jsluice_analysis( all_crawl_urls, JSLUICE_MAX_FILES, @@ -710,15 +740,41 @@ def _run_kr_wordlist(wordlist_name): use_proxy ) + if jsluice_result.get("urls"): + if JSLUICE_VERIFY_URLS: + verified_jsluice_urls, verify_stats = verify_jsluice_urls( + jsluice_result["urls"], + JSLUICE_VERIFY_DOCKER_IMAGE, + JSLUICE_VERIFY_THREADS, + JSLUICE_VERIFY_TIMEOUT, + JSLUICE_VERIFY_RATE_LIMIT, + JSLUICE_VERIFY_ACCEPT_STATUS, + JSLUICE_EXCLUDE_PATTERNS, + use_proxy, + ) + jsluice_result["urls"] = sorted(verified_jsluice_urls) + jsluice_stats.update(verify_stats) + else: + jsluice_stats["jsluice_verify_total"] = len(jsluice_result["urls"]) + jsluice_stats["jsluice_verify_candidates"] = len(jsluice_result["urls"]) + jsluice_stats["jsluice_verified"] = len(jsluice_result["urls"]) + if jsluice_result.get("urls"): print("\n[*][jsluice] Merging extracted URLs into results...") - organized_data['by_base_url'], jsluice_stats = merge_jsluice_into_by_base_url( + organized_data['by_base_url'], merge_stats = merge_jsluice_into_by_base_url( jsluice_result["urls"], organized_data['by_base_url'], ) + jsluice_stats.update(merge_stats) + jsluice_stats.update(verify_stats) print(f"[+][jsluice] Total URLs: {jsluice_stats['jsluice_total']}") print(f"[+][jsluice] New endpoints: {jsluice_stats['jsluice_new']}") print(f"[+][jsluice] Overlap: {jsluice_stats['jsluice_overlap']}") + if JSLUICE_VERIFY_URLS: + print(f"[+][jsluice] Skipped (blacklist): {jsluice_stats['jsluice_skipped_blacklist']}") + print(f"[+][jsluice] Skipped (unverified): {jsluice_stats['jsluice_skipped_unverified']}") + elif JSLUICE_VERIFY_URLS and jsluice_stats.get("jsluice_verify_total", 0) > 0: + print(f"[-][jsluice] No URLs survived validation ({jsluice_stats['jsluice_skipped_blacklist']} blacklisted, {jsluice_stats['jsluice_skipped_unverified']} unverified)") # FFuf directory fuzzing (runs after crawlers and jsluice, before GAU merge) ffuf_stats = { @@ -1069,6 +1125,7 @@ def _run_kr_wordlist(wordlist_name): # jsluice metadata 'jsluice_enabled': JSLUICE_ENABLED, 'jsluice_max_files': JSLUICE_MAX_FILES if JSLUICE_ENABLED else None, + 'jsluice_verify_enabled': JSLUICE_VERIFY_URLS if JSLUICE_ENABLED else False, 'jsluice_urls_found': len(jsluice_in_scope_urls), 'jsluice_secrets_found': len(jsluice_result.get("secrets", [])), 'jsluice_stats': jsluice_stats, diff --git a/recon/project_settings.py b/recon/project_settings.py index 526f2d25..ef125edf 100644 --- a/recon/project_settings.py +++ b/recon/project_settings.py @@ -337,6 +337,26 @@ 'JSLUICE_EXTRACT_SECRETS': True, 'JSLUICE_CONCURRENCY': 5, 'JSLUICE_PARALLELISM': 5, + 'JSLUICE_VERIFY_URLS': True, + 'JSLUICE_VERIFY_DOCKER_IMAGE': 'projectdiscovery/httpx:latest', + 'JSLUICE_VERIFY_TIMEOUT': 5, + 'JSLUICE_VERIFY_RATE_LIMIT': 50, + 'JSLUICE_VERIFY_THREADS': 50, + 'JSLUICE_VERIFY_ACCEPT_STATUS': [200, 201, 301, 302, 307, 308, 401, 403], + 'JSLUICE_EXCLUDE_PATTERNS': [ + '/_next/image', '/_next/static', '/_next/data', '/__nextjs', + '/_nuxt/', '/__nuxt', + '/runtime.', '/polyfills.', '/vendor.', + '/webpack', '/chunk.', '.chunk.js', '.bundle.js', 'hot-update', + '/static/', '/public/', '/dist/', '/build/', '/lib/', '/vendor/', '/node_modules/', + '.js', '.mjs', '.map', '.css', '.scss', '.sass', '.less', + '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp', '.avif', + '.woff', '.woff2', '.ttf', '.eot', '.otf', + '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', + '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.zip', '.rar', '.7z', '.tar', '.gz', + '/rxjs/', '/react/', '/angular/', '/lodash/', '/zone.js/', + ], # ========== JS RECON SCANNER ========== 'JS_RECON_ENABLED': False, @@ -882,6 +902,13 @@ def fetch_project_settings(project_id: str, webapp_url: str) -> dict[str, Any]: settings['JSLUICE_EXTRACT_SECRETS'] = project.get('jsluiceExtractSecrets', DEFAULT_SETTINGS['JSLUICE_EXTRACT_SECRETS']) settings['JSLUICE_CONCURRENCY'] = project.get('jsluiceConcurrency', DEFAULT_SETTINGS['JSLUICE_CONCURRENCY']) settings['JSLUICE_PARALLELISM'] = project.get('jsluiceParallelism', DEFAULT_SETTINGS['JSLUICE_PARALLELISM']) + settings['JSLUICE_VERIFY_URLS'] = project.get('jsluiceVerifyUrls', DEFAULT_SETTINGS['JSLUICE_VERIFY_URLS']) + settings['JSLUICE_VERIFY_DOCKER_IMAGE'] = project.get('jsluiceVerifyDockerImage', DEFAULT_SETTINGS['JSLUICE_VERIFY_DOCKER_IMAGE']) + settings['JSLUICE_VERIFY_TIMEOUT'] = project.get('jsluiceVerifyTimeout', DEFAULT_SETTINGS['JSLUICE_VERIFY_TIMEOUT']) + settings['JSLUICE_VERIFY_RATE_LIMIT'] = project.get('jsluiceVerifyRateLimit', DEFAULT_SETTINGS['JSLUICE_VERIFY_RATE_LIMIT']) + settings['JSLUICE_VERIFY_THREADS'] = project.get('jsluiceVerifyThreads', DEFAULT_SETTINGS['JSLUICE_VERIFY_THREADS']) + settings['JSLUICE_VERIFY_ACCEPT_STATUS'] = project.get('jsluiceVerifyAcceptStatus', DEFAULT_SETTINGS['JSLUICE_VERIFY_ACCEPT_STATUS']) + settings['JSLUICE_EXCLUDE_PATTERNS'] = project.get('jsluiceExcludePatterns', DEFAULT_SETTINGS['JSLUICE_EXCLUDE_PATTERNS']) # JS Recon Scanner settings['JS_RECON_ENABLED'] = project.get('jsReconEnabled', DEFAULT_SETTINGS['JS_RECON_ENABLED']) diff --git a/recon/tests/test_hakrawler_jsluice.py b/recon/tests/test_hakrawler_jsluice.py index 14419e21..e1ff91b0 100644 --- a/recon/tests/test_hakrawler_jsluice.py +++ b/recon/tests/test_hakrawler_jsluice.py @@ -8,6 +8,7 @@ import sys import json import subprocess +import tempfile from pathlib import Path from unittest import mock @@ -473,6 +474,95 @@ def test_jsluice_run_filters_scope_and_cleans_up(): print("PASS: test_jsluice_run_filters_scope_and_cleans_up") +def test_jsluice_filter_url_rejects_common_static_library_noise(): + """filter_jsluice_url should skip obvious bundled library/static paths.""" + from recon.helpers.resource_enum.jsluice_helpers import filter_jsluice_url + + patterns = [ + "/rxjs/", + "/node_modules/", + "/webpack", + ".map", + ".chunk.js", + ] + + assert filter_jsluice_url("https://example.com/rxjs/static-5.10", patterns) is False + assert filter_jsluice_url("https://example.com/node_modules/lodash/index.js", patterns) is False + assert filter_jsluice_url("https://example.com/_next/static/chunks/app.chunk.js", patterns) is False + assert filter_jsluice_url("https://example.com/api/users", patterns) is True + assert filter_jsluice_url("https://example.com/dashboard/settings", patterns) is True + print("PASS: test_jsluice_filter_url_rejects_common_static_library_noise") + + +def test_verify_jsluice_urls_filters_noise_and_unverified(): + """verify_jsluice_urls should blacklist noise and keep only accepted HTTP statuses.""" + from recon.helpers.resource_enum.jsluice_helpers import verify_jsluice_urls + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + output_file = temp_path / "verified.json" + + def fake_run(cmd, **kwargs): + output_file.write_text( + "\n".join([ + json.dumps({"url": "https://example.com/api/live", "status_code": 200}), + json.dumps({"url": "https://example.com/api/missing", "status_code": 404}), + "", + ]) + ) + return mock.MagicMock(returncode=0, stdout="", stderr="") + + with mock.patch("recon.helpers.resource_enum.jsluice_helpers._create_temp_dir", return_value=temp_path), \ + mock.patch("recon.helpers.resource_enum.jsluice_helpers._cleanup_temp_dir"), \ + mock.patch("subprocess.run", side_effect=fake_run): + verified, stats = verify_jsluice_urls( + urls=[ + "https://example.com/api/live", + "https://example.com/api/missing", + "https://example.com/rxjs/static-5.10", + ], + docker_image="projectdiscovery/httpx:latest", + threads=10, + timeout=5, + rate_limit=50, + accept_status=[200, 201, 301, 302, 307, 308, 401, 403], + exclude_patterns=["/rxjs/"], + ) + + assert verified == {"https://example.com/api/live"} + assert stats["jsluice_verify_total"] == 3 + assert stats["jsluice_skipped_blacklist"] == 1 + assert stats["jsluice_verified"] == 1 + assert stats["jsluice_skipped_unverified"] == 1 + print("PASS: test_verify_jsluice_urls_filters_noise_and_unverified") + + +def test_verify_jsluice_urls_fails_closed_on_httpx_error(): + """If httpx verification fails, no jsluice URLs should be published.""" + from recon.helpers.resource_enum.jsluice_helpers import verify_jsluice_urls + + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + with mock.patch("recon.helpers.resource_enum.jsluice_helpers._create_temp_dir", return_value=temp_path), \ + mock.patch("recon.helpers.resource_enum.jsluice_helpers._cleanup_temp_dir"), \ + mock.patch("subprocess.run", side_effect=RuntimeError("docker failed")): + verified, stats = verify_jsluice_urls( + urls=["https://example.com/api/live"], + docker_image="projectdiscovery/httpx:latest", + threads=10, + timeout=5, + rate_limit=50, + accept_status=[200, 201, 301, 302, 307, 308, 401, 403], + exclude_patterns=[], + ) + + assert verified == set() + assert stats["jsluice_verify_total"] == 1 + assert stats["jsluice_verified"] == 0 + assert stats["jsluice_skipped_unverified"] == 1 + print("PASS: test_verify_jsluice_urls_fails_closed_on_httpx_error") + + # =========================================================================== # jsluice merge tests # ===========================================================================