Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions recon/helpers/resource_enum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@

# jsluice helpers
from .jsluice_helpers import (
DEFAULT_JSLUICE_EXCLUDE_PATTERNS,
filter_jsluice_url,
run_jsluice_analysis,
merge_jsluice_into_by_base_url,
verify_jsluice_urls,
)

# FFuf helpers
Expand Down Expand Up @@ -123,8 +126,11 @@
"pull_hakrawler_docker_image",
"merge_hakrawler_into_by_base_url",
# jsluice
"DEFAULT_JSLUICE_EXCLUDE_PATTERNS",
"filter_jsluice_url",
"run_jsluice_analysis",
"merge_jsluice_into_by_base_url",
"verify_jsluice_urls",
# FFuf
"run_ffuf_discovery",
"pull_ffuf_binary_check",
Expand Down
172 changes: 171 additions & 1 deletion recon/helpers/resource_enum/jsluice_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,182 @@
import ssl
import subprocess
import urllib.request
import uuid
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, List, Set, Tuple
from urllib.parse import urlparse, urljoin
from concurrent.futures import ThreadPoolExecutor, as_completed


DEFAULT_JSLUICE_EXCLUDE_PATTERNS = [
'/_next/image', '/_next/static', '/_next/data', '/__nextjs',
'/_nuxt/', '/__nuxt',
'/runtime.', '/polyfills.', '/vendor.',
'/webpack', '/chunk.', '.chunk.js', '.bundle.js', 'hot-update',
'/static/', '/public/', '/dist/', '/build/', '/lib/', '/vendor/', '/node_modules/',
'.js', '.mjs', '.map', '.css', '.scss', '.sass', '.less',
'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp', '.avif',
'.woff', '.woff2', '.ttf', '.eot', '.otf',
'.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm',
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
'.zip', '.rar', '.7z', '.tar', '.gz',
'/rxjs/', '/react/', '/angular/', '/lodash/', '/zone.js/',
]


def _create_temp_dir(prefix: str = "jsluice_verify") -> Path:
"""Create a temp directory under /tmp/redamon for Docker-in-Docker compatibility."""
temp_dir = Path(f"/tmp/redamon/.{prefix}_{uuid.uuid4().hex[:8]}")
temp_dir.mkdir(parents=True, exist_ok=True)
return temp_dir


def _cleanup_temp_dir(temp_dir: Path):
"""Clean up a temp directory."""
try:
if temp_dir.exists():
shutil.rmtree(temp_dir)
except Exception:
pass


def filter_jsluice_url(url: str, exclude_patterns: List[str]) -> bool:
"""
Return True when a jsluice URL should be probed.

jsluice often extracts library, bundle, sourcemap, and static asset paths
from JavaScript source. These are filtered before HTTP validation to avoid
spending probe budget on obvious non-application endpoints.
"""
if not url:
return False

try:
url_lower = url.lower()
parsed = urlparse(url)
path_lower = (parsed.path or "").lower()
query_lower = (parsed.query or "").lower()
haystack = f"{url_lower} {path_lower} {query_lower}"

return not any(
pattern and pattern.lower() in haystack
for pattern in exclude_patterns
)
except Exception:
return False


def verify_jsluice_urls(
urls: List[str],
docker_image: str,
threads: int,
timeout: int,
rate_limit: int,
accept_status: List[int],
exclude_patterns: List[str] = None,
use_proxy: bool = False,
) -> Tuple[Set[str], Dict[str, int]]:
"""
Verify jsluice-discovered URLs are live using httpx.

This verifier fails closed: if probing fails or times out, unverified
jsluice URLs are not returned for graph publication.
"""
exclude_patterns = exclude_patterns or []
stats = {
"jsluice_verify_total": len(urls),
"jsluice_verify_candidates": 0,
"jsluice_skipped_blacklist": 0,
"jsluice_verified": 0,
"jsluice_skipped_unverified": 0,
}

if not urls:
return set(), stats

candidates = []
for url in sorted(set(urls)):
if filter_jsluice_url(url, exclude_patterns):
candidates.append(url)
else:
stats["jsluice_skipped_blacklist"] += 1

stats["jsluice_verify_candidates"] = len(candidates)
if not candidates:
stats["jsluice_skipped_unverified"] = 0
print(f"[*][jsluice] Verification skipped: all {len(urls)} URLs matched noise filters")
return set(), stats

print(f"\n[*][jsluice] Verifying {len(candidates)} jsluice URLs...")
if stats["jsluice_skipped_blacklist"]:
print(f"[*][jsluice] Skipped {stats['jsluice_skipped_blacklist']} URLs via noise filters")

temp_dir = _create_temp_dir("jsluice_verify")
try:
urls_file = temp_dir / "urls.txt"
output_file = temp_dir / "verified.json"

with open(urls_file, 'w') as f:
for url in candidates:
f.write(f"{url}\n")

cmd = [
"docker", "run", "--rm",
"-v", f"{temp_dir}:/data",
docker_image,
"-l", "/data/urls.txt",
"-o", "/data/verified.json",
"-json",
"-silent",
"-nc",
"-t", str(threads),
"-timeout", str(timeout),
"-rl", str(rate_limit),
]

if use_proxy:
cmd.extend(["-proxy", "socks5://127.0.0.1:9050"])

try:
subprocess.run(cmd, capture_output=True, text=True, timeout=300)
except subprocess.TimeoutExpired:
print("[!][jsluice] URL verification timeout; dropping unverified jsluice URLs")
stats["jsluice_skipped_unverified"] = len(candidates)
return set(), stats
except Exception as e:
print(f"[!][jsluice] URL verification error: {e}; dropping unverified jsluice URLs")
stats["jsluice_skipped_unverified"] = len(candidates)
return set(), stats

verified = set()
accept_codes = {int(code) for code in accept_status}

if output_file.exists():
with open(output_file, 'r') as f:
for line in f:
try:
entry = json.loads(line.strip())
except json.JSONDecodeError:
continue

url = entry.get('url', '')
status = entry.get('status_code') or entry.get('status-code')
try:
status = int(status)
except (TypeError, ValueError):
continue

if url and status in accept_codes:
verified.add(url)

stats["jsluice_verified"] = len(verified)
stats["jsluice_skipped_unverified"] = len(candidates) - len(verified)
print(f"[+][jsluice] Verified: {len(verified)}/{len(candidates)} URLs are live")
return verified, stats
finally:
_cleanup_temp_dir(temp_dir)


def _extract_urls_for_base(base_url, file_entries, concurrency, timeout, allowed_hosts):
"""Extract URLs from JS files for a single base URL."""
extracted_urls = []
Expand Down
59 changes: 58 additions & 1 deletion recon/main_recon_modules/resource_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@
pull_hakrawler_docker_image,
merge_hakrawler_into_by_base_url,
# jsluice helpers
DEFAULT_JSLUICE_EXCLUDE_PATTERNS,
run_jsluice_analysis,
merge_jsluice_into_by_base_url,
verify_jsluice_urls,
# FFuf helpers
run_ffuf_discovery,
pull_ffuf_binary_check,
Expand Down Expand Up @@ -165,6 +167,9 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett
("JSLUICE_ENABLED", "jsluice"),
("JSLUICE_MAX_FILES", "jsluice"),
("JSLUICE_PARALLELISM", "jsluice"),
("JSLUICE_VERIFY_URLS", "jsluice"),
("JSLUICE_VERIFY_RATE_LIMIT", "jsluice"),
("JSLUICE_VERIFY_THREADS", "jsluice"),
("ARJUN_ENABLED", "Arjun"),
("ARJUN_THREADS", "Arjun"),
("ARJUN_RATE_LIMIT", "Arjun"),
Expand Down Expand Up @@ -216,6 +221,19 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett
JSLUICE_EXTRACT_SECRETS = settings.get('JSLUICE_EXTRACT_SECRETS', True)
JSLUICE_CONCURRENCY = settings.get('JSLUICE_CONCURRENCY', 5)
JSLUICE_PARALLELISM = settings.get('JSLUICE_PARALLELISM', 3)
JSLUICE_VERIFY_URLS = settings.get('JSLUICE_VERIFY_URLS', True)
JSLUICE_VERIFY_DOCKER_IMAGE = settings.get('JSLUICE_VERIFY_DOCKER_IMAGE', 'projectdiscovery/httpx:latest')
JSLUICE_VERIFY_TIMEOUT = settings.get('JSLUICE_VERIFY_TIMEOUT', 5)
JSLUICE_VERIFY_RATE_LIMIT = settings.get('JSLUICE_VERIFY_RATE_LIMIT', 50)
JSLUICE_VERIFY_THREADS = settings.get('JSLUICE_VERIFY_THREADS', 50)
JSLUICE_VERIFY_ACCEPT_STATUS = settings.get(
'JSLUICE_VERIFY_ACCEPT_STATUS',
[200, 201, 301, 302, 307, 308, 401, 403]
)
JSLUICE_EXCLUDE_PATTERNS = list(settings.get(
'JSLUICE_EXCLUDE_PATTERNS',
DEFAULT_JSLUICE_EXCLUDE_PATTERNS,
))

# FFuf settings
FFUF_ENABLED = settings.get('FFUF_ENABLED', False)
Expand Down Expand Up @@ -415,6 +433,12 @@ def run_resource_enum(recon_data: dict, output_file: Optional[Path] = None, sett
print(f"[*][jsluice] Extract URLs: {JSLUICE_EXTRACT_URLS}")
print(f"[*][jsluice] Extract secrets: {JSLUICE_EXTRACT_SECRETS}")
print(f"[*][jsluice] Parallelism: {JSLUICE_PARALLELISM} concurrent base URLs")
print(f"[*][jsluice] URL verification: {JSLUICE_VERIFY_URLS}")
if JSLUICE_VERIFY_URLS:
print(f"[*][jsluice] Verify rate limit: {JSLUICE_VERIFY_RATE_LIMIT} req/s")
print(f"[*][jsluice] Verify threads: {JSLUICE_VERIFY_THREADS}")
print(f"[*][jsluice] Verify timeout: {JSLUICE_VERIFY_TIMEOUT}s")
print(f"[*][jsluice] Noise filter patterns: {len(JSLUICE_EXCLUDE_PATTERNS)}")
# FFuf settings
print(f"[*][FFuf] Enabled: {FFUF_ENABLED}")
if FFUF_ENABLED:
Expand Down Expand Up @@ -693,11 +717,17 @@ def _run_kr_wordlist(wordlist_name):
"jsluice_parsed": 0,
"jsluice_new": 0,
"jsluice_overlap": 0,
"jsluice_verify_total": 0,
"jsluice_verify_candidates": 0,
"jsluice_skipped_blacklist": 0,
"jsluice_verified": 0,
"jsluice_skipped_unverified": 0,
}

if JSLUICE_ENABLED and (JSLUICE_EXTRACT_URLS or JSLUICE_EXTRACT_SECRETS):
all_crawl_urls = list(set(katana_urls + hakrawler_urls))
if all_crawl_urls:
verify_stats = {}
jsluice_result = run_jsluice_analysis(
all_crawl_urls,
JSLUICE_MAX_FILES,
Expand All @@ -710,15 +740,41 @@ def _run_kr_wordlist(wordlist_name):
use_proxy
)

if jsluice_result.get("urls"):
if JSLUICE_VERIFY_URLS:
verified_jsluice_urls, verify_stats = verify_jsluice_urls(
jsluice_result["urls"],
JSLUICE_VERIFY_DOCKER_IMAGE,
JSLUICE_VERIFY_THREADS,
JSLUICE_VERIFY_TIMEOUT,
JSLUICE_VERIFY_RATE_LIMIT,
JSLUICE_VERIFY_ACCEPT_STATUS,
JSLUICE_EXCLUDE_PATTERNS,
use_proxy,
)
jsluice_result["urls"] = sorted(verified_jsluice_urls)
jsluice_stats.update(verify_stats)
else:
jsluice_stats["jsluice_verify_total"] = len(jsluice_result["urls"])
jsluice_stats["jsluice_verify_candidates"] = len(jsluice_result["urls"])
jsluice_stats["jsluice_verified"] = len(jsluice_result["urls"])

if jsluice_result.get("urls"):
print("\n[*][jsluice] Merging extracted URLs into results...")
organized_data['by_base_url'], jsluice_stats = merge_jsluice_into_by_base_url(
organized_data['by_base_url'], merge_stats = merge_jsluice_into_by_base_url(
jsluice_result["urls"],
organized_data['by_base_url'],
)
jsluice_stats.update(merge_stats)
jsluice_stats.update(verify_stats)
print(f"[+][jsluice] Total URLs: {jsluice_stats['jsluice_total']}")
print(f"[+][jsluice] New endpoints: {jsluice_stats['jsluice_new']}")
print(f"[+][jsluice] Overlap: {jsluice_stats['jsluice_overlap']}")
if JSLUICE_VERIFY_URLS:
print(f"[+][jsluice] Skipped (blacklist): {jsluice_stats['jsluice_skipped_blacklist']}")
print(f"[+][jsluice] Skipped (unverified): {jsluice_stats['jsluice_skipped_unverified']}")
elif JSLUICE_VERIFY_URLS and jsluice_stats.get("jsluice_verify_total", 0) > 0:
print(f"[-][jsluice] No URLs survived validation ({jsluice_stats['jsluice_skipped_blacklist']} blacklisted, {jsluice_stats['jsluice_skipped_unverified']} unverified)")

# FFuf directory fuzzing (runs after crawlers and jsluice, before GAU merge)
ffuf_stats = {
Expand Down Expand Up @@ -1069,6 +1125,7 @@ def _run_kr_wordlist(wordlist_name):
# jsluice metadata
'jsluice_enabled': JSLUICE_ENABLED,
'jsluice_max_files': JSLUICE_MAX_FILES if JSLUICE_ENABLED else None,
'jsluice_verify_enabled': JSLUICE_VERIFY_URLS if JSLUICE_ENABLED else False,
'jsluice_urls_found': len(jsluice_in_scope_urls),
'jsluice_secrets_found': len(jsluice_result.get("secrets", [])),
'jsluice_stats': jsluice_stats,
Expand Down
27 changes: 27 additions & 0 deletions recon/project_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,26 @@
'JSLUICE_EXTRACT_SECRETS': True,
'JSLUICE_CONCURRENCY': 5,
'JSLUICE_PARALLELISM': 5,
'JSLUICE_VERIFY_URLS': True,
'JSLUICE_VERIFY_DOCKER_IMAGE': 'projectdiscovery/httpx:latest',
'JSLUICE_VERIFY_TIMEOUT': 5,
'JSLUICE_VERIFY_RATE_LIMIT': 50,
'JSLUICE_VERIFY_THREADS': 50,
'JSLUICE_VERIFY_ACCEPT_STATUS': [200, 201, 301, 302, 307, 308, 401, 403],
'JSLUICE_EXCLUDE_PATTERNS': [
'/_next/image', '/_next/static', '/_next/data', '/__nextjs',
'/_nuxt/', '/__nuxt',
'/runtime.', '/polyfills.', '/vendor.',
'/webpack', '/chunk.', '.chunk.js', '.bundle.js', 'hot-update',
'/static/', '/public/', '/dist/', '/build/', '/lib/', '/vendor/', '/node_modules/',
'.js', '.mjs', '.map', '.css', '.scss', '.sass', '.less',
'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp', '.avif',
'.woff', '.woff2', '.ttf', '.eot', '.otf',
'.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm',
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
'.zip', '.rar', '.7z', '.tar', '.gz',
'/rxjs/', '/react/', '/angular/', '/lodash/', '/zone.js/',
],

# ========== JS RECON SCANNER ==========
'JS_RECON_ENABLED': False,
Expand Down Expand Up @@ -882,6 +902,13 @@ def fetch_project_settings(project_id: str, webapp_url: str) -> dict[str, Any]:
settings['JSLUICE_EXTRACT_SECRETS'] = project.get('jsluiceExtractSecrets', DEFAULT_SETTINGS['JSLUICE_EXTRACT_SECRETS'])
settings['JSLUICE_CONCURRENCY'] = project.get('jsluiceConcurrency', DEFAULT_SETTINGS['JSLUICE_CONCURRENCY'])
settings['JSLUICE_PARALLELISM'] = project.get('jsluiceParallelism', DEFAULT_SETTINGS['JSLUICE_PARALLELISM'])
settings['JSLUICE_VERIFY_URLS'] = project.get('jsluiceVerifyUrls', DEFAULT_SETTINGS['JSLUICE_VERIFY_URLS'])
settings['JSLUICE_VERIFY_DOCKER_IMAGE'] = project.get('jsluiceVerifyDockerImage', DEFAULT_SETTINGS['JSLUICE_VERIFY_DOCKER_IMAGE'])
settings['JSLUICE_VERIFY_TIMEOUT'] = project.get('jsluiceVerifyTimeout', DEFAULT_SETTINGS['JSLUICE_VERIFY_TIMEOUT'])
settings['JSLUICE_VERIFY_RATE_LIMIT'] = project.get('jsluiceVerifyRateLimit', DEFAULT_SETTINGS['JSLUICE_VERIFY_RATE_LIMIT'])
settings['JSLUICE_VERIFY_THREADS'] = project.get('jsluiceVerifyThreads', DEFAULT_SETTINGS['JSLUICE_VERIFY_THREADS'])
settings['JSLUICE_VERIFY_ACCEPT_STATUS'] = project.get('jsluiceVerifyAcceptStatus', DEFAULT_SETTINGS['JSLUICE_VERIFY_ACCEPT_STATUS'])
settings['JSLUICE_EXCLUDE_PATTERNS'] = project.get('jsluiceExcludePatterns', DEFAULT_SETTINGS['JSLUICE_EXCLUDE_PATTERNS'])

# JS Recon Scanner
settings['JS_RECON_ENABLED'] = project.get('jsReconEnabled', DEFAULT_SETTINGS['JS_RECON_ENABLED'])
Expand Down
Loading