Source code for automation_file.remote.url_validator

"""SSRF guard for outbound HTTP requests.

``validate_http_url`` rejects non-http(s) schemes, resolves the host, and
rejects private / loopback / link-local / reserved IP ranges. Every remote
function that accepts a user-supplied URL must pass it through here first.
"""

from __future__ import annotations

import ipaddress
import socket
from urllib.parse import urlparse

from automation_file.exceptions import UrlValidationException

_ALLOWED_SCHEMES = frozenset({"http", "https"})


def _require_host(url: str) -> str:
    if not isinstance(url, str) or not url:
        raise UrlValidationException("url must be a non-empty string")
    parsed = urlparse(url)
    if parsed.scheme not in _ALLOWED_SCHEMES:
        raise UrlValidationException(f"disallowed scheme: {parsed.scheme!r}")
    host = parsed.hostname
    if not host:
        raise UrlValidationException("url must contain a host")
    return host


def _resolve_ips(host: str) -> list[str]:
    try:
        infos = socket.getaddrinfo(host, None)
    except socket.gaierror as error:
        raise UrlValidationException(f"cannot resolve host: {host}") from error
    return [str(info[4][0]) for info in infos]


def _is_disallowed_ip(ip_obj: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
    return (
        ip_obj.is_private
        or ip_obj.is_loopback
        or ip_obj.is_link_local
        or ip_obj.is_reserved
        or ip_obj.is_multicast
        or ip_obj.is_unspecified
    )


[docs] def validate_http_url(url: str, *, allow_private: bool = False) -> str: """Return ``url`` if safe; raise :class:`UrlValidationException` otherwise. ``allow_private=True`` relaxes the private/loopback/link-local checks for callers that need to reach LAN services (e.g. on-prem WebDAV). Scheme and host checks still apply. Callers must opt in explicitly — the default remains strict SSRF blocking. """ host = _require_host(url) for ip_str in _resolve_ips(host): try: ip_obj = ipaddress.ip_address(ip_str) except ValueError as error: raise UrlValidationException(f"cannot parse resolved ip: {ip_str}") from error if not allow_private and _is_disallowed_ip(ip_obj): raise UrlValidationException(f"disallowed ip: {ip_str}") if allow_private and (ip_obj.is_multicast or ip_obj.is_unspecified): raise UrlValidationException(f"disallowed ip even in permissive mode: {ip_str}") return url