lucasvl.nl/lib/python3.11/site-packages/werkzeug/urls.py

from __future__ import annotations

import codecs
import re
import typing as t
from urllib.parse import quote
from urllib.parse import unquote
from urllib.parse import urlencode
from urllib.parse import urlsplit
from urllib.parse import urlunsplit

from .datastructures import iter_multi_items


def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:
    """Used in :func:`uri_to_iri` after unquoting to re-quote any
    invalid bytes.
    """
    # the docs state that UnicodeError does have these attributes,
    # but mypy isn't picking them up
    out = quote(e.object[e.start : e.end], safe="")  # type: ignore
    return out, e.end  # type: ignore


codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)


def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:
    """Create a function that unquotes all percent encoded characters except those
    given. This allows working with unquoted characters if possible while not changing
    the meaning of a given part of a URL.
    """
    choices = "|".join(f"{ord(c):02X}" for c in sorted(chars))
    pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)

    def _unquote_partial(value: str) -> str:
        parts = iter(pattern.split(value))
        out = []

        for part in parts:
            out.append(unquote(part, "utf-8", "werkzeug.url_quote"))
            out.append(next(parts, ""))

        return "".join(out)

    _unquote_partial.__name__ = f"_unquote_{name}"
    return _unquote_partial


# characters that should remain quoted in URL parts
# based on https://url.spec.whatwg.org/#percent-encoded-bytes
# always keep all controls, space, and % quoted
_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()
_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)
_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")
_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")
_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")


def uri_to_iri(uri: str) -> str:
    """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
    leaving all reserved and invalid characters quoted. If the URL has
    a domain, it is decoded from Punycode.

    >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
    'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'

    :param uri: The URI to convert.

    .. versionchanged:: 3.0
        Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
        are removed.

    .. versionchanged:: 2.3
        Which characters remain quoted is specific to each part of the URL.

    .. versionchanged:: 0.15
        All reserved and invalid characters remain quoted. Previously,
        only some reserved characters were preserved, and invalid bytes
        were replaced instead of left quoted.

    .. versionadded:: 0.6
    """
    parts = urlsplit(uri)
    path = _unquote_path(parts.path)
    query = _unquote_query(parts.query)
    fragment = _unquote_fragment(parts.fragment)

    if parts.hostname:
        netloc = _decode_idna(parts.hostname)
    else:
        netloc = ""

    if ":" in netloc:
        netloc = f"[{netloc}]"

    if parts.port:
        netloc = f"{netloc}:{parts.port}"

    if parts.username:
        auth = _unquote_user(parts.username)

        if parts.password:
            password = _unquote_user(parts.password)
            auth = f"{auth}:{password}"

        netloc = f"{auth}@{netloc}"

    return urlunsplit((parts.scheme, netloc, path, query, fragment))


def iri_to_uri(iri: str) -> str:
    """Convert an IRI to a URI. All non-ASCII and unsafe characters are
    quoted. If the URL has a domain, it is encoded to Punycode.

    >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
    'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'

    :param iri: The IRI to convert.

    .. versionchanged:: 3.0
        Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
        and the ``safe_conversion`` parameter, are removed.

    .. versionchanged:: 2.3
        Which characters remain unquoted is specific to each part of the URL.

    .. versionchanged:: 0.15
        All reserved characters remain unquoted. Previously, only some reserved
        characters were left unquoted.

    .. versionchanged:: 0.9.6
       The ``safe_conversion`` parameter was added.

    .. versionadded:: 0.6
    """
    parts = urlsplit(iri)
    # safe = https://url.spec.whatwg.org/#url-path-segment-string
    # as well as percent for things that are already quoted
    path = quote(parts.path, safe="%!$&'()*+,/:;=@")
    query = quote(parts.query, safe="%!$&'()*+,/:;=?@")
    fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")

    if parts.hostname:
        netloc = parts.hostname.encode("idna").decode("ascii")
    else:
        netloc = ""

    if ":" in netloc:
        netloc = f"[{netloc}]"

    if parts.port:
        netloc = f"{netloc}:{parts.port}"

    if parts.username:
        auth = quote(parts.username, safe="%!$&'()*+,;=")

        if parts.password:
            password = quote(parts.password, safe="%!$&'()*+,;=")
            auth = f"{auth}:{password}"

        netloc = f"{auth}@{netloc}"

    return urlunsplit((parts.scheme, netloc, path, query, fragment))


def _invalid_iri_to_uri(iri: str) -> str:
    """The URL scheme ``itms-services://`` must contain the ``//`` even though it does
    not have a host component. There may be other invalid schemes as well. Currently,
    responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
    removes the ``//``. For now, if the IRI only contains ASCII and does not contain
    spaces, pass it on as-is. In Werkzeug 3.0, this should become a
    ``response.process_location`` flag.

    :meta private:
    """
    try:
        iri.encode("ascii")
    except UnicodeError:
        pass
    else:
        if len(iri.split(None, 1)) == 1:
            return iri

    return iri_to_uri(iri)


def _decode_idna(domain: str) -> str:
    try:
        data = domain.encode("ascii")
    except UnicodeEncodeError:
        # If the domain is not ASCII, it's decoded already.
        return domain

    try:
        # Try decoding in one shot.
        return data.decode("idna")
    except UnicodeDecodeError:
        pass

    # Decode each part separately, leaving invalid parts as punycode.
    parts = []

    for part in data.split(b"."):
        try:
            parts.append(part.decode("idna"))
        except UnicodeDecodeError:
            parts.append(part.decode("ascii"))

    return ".".join(parts)


def _urlencode(query: t.Mapping[str, str] | t.Iterable[tuple[str, str]]) -> str:
    items = [x for x in iter_multi_items(query) if x[1] is not None]
    # safe = https://url.spec.whatwg.org/#percent-encoded-bytes
    return urlencode(items, safe="!$'()*,/:;?@")
added venv and start.sh 2024-03-04 21:31:20 -06:00			`from __future__ import annotations`

			`import codecs`
			`import re`
			`import typing as t`
			`from urllib.parse import quote`
			`from urllib.parse import unquote`
			`from urllib.parse import urlencode`
			`from urllib.parse import urlsplit`
			`from urllib.parse import urlunsplit`

			`from .datastructures import iter_multi_items`


			`def _codec_error_url_quote(e: UnicodeError) -> tuple[str, int]:`
			"""Used in :func:`uri_to_iri` after unquoting to re-quote any
			`invalid bytes.`
			`"""`
			`# the docs state that UnicodeError does have these attributes,`
			`# but mypy isn't picking them up`
			`out = quote(e.object[e.start : e.end], safe="") # type: ignore`
			`return out, e.end # type: ignore`


			`codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)`


			`def _make_unquote_part(name: str, chars: str) -> t.Callable[[str], str]:`
			`"""Create a function that unquotes all percent encoded characters except those`
			`given. This allows working with unquoted characters if possible while not changing`
			`the meaning of a given part of a URL.`
			`"""`
			`choices = "\|".join(f"{ord(c):02X}" for c in sorted(chars))`
			`pattern = re.compile(f"((?:%(?:{choices}))+)", re.I)`

			`def _unquote_partial(value: str) -> str:`
			`parts = iter(pattern.split(value))`
			`out = []`

			`for part in parts:`
			`out.append(unquote(part, "utf-8", "werkzeug.url_quote"))`
			`out.append(next(parts, ""))`

			`return "".join(out)`

			`_unquote_partial.__name__ = f"_unquote_{name}"`
			`return _unquote_partial`


			`# characters that should remain quoted in URL parts`
			`# based on https://url.spec.whatwg.org/#percent-encoded-bytes`
			`# always keep all controls, space, and % quoted`
			`_always_unsafe = bytes((*range(0x21), 0x25, 0x7F)).decode()`
			`_unquote_fragment = _make_unquote_part("fragment", _always_unsafe)`
			`_unquote_query = _make_unquote_part("query", _always_unsafe + "&=+#")`
			`_unquote_path = _make_unquote_part("path", _always_unsafe + "/?#")`
			`_unquote_user = _make_unquote_part("user", _always_unsafe + ":@/?#")`


			`def uri_to_iri(uri: str) -> str:`
			`"""Convert a URI to an IRI. All valid UTF-8 characters are unquoted,`
			`leaving all reserved and invalid characters quoted. If the URL has`
			`a domain, it is decoded from Punycode.`

			`>>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")`
			`'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'`

			`:param uri: The URI to convert.`

			`.. versionchanged:: 3.0`
			Passing a tuple or bytes, and the ``charset`` and ``errors`` parameters,
			`are removed.`

			`.. versionchanged:: 2.3`
			`Which characters remain quoted is specific to each part of the URL.`

			`.. versionchanged:: 0.15`
			`All reserved and invalid characters remain quoted. Previously,`
			`only some reserved characters were preserved, and invalid bytes`
			`were replaced instead of left quoted.`

			`.. versionadded:: 0.6`
			`"""`
			`parts = urlsplit(uri)`
			`path = _unquote_path(parts.path)`
			`query = _unquote_query(parts.query)`
			`fragment = _unquote_fragment(parts.fragment)`

			`if parts.hostname:`
			`netloc = _decode_idna(parts.hostname)`
			`else:`
			`netloc = ""`

			`if ":" in netloc:`
			`netloc = f"[{netloc}]"`

			`if parts.port:`
			`netloc = f"{netloc}:{parts.port}"`

			`if parts.username:`
			`auth = _unquote_user(parts.username)`

			`if parts.password:`
			`password = _unquote_user(parts.password)`
			`auth = f"{auth}:{password}"`

			`netloc = f"{auth}@{netloc}"`

			`return urlunsplit((parts.scheme, netloc, path, query, fragment))`


			`def iri_to_uri(iri: str) -> str:`
			`"""Convert an IRI to a URI. All non-ASCII and unsafe characters are`
			`quoted. If the URL has a domain, it is encoded to Punycode.`

			`>>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')`
			`'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'`

			`:param iri: The IRI to convert.`

			`.. versionchanged:: 3.0`
			Passing a tuple or bytes, the ``charset`` and ``errors`` parameters,
			and the ``safe_conversion`` parameter, are removed.

			`.. versionchanged:: 2.3`
			`Which characters remain unquoted is specific to each part of the URL.`

			`.. versionchanged:: 0.15`
			`All reserved characters remain unquoted. Previously, only some reserved`
			`characters were left unquoted.`

			`.. versionchanged:: 0.9.6`
			The ``safe_conversion`` parameter was added.

			`.. versionadded:: 0.6`
			`"""`
			`parts = urlsplit(iri)`
			`# safe = https://url.spec.whatwg.org/#url-path-segment-string`
			`# as well as percent for things that are already quoted`
			`path = quote(parts.path, safe="%!$&'()*+,/:;=@")`
			`query = quote(parts.query, safe="%!$&'()*+,/:;=?@")`
			`fragment = quote(parts.fragment, safe="%!#$&'()*+,/:;=?@")`

			`if parts.hostname:`
			`netloc = parts.hostname.encode("idna").decode("ascii")`
			`else:`
			`netloc = ""`

			`if ":" in netloc:`
			`netloc = f"[{netloc}]"`

			`if parts.port:`
			`netloc = f"{netloc}:{parts.port}"`

			`if parts.username:`
			`auth = quote(parts.username, safe="%!$&'()*+,;=")`

			`if parts.password:`
			`password = quote(parts.password, safe="%!$&'()*+,;=")`
			`auth = f"{auth}:{password}"`

			`netloc = f"{auth}@{netloc}"`

			`return urlunsplit((parts.scheme, netloc, path, query, fragment))`


			`def _invalid_iri_to_uri(iri: str) -> str:`
			"""The URL scheme ``itms-services://`` must contain the ``//`` even though it does
			`not have a host component. There may be other invalid schemes as well. Currently,`
			responses will always call ``iri_to_uri`` on the redirect ``Location`` header, which
			removes the ``//``. For now, if the IRI only contains ASCII and does not contain
			`spaces, pass it on as-is. In Werkzeug 3.0, this should become a`
			``response.process_location`` flag.

			`:meta private:`
			`"""`
			`try:`
			`iri.encode("ascii")`
			`except UnicodeError:`
			`pass`
			`else:`
			`if len(iri.split(None, 1)) == 1:`
			`return iri`

			`return iri_to_uri(iri)`


			`def _decode_idna(domain: str) -> str:`
			`try:`
			`data = domain.encode("ascii")`
			`except UnicodeEncodeError:`
			`# If the domain is not ASCII, it's decoded already.`
			`return domain`

			`try:`
			`# Try decoding in one shot.`
			`return data.decode("idna")`
			`except UnicodeDecodeError:`
			`pass`

			`# Decode each part separately, leaving invalid parts as punycode.`
			`parts = []`

			`for part in data.split(b"."):`
			`try:`
			`parts.append(part.decode("idna"))`
			`except UnicodeDecodeError:`
			`parts.append(part.decode("ascii"))`

			`return ".".join(parts)`


			`def _urlencode(query: t.Mapping[str, str] \| t.Iterable[tuple[str, str]]) -> str:`
			`items = [x for x in iter_multi_items(query) if x[1] is not None]`
			`# safe = https://url.spec.whatwg.org/#percent-encoded-bytes`
			`return urlencode(items, safe="!$'()*,/:;?@")`