in lib/lambda/layers/aws-sap-odp-extractor/python/urllib3/util/url.py [0:0]
def parse_url(url):
"""
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
performed to parse incomplete urls. Fields not provided will be None.
This parser is RFC 3986 compliant.
:param str url: URL to parse into a :class:`.Url` namedtuple.
Partly backwards-compatible with :mod:`urlparse`.
Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
>>> parse_url('google.com:80')
Url(scheme=None, host='google.com', port=80, path=None, ...)
>>> parse_url('/foo?bar')
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
"""
if not url:
# Empty
return Url()
is_string = not isinstance(url, six.binary_type)
# RFC 3986 doesn't like URLs that have a host but don't start
# with a scheme and we support URLs like that so we need to
# detect that problem and add an empty scheme indication.
# We don't get hurt on path-only URLs here as it's stripped
# off and given an empty scheme anyways.
if not SCHEME_REGEX.search(url):
url = "//" + url
def idna_encode(name):
if name and any([ord(x) > 128 for x in name]):
try:
import idna
except ImportError:
raise LocationParseError("Unable to parse URL without the 'idna' module")
try:
return idna.encode(name.lower(), strict=True, std3_rules=True)
except idna.IDNAError:
raise LocationParseError(u"Name '%s' is not a valid IDNA label" % name)
return name
try:
split_iri = misc.IRI_MATCHER.match(compat.to_str(url)).groupdict()
iri_ref = rfc3986.IRIReference(
split_iri['scheme'], split_iri['authority'],
_encode_invalid_chars(split_iri['path'], PATH_CHARS),
_encode_invalid_chars(split_iri['query'], QUERY_CHARS),
_encode_invalid_chars(split_iri['fragment'], FRAGMENT_CHARS)
)
has_authority = iri_ref.authority is not None
uri_ref = iri_ref.encode(idna_encoder=idna_encode)
except (ValueError, RFC3986Exception):
return six.raise_from(LocationParseError(url), None)
# rfc3986 strips the authority if it's invalid
if has_authority and uri_ref.authority is None:
raise LocationParseError(url)
# Only normalize schemes we understand to not break http+unix
# or other schemes that don't follow RFC 3986.
if uri_ref.scheme is None or uri_ref.scheme.lower() in NORMALIZABLE_SCHEMES:
uri_ref = uri_ref.normalize()
# Validate all URIReference components and ensure that all
# components that were set before are still set after
# normalization has completed.
validator = Validator()
try:
validator.check_validity_of(
*validator.COMPONENT_NAMES
).validate(uri_ref)
except ValidationError:
return six.raise_from(LocationParseError(url), None)
# For the sake of backwards compatibility we put empty
# string values for path if there are any defined values
# beyond the path in the URL.
# TODO: Remove this when we break backwards compatibility.
path = uri_ref.path
if not path:
if (uri_ref.query is not None
or uri_ref.fragment is not None):
path = ""
else:
path = None
# Ensure that each part of the URL is a `str` for
# backwards compatibility.
def to_input_type(x):
if x is None:
return None
elif not is_string and not isinstance(x, six.binary_type):
return x.encode('utf-8')
return x
return Url(
scheme=to_input_type(uri_ref.scheme),
auth=to_input_type(uri_ref.userinfo),
host=to_input_type(uri_ref.host),
port=int(uri_ref.port) if uri_ref.port is not None else None,
path=to_input_type(path),
query=to_input_type(uri_ref.query),
fragment=to_input_type(uri_ref.fragment)
)