in cpp/src/arrow/vendored/uriparser/UriNormalize.c [532:767]
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
unsigned int inMask, unsigned int * outMask,
UriMemoryManager * memory) {
unsigned int doneMask = URI_NORMALIZED;
/* Not just doing inspection? -> memory manager required! */
if (outMask == NULL) {
assert(memory != NULL);
}
if (uri == NULL) {
if (outMask != NULL) {
*outMask = URI_NORMALIZED;
return URI_SUCCESS;
} else {
return URI_ERROR_NULL;
}
}
if (outMask != NULL) {
/* Reset mask */
*outMask = URI_NORMALIZED;
} else if (inMask == URI_NORMALIZED) {
/* Nothing to do */
return URI_SUCCESS;
}
/* Scheme, host */
if (outMask != NULL) {
const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)(
uri->scheme.first, uri->scheme.afterLast);
const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)(
uri->hostText.first, uri->hostText.afterLast);
if (normalizeScheme) {
*outMask |= URI_NORMALIZE_SCHEME;
}
if (normalizeHostCase) {
*outMask |= URI_NORMALIZE_HOST;
} else {
const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)(
uri->hostText.first, uri->hostText.afterLast);
if (normalizeHostPrecent) {
*outMask |= URI_NORMALIZE_HOST;
}
}
} else {
/* Scheme */
if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) {
if (uri->owner) {
URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast);
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_SCHEME;
}
}
/* Host */
if (inMask & URI_NORMALIZE_HOST) {
if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
if (uri->owner) {
URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first,
uri->hostData.ipFuture.afterLast);
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first),
&(uri->hostData.ipFuture.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
}
uri->hostText.first = uri->hostData.ipFuture.first;
uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
} else if ((uri->hostText.first != NULL)
&& (uri->hostData.ip4 == NULL)
&& (uri->hostData.ip6 == NULL)) {
/* Regname */
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first,
&(uri->hostText.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(
&(uri->hostText.first),
&(uri->hostText.afterLast),
memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
}
URI_FUNC(LowercaseInplace)(uri->hostText.first,
uri->hostText.afterLast);
}
}
}
/* User info */
if (outMask != NULL) {
const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(
uri->userInfo.first, uri->userInfo.afterLast);
if (normalizeUserInfo) {
*outMask |= URI_NORMALIZE_USER_INFO;
}
} else {
if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first),
&(uri->userInfo.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_USER_INFO;
}
}
}
/* Path */
if (outMask != NULL) {
const URI_TYPE(PathSegment) * walker = uri->pathHead;
while (walker != NULL) {
const URI_CHAR * const first = walker->text.first;
const URI_CHAR * const afterLast = walker->text.afterLast;
if ((first != NULL)
&& (afterLast != NULL)
&& (afterLast > first)
&& (
(((afterLast - first) == 1)
&& (first[0] == _UT('.')))
||
(((afterLast - first) == 2)
&& (first[0] == _UT('.'))
&& (first[1] == _UT('.')))
||
URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast)
)) {
*outMask |= URI_NORMALIZE_PATH;
break;
}
walker = walker->next;
}
} else if (inMask & URI_NORMALIZE_PATH) {
URI_TYPE(PathSegment) * walker;
const UriBool relative = ((uri->scheme.first == NULL)
&& !uri->absolutePath) ? URI_TRUE : URI_FALSE;
/* Fix percent-encoding for each segment */
walker = uri->pathHead;
if (uri->owner) {
while (walker != NULL) {
URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast));
walker = walker->next;
}
} else {
while (walker != NULL) {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first),
&(walker->text.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
walker = walker->next;
}
doneMask |= URI_NORMALIZE_PATH;
}
/* 6.2.2.3 Path Segment Normalization */
if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative,
(uri->owner == URI_TRUE)
|| ((doneMask & URI_NORMALIZE_PATH) != 0),
memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
URI_FUNC(FixEmptyTrailSegment)(uri, memory);
}
/* Query, fragment */
if (outMask != NULL) {
const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)(
uri->query.first, uri->query.afterLast);
const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)(
uri->fragment.first, uri->fragment.afterLast);
if (normalizeQuery) {
*outMask |= URI_NORMALIZE_QUERY;
}
if (normalizeFragment) {
*outMask |= URI_NORMALIZE_FRAGMENT;
}
} else {
/* Query */
if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first),
&(uri->query.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_QUERY;
}
}
/* Fragment */
if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first),
&(uri->fragment.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_FRAGMENT;
}
}
}
/* Dup all not duped yet */
if ((outMask == NULL) && !uri->owner) {
if (!URI_FUNC(MakeOwner)(uri, &doneMask, memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
return URI_ERROR_MALLOC;
}
uri->owner = URI_TRUE;
}
return URI_SUCCESS;
}