bool inner_parse()

in Source/Shared/http_utils.cpp [1359:1547]


            bool inner_parse(
                const char* encoded,
                const char** scheme_begin, const char** scheme_end,
                const char** uinfo_begin, const char** uinfo_end,
                const char** host_begin, const char** host_end,
                _Out_ int* port,
                const char** path_begin, const char** path_end,
                const char** query_begin, const char** query_end,
                const char** fragment_begin, const char** fragment_end)
            {
                *scheme_begin = nullptr;
                *scheme_end = nullptr;
                *uinfo_begin = nullptr;
                *uinfo_end = nullptr;
                *host_begin = nullptr;
                *host_end = nullptr;
                *port = 0;
                *path_begin = nullptr;
                *path_end = nullptr;
                *query_begin = nullptr;
                *query_end = nullptr;
                *fragment_begin = nullptr;
                *fragment_end = nullptr;

                const char* p = encoded;

                // IMPORTANT -- A uri may either be an absolute uri, or an relative-reference
                // Absolute: 'http://host.com'
                // Relative-Reference: '//:host.com', '/path1/path2?query', './path1:path2'
                // A Relative-Reference can be disambiguated by parsing for a ':' before the first slash

                bool is_relative_reference = true;
                const char* p2 = p;
                for (; *p2 != '/' && *p2 != '\0'; p2++)
                {
                    if (*p2 == ':')
                    {
                        // found a colon, the first portion is a scheme
                        is_relative_reference = false;
                        break;
                    }
                }

                if (!is_relative_reference)
                {
                    // the first character of a scheme must be a letter
                    if (!isalpha(*p))
                    {
                        return false;
                    }

                    // start parsing the scheme, it's always delimited by a colon (must be present)
                    *scheme_begin = p++;
                    for (; *p != ':'; p++)
                    {
                        if (!is_scheme_character(*p))
                        {
                            return false;
                        }
                    }
                    *scheme_end = p;

                    // skip over the colon
                    p++;
                }

                // if we see two slashes next, then we're going to parse the authority portion
                // later on we'll break up the authority into the port and host
                const char* authority_begin = nullptr;
                const char* authority_end = nullptr;
                if (*p == '/' && p[1] == '/')
                {
                    // skip over the slashes
                    p += 2;
                    authority_begin = p;

                    // the authority is delimited by a slash (resource), question-mark (query) or octothorpe (fragment)
                    // or by EOS. The authority could be empty ('file:///C:\file_name.txt')
                    for (; *p != '/' && *p != '?' && *p != '#' && *p != '\0'; p++)
                    {
                        // We're NOT currently supporting IPv6, IPvFuture or username/password in authority
                        if (!is_authority_character(*p))
                        {
                            return false;
                        }
                    }
                    authority_end = p;

                    // now lets see if we have a port specified -- by working back from the end
                    if (authority_begin != authority_end)
                    {
                        // the port is made up of all digits
                        const char* port_begin = authority_end - 1;
                        for (; isdigit(*port_begin) && port_begin != authority_begin; port_begin--)
                        {
                        }

                        if (*port_begin == ':')
                        {
                            // has a port
                            *host_begin = authority_begin;
                            *host_end = port_begin;

                            //skip the colon
                            port_begin++;

                            *port = convert::scan_string<int>(xsapi_internal_string(port_begin, authority_end), std::locale::classic());
                        }
                        else
                        {
                            // no port
                            *host_begin = authority_begin;
                            *host_end = authority_end;
                        }

                        // look for a user_info component
                        const char* u_end = *host_begin;
                        for (; is_user_info_character(*u_end) && u_end != *host_end; u_end++)
                        {
                        }

                        if (*u_end == '@')
                        {
                            *host_begin = u_end + 1;
                            *uinfo_begin = authority_begin;
                            *uinfo_end = u_end;
                        }
                        else
                        {
                            uinfo_end = uinfo_begin = nullptr;
                        }
                    }
                }

                // if we see a path character or a slash, then the
                // if we see a slash, or any other legal path character, parse the path next
                if (*p == '/' || is_path_character(*p))
                {
                    *path_begin = p;

                    // the path is delimited by a question-mark (query) or octothorpe (fragment) or by EOS
                    for (; *p != '?' && *p != '#' && *p != '\0'; p++)
                    {
                        if (!is_path_character(*p))
                        {
                            return false;
                        }
                    }
                    *path_end = p;
                }

                // if we see a ?, then the query is next
                if (*p == '?')
                {
                    // skip over the question mark
                    p++;
                    *query_begin = p;

                    // the query is delimited by a '#' (fragment) or EOS
                    for (; *p != '#' && *p != '\0'; p++)
                    {
                        if (!is_query_character(*p))
                        {
                            return false;
                        }
                    }
                    *query_end = p;
                }

                // if we see a #, then the fragment is next
                if (*p == '#')
                {
                    // skip over the hash mark
                    p++;
                    *fragment_begin = p;

                    // the fragment is delimited by EOS
                    for (; *p != '\0'; p++)
                    {
                        if (!is_fragment_character(*p))
                        {
                            return false;
                        }
                    }
                    *fragment_end = p;
                }

                return true;
            }