int httpd_parse_request()

in netutils/thttpd/libhttpd.c [2653:3166]


int httpd_parse_request(httpd_conn *hc)
{
  char *buf;
  char *method_str;
  char *url;
  char *protocol;
  char *reqhost;
  char *eol;
  char *cp;
  char *pi;

  hc->checked_idx = 0;          /* reset */
  method_str      = bufgets(hc);
  ninfo("method_str: \"%s\"\n", method_str);

  url = strpbrk(method_str, " \t\012\015");
  if (!url)
    {
      BADREQUEST("url-1");
      httpd_send_err(hc, 400, httpd_err400title, "", httpd_err400form, "");
      return -1;
    }

  *url++ = '\0';
  url   += strspn(url, " \t\012\015");
  ninfo("url: \"%s\"\n", url);

  protocol = strpbrk(url, " \t\012\015");
  ninfo("protocol: \"%s\"\n", protocol ? protocol : "<null>");

  if (!protocol)
    {
      protocol      = "HTTP/0.9";
      hc->mime_flag = false;
    }
  else
    {
      *protocol++ = '\0';
      protocol += strspn(protocol, " \t\012\015");
      if (*protocol != '\0')
        {
          eol = strpbrk(protocol, " \t\012\015");
          if (eol)
            {
              *eol = '\0';
            }

          if (strcasecmp(protocol, "HTTP/1.0") != 0)
            {
              hc->one_one = true;
            }
        }
    }

  hc->protocol = protocol;

  /* Check for HTTP/1.1 absolute URL. */

  if (strncasecmp(url, "http://", 7) == 0)
    {
      if (!hc->one_one)
        {
          BADREQUEST("one_one");
          httpd_send_err(hc, 400, httpd_err400title, "",
                         httpd_err400form, "");
          return -1;
        }

      reqhost = url + 7;
      url     = strchr(reqhost, '/');
      if (!url)
        {
          BADREQUEST("reqhost-1");
          httpd_send_err(hc, 400, httpd_err400title, "",
                         httpd_err400form, "");
          return -1;
        }

      *url = '\0';

      if (strchr(reqhost, '/') != NULL || reqhost[0] == '.')
        {
          BADREQUEST("reqhost-2");
          httpd_send_err(hc, 400, httpd_err400title, "",
                         httpd_err400form, "");
          return -1;
        }

      httpd_realloc_str(&hc->reqhost, &hc->maxreqhost, strlen(reqhost));
      strlcpy(hc->reqhost, reqhost, hc->maxreqhost + 1);
      *url = '/';
    }

  if (*url != '/')
    {
      BADREQUEST("url-2");
      httpd_send_err(hc, 400, httpd_err400title, "", httpd_err400form, "");
      return -1;
    }

  if (strcasecmp(method_str, httpd_method_str(METHOD_GET)) == 0)
    {
      hc->method = METHOD_GET;
    }
  else if (strcasecmp(method_str, httpd_method_str(METHOD_HEAD)) == 0)
    {
      hc->method = METHOD_HEAD;
    }
  else if (strcasecmp(method_str, httpd_method_str(METHOD_POST)) == 0)
    {
      hc->method = METHOD_POST;
    }
  else
    {
      NOTIMPLEMENTED(method_str);
      httpd_send_err(hc, 501, err501title, "", err501form, method_str);
      return -1;
    }

  hc->encodedurl = url;
  httpd_realloc_str(&hc->decodedurl, &hc->maxdecodedurl,
                    strlen(hc->encodedurl));
  httpd_strdecode(hc->decodedurl, hc->encodedurl);

  httpd_realloc_str(&hc->origfilename, &hc->maxorigfilename,
                    strlen(hc->decodedurl));
  strlcpy(hc->origfilename, &hc->decodedurl[1], hc->maxorigfilename + 1);

  /* Special case for top-level URL. */

  if (hc->origfilename[0] == '\0')
    {
      strlcpy(hc->origfilename, ".", hc->maxorigfilename + 1);
    }

  /* Extract query string from encoded URL. */

  cp = strchr(hc->encodedurl, '?');
  if (cp)
    {
      ++cp;
      httpd_realloc_str(&hc->query, &hc->maxquery, strlen(cp));
      strlcpy(hc->query, cp, hc->maxquery + 1);

      /* Remove query from (decoded) origfilename. */

      cp = strchr(hc->origfilename, '?');
      if (cp)
        {
          *cp = '\0';
        }
    }

  de_dotdot(hc->origfilename);
  if (hc->origfilename[0] == '/' ||
      (hc->origfilename[0] == '.' && hc->origfilename[1] == '.' &&
       (hc->origfilename[2] == '\0' || hc->origfilename[2] == '/')))
    {
      BADREQUEST("origfilename");
      httpd_send_err(hc, 400, httpd_err400title, "", httpd_err400form, "");
      return -1;
    }

  if (hc->mime_flag)
    {
      /* Read the MIME headers. */

      while ((buf = bufgets(hc)) != NULL)
        {
          if (buf[0] == '\0')
            {
              break;
            }

          if (strncasecmp(buf, "Referer:", 8) == 0)
            {
              cp = &buf[8];
              cp += strspn(cp, " \t");
              hc->referer = cp;
            }
          else if (strncasecmp(buf, "User-Agent:", 11) == 0)
            {
              cp = &buf[11];
              cp += strspn(cp, " \t");
              hc->useragent = cp;
            }
          else if (strncasecmp(buf, "Host:", 5) == 0)
            {
              cp = &buf[5];
              cp += strspn(cp, " \t");
              hc->hdrhost = cp;
              cp = strchr(hc->hdrhost, ':');
              if (cp)
                {
                  *cp = '\0';
                }

              if (hc->hdrhost[0] == '.' ||
                  strchr(hc->hdrhost, '/') != NULL)
                {
                  BADREQUEST("hdrhost");
                  httpd_send_err(hc, 400, httpd_err400title, "",
                                 httpd_err400form, "");
                  return -1;
                }
            }
          else if (strncasecmp(buf, "Accept:", 7) == 0)
            {
              cp = &buf[7];
              cp += strspn(cp, " \t");
              if (hc->accept[0] != '\0')
                {
                  if (strlen(hc->accept) > CONFIG_THTTPD_MAXREALLOC)
                    {
                      nerr("ERROR: %s way too much Accept: data\n",
                           httpd_ntoa(&hc->client_addr));
                      continue;
                    }

                  httpd_realloc_str(&hc->accept, &hc->maxaccept,
                                    strlen(hc->accept) + 2 + strlen(cp));
                  strlcat(hc->accept, ", ", hc->maxaccepte + 1);
                }
              else
                {
                  httpd_realloc_str(&hc->accept, &hc->maxaccept, strlen(cp));
                }

              strlcat(hc->accept, cp, hc->maxaccepte + 1);
            }
          else if (strncasecmp(buf, "Accept-Encoding:", 16) == 0)
            {
              cp = &buf[16];
              cp += strspn(cp, " \t");
              if (hc->accepte[0] != '\0')
                {
                  if (strlen(hc->accepte) > CONFIG_THTTPD_MAXREALLOC)
                    {
                      nerr("ERROR: %s way too much Accept-Encoding: data\n",
                            httpd_ntoa(&hc->client_addr));
                      continue;
                    }

                  httpd_realloc_str(&hc->accepte, &hc->maxaccepte,
                                    strlen(hc->accepte) + 2 + strlen(cp));
                  strlcat(hc->accepte, ", ", hc->maxaccepte + 1);
                }
              else
                {
                  httpd_realloc_str(&hc->accepte, &hc->maxaccepte,
                                    strlen(cp));
                }

             strlcpy(hc->accepte, cp, hc->maxaccepte + 1);
            }
          else if (strncasecmp(buf, "Accept-Language:", 16) == 0)
            {
              cp = &buf[16];
              cp += strspn(cp, " \t");
              hc->acceptl = cp;
            }
          else if (strncasecmp(buf, "If-Modified-Since:", 18) == 0)
            {
              cp = &buf[18];
              hc->if_modified_since = tdate_parse(cp);
              if (hc->if_modified_since == (time_t) - 1)
                {
                  nerr("ERROR: unparsable time: %s\n", cp);
                }
            }
          else if (strncasecmp(buf, "Cookie:", 7) == 0)
            {
              cp = &buf[7];
              cp += strspn(cp, " \t");
              hc->cookie = cp;
            }
          else if (strncasecmp(buf, "Range:", 6) == 0)
            {
              /* Only support %d- and %d-%d, not %d-%d,%d-%d or -%d. */

              if (strchr(buf, ',') == NULL)
                {
                  char *cp_dash;
                  cp = strpbrk(buf, "=");
                  if (cp)
                    {
                      cp_dash = strchr(cp + 1, '-');
                      if (cp_dash != NULL && cp_dash != cp + 1)
                        {
                          *cp_dash = '\0';
                          hc->got_range = true;
                          hc->range_start = atoll(cp + 1);
                          if (hc->range_start < 0)
                            {
                              hc->range_start = 0;
                            }

                          if (isdigit((int)cp_dash[1]))
                            {
                              hc->range_end = atoll(cp_dash + 1);
                              if (hc->range_end < 0)
                                hc->range_end = -1;
                            }
                        }
                    }
                }
            }
          else if (strncasecmp(buf, "Range-If:", 9) == 0 ||
                   strncasecmp(buf, "If-Range:", 9) == 0)
            {
              cp = &buf[9];
              hc->range_if = tdate_parse(cp);
              if (hc->range_if == (time_t) - 1)
                {
                  nerr("ERROR: unparsable time: %s\n", cp);
                }
            }
          else if (strncasecmp(buf, "Content-Type:", 13) == 0)
            {
              cp = &buf[13];
              cp += strspn(cp, " \t");
              hc->contenttype = cp;
            }
          else if (strncasecmp(buf, "Content-Length:", 15) == 0)
            {
              cp = &buf[15];
              hc->contentlength = atol(cp);
            }
          else if (strncasecmp(buf, "Authorization:", 14) == 0)
            {
              cp = &buf[14];
              cp += strspn(cp, " \t");
              hc->authorization = cp;
            }
          else if (strncasecmp(buf, "Connection:", 11) == 0)
            {
              cp = &buf[11];
              cp += strspn(cp, " \t");
              if (strcasecmp(cp, "keep-alive") == 0)
                {
                  hc->keep_alive = true;
                }
            }
#ifdef LOG_UNKNOWN_HEADERS
          else if (strncasecmp(buf, "Accept-Charset:", 15) == 0 ||
                   strncasecmp(buf, "Accept-Language:", 16) == 0 ||
                   strncasecmp(buf, "Agent:", 6) == 0 ||
                   strncasecmp(buf, "Cache-Control:", 14) == 0 ||
                   strncasecmp(buf, "Cache-Info:", 11) == 0 ||
                   strncasecmp(buf, "Charge-To:", 10) == 0 ||
                   strncasecmp(buf, "Client-IP:", 10) == 0 ||
                   strncasecmp(buf, "Date:", 5) == 0 ||
                   strncasecmp(buf, "Extension:", 10) == 0 ||
                   strncasecmp(buf, "Forwarded:", 10) == 0 ||
                   strncasecmp(buf, "From:", 5) == 0 ||
                   strncasecmp(buf, "HTTP-Version:", 13) == 0 ||
                   strncasecmp(buf, "Max-Forwards:", 13) == 0 ||
                   strncasecmp(buf, "Message-Id:", 11) == 0 ||
                   strncasecmp(buf, "MIME-Version:", 13) == 0 ||
                   strncasecmp(buf, "Negotiate:", 10) == 0 ||
                   strncasecmp(buf, "Pragma:", 7) == 0 ||
                   strncasecmp(buf, "Proxy-Agent:", 12) == 0 ||
                   strncasecmp(buf, "Proxy-Connection:", 17) == 0 ||
                   strncasecmp(buf, "Security-Scheme:", 16) == 0 ||
                   strncasecmp(buf, "Session-Id:", 11) == 0 ||
                   strncasecmp(buf, "UA-Color:", 9) == 0 ||
                   strncasecmp(buf, "UA-CPU:", 7) == 0 ||
                   strncasecmp(buf, "UA-Disp:", 8) == 0 ||
                   strncasecmp(buf, "UA-OS:", 6) == 0 ||
                   strncasecmp(buf, "UA-Pixels:", 10) == 0 ||
                   strncasecmp(buf, "User:", 5) == 0 ||
                   strncasecmp(buf, "Via:", 4) == 0 ||
                   strncasecmp(buf, "X-", 2) == 0)
            ;                   /* ignore */
          else
            {
              nwarn("WARNING: unknown request header: %s\n", buf);
            }
#endif /* LOG_UNKNOWN_HEADERS */
        }
    }

  if (hc->one_one)
    {
      /* Check that HTTP/1.1 requests specify a host, as required. */

      if (hc->reqhost[0] == '\0' && hc->hdrhost[0] == '\0')
        {
          BADREQUEST("reqhost-3");
          httpd_send_err(hc, 400, httpd_err400title, "",
                         httpd_err400form, "");
          return -1;
        }

      /* If the client wants to do keep-alive, it might also be doing
       * pipelining.  There's no way for us to tell.  Since we don't
       * implement keep-alive yet, if we close such a connection there
       * might be unread pipelined requests waiting.  So, we have to do a
       * lingering close.
       */

      if (hc->keep_alive)
        {
          hc->should_linger = true;
        }
    }

  /* Ok, the request has been parsed.  Now we resolve stuff that may require
   * the entire request.
   */

  /* Copy original filename to expanded filename. */

  httpd_realloc_str(&hc->expnfilename, &hc->maxexpnfilename,
                    strlen(hc->origfilename));
  strlcpy(hc->expnfilename, hc->origfilename, hc->maxexpnfilename + 1);

  /* Tilde mapping. */

  if (hc->expnfilename[0] == '~')
    {
#ifdef CONFIG_THTTPD_TILDE_MAP1
      if (!httpd_tilde_map1(hc))
        {
          httpd_send_err(hc, 404, err404title, "",
                         err404form, hc->encodedurl);
          return -1;
        }

#endif
#ifdef CONFIG_THTTPD_TILDE_MAP2
      if (!httpd_tilde_map2(hc))
        {
          httpd_send_err(hc, 404, err404title, "",
                         err404form, hc->encodedurl);
          return -1;
        }

#endif
    }

  /* Virtual host mapping. */

#ifdef CONFIG_THTTPD_VHOST
  if (!vhost_map(hc))
    {
      INTERNALERROR("VHOST");
      httpd_send_err(hc, 500, err500title, "", err500form, hc->encodedurl);
      return -1;
    }
#endif

  /* Expand the filename */

  cp = expand_filename(hc->expnfilename, &pi, hc->tildemapped);
  if (!cp)
    {
      INTERNALERROR(hc->expnfilename);
      httpd_send_err(hc, 500, err500title, "", err500form, hc->encodedurl);
      return -1;
    }

  httpd_realloc_str(&hc->expnfilename, &hc->maxexpnfilename, strlen(cp));
  strlcpy(hc->expnfilename, cp, hc->maxexpnfilename + 1);
  httpd_realloc_str(&hc->pathinfo, &hc->maxpathinfo, strlen(pi));
  strlcpy(hc->pathinfo, pi, hc->maxpathinfo + 1);
  ninfo("expnfilename: \"%s\" pathinfo: \"%s\"\n",
         hc->expnfilename, hc->pathinfo);

  /* Remove pathinfo stuff from the original filename too. */

  if (hc->pathinfo[0] != '\0')
    {
      int i;
      i = strlen(hc->origfilename) - strlen(hc->pathinfo);
      if (i > 0 && strcmp(&hc->origfilename[i], hc->pathinfo) == 0)
        {
          hc->origfilename[i - 1] = '\0';
        }
    }

  /* If the expanded filename is an absolute path, check that it's still
   * within the current directory or the alternate directory.
   */

  if (hc->expnfilename[0] == '/')
    {
      if (strncmp(hc->expnfilename, httpd_root, strlen(httpd_root)) == 0)
        {
        }
#ifdef CONFIG_THTTPD_TILDE_MAP2
      else if (hc->altdir[0] != '\0' &&
               (!strncmp(hc->expnfilename, hc->altdir, strlen(hc->altdir)) &&
                (hc->expnfilename[strlen(hc->altdir)] == '\0' ||
                 hc->expnfilename[strlen(hc->altdir)] == '/')))
        {
        }
#endif
      else
        {
          nwarn("WARNING: %s URL \"%s\" goes outside the web tree\n",
                 httpd_ntoa(&hc->client_addr), hc->encodedurl);
          httpd_send_err(hc, 403, err403title, "",
                         ERROR_FORM(err403form,
                                    "The requested URL '%s' resolves to a "
                                    "file outside the permitted web server "
                                    "directory tree.\n"),
                         hc->encodedurl);
          return -1;
        }
    }

  return 0;
}