CURLcode CurlSession::ReadStatusLineAndHeadersFromRawResponse()

in sdk/core/azure-core/src/http/curl/curl.cpp [822:1066]


CURLcode CurlSession::ReadStatusLineAndHeadersFromRawResponse(
    Context const& context,
    bool reuseInternalBuffer)
{
  auto parser = ResponseBufferParser();
  auto bufferSize = size_t();

  // Keep reading until all headers were read
  while (!parser.IsParseCompleted())
  {
    size_t bytesParsed = 0;
    if (reuseInternalBuffer)
    {
      // parse from internal buffer. This means previous read from server got more than one
      // response. This happens when Server returns a 100-continue plus an error code
      bufferSize = this->m_innerBufferSize - this->m_bodyStartInBuffer;
      bytesParsed = parser.Parse(this->m_readBuffer + this->m_bodyStartInBuffer, bufferSize);
      // if parsing from internal buffer is not enough, do next read from wire
      reuseInternalBuffer = false;
      // reset body start
      this->m_bodyStartInBuffer = _detail::DefaultLibcurlReaderSize;
    }
    else
    {
      // Try to fill internal buffer from socket.
      // If response is smaller than buffer, we will get back the size of the response
      bufferSize = m_connection->ReadFromSocket(
          this->m_readBuffer, _detail::DefaultLibcurlReaderSize, context);
      if (bufferSize == 0)
      {
        // closed connection, prevent application from keep trying to pull more bytes from the wire
        Log::Write(Logger::Level::Error, "Failed to read from socket");
        return CURLE_RECV_ERROR;
      }
      // returns the number of bytes parsed up to the body Start
      bytesParsed = parser.Parse(this->m_readBuffer, bufferSize);
    }

    if (bytesParsed < bufferSize)
    {
      this->m_bodyStartInBuffer = bytesParsed; // Body Start
    }
  }

  this->m_response = parser.ExtractResponse();
  this->m_innerBufferSize = bufferSize;
  this->m_lastStatusCode = this->m_response->GetStatusCode();

  // The logic below comes from the expectation that Azure services, particularly Storage, may not
  // conform to HTTP standards when it comes to handling 100-continue requests, and not send
  // "Connection: close" when they should. We do not know for sure if this is true, but this logic
  // did exist for libcurl transport in earlier C++ SDK versions.
  //
  // The idea is the following: if status code is not 2xx, and request header contains "Expect:
  // 100-continue" and request body length is not zero, we don't reuse the connection.
  //
  // More detailed description of what might happen if we don't have this logic:
  // 1. Storage SDK sends a PUT request with a non-empty request body (which means Content-Length
  // request header is not 0, let's say it's 6) and Expect: 100-continue request header, but it
  // doesn't send the header unless server returns 100 Continue status code.
  // 2. Storage service returns 4xx status code and response headers, but it doesn't want to close
  // this connection, so there's no Connection: close in response headers.
  // 3. Now both client and server agree to continue using this connection. But they do not agree in
  // the current status of this connection.
  //    3.1. Client side thinks the previous request is finished because it has received a status
  //    code and response headers. It should send a new HTTP request if there's any.
  //    3.2. Server side thinks the previous request is not finished because it hasn't received the
  //    request body. I tend to think this is a bug of server-side.
  // 4. Client side sends a new request, for example,
  //    HEAD /whatever/path HTTP/1.1
  //    host: foo.bar.com
  //    ...
  // 5. Server side takes the first 6 bytes (HEAD /) of the send request and thinks this is the
  // request body of the first request and discard it.
  // 6. Server side keeps reading the remaining data on the wire and thinks the first part
  // (whatever/path) is an HTTP verb. It fails the request with 400 invalid verb.
  bool non2xxAfter100ContinueWithNonzeroContentLength = false;
  {
    auto responseHttpCodeInt
        = static_cast<std::underlying_type<Http::HttpStatusCode>::type>(m_lastStatusCode);
    if (responseHttpCodeInt < 200 || responseHttpCodeInt >= 300)
    {
      const auto requestExpectHeader = m_request.GetHeader("Expect");
      if (requestExpectHeader.HasValue())
      {
        const auto requestExpectHeaderValueLowercase
            = Core::_internal::StringExtensions::ToLower(requestExpectHeader.Value());
        if (requestExpectHeaderValueLowercase == "100-continue")
        {
          const auto requestContentLengthHeaderValue = m_request.GetHeader("Content-Length");
          if (requestContentLengthHeaderValue.HasValue()
              && requestContentLengthHeaderValue.Value() != "0")
          {
            non2xxAfter100ContinueWithNonzeroContentLength = true;
          }
        }
      }
    }
  }

  if (non2xxAfter100ContinueWithNonzeroContentLength)
  {
    m_httpKeepAlive = false;
  }
  else
  {
    // HTTP <=1.0 is "close" by default. HTTP 1.1 is "keep-alive" by default.
    // The value can also be "keep-alive, close" (i.e. "both are fine"), in which case we are
    // preferring to treat it as keep-alive.
    // (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection)
    // Should it come to HTTP/2 and HTTP/3, they are "keep-alive", but any response from HTTP/2 or
    // /3 containing a "Connection" header should be considered malformed.
    // (HTTP/2: https://httpwg.org/specs/rfc9113.html#ConnectionSpecific
    //  HTTP/3: https://httpwg.org/specs/rfc9114.html#rfc.section.4.2)
    //
    // HTTP/2+ are supposed to create persistent ("keep-alive") connections per host,
    // and close them by inactivity timeout. Given that we don't have such mechanism implemented
    // at this moment, we are closing all non-1.x connections immediately, which, in the worst case,
    // would only mean there's a perf hit, but the communication flow is expected to be correct.
    if (m_response->GetMajorVersion() == 1)
    {
      std::string connectionHeaderValue;
      {
        const Core::CaseInsensitiveMap& responseHeaders = m_response->GetHeaders();
        const auto connectionHeader = responseHeaders.find("Connection");
        if (connectionHeader != responseHeaders.cend())
        {
          connectionHeaderValue
              = Core::_internal::StringExtensions::ToLower(connectionHeader->second);
        }
      }

      const bool hasConnectionKeepAlive
          = connectionHeaderValue.find("keep-alive") != std::string::npos;

      if (m_response->GetMinorVersion() >= 1)
      {
        // HTTP/1.1+
        const bool hasConnectionClose = connectionHeaderValue.find("close") != std::string::npos;

        m_httpKeepAlive = (!hasConnectionClose || hasConnectionKeepAlive);
      }
      else
      {
        // HTTP/1.0
        m_httpKeepAlive = hasConnectionKeepAlive;
      }
    }
    else
    {
      // We don't expect HTTP/0.9, 2.0 or 3.0 in responses.
      // Barring rejecting as malformed, the safest thing to do here is to assume the connection is
      // not reusable.
      m_httpKeepAlive = false;
    }
  }

  // For Head request, set the length of body response to 0.
  // Response will give us content-length as if we were not doing Head saying what would it be the
  // length of the body. However, Server won't send body
  // For NoContent status code, also need to set contentLength to 0.
  // https://github.com/Azure/azure-sdk-for-cpp/issues/406
  if (this->m_request.GetMethod() == HttpMethod::Head
      || this->m_lastStatusCode == HttpStatusCode::NoContent
      || this->m_lastStatusCode == HttpStatusCode::NotModified)
  {
    this->m_contentLength = 0;
    this->m_bodyStartInBuffer = _detail::DefaultLibcurlReaderSize;
    return CURLE_OK;
  }

  // headers are already lowerCase at this point
  auto const& headers = this->m_response->GetHeaders();

  // Check if server has return the connection header. This header can be used to stop re-using the
  // connection. The `Iot Edge Blob Storage Module` is known to return this after some time re-using
  // the same http secured channel.
  auto connectionHeader = headers.find("connection");
  if (connectionHeader != headers.end())
  {
    if (Azure::Core::_internal::StringExtensions::LocaleInvariantCaseInsensitiveEqual(
            connectionHeader->second, "close"))
    {
      // Use connection shut-down so it won't be moved it back to the connection pool.
      m_connection->Shutdown();
    }
    // If the server indicated that the connection header is "upgrade", it means that this
    // is a WebSocket connection so the caller may be upgrading the connection.
    if (Azure::Core::_internal::StringExtensions::LocaleInvariantCaseInsensitiveEqual(
            connectionHeader->second, "upgrade"))
    {
      m_connectionUpgraded = true;
    }
  }

  auto isContentLengthHeaderInResponse = headers.find("content-length");
  if (isContentLengthHeaderInResponse != headers.end())
  {
    this->m_contentLength
        = static_cast<int64_t>(std::stoull(isContentLengthHeaderInResponse->second.data()));
    return CURLE_OK;
  }

  // No content-length from headers, check transfer-encoding
  this->m_contentLength = -1;
  auto isTransferEncodingHeaderInResponse = headers.find("transfer-encoding");
  if (isTransferEncodingHeaderInResponse != headers.end())
  {
    auto& headerValue = isTransferEncodingHeaderInResponse->second;
    auto isChunked = headerValue.find("chunked");

    if (isChunked != std::string::npos)
    {
      // set curl session to know response is chunked
      // This will be used to remove chunked info while reading
      this->m_isChunkedResponseType = true;

      // Need to move body start after chunk size
      if (this->m_bodyStartInBuffer >= this->m_innerBufferSize)
      { // if nothing on inner buffer, pull from wire
        this->m_innerBufferSize = m_connection->ReadFromSocket(
            this->m_readBuffer, _detail::DefaultLibcurlReaderSize, context);
        if (this->m_innerBufferSize == 0)
        {
          // closed connection, prevent application from keep trying to pull more bytes from the
          // wire
          Log::Write(Logger::Level::Error, "Failed to read from socket");
          return CURLE_RECV_ERROR;
        }
        this->m_bodyStartInBuffer = 0;
      }

      ParseChunkSize(context);
      return CURLE_OK;
    }
  }
  /*
  https://tools.ietf.org/html/rfc7230#section-3.3.3
   7.  Otherwise, this is a response message without a declared message
       body length, so the message body length is determined by the
       number of octets received prior to the server closing the
       connection.
  */
  return CURLE_OK;
}