core/ebpf/protocol/http/HttpParser.cpp (243 lines of code) (raw):

// Copyright 2025 iLogtail Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "HttpParser.h" #include <map> #include "common/StringTools.h" #include "ebpf/type/NetworkObserverEvent.h" #include "ebpf/util/TraceId.h" #include "logger/Logger.h" namespace logtail::ebpf { inline constexpr char kContentLength[] = "Content-Length"; inline constexpr char kTransferEncoding[] = "Transfer-Encoding"; inline constexpr char kUpgrade[] = "Upgrade"; std::vector<std::shared_ptr<AbstractRecord>> HTTPProtocolParser::Parse(struct conn_data_event_t* dataEvent, const std::shared_ptr<Connection>& conn, const std::shared_ptr<Sampler>& sampler) { auto record = std::make_shared<HttpRecord>(conn); record->SetEndTsNs(dataEvent->end_ts); record->SetStartTsNs(dataEvent->start_ts); auto spanId = GenerateSpanID(); // slow request if (record->GetLatencyMs() > 500 || sampler->ShouldSample(spanId)) { record->MarkSample(); } // ParseResponse may set SAMPLE flag, depending on HTTP status code ... if (dataEvent->response_len > 0) { std::string_view buf(dataEvent->msg + dataEvent->request_len, dataEvent->response_len); ParseState state = http::ParseResponse(buf, record, true, false); if (state != ParseState::kSuccess) { LOG_DEBUG(sLogger, ("[HTTPProtocolParser]: Parse HTTP response failed", int(state))); return {}; } } if (dataEvent->request_len > 0) { std::string_view buf(dataEvent->msg, dataEvent->request_len); ParseState state = http::ParseRequest(buf, record, false); if (state != ParseState::kSuccess) { LOG_DEBUG(sLogger, ("[HTTPProtocolParser]: Parse HTTP request failed", int(state))); return {}; } } if (record->ShouldSample()) { record->SetSpanId(std::move(spanId)); record->SetTraceId(GenerateTraceID()); } return {record}; } namespace http { HeadersMap GetHTTPHeadersMap(const phr_header* headers, size_t numHeaders) { HeadersMap result; for (size_t i = 0; i < numHeaders; i++) { std::string name(headers[i].name, headers[i].name_len); std::string value(headers[i].value, headers[i].value_len); result.emplace(std::move(name), std::move(value)); } return result; } int ParseHttpRequest(std::string_view& buf, HTTPRequest& result) { return phr_parse_request(buf.data(), buf.size(), &result.mMethod, &result.mMethodLen, &result.mPath, &result.mPathLen, &result.mMinorVersion, result.mHeaders, &result.mNumHeaders, /*last_len*/ 0); } const std::string kRootPath = "/"; const char kQuestionMark = '?'; const std::string kHttP1Prefix = "http1."; ParseState ParseRequest(std::string_view& buf, std::shared_ptr<HttpRecord>& result, bool forceSample) { HTTPRequest req; int retval = http::ParseHttpRequest(buf, req); if (retval >= 0) { buf.remove_prefix(retval); auto orginPath = std::string(req.mPath, req.mPathLen); auto trimPath = TrimString(orginPath); std::size_t pos = trimPath.find(kQuestionMark); if (trimPath.empty() || (pos != std::string::npos && pos == 0)) { result->SetPath(kRootPath); result->SetRealPath(kRootPath); } else if (pos != std::string::npos) { result->SetPath(trimPath.substr(0, pos)); } else { result->SetPath(trimPath); result->SetRealPath(trimPath); } if (result->ShouldSample() || forceSample) { result->SetProtocolVersion(kHttP1Prefix + std::to_string(req.mMinorVersion)); result->SetMethod(std::string(req.mMethod, req.mMethodLen)); result->SetReqHeaderMap(http::GetHTTPHeadersMap(req.mHeaders, req.mNumHeaders)); return ParseRequestBody(buf, result); } return ParseState::kSuccess; } if (retval == -2) { return ParseState::kNeedsMoreData; } return ParseState::kInvalid; } ParseState PicoParseChunked(std::string_view& data, size_t bodySizeLimitBytes, std::string& result, size_t& bodySize) { // Make a copy of the data because phr_decode_chunked mutates the input, // and if the original parse fails due to a lack of data, we need the original // state to be preserved. std::string dataCopy(data); phr_chunked_decoder chunkDecoder = {}; chunkDecoder.consume_trailer = 1; char* buf = dataCopy.data(); size_t bufSize = dataCopy.size(); ssize_t retval = phr_decode_chunked(&chunkDecoder, buf, &bufSize); if (retval == -1) { // Parse failed. return ParseState::kInvalid; } if (retval == -2) { // Incomplete message. return ParseState::kNeedsMoreData; } if (retval >= 0) { // Found a complete message. dataCopy.resize(std::min(bufSize, bodySizeLimitBytes)); // data_copy.resize(buf_size); dataCopy.shrink_to_fit(); result = std::move(dataCopy); bodySize = bufSize; // phr_decode_chunked rewrites the buffer in place, removing chunked-encoding headers. // So we cannot simply remove the prefix, but rather have to shorten the buffer too. // This is done via retval, which specifies how many unprocessed bytes are left. data.remove_prefix(data.size() - retval); return ParseState::kSuccess; } return ParseState::kUnknown; } ParseState ParseChunked(std::string_view& data, size_t bodySizeLimitBytes, std::string& result, size_t& bodySize) { return PicoParseChunked(data, bodySizeLimitBytes, result, bodySize); } ParseState ParseRequestBody(std::string_view& buf, std::shared_ptr<HttpRecord>& result) { // Case 1: Content-Length const auto contentLengthIter = result->GetReqHeaderMap().find(kContentLength); if (contentLengthIter != result->GetReqHeaderMap().end()) { std::string_view contentLenStr = contentLengthIter->second; auto r = ParseContent(contentLenStr, buf, 256, result->mReqBody, result->mReqBodySize); return r; } // Case 2: Chunked transfer. const auto transferEncodingIter = result->GetReqHeaderMap().find(kTransferEncoding); if (transferEncodingIter != result->GetReqHeaderMap().end() && transferEncodingIter->second == "chunked") { auto s = ParseChunked(buf, 256, result->mReqBody, result->mReqBodySize); return s; } // Case 3: Message has no Content-Length or Transfer-Encoding. // An HTTP request with no Content-Length and no Transfer-Encoding should not have a body when // no Content-Length or Transfer-Encoding is set: // "A user agent SHOULD NOT send a Content-Length header field when the request message does // not contain a payload body and the method semantics do not anticipate such a body." // // We apply this to all methods, since we have no better strategy in other cases. result->mReqBody = ""; return ParseState::kSuccess; } int ParseHttpResponse(std::string_view buf, HTTPResponse* result) { return phr_parse_response(buf.data(), buf.size(), &result->mMinorVersion, &result->mStatus, &result->mMsg, &result->mMsgLen, result->mHeaders, &result->mNumHeaders, /*last_len*/ 0); } bool ParseContentLength(const std::string_view& contentLenStr, size_t* len) { if (len == nullptr) { return false; } try { size_t pos; std::stoull(contentLenStr.data()); *len = std::stoull(std::string(contentLenStr), &pos); if (pos != contentLenStr.size()) { return false; } } catch (const std::exception& e) { return false; } return true; } ParseState ParseContent(std::string_view& contentLenStr, std::string_view& data, size_t bodySizeLimitBytes, std::string& result, size_t& bodySize) { size_t len; if (!ParseContentLength(contentLenStr, &len)) { return ParseState::kInvalid; } if (data.size() < len) { return ParseState::kNeedsMoreData; } result = data.substr(0, std::min(len, bodySizeLimitBytes)); // *result = data->substr(0, len); bodySize = len; data.remove_prefix(std::min(len, data.size())); return ParseState::kSuccess; } bool StartsWithHttp(const std::string_view& buf) { if (buf.empty()) { return false; } static const std::string_view kPrefix = "HTTP"; return buf.size() >= kPrefix.size() && buf.substr(0, kPrefix.size()) == kPrefix; } ParseState ParseResponseBody(std::string_view& buf, std::shared_ptr<HttpRecord>& result, bool closed) { HTTPResponse r; bool adjacentResp = StartsWithHttp(buf) && (ParseHttpResponse(buf, &r) > 0); if (adjacentResp || (buf.empty() && closed)) { return ParseState::kSuccess; } // Case 1: Content-Length const auto contentLengthIter = result->GetRespHeaderMap().find(kContentLength); if (contentLengthIter != result->GetRespHeaderMap().end()) { std::string_view contentLenStr = contentLengthIter->second; auto s = ParseContent(contentLenStr, buf, 256, result->mRespBody, result->mRespBodySize); // CTX_DCHECK_LE(result->body.size(), FLAGS_http_body_limit_bytes); return s; } // Case 2: Chunked transfer. const auto transferEncodingIter = result->GetRespHeaderMap().find(kTransferEncoding); if (transferEncodingIter != result->GetRespHeaderMap().end() && transferEncodingIter->second == "chunked") { auto s = ParseChunked(buf, 256, result->mRespBody, result->mRespBodySize); // CTX_DCHECK_LE(result->body.size(), FLAGS_http_body_limit_bytes); return s; } // Case 3: Responses where we can assume no body. // The status codes below MUST not have a body, according to the spec. // See: https://tools.ietf.org/html/rfc2616#section-4.4 if ((result->mCode >= 100 && result->mCode < 200) || result->mCode == 204 || result->mCode == 304) { result->mRespBody = ""; // Status 101 is an even more special case. if (result->mCode == 101) { const auto upgradeIter = result->GetRespHeaderMap().find(kUpgrade); if (upgradeIter == result->GetRespHeaderMap().end()) { } return ParseState::kEOS; } return ParseState::kSuccess; } // Case 4: Response where we can't assume no body, but where no Content-Length or // Transfer-Encoding is provided. In these cases we should wait for close(). // According to HTTP/1.1 standard: // https://www.w3.org/Protocols/HTTP/1.0/draft-ietf-http-spec.html#BodyLength // such messages are terminated by the close of the connection. // TODO(yzhao): For now we just accumulate messages, let probe_close() submit a message to // perf buffer, so that we can terminate such messages. result->mRespBody = buf; buf.remove_prefix(buf.size()); return ParseState::kSuccess; } ParseState ParseResponse(std::string_view& buf, std::shared_ptr<HttpRecord>& result, bool closed, bool forceSample) { HTTPResponse resp; int retval = ParseHttpResponse(buf, &resp); if (retval >= 0) { buf.remove_prefix(retval); result->SetStatusCode(resp.mStatus); // for 4xx 5xx if (result->GetStatusCode() >= 400) { result->MarkSample(); } if (result->ShouldSample() || forceSample) { result->SetRespHeaderMap(http::GetHTTPHeadersMap(resp.mHeaders, resp.mNumHeaders)); result->SetRespMsg(std::string(resp.mMsg, resp.mMsgLen)); return ParseResponseBody(buf, result, closed); } return ParseState::kSuccess; } if (retval == -2) { return ParseState::kNeedsMoreData; } return ParseState::kInvalid; } } // namespace http } // namespace logtail::ebpf