Skip to content

Commit b0799da

Browse files
Harden Transfer-Encoding (#21737)
### What does this PR do? ### How did you verify your code works? --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent a67ba81 commit b0799da

File tree

2 files changed

+475
-24
lines changed

2 files changed

+475
-24
lines changed

packages/bun-uws/src/HttpParser.h

Lines changed: 86 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,78 @@ namespace uWS
222222
return std::string_view(nullptr, 0);
223223
}
224224

225+
struct TransferEncoding {
226+
bool has: 1 = false;
227+
bool chunked: 1 = false;
228+
bool invalid: 1 = false;
229+
};
230+
231+
TransferEncoding getTransferEncoding()
232+
{
233+
TransferEncoding te;
234+
235+
if (!bf.mightHave("transfer-encoding")) {
236+
return te;
237+
}
238+
239+
for (Header *h = headers; (++h)->key.length();) {
240+
if (h->key.length() == 17 && !strncmp(h->key.data(), "transfer-encoding", 17)) {
241+
// Parse comma-separated values, ensuring "chunked" is last if present
242+
const auto value = h->value;
243+
size_t pos = 0;
244+
size_t lastTokenStart = 0;
245+
size_t lastTokenLen = 0;
246+
247+
while (pos < value.length()) {
248+
// Skip leading whitespace
249+
while (pos < value.length() && (value[pos] == ' ' || value[pos] == '\t')) {
250+
pos++;
251+
}
252+
253+
// Remember start of this token
254+
size_t tokenStart = pos;
255+
256+
// Find end of token (until comma or end)
257+
while (pos < value.length() && value[pos] != ',') {
258+
pos++;
259+
}
260+
261+
// Trim trailing whitespace from token
262+
size_t tokenEnd = pos;
263+
while (tokenEnd > tokenStart && (value[tokenEnd - 1] == ' ' || value[tokenEnd - 1] == '\t')) {
264+
tokenEnd--;
265+
}
266+
267+
size_t tokenLen = tokenEnd - tokenStart;
268+
if (tokenLen > 0) {
269+
lastTokenStart = tokenStart;
270+
lastTokenLen = tokenLen;
271+
}
272+
273+
// Move past comma if present
274+
if (pos < value.length() && value[pos] == ',') {
275+
pos++;
276+
}
277+
}
278+
279+
if (te.chunked) [[unlikely]] {
280+
te.invalid = true;
281+
return te;
282+
}
283+
284+
te.has = lastTokenLen > 0;
285+
286+
// Check if the last token is "chunked"
287+
if (lastTokenLen == 7 && !strncmp(value.data() + lastTokenStart, "chunked", 7)) [[likely]] {
288+
te.chunked = true;
289+
}
290+
291+
}
292+
}
293+
294+
return te;
295+
}
296+
225297

226298
std::string_view getUrl()
227299
{
@@ -771,14 +843,16 @@ namespace uWS
771843
* the Transfer-Encoding overrides the Content-Length. Such a message might indicate an attempt
772844
* to perform request smuggling (Section 11.2) or response splitting (Section 11.1) and
773845
* ought to be handled as an error. */
774-
std::string_view transferEncodingString = req->getHeader("transfer-encoding");
775-
std::string_view contentLengthString = req->getHeader("content-length");
776-
777-
auto transferEncodingStringLen = transferEncodingString.length();
778-
auto contentLengthStringLen = contentLengthString.length();
779-
if (transferEncodingStringLen && contentLengthStringLen) {
780-
/* We could be smart and set an error in the context along with this, to indicate what
781-
* http error response we might want to return */
846+
const std::string_view contentLengthString = req->getHeader("content-length");
847+
const auto contentLengthStringLen = contentLengthString.length();
848+
849+
/* Check Transfer-Encoding header validity and conflicts */
850+
HttpRequest::TransferEncoding transferEncoding = req->getTransferEncoding();
851+
852+
transferEncoding.invalid = transferEncoding.invalid || (transferEncoding.has && (contentLengthStringLen || !transferEncoding.chunked));
853+
854+
if (transferEncoding.invalid) [[unlikely]] {
855+
/* Invalid Transfer-Encoding (multiple headers or chunked not last - request smuggling attempt) */
782856
return HttpParserResult::error(HTTP_ERROR_400_BAD_REQUEST, HTTP_PARSER_ERROR_INVALID_TRANSFER_ENCODING);
783857
}
784858

@@ -789,7 +863,7 @@ namespace uWS
789863
// lets check if content len is valid before calling requestHandler
790864
if(contentLengthStringLen) {
791865
remainingStreamingBytes = toUnsignedInteger(contentLengthString);
792-
if (remainingStreamingBytes == UINT64_MAX) {
866+
if (remainingStreamingBytes == UINT64_MAX) [[unlikely]] {
793867
/* Parser error */
794868
return HttpParserResult::error(HTTP_ERROR_400_BAD_REQUEST, HTTP_PARSER_ERROR_INVALID_CONTENT_LENGTH);
795869
}
@@ -813,20 +887,8 @@ namespace uWS
813887
/* RFC 9112 6.3
814888
* If a message is received with both a Transfer-Encoding and a Content-Length header field,
815889
* the Transfer-Encoding overrides the Content-Length. */
816-
if (transferEncodingStringLen) {
817-
818-
/* If a proxy sent us the transfer-encoding header that 100% means it must be chunked or else the proxy is
819-
* not RFC 9112 compliant. Therefore it is always better to assume this is the case, since that entirely eliminates
820-
* all forms of transfer-encoding obfuscation tricks. We just rely on the header. */
821-
822-
/* RFC 9112 6.3
823-
* If a Transfer-Encoding header field is present in a request and the chunked transfer coding is not the
824-
* final encoding, the message body length cannot be determined reliably; the server MUST respond with the
825-
* 400 (Bad Request) status code and then close the connection. */
826-
827-
/* In this case we fail later by having the wrong interpretation (assuming chunked).
828-
* This could be made stricter but makes no difference either way, unless forwarding the identical message as a proxy. */
829-
890+
if (transferEncoding.has) {
891+
/* We already validated that chunked is last if present, before calling the handler */
830892
remainingStreamingBytes = STATE_IS_CHUNKED;
831893
/* If consume minimally, we do not want to consume anything but we want to mark this as being chunked */
832894
if constexpr (!ConsumeMinimally) {
@@ -835,7 +897,7 @@ namespace uWS
835897
for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
836898
dataHandler(user, chunk, chunk.length() == 0);
837899
}
838-
if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
900+
if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) [[unlikely]] {
839901
// TODO: what happen if we already responded?
840902
return HttpParserResult::error(HTTP_ERROR_400_BAD_REQUEST, HTTP_PARSER_ERROR_INVALID_CHUNKED_ENCODING);
841903
}

0 commit comments

Comments
 (0)