tools/test-proxy/Azure.Sdk.Tools.TestProxy/Common/MultipartUtilities.cs (221 lines of code) (raw):
using Microsoft.AspNetCore.WebUtilities;
using Microsoft.Extensions.Primitives;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;
using System.Text;
using System;
using Microsoft.Net.Http.Headers;
using Microsoft.Extensions.Logging;
using Azure.Sdk.Tools.TestProxy.Common.Exceptions;
using Newtonsoft.Json.Linq;
using System.Net;
namespace Azure.Sdk.Tools.TestProxy.Common
{
public static class MultipartUtilities
{
public static readonly byte[] CrLf = new byte[] { (byte)'\r', (byte)'\n' };
public static byte[] ReadAllBytes(Stream s)
{
if (s is MemoryStream ms && ms.TryGetBuffer(out ArraySegment<byte> seg))
return seg.AsSpan(seg.Offset, seg.Count).ToArray();
using var copy = new MemoryStream();
int first = s.ReadByte();
if (first == -1)
return Array.Empty<byte>();
copy.WriteByte((byte)first);
s.CopyTo(copy);
return copy.ToArray();
}
/// <summary>
/// This function is necessary because while the MultipartReader REQUIRES a payload that follows the spec for multipart/mixed,
/// azure services don't actually return totally compliant mixed bodies. A lot of the time they merely include LF--boundaryabc123 instead of the spec-required
/// CRLF--boundaryabc123
///
/// This function rewrites a complete multipart entity so that every header line
/// (from the delimiter up to the first blank line) ends with CR LF,
/// and every delimiter line starts with CR LF. The body region is
/// left byte‑for‑byte intact.
/// </summary>
/// <param name="src">The byte buffer we need to update.</param>
/// <returns></returns>
public static byte[] NormalizeBareLf(byte[] src)
{
const byte CR = 0x0D, LF = 0x0A, DASH = 0x2D;
var dst = new byte[src.Length + 1000];
int w = 0;
bool atLineStart = true;
bool inHeaders = false;
for (int i = 0; i < src.Length; i++)
{
byte b = src[i];
// 1. a delimiter line means the next lines are headers
if (atLineStart && b == DASH && i + 1 < src.Length && src[i + 1] == DASH)
inHeaders = true;
// 2. inside headers, look ahead for the pattern LF LF
if (inHeaders && b == LF && i + 1 < src.Length && src[i + 1] == LF)
{
// we’re on the *first* LF of LF LF
// ensure we output CR LF CR LF
if (w == 0 || dst[w - 1] != CR) dst[w++] = CR;
dst[w++] = LF; // current LF
dst[w++] = CR; // injected CR before second LF
dst[w++] = LF; // second LF
i++; // skip over original second LF
atLineStart = true;
inHeaders = false; // blank line ends header block
continue;
}
// 3. bare LF at end of a non‑blank header line
if (inHeaders && b == LF && (w == 0 || dst[w - 1] != CR))
dst[w++] = CR;
dst[w++] = b;
atLineStart = b == LF;
// 4. CR LF CR LF already correct → leave header mode
if (inHeaders && atLineStart &&
i + 1 < src.Length && (src[i + 1] == CR || src[i + 1] == LF))
inHeaders = false;
}
var fixedBytes = w == src.Length ? src : dst.AsSpan(0, w).ToArray();
if (src.Length != w)
{
if (DebugLogger.CheckLogLevel(LogLevel.Debug))
{
var beforeText = Convert.ToBase64String(src);
var afterText = Convert.ToBase64String(fixedBytes);
DebugLogger.LogDebug($"We updated the multipart body from length {src.Length} to length {w}");
DebugLogger.LogDebug($"Base64 before: {beforeText}");
DebugLogger.LogDebug($"Base64 after: {afterText}");
}
}
return fixedBytes;
}
public static string ResolveFirstBoundary(string boundary, byte[] raw)
{
// Boundary might have been sanitised to "REDACTED"
if (boundary == "REDACTED" || boundary.EndsWith("00000000-0000-0000-0000-000000000000"))
{
ReadOnlySpan<byte> crlf = stackalloc byte[] { 0x0D, 0x0A };
int idx = raw.AsSpan().IndexOf(crlf);
if (idx == -1) throw new InvalidDataException("Multipart body missing CRLF.");
boundary = Encoding.ASCII.GetString(raw, 2, idx - 2); // skip leading "--"
}
return boundary;
}
public static bool IsNestedMultipart(
IDictionary<string, StringValues> headers,
out string boundary)
{
boundary = null;
if (!headers.TryGetValue("Content-Type", out var v)) return false;
if (!MediaTypeHeaderValue.TryParse(v[0], out var mt)) return false;
if (!mt.MediaType.StartsWith("multipart/", StringComparison.OrdinalIgnoreCase))
return false;
boundary = mt.Boundary.Value?.Trim('"');
return !string.IsNullOrEmpty(boundary);
}
public static void WriteTextBody(Utf8JsonWriter w, ReadOnlySpan<char> text)
{
while (true)
{
int idx = text.IndexOf('\n');
if (idx == -1) break;
idx += 1; // keep '\n'
w.WriteStringValue(text[..idx]);
text = text[idx..];
}
if (!text.IsEmpty) w.WriteStringValue(text);
}
public static void DumpAscii(ReadOnlySpan<byte> bytes, int count = 256)
{
var sb = new StringBuilder();
int n = Math.Min(count, bytes.Length);
for (int i = 0; i < n; i++)
{
byte b = bytes[i];
sb.Append(b switch
{
0x0D => '␍', // CR
0x0A => '␊', // LF
_ => (char)b
});
}
DebugLogger.LogInformation("‑‑‑‑‑‑‑‑‑‑ first " + n + " bytes ‑‑‑‑‑‑‑‑‑‑");
DebugLogger.LogInformation(sb.ToString() + Environment.NewLine);
}
public static void SerializeMultipartBody(
Utf8JsonWriter jsonWriter,
string name,
byte[] raw,
string boundary)
{
jsonWriter.WriteStartArray(name);
// Boundary might have been sanitised to "REDACTED"
boundary = ResolveFirstBoundary(boundary, raw);
// Only run the LF→CRLF fixer once at the outermost level
byte[] fixedRaw = NormalizeBareLf(raw);
WriteMultipartLines(jsonWriter,
new MemoryStream(fixedRaw, writable: false),
boundary);
jsonWriter.WriteEndArray();
}
public static void WriteMultipartLines(
Utf8JsonWriter jsonWriter,
Stream stream,
string boundary)
{
byte[] buf = NormalizeBareLf(ReadAllBytes(stream));
var reader = new MultipartReader(boundary, new MemoryStream(buf, false));
string open = $"--{boundary}\r\n";
string close = $"--{boundary}--\r\n";
try
{
MultipartSection part;
while ((part = reader.ReadNextSectionAsync().GetAwaiter().GetResult()) != null)
{
jsonWriter.WriteStringValue(open);
foreach (var h in part.Headers)
jsonWriter.WriteStringValue($"{h.Key}: {h.Value}\r\n");
jsonWriter.WriteStringValue("\r\n");
if (IsNestedMultipart(part.Headers, out var childBoundary))
{
WriteMultipartLines(jsonWriter, part.Body, childBoundary);
}
else if (ContentTypeUtilities.IsTextContentType(part.Headers, out var enc))
{
WriteTextBody(jsonWriter, enc.GetString(ReadAllBytes(part.Body)));
}
else
{
byte[] bytes = ReadAllBytes(part.Body);
if (bytes.Length == 0)
{
jsonWriter.WriteStartArray(); jsonWriter.WriteEndArray();
}
else
{
jsonWriter.WriteStringValue($"b64:{Convert.ToBase64String(bytes)}");
}
}
jsonWriter.WriteStringValue("\r\n");
}
}
catch (IOException ex)
{
var byteContent = Convert.ToBase64String(buf);
string message = $$"""
The test-proxy is unexpectedly unable to read this section of the config during serialization: \"{{ex.Message}}\"
File an issue on Azure/azure-sdk-tools and include this base64 string for reproducibility:
{{byteContent}}
""";
throw new HttpException(HttpStatusCode.InternalServerError, message);
}
jsonWriter.WriteStringValue(close);
}
public static byte[] DeserializeMultipartBody(JsonElement property, string boundary)
{
// this is a patch for the _old_ way of storing `multipart/mixed` recordings. On disk, `ResponseBody` was just a pure base64 string.
// the bytes just need to be read exactly as they are.
if (property.ValueKind == JsonValueKind.String)
{
return Convert.FromBase64String(property.GetString());
}
using var ms = new MemoryStream();
foreach (var item in property.EnumerateArray())
{
// Handle the “empty binary part” marker: []
if (item.ValueKind == JsonValueKind.Array)
{
// nothing to write – it really was a 204 / empty body
continue;
}
var segment = item.GetString();
if (segment.StartsWith("b64:", StringComparison.Ordinal))
{
var bytes = Convert.FromBase64String(segment.Substring(4));
ms.Write(bytes);
}
else
{
// Delimiter lines, headers, and text bodies are ASCII by spec.
ms.Write(Encoding.ASCII.GetBytes(segment));
}
}
return ms.ToArray();
}
}
}