src/WebJobs.Script/Sanitizer.cs (142 lines of code) (raw):
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.
using System;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Linq;
namespace Microsoft.Azure.WebJobs.Logging
{
/// <summary>
/// Utility class for sanitizing logging strings.
/// </summary>
// Note: This file is shared between the WebJobs SDK and Script repos. Update both if changes are needed.
internal static class Sanitizer
{
public const string SecretReplacement = "[Hidden Credential]";
private static readonly char[] ValueTerminators = new char[] { '<', '"', '\'' };
// List of keywords that should not be replaced with [Hidden Credential]
private static readonly string[] AllowedTokens = new string[] { "PublicKeyToken=" };
internal static readonly string[] CredentialTokens = new string[] { "Token=", "DefaultEndpointsProtocol=http", "AccountKey=", "Data Source=", "Server=", "Password=", "pwd=", "&sig=", "&sig=", "?sig=", "SharedAccessKey=", "&code=", "&code=", "?code=", "/code=", "key=" };
private static readonly string[] CredentialNameFragments = new[] { "password", "pwd", "key", "secret", "token", "sas" };
// Pattern of format : "<protocol>://<username>:<password>@<address>:<port>"
private static readonly string Pattern = @"
\b([a-zA-Z]+) # Capture protocol
:\/\/ # '://'
([^:/\s]+) # Capture username
: # ':'
([^@/\s]+) # Capture password
@ # '@'
([^:/\s]+) # Capture address
: # ':'
([0-9]+)\b # Capture port number
";
private static readonly Regex Regex = new Regex(Pattern, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
/// <summary>
/// Removes well-known credential strings from strings.
/// </summary>
/// <param name="input">The string to sanitize.</param>
/// <returns>The sanitized string.</returns>
internal static string Sanitize(string input)
{
if (string.IsNullOrEmpty(input))
{
return string.Empty;
}
// Everything we *might* replace contains an equal, so if we don't have that short circuit out.
// This can be likely be more efficient with a Regex, but that's best done with a large test suite and this is
// a quick/simple win for the high traffic case.
if (!MayContainCredentials(input))
{
return input;
}
string t = input;
string inputWithAllowedTokensHidden = input;
// Remove any known safe strings from the input before looking for Credentials
foreach (string allowedToken in AllowedTokens)
{
if (inputWithAllowedTokensHidden.Contains(allowedToken))
{
string hiddenString = new string('#', allowedToken.Length);
inputWithAllowedTokensHidden = inputWithAllowedTokensHidden.Replace(allowedToken, hiddenString);
}
}
foreach (var token in CredentialTokens)
{
int startIndex = 0;
while (true)
{
// search for the next token instance
startIndex = inputWithAllowedTokensHidden.IndexOf(token, startIndex, StringComparison.OrdinalIgnoreCase);
if (startIndex == -1)
{
break;
}
// Find the end of the secret. It most likely ends with either a double quota " or tag opening <
int credentialEnd = t.IndexOfAny(ValueTerminators, startIndex);
t = t.Substring(0, startIndex) + SecretReplacement + (credentialEnd != -1 ? t.Substring(credentialEnd) : string.Empty);
inputWithAllowedTokensHidden = inputWithAllowedTokensHidden.Substring(0, startIndex) + SecretReplacement + (credentialEnd != -1 ? inputWithAllowedTokensHidden.Substring(credentialEnd) : string.Empty);
}
}
// This check avoids unnecessary regex evaluation if the input does not contain any url
if (input.Contains(":"))
{
t = Regex.Replace(t, SecretReplacement);
}
return t;
}
internal static JObject Sanitize(JObject obj, Func<string, bool> selector = null)
{
static bool IsPotentialCredential(string name)
{
foreach (string fragment in CredentialNameFragments)
{
if (name.Contains(fragment, StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
static JToken Sanitize(JToken token)
{
if (token is JObject obj)
{
JObject sanitized = new JObject();
foreach (var prop in obj)
{
if (IsPotentialCredential(prop.Key))
{
sanitized[prop.Key] = Sanitizer.SecretReplacement;
}
else
{
sanitized[prop.Key] = Sanitize(prop.Value);
}
}
return sanitized;
}
if (token is JArray arr)
{
JArray sanitized = new JArray();
foreach (var value in arr)
{
sanitized.Add(Sanitize(value));
}
return sanitized;
}
if (token.Type == JTokenType.String)
{
return Sanitizer.Sanitize(token.ToString());
}
return token;
}
JObject sanitizedObject = new JObject();
foreach (var prop in obj)
{
string propName = prop.Key;
if (selector != null && !selector(propName))
{
continue;
}
var propValue = prop.Value;
if (propValue != null)
{
sanitizedObject[propName] = Sanitize(propValue);
}
}
return sanitizedObject;
}
/// <summary>
/// Checks if a string even *possibly* contains one of our <see cref="CredentialTokens"/>.
/// Useful for short-circuiting more expensive checks and replacements if it's known we wouldn't do anything.
/// </summary>
internal static bool MayContainCredentials(string input) => input.Contains("=") || input.Contains(":");
}
}