func customS3Decoder()

in v3/client/decrypt_middleware.go [21:71]


func customS3Decoder(matDesc string) (decoded string, e error) {
	// Manually decode S3's non-standard "double encoding"
	// First, mime decode it:
	decoder := new(mime.WordDecoder)
	s, err := decoder.DecodeHeader(matDesc)
	if err != nil {
		return "", fmt.Errorf("error while decoding material description: %s\n from S3 object metadata: %w", matDesc, err)
	}
	var sb strings.Builder

	skipNext := false
	var utf8buffer []byte
	// Iterate over the bytes in the string
	for i, b := range []byte(s) {
		r := rune(b)
		// Check if the rune (code point) is non-US-ASCII
		if r > 127 && !skipNext {
			// Non-ASCII characters need special treatment
			// due to double-encoding.
			// We are dealing with UTF-16 encoded codepoints
			// of the original UTF-8 characters.
			// So, take two bytes at a time...
			buf := []byte{s[i], s[i+1]}
			// Get the rune (code point)
			wrongRune := string(buf)
			// UTF-16 encode it
			encd := utf16.Encode([]rune(wrongRune))[0]
			// Buffer the byte-level representation of the code point
			// So that it can be UTF-8 encoded later
			utf8buffer = append(utf8buffer, byte(encd))
			skipNext = true
		} else if r > 127 && skipNext {
			// only skip once
			skipNext = false
		} else {
			// Decode the binary values as UTF-8
			// This recovers the original UTF-8
			for len(utf8buffer) > 0 {
				rb, size := utf8.DecodeRune(utf8buffer)
				sb.WriteRune(rb)
				utf8buffer = utf8buffer[size:]
			}
			sb.WriteByte(b)
		}
		// A more general solution would need to clear the utf8buffer here,
		// but specifically for material description,
		// we can assume that the string is JSON,
		// so the last character is '}' which is valid ASCII.
	}
	return sb.String(), nil
}