ext/Objects/unicodeobject-test.cpp (2,578 lines of code) (raw):

// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) #include <cstring> #include "Python.h" #include "gtest/gtest.h" #include "capi-fixture.h" #include "capi-testing.h" extern "C" int _Py_EncodeUTF8Ex(const wchar_t*, char**, size_t*, const char**, int, _Py_error_handler); extern "C" wchar_t* _Py_DecodeUTF8_surrogateescape(const char*, Py_ssize_t, size_t*); extern "C" int _Py_DecodeUTF8Ex(const char*, Py_ssize_t, wchar_t**, size_t*, const char**, _Py_error_handler); extern "C" int _Py_normalize_encoding(const char*, char*, size_t); namespace py { namespace testing { using UnicodeExtensionApiTest = ExtensionApi; TEST_F(UnicodeExtensionApiTest, AsEncodedStringFromNonStringReturnsNull) { EXPECT_EQ(PyUnicode_AsEncodedString(Py_None, nullptr, nullptr), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringWithNullSizeReturnsUTF8) { const char* str = "utf-8 \xc3\xa8"; PyObjectPtr pyunicode(PyUnicode_FromString(str)); PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, nullptr, nullptr)); EXPECT_TRUE(isBytesEqualsCStr(bytes, str)); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIUsesErrorHandler) { PyObjectPtr pyunicode(PyUnicode_FromString("non\xc3\xa8-ascii")); PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "ascii", "ignore")); EXPECT_TRUE(isBytesEqualsCStr(bytes, "non-ascii")); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringLatin1ReturnsLatin1) { PyObjectPtr pyunicode(PyUnicode_FromString("latin-1 \xc3\xa8")); PyObjectPtr bytes(PyUnicode_AsEncodedString(pyunicode, "latin-1", nullptr)); EXPECT_TRUE(isBytesEqualsCStr(bytes, "latin-1 \xe8")); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringASCIIWithSubClassReturnsASCII) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("some string") )"); PyObjectPtr substr(mainModuleGet("substr")); const char* expected = "some string"; PyObjectPtr bytes(PyUnicode_AsEncodedString(substr, "ascii", nullptr)); EXPECT_TRUE(isBytesEqualsCStr(bytes, expected)); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringWithBytearrayReturnRaisesWarning) { CaptureStdStreams streams; PyRun_SimpleString(R"( import _codecs def encoder(s): return bytearray(b"expected"), "two" def lookup_function(encoding): if encoding == "encode-with-bytearray-return": return encoder, 0, 0, 0 _codecs.register(lookup_function) substr = "some test" )"); PyObjectPtr substr(mainModuleGet("substr")); PyObjectPtr bytes(PyUnicode_AsEncodedString( substr, "encode-with-bytearray-return", nullptr)); EXPECT_TRUE(isBytesEqualsCStr(bytes, "expected")); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_NE(streams.err().find( "RuntimeWarning: encoder encode-with-bytearray-return " "returned bytearray instead of bytes; use codecs.encode() to " "encode to arbitrary types\n"), std::string::npos); } TEST_F(UnicodeExtensionApiTest, AsEncodedStringWithNonBytelikeReturnRaisesError) { PyRun_SimpleString(R"( import _codecs def encoder(s): return "not-byteslike", "two" def lookup_function(encoding): if encoding == "encode-with-non-bytelike-return": return encoder, 0, 0, 0 _codecs.register(lookup_function) substr = "some test" )"); PyObjectPtr substr(mainModuleGet("substr")); EXPECT_EQ(PyUnicode_AsEncodedString(substr, "encode-with-non-bytelike-return", nullptr), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsUTF8FromNonStringReturnsNull) { // Pass a non string object const char* cstring = PyUnicode_AsUTF8AndSize(Py_None, nullptr); EXPECT_EQ(nullptr, cstring); } TEST_F(UnicodeExtensionApiTest, AsUTF8WithNullSizeReturnsCString) { const char* str = "Some C String"; PyObjectPtr pyunicode(PyUnicode_FromString(str)); // Pass a nullptr size const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, nullptr); ASSERT_NE(nullptr, cstring); EXPECT_STREQ(str, cstring); } TEST_F(UnicodeExtensionApiTest, AsUTF8WithSubClassReturnsCString) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("some string") )"); PyObjectPtr substr(mainModuleGet("substr")); Py_ssize_t size = 0; const char* expected = "some string"; const char* c_str = PyUnicode_AsUTF8AndSize(substr, &size); ASSERT_NE(c_str, nullptr); EXPECT_STREQ(c_str, expected); } TEST_F(UnicodeExtensionApiTest, AsUTF8WithReferencedSizeReturnsCString) { const char* str = "Some C String"; PyObjectPtr pyunicode(PyUnicode_FromString(str)); // Pass a size reference Py_ssize_t size = 0; const char* cstring = PyUnicode_AsUTF8AndSize(pyunicode, &size); ASSERT_NE(nullptr, cstring); EXPECT_STREQ(str, cstring); EXPECT_EQ(size, static_cast<Py_ssize_t>(std::strlen(str))); // Repeated calls should return the same buffer and still set the size. size = 0; const char* cstring2 = PyUnicode_AsUTF8AndSize(pyunicode, &size); ASSERT_NE(cstring2, nullptr); EXPECT_EQ(cstring2, cstring); } TEST_F(UnicodeExtensionApiTest, AsUTF8ReturnsCString) { const char* str = "Some other C String"; PyObjectPtr pyobj(PyUnicode_FromString(str)); const char* cstring = PyUnicode_AsUTF8(pyobj); ASSERT_NE(cstring, nullptr); EXPECT_STREQ(cstring, str); // Make sure repeated calls on the same object return the same buffer. const char* cstring2 = PyUnicode_AsUTF8(pyobj); ASSERT_NE(cstring2, nullptr); EXPECT_EQ(cstring2, cstring); } TEST_F(UnicodeExtensionApiTest, AsUTF8WithSurrogatesRaisesUnicodeEncodeError) { PyObjectPtr str(PyUnicode_DecodeLocale("hello\x80world", "surrogateescape")); EXPECT_EQ(PyUnicode_AsUTF8(str), nullptr); PyObject *exc, *value, *tb; PyErr_Fetch(&exc, &value, &tb); ASSERT_NE(exc, nullptr); ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeEncodeError)); Py_ssize_t temp; PyObjectPtr msg(PyUnicodeEncodeError_GetReason(value)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "surrogates not allowed")); PyUnicodeEncodeError_GetStart(value, &temp); EXPECT_EQ(temp, 5); PyUnicodeEncodeError_GetEnd(value, &temp); EXPECT_EQ(temp, 6); Py_DECREF(exc); Py_DECREF(value); Py_XDECREF(tb); } TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithNonStringReturnsNull) { PyObjectPtr bytes(_PyUnicode_AsUTF8String(Py_None, nullptr)); ASSERT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, AsUTF8StringReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo")); PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 3); EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); } TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithInvalidCodepointRaisesEncodeError) { PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(unicode)); PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, nullptr)); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); EXPECT_EQ(bytes, nullptr); } TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithReplaceErrorsReturnsBytes) { PyObjectPtr unicode(PyUnicode_DecodeASCII("foo\x80", 4, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(unicode)); PyObjectPtr bytes(_PyUnicode_AsUTF8String(unicode, "replace")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 4); EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithNonStringReturnsNull) { // Pass a non string object. Py_UCS4* ucs4_string = PyUnicode_AsUCS4(Py_None, nullptr, 0, 0); EXPECT_EQ(nullptr, ucs4_string); } TEST_F(UnicodeExtensionApiTest, AsUTF8StringWithSubClassReturnsBytes) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("foo") )"); PyObjectPtr substr(mainModuleGet("substr")); PyObjectPtr bytes(_PyUnicode_AsUTF8String(substr, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 3); EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithNullBufferReturnsNull) { PyObjectPtr unicode(PyUnicode_FromString("foo")); Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, nullptr, 0, 0); EXPECT_EQ(nullptr, ucs4_string); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithShortBufferWithoutCopyNullReturnsNotNullTerminated) { PyObjectPtr unicode(PyUnicode_FromString("abc")); Py_UCS4 target[4]; target[0] = 1; Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, target, 2, 0 /* copy_null */); EXPECT_EQ(nullptr, ucs4_string); EXPECT_EQ(Py_UCS4{1}, target[0]); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithShortBufferWithCopyNullReturnsNullTerminated) { PyObjectPtr unicode(PyUnicode_FromString("abc")); Py_UCS4 target[4]; target[0] = 1; Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, target, 2, 1 /* copy_null */); EXPECT_EQ(nullptr, ucs4_string); EXPECT_EQ(Py_UCS4{0}, target[0]); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithoutCopyNullReturnsNotNullTerminated) { Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); Py_UCS4 target[6]; target[5] = 1; Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, target, 5, 0 /* copy_null */); EXPECT_EQ(target, ucs4_string); EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]); EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]); EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]); EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]); EXPECT_EQ(Py_UCS4{1}, ucs4_string[5]); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithCopyNullReturnsNullTerminated) { Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; PyObjectPtr unicode(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); Py_UCS4 target[6]; target[5] = 1; Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, target, 6, 1 /* copy_null */); EXPECT_EQ(target, ucs4_string); EXPECT_EQ(Py_UCS4{0x1F192}, ucs4_string[0]); EXPECT_EQ(Py_UCS4{'h'}, ucs4_string[1]); EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); EXPECT_EQ(Py_UCS4{'l'}, ucs4_string[3]); EXPECT_EQ(Py_UCS4{0x2CC0}, ucs4_string[4]); EXPECT_EQ(Py_UCS4{0}, ucs4_string[5]); } TEST_F(UnicodeExtensionApiTest, AsUCS4WithSubClassAndCopyNullReturnsNullTerminatedString) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("foo") )"); PyObjectPtr unicode(mainModuleGet("substr")); Py_UCS4 target[4]; Py_UCS4* ucs4_string = PyUnicode_AsUCS4(unicode, target, 4, 1 /* copy_null */); EXPECT_EQ(Py_UCS4{'f'}, ucs4_string[0]); EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[1]); EXPECT_EQ(Py_UCS4{'o'}, ucs4_string[2]); EXPECT_EQ(Py_UCS4{0}, ucs4_string[3]); } // Delegating testing to AsUCS4. TEST_F(UnicodeExtensionApiTest, AsUCS4WithNonAsciiReturnsCodePointsNullTerminated) { PyObjectPtr unicode(PyUnicode_FromString("ab\u00e4p")); Py_UCS4* ucs4_string = PyUnicode_AsUCS4Copy(unicode); EXPECT_EQ(Py_UCS4{'a'}, ucs4_string[0]); EXPECT_EQ(Py_UCS4{'b'}, ucs4_string[1]); EXPECT_EQ(Py_UCS4{0xE4}, ucs4_string[2]); EXPECT_EQ(Py_UCS4{'p'}, ucs4_string[3]); EXPECT_EQ(Py_UCS4{0}, ucs4_string[4]); PyMem_Free(ucs4_string); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithNullptrRaisesSystemError) { wchar_t wide_string[1]; EXPECT_EQ( PyUnicode_AsWideChar(nullptr, wide_string, Py_ARRAY_LENGTH(wide_string)), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithNonStringRaisesTypeError) { PyObjectPtr not_string(PyTuple_New(0)); wchar_t wide_string[1]; EXPECT_EQ(PyUnicode_AsWideChar(not_string, wide_string, Py_ARRAY_LENGTH(wide_string)), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithNonASCIICodePointReturnsNullTerminatedWideCharString) { PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); wchar_t wide_string[4]; EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string, Py_ARRAY_LENGTH(wide_string))); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ(0xe5, wide_string[1]); EXPECT_EQ('z', wide_string[2]); EXPECT_EQ(0, wide_string[3]); } TEST_F(UnicodeExtensionApiTest, AsWideCharCopiesUpToSizeElements) { PyObjectPtr unicode(PyUnicode_FromString("abcdef")); wchar_t wide_string[5] = {'x', 'x', 'x', 'x', 'x'}; EXPECT_EQ(Py_ssize_t{3}, PyUnicode_AsWideChar(unicode, wide_string, 3)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ('b', wide_string[1]); EXPECT_EQ('c', wide_string[2]); EXPECT_EQ('x', wide_string[3]); EXPECT_EQ('x', wide_string[4]); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithEmbeddedNullWritesNullChar) { PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); wchar_t wide_string[5]; EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string, Py_ARRAY_LENGTH(wide_string))); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ('b', wide_string[1]); EXPECT_EQ('\0', wide_string[2]); EXPECT_EQ('c', wide_string[3]); EXPECT_EQ('\0', wide_string[4]); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithSizeEqualsBufferSizeDoesNotWriteNul) { PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); wchar_t wide_string[4]; EXPECT_EQ(4, PyUnicode_AsWideChar(unicode, wide_string, 4)); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ('b', wide_string[1]); EXPECT_EQ('\0', wide_string[2]); EXPECT_EQ('c', wide_string[3]); } TEST_F(UnicodeExtensionApiTest, AsWideCharWithBufferSizeLessThanStringSizeWritesUpToBufferSize) { PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); wchar_t wide_string[2]; EXPECT_EQ(2, PyUnicode_AsWideChar(unicode, wide_string, Py_ARRAY_LENGTH(wide_string))); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ('b', wide_string[1]); } TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNullptrRaisesSystemError) { EXPECT_EQ(PyUnicode_AsWideCharString(nullptr, nullptr), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonStringRaisesTypeError) { PyObjectPtr not_string(PyTuple_New(0)); EXPECT_EQ(PyUnicode_AsWideCharString(not_string, nullptr), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F( UnicodeExtensionApiTest, AsWideCharStringWithNonASCIICodePointReturnsNullTerminatedWideCharString) { PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, nullptr); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ(0xe5, wide_string[1]); EXPECT_EQ('z', wide_string[2]); EXPECT_EQ(0, wide_string[3]); PyMem_Free(wide_string); } TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithNonNullSizeSetsSize) { PyObjectPtr unicode(PyUnicode_FromString("a\xc3\xa5z")); Py_ssize_t size = 0xdeadbeef; wchar_t* wide_string = PyUnicode_AsWideCharString(unicode, &size); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ(size, 3); EXPECT_EQ('a', wide_string[0]); EXPECT_EQ(0xe5, wide_string[1]); EXPECT_EQ('z', wide_string[2]); EXPECT_EQ(0, wide_string[3]); PyMem_Free(wide_string); } TEST_F(UnicodeExtensionApiTest, AsWideCharStringWithEmbeddedNullRaisesValueError) { PyObjectPtr unicode(PyUnicode_FromStringAndSize("ab\0c", 4)); EXPECT_EQ(PyUnicode_AsWideCharString(unicode, nullptr), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, CheckWithStrReturnsTrue) { PyObjectPtr str(PyUnicode_FromString("ab\u00e4p")); EXPECT_TRUE(PyUnicode_Check(str)); EXPECT_TRUE(PyUnicode_CheckExact(str)); } TEST_F(UnicodeExtensionApiTest, CheckWithSubClassIsNotExact) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr('ok') )"); PyObjectPtr substr(mainModuleGet("substr")); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyUnicode_Check(substr)); EXPECT_FALSE(PyUnicode_CheckExact(substr)); } TEST_F(UnicodeExtensionApiTest, CheckWithUnrelatedTypeReturnsFalse) { PyObjectPtr pylong(PyLong_FromLong(10)); EXPECT_FALSE(PyUnicode_Check(pylong)); EXPECT_FALSE(PyUnicode_CheckExact(pylong)); } TEST_F(UnicodeExtensionApiTest, DATAReturnsCStringContainingStrContents) { const char* cstr = "hello"; PyObjectPtr str(PyUnicode_FromString(cstr)); void* data = PyUnicode_DATA(str.get()); EXPECT_STREQ(reinterpret_cast<char*>(data), cstr); } TEST_F(UnicodeExtensionApiTest, DATAReturnsSamePointer) { PyObjectPtr str(PyUnicode_FromString("hello")); void* p1 = PyUnicode_DATA(str.get()); void* p2 = PyUnicode_DATA(str.get()); EXPECT_EQ(p1, p2); } TEST_F(UnicodeExtensionApiTest, FormatWithNullFormatRaisesBadInternalCall) { PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(nullptr, PyUnicode_Format(nullptr, str)); ASSERT_NE(nullptr, PyErr_Occurred()); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, FormatWithNullArgsRaisesBadInternalCall) { PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(nullptr, PyUnicode_Format(str, nullptr)); ASSERT_NE(nullptr, PyErr_Occurred()); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, FormatWithNonStrFormatRaisesTypeError) { PyObjectPtr format(PyLong_FromLong(10)); PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(nullptr, PyUnicode_Format(format, str)); ASSERT_NE(nullptr, PyErr_Occurred()); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, FormatWithMismatchedFormatAndArgsRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("foo%s%s")); PyObjectPtr args(PyUnicode_FromString("bar")); EXPECT_EQ(nullptr, PyUnicode_Format(str, args)); ASSERT_NE(nullptr, PyErr_Occurred()); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, FormatWithStrArgsReturnsStr) { PyObjectPtr str(PyUnicode_FromString("foo%s")); PyObjectPtr args(PyUnicode_FromString("bar")); PyObjectPtr result(PyUnicode_Format(str, args)); EXPECT_NE(nullptr, result); EXPECT_EQ(nullptr, PyErr_Occurred()); EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobar")); } TEST_F(UnicodeExtensionApiTest, FormatWithTupleArgsReturnsStr) { PyObjectPtr str(PyUnicode_FromString("foo%s%s")); PyObjectPtr args(PyTuple_Pack(2, PyUnicode_FromString("bar"), PyUnicode_FromString("baz"))); PyObjectPtr result(PyUnicode_Format(str, args)); EXPECT_NE(nullptr, result); EXPECT_EQ(nullptr, PyErr_Occurred()); EXPECT_TRUE(isUnicodeEqualsCStr(result, "foobarbaz")); } TEST_F(UnicodeExtensionApiTest, FSDecoderWithStrSetsString) { PyObjectPtr str(PyUnicode_FromString("foo")); PyObject* result; EXPECT_EQ(PyUnicode_FSDecoder(str, &result), Py_CLEANUP_SUPPORTED); EXPECT_TRUE(isUnicodeEqualsCStr(result, "foo")); EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1); EXPECT_EQ(result, nullptr); } TEST_F(UnicodeExtensionApiTest, FSDecoderWithBytesSetsString) { const char bytes[] = "bar"; PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1)); PyObject* result; EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), Py_CLEANUP_SUPPORTED); EXPECT_TRUE(isUnicodeEqualsCStr(result, bytes)); EXPECT_EQ(PyUnicode_FSDecoder(nullptr, &result), 1); EXPECT_EQ(result, nullptr); } TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesValueError) { const char bytes[] = "foo\0bar"; PyObjectPtr pybytes(PyBytes_FromStringAndSize(bytes, sizeof(bytes) - 1)); PyObject* result; EXPECT_EQ(PyUnicode_FSDecoder(pybytes, &result), 0); EXPECT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, FSDecoderRaisesTypeError) { PyObjectPtr pyint(PyLong_FromLong(42)); PyObject* result; EXPECT_EQ(PyUnicode_FSDecoder(pyint, &result), 0); EXPECT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, FindWithNonStrSelfRaisesTypeError) { PyObject* self = Py_None; PyObjectPtr sub(PyUnicode_FromString("ll")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, FindWithNonStrSubRaisesTypeError) { PyObjectPtr self(PyUnicode_FromString("hello")); PyObject* sub = Py_None; EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -2); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, FindForwardReturnsLeftmostStartIndex) { PyObjectPtr self(PyUnicode_FromString("hello")); PyObjectPtr sub(PyUnicode_FromString("ll")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindForwardWithSubClassReturnsLeftmostStartIndex) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr('hello') )"); PyObjectPtr self(mainModuleGet("substr")); PyObjectPtr sub(PyUnicode_FromString("ll")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), 2); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindReturnsNegativeOneWithNonexistentSubstr) { PyObjectPtr self(PyUnicode_FromString("hello")); PyObjectPtr sub(PyUnicode_FromString("xx")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, 1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindReverseReturnsNegativeOneWithNonexistentSubstr) { PyObjectPtr self(PyUnicode_FromString("hello")); PyObjectPtr sub(PyUnicode_FromString("xx")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 5, -1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindReverseReturnsRightmostStartIndex) { PyObjectPtr self(PyUnicode_FromString("helloll")); PyObjectPtr sub(PyUnicode_FromString("ll")); EXPECT_EQ(PyUnicode_Find(self, sub, 0, 7, -1), 5); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeStartSearchesFromEnd) { PyObjectPtr self(PyUnicode_FromString("hello")); EXPECT_EQ(4, PyUnicode_FindChar(self, Py_UCS4{'o'}, -2, 5, 1)); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithNegativeEndSearchesFromEnd) { PyObjectPtr self(PyUnicode_FromString("hello")); EXPECT_EQ(1, PyUnicode_FindChar(self, Py_UCS4{'e'}, 0, -3, 1)); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithExistentCharEndGreaterThanLengthClipsEnd) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'h'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithNonExistentCharEndGreaterThanLengthClipsEnd) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'q'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 100, 1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharFindsChar) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'h'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithStrSubClassReturnsLeftmostIndex) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr('hello') )"); PyObjectPtr self(mainModuleGet("substr")); Py_UCS4 ch = 'h'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharFindsCharInMiddleOfString) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'l'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), 2); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharReverseFindsCharInMiddleOfString) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'l'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, -1), 3); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithNonExistentCharDoesNotFindChar) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'q'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 5, 1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithCharBeforeWindowDoesNotFindChar) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'h'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 2, 5, 1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithCharAfterWindowDoesNotFindChar) { PyObjectPtr self(PyUnicode_FromString("hello")); Py_UCS4 ch = 'o'; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), -1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FindCharWithUnicodeCharFindsChar) { PyObjectPtr self(PyUnicode_FromString("h\u00e9llo")); Py_UCS4 ch = 0xE9; EXPECT_EQ(PyUnicode_FindChar(self, ch, 0, 3, 1), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesEmptyString) { PyObjectPtr pyuni(PyUnicode_FromStringAndSize(nullptr, 0)); EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, "")); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSizedString) { const char* str = "Some string"; PyObjectPtr pyuni(PyUnicode_FromStringAndSize(str, 11)); EXPECT_TRUE(isUnicodeEqualsCStr(pyuni, str)); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FromStringAndSizeCreatesSmallerString) { PyObjectPtr str(PyUnicode_FromStringAndSize("1234567890", 5)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "12345")); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FromStringAndSizeFailsNegSize) { PyObjectPtr pyuni(PyUnicode_FromStringAndSize("a", -1)); ASSERT_EQ(pyuni, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, FromStringAndSizeIncrementsRefCount) { PyObject* pyuni = PyUnicode_FromStringAndSize("Some string", 11); ASSERT_NE(pyuni, nullptr); EXPECT_GE(Py_REFCNT(pyuni), 1); Py_DECREF(pyuni); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, READWithOneByteKindReturnsCharAtIndex) { const char* str = "foo"; EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 0), Py_UCS4{'f'}); EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 1), Py_UCS4{'o'}); EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, str, 2), Py_UCS4{'o'}); } TEST_F(UnicodeExtensionApiTest, READWithTwoByteKindReturnsCharAtIndex) { const char* str = "quux"; // This assumes little-endian architecture. No static assert because we can't // include that enum and macro in these tests. EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 0), Py_UCS4{0x7571}); // qu EXPECT_EQ(PyUnicode_READ(PyUnicode_2BYTE_KIND, str, 1), Py_UCS4{0x7875}); // ux } TEST_F(UnicodeExtensionApiTest, READWithFourByteKindReturnsCharAtIndex) { const char* str = "quux"; // This assumes little-endian architecture. No static assert because we can't // include that enum and macro in these tests. EXPECT_EQ(PyUnicode_READ(PyUnicode_4BYTE_KIND, str, 0), Py_UCS4{0x78757571}); } TEST_F(UnicodeExtensionApiTest, READCHARReturnsCharAtIndex) { PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{'f'}); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'o'}); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 2), Py_UCS4{'o'}); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 3), Py_UCS4{'\0'}); } TEST_F(UnicodeExtensionApiTest, READCHARReturnsUnicodeCodePointAtIndex) { PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88")); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 0), Py_UCS4{0x10348}); EXPECT_EQ(PyUnicode_READ_CHAR(str.get(), 1), Py_UCS4{'\0'}); PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me")); EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 0), Py_UCS4{'c'}); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 1), Py_UCS4{'r'}); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 2), Py_UCS4{0xE9}); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 3), Py_UCS4{'m'}); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 4), Py_UCS4{'e'}); EXPECT_EQ(PyUnicode_READ_CHAR(dessert.get(), 5), Py_UCS4{'\0'}); } TEST_F(UnicodeExtensionApiTest, READReadsCharsFromDATA) { PyObjectPtr str(PyUnicode_FromString("foo")); void* data = PyUnicode_DATA(str.get()); EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 0), Py_UCS4{'f'}); EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 1), Py_UCS4{'o'}); EXPECT_EQ(PyUnicode_READ(PyUnicode_1BYTE_KIND, data, 2), Py_UCS4{'o'}); } TEST_F(UnicodeExtensionApiTest, ReadCharReturnsCharAtIndex) { PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{'f'}); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{'o'}); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 2), Py_UCS4{'o'}); } TEST_F(UnicodeExtensionApiTest, ReadCharReturnsUnicodeCodePointAtIndex) { PyObjectPtr str(PyUnicode_FromString("\xF0\x90\x8D\x88")); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), 1); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 0), Py_UCS4{0x10348}); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 1), Py_UCS4{0xFFFFFFFF}); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); PyErr_Clear(); PyObjectPtr dessert(PyUnicode_FromString("cr\xc3\xa9me")); EXPECT_EQ(PyUnicode_GET_LENGTH(dessert.get()), 5); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 0), Py_UCS4{'c'}); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 1), Py_UCS4{'r'}); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 2), Py_UCS4{0xE9}); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 3), Py_UCS4{'m'}); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 4), Py_UCS4{'e'}); EXPECT_EQ(PyUnicode_ReadChar(dessert.get(), 5), Py_UCS4{0xFFFFFFFF}); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); PyErr_Clear(); } TEST_F(UnicodeExtensionApiTest, ReadCharWithNonStrRaisesTypeError) { PyObjectPtr list(PyList_New(3)); EXPECT_EQ(PyUnicode_ReadChar(list.get(), 0), Py_UCS4{0xFFFFFFFF}); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, ReadCharWithOutOfBoundIndexRaisesIndexError) { PyObjectPtr str(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_ReadChar(str.get(), 3), Py_UCS4{0xFFFFFFFF}); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); } TEST_F(UnicodeExtensionApiTest, ReadyReturnsZero) { PyObject* pyunicode = PyUnicode_FromString("some string"); int is_ready = PyUnicode_READY(pyunicode); EXPECT_EQ(0, is_ready); Py_DECREF(pyunicode); } TEST_F(UnicodeExtensionApiTest, ReplaceWithStrOfNonStringTypeReturnsNull) { PyObjectPtr non_str(PyBool_FromLong(1)); PyObjectPtr substr(PyUnicode_FromString("some string")); PyObjectPtr replstr(PyUnicode_FromString("some string")); EXPECT_EQ(PyUnicode_Replace(non_str, substr, replstr, -1), nullptr); EXPECT_NE(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ReplaceWithSubstrOfNonStringTypeReturnsNull) { PyObjectPtr non_str(PyBool_FromLong(1)); PyObjectPtr str(PyUnicode_FromString("some string")); PyObjectPtr replstr(PyUnicode_FromString("some string")); EXPECT_EQ(PyUnicode_Replace(str, non_str, replstr, -1), nullptr); EXPECT_NE(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ReplaceWithReplstrOfNonStringTypeReturnsNull) { PyObjectPtr non_str(PyBool_FromLong(1)); PyObjectPtr str(PyUnicode_FromString("some string")); PyObjectPtr substr(PyUnicode_FromString("some string")); EXPECT_EQ(PyUnicode_Replace(str, substr, non_str, -1), nullptr); EXPECT_NE(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ReplaceWithStrSubclassReturnStrWithSameContent) { PyRun_SimpleString(R"( class SubStr(str): pass subclass_instance = SubStr("hello world!") )"); PyObjectPtr subclass_instance(mainModuleGet("subclass_instance")); PyObjectPtr substr(PyUnicode_FromString("some string")); PyObjectPtr replstr(PyUnicode_FromString("some string")); PyObjectPtr result(PyUnicode_Replace(subclass_instance, substr, replstr, -1)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyUnicode_CheckExact(result)); EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world!")); } TEST_F(UnicodeExtensionApiTest, ReplaceWithNegativeMaxcountReturnsResultReplacingAllSubstr) { PyObjectPtr str(PyUnicode_FromString("22122122122122122")); PyObjectPtr substr(PyUnicode_FromString("22")); PyObjectPtr replstr(PyUnicode_FromString("*")); PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*")); PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1)); EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ReplaceWithSubClassAndNegativeMaxcountReturnsResultReplacingAllSubstr) { PyRun_SimpleString(R"( class SubStr(str): pass str_val = SubStr("22122122122122122") substr = SubStr("22") replstr = SubStr("*") )"); PyObjectPtr str(mainModuleGet("str_val")); PyObjectPtr substr(mainModuleGet("substr")); PyObjectPtr replstr(mainModuleGet("replstr")); PyObjectPtr expected(PyUnicode_FromString("*1*1*1*1*1*")); PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, -1)); EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ReplaceWithLimitedMaxcountReturnsResultReplacingUpToMaxcount) { PyObjectPtr str(PyUnicode_FromString("22122122122122122")); PyObjectPtr substr(PyUnicode_FromString("22")); PyObjectPtr replstr(PyUnicode_FromString("*")); PyObjectPtr expected(PyUnicode_FromString("*1*1*122122122")); PyObjectPtr actual(PyUnicode_Replace(str, substr, replstr, 3)); EXPECT_EQ(_PyUnicode_EQ(actual, expected), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, Compare) { PyObject* s1 = PyUnicode_FromString("some string"); PyObject* s2 = PyUnicode_FromString("some longer string"); PyObject* s22 = PyUnicode_FromString("some longer string"); int result = PyUnicode_Compare(s1, s2); EXPECT_EQ(result, 1); EXPECT_EQ(PyErr_Occurred(), nullptr); result = PyUnicode_Compare(s2, s1); EXPECT_EQ(result, -1); EXPECT_EQ(PyErr_Occurred(), nullptr); result = PyUnicode_Compare(s2, s22); EXPECT_EQ(result, 0); EXPECT_EQ(PyErr_Occurred(), nullptr); Py_DECREF(s22); Py_DECREF(s2); Py_DECREF(s1); } TEST_F(UnicodeExtensionApiTest, CompareWithSubClass) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("some string") )"); PyObjectPtr s1(mainModuleGet("substr")); PyObjectPtr s2(PyUnicode_FromString("some longer string")); PyObjectPtr s22(PyUnicode_FromString("some longer string")); int result = PyUnicode_Compare(s1, s2); EXPECT_EQ(result, 1); EXPECT_EQ(PyErr_Occurred(), nullptr); result = PyUnicode_Compare(s2, s1); EXPECT_EQ(result, -1); EXPECT_EQ(PyErr_Occurred(), nullptr); result = PyUnicode_Compare(s2, s22); EXPECT_EQ(result, 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, CompareBadInput) { PyObject* str_obj = PyUnicode_FromString("this is a string"); PyObject* int_obj = PyLong_FromLong(1234); PyUnicode_Compare(str_obj, int_obj); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); PyErr_Clear(); PyUnicode_Compare(int_obj, str_obj); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); PyErr_Clear(); PyUnicode_Compare(int_obj, int_obj); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); PyErr_Clear(); Py_DECREF(int_obj); Py_DECREF(str_obj); } TEST_F(UnicodeExtensionApiTest, EqSameLength) { PyObject* str1 = PyUnicode_FromString("some string"); PyObject* str2 = PyUnicode_FromString("some other string"); EXPECT_EQ(_PyUnicode_EQ(str1, str2), 0); EXPECT_EQ(_PyUnicode_EQ(str2, str1), 0); Py_DECREF(str2); PyObject* str3 = PyUnicode_FromString("some string"); EXPECT_EQ(_PyUnicode_EQ(str1, str3), 1); EXPECT_EQ(_PyUnicode_EQ(str3, str1), 1); Py_DECREF(str3); Py_DECREF(str1); } TEST_F(UnicodeExtensionApiTest, EqWithSubClassSameLength) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("some string") )"); PyObjectPtr str(mainModuleGet("substr")); PyObjectPtr str1(PyUnicode_FromString("some string")); EXPECT_EQ(_PyUnicode_EQ(str1.get(), str.get()), 1); PyObjectPtr str2(PyUnicode_FromString("some other string")); EXPECT_EQ(_PyUnicode_EQ(str2.get(), str.get()), 0); } TEST_F(UnicodeExtensionApiTest, EqDifferentLength) { PyObject* small = PyUnicode_FromString("123"); PyObject* large = PyUnicode_FromString("1234567890"); EXPECT_EQ(_PyUnicode_EQ(small, large), 0); EXPECT_EQ(_PyUnicode_EQ(large, small), 0); Py_DECREF(large); Py_DECREF(small); } TEST_F(UnicodeExtensionApiTest, EqualToASCIIString) { PyObject* unicode = PyUnicode_FromString("here's another string"); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string")); EXPECT_FALSE( _PyUnicode_EqualToASCIIString(unicode, "here is another string")); Py_DECREF(unicode); } TEST_F(UnicodeExtensionApiTest, EqualToASCIIStringWithSubClass) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("here's another string") )"); PyObjectPtr unicode(mainModuleGet("substr")); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(unicode, "here's another string")); EXPECT_FALSE( _PyUnicode_EqualToASCIIString(unicode, "here is another string")); } TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCIINul) { PyObjectPtr pyunicode(PyUnicode_FromStringAndSize("large\0st", 8)); // Less EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "largz"), -1); // Greater EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large"), 1); } TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringASCII) { PyObjectPtr pyunicode(PyUnicode_FromString("large string")); // Equal EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large string"), 0); // Less EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strings"), -1); EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large tbigger"), -1); // Greater EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large strin"), 1); EXPECT_EQ(PyUnicode_CompareWithASCIIString(pyunicode, "large smaller"), 1); } TEST_F(UnicodeExtensionApiTest, CompareWithASCIIStringWithSubClass) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr("large string") )"); PyObjectPtr substr(mainModuleGet("substr")); // Equal EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large string"), 0); // Less EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strings"), -1); EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large tbigger"), -1); // Greater EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large strin"), 1); EXPECT_EQ(PyUnicode_CompareWithASCIIString(substr, "large smaller"), 1); } TEST_F(UnicodeExtensionApiTest, GetLengthWithEmptyStrReturnsZero) { PyObjectPtr str(PyUnicode_FromString("")); Py_ssize_t expected = 0; EXPECT_EQ(PyUnicode_GetLength(str), expected); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); } TEST_F(UnicodeExtensionApiTest, GetLengthWithNonEmptyString) { PyObjectPtr str(PyUnicode_FromString("foo")); Py_ssize_t expected = 3; EXPECT_EQ(PyUnicode_GetLength(str), expected); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); } TEST_F(UnicodeExtensionApiTest, GetLengthWithSubClassOfNonEmptyString) { PyRun_SimpleString(R"( class SubStr(str): pass substr = SubStr('foo') )"); PyObjectPtr str(mainModuleGet("substr")); Py_ssize_t expected = 3; EXPECT_EQ(PyUnicode_GetLength(str), expected); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); } TEST_F(UnicodeExtensionApiTest, GetLengthWithUTF8ReturnsCodePointLength) { PyObjectPtr str(PyUnicode_FromString("\xc3\xa9")); Py_ssize_t expected = 1; EXPECT_EQ(PyUnicode_GetLength(str), expected); EXPECT_EQ(PyUnicode_GET_LENGTH(str.get()), expected); EXPECT_EQ(PyUnicode_GET_SIZE(str.get()), expected); } TEST_F(UnicodeExtensionApiTest, GetLengthWithNonStrRaisesTypeError) { PyObjectPtr list(PyList_New(3)); EXPECT_EQ(PyUnicode_GetLength(list), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, GetSizeWithNonStrRaisesTypeError) { PyObjectPtr list(PyList_New(3)); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" EXPECT_EQ(PyUnicode_GetSize(list), -1); #pragma GCC diagnostic pop ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, GetSizeWithStrReturnsLength) { PyObjectPtr unicode(PyUnicode_FromString("abc")); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" EXPECT_EQ(PyUnicode_GetSize(unicode), 3); #pragma GCC diagnostic pop EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, FromUnicodeWithASCIIReturnsString) { PyObjectPtr unicode(PyUnicode_FromUnicode(L"abc123-", 7)); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-")); } TEST_F(UnicodeExtensionApiTest, FromUnicodeWithNullBufferAbortsPyro) { EXPECT_DEATH(PyUnicode_FromUnicode(nullptr, 2), "unimplemented: _PyUnicode_New"); } TEST_F(UnicodeExtensionApiTest, FromOrdinalWithNegativeCodePointRaisesValueError) { EXPECT_EQ(PyUnicode_FromOrdinal(-1), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, FromOrdinalWithHugeCodePointRaisesValueError) { EXPECT_EQ(PyUnicode_FromOrdinal(0xFFFFFFFF), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, FromOrdinalWithValidCodePointReturnsString) { PyObjectPtr str(PyUnicode_FromOrdinal(1488)); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_NE(str, nullptr); ASSERT_TRUE(PyUnicode_Check(str)); EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xD7\x90"); } TEST_F(UnicodeExtensionApiTest, FromWideCharWithNullBufferAndZeroSizeReturnsEmpty) { PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_Check(empty)); EXPECT_EQ(PyUnicode_GetLength(empty), 0); } TEST_F(UnicodeExtensionApiTest, FromWideCharWithNullBufferReturnsError) { PyObjectPtr empty(PyUnicode_FromWideChar(nullptr, 1)); ASSERT_EQ(empty, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, FromWideCharWithUnknownSizeReturnsString) { PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", -1)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123-")); } TEST_F(UnicodeExtensionApiTest, FromWideCharWithGivenSizeReturnsString) { PyObjectPtr unicode(PyUnicode_FromWideChar(L"abc123-", 6)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "abc123")); } TEST_F(UnicodeExtensionApiTest, FromWideCharWithBufferAndZeroSizeReturnsEmpty) { PyObjectPtr empty(PyUnicode_FromWideChar(L"abc", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_Check(empty)); EXPECT_EQ(PyUnicode_GetLength(empty), 0); } TEST_F(UnicodeExtensionApiTest, DecodeWithNullEncodingReturnsUTF8) { const char* str = "utf-8 \xc3\xa8"; PyObjectPtr uni(PyUnicode_Decode(str, 8, nullptr, nullptr)); ASSERT_TRUE(PyUnicode_CheckExact(uni)); EXPECT_STREQ(PyUnicode_AsUTF8(uni), str); } TEST_F(UnicodeExtensionApiTest, DecodeASCIIUsesErrorHandler) { PyObjectPtr uni(PyUnicode_Decode("non\xc3\xa8-ascii", 11, "ascii", "ignore")); ASSERT_TRUE(PyUnicode_CheckExact(uni)); EXPECT_STREQ(PyUnicode_AsUTF8(uni), "non-ascii"); } TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsLatin1) { PyObjectPtr uni(PyUnicode_Decode("latin-1 \xe8", 9, "latin-1", nullptr)); ASSERT_TRUE(PyUnicode_CheckExact(uni)); EXPECT_STREQ(PyUnicode_AsUTF8(uni), "latin-1 \xc3\xa8"); } TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultCreatesString) { PyObjectPtr unicode(PyUnicode_DecodeFSDefault("hello")); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello")); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultAndSizeReturnsString) { PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 5)); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello")); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, DecodeFSDefaultAndSizeWithSmallerSizeReturnsString) { PyObjectPtr unicode(PyUnicode_DecodeFSDefaultAndSize("hello", 2)); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "he")); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, DecodeASCIIReturnsString) { PyObjectPtr str(PyUnicode_DecodeASCII("hello world", 11, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); } TEST_F(UnicodeExtensionApiTest, DecodeLatin1ReturnsString) { const char* c_str = "\xBFhello world?"; PyObjectPtr str(PyUnicode_DecodeLatin1(c_str, std::strlen(c_str), nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ(PyUnicode_CheckExact(str), 1); EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xC2\xBFhello world?"); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterPrepareWithLenZeroReturnsZero) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 0, 127), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWithOverallocateSetOverallocates) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); writer.overallocate = 0; ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0); ASSERT_EQ(writer.size, 5); _PyUnicodeWriter_Dealloc(&writer); _PyUnicodeWriter_Init(&writer); writer.overallocate = 1; ASSERT_EQ(_PyUnicodeWriter_Prepare(&writer, 5, 127), 0); ASSERT_GT(writer.size, 5); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterCreatesEmptyString) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr empty(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_Check(empty)); EXPECT_EQ(PyUnicode_GetLength(empty), 0); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesASCIIStrings) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello", 5), 0); ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, " world", 6), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); } TEST_F(UnicodeExtensionApiTest, WriteASCIIStringWithNegativeLengthReturnsString) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); ASSERT_EQ(_PyUnicodeWriter_WriteASCIIString(&writer, "hello world", -1), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); } TEST_F(UnicodeExtensionApiTest, WriteASCIIStringWithNonASCIIDeathTestPyro) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); EXPECT_DEATH(_PyUnicodeWriter_WriteASCIIString(&writer, "\xA0", 1), "_PyUnicodeWriter_WriteASCIIString only takes ASCII"); _PyUnicodeWriter_Dealloc(&writer); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesChars) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'a'), 0); ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 0xA0), 0); ASSERT_EQ(_PyUnicodeWriter_WriteChar(&writer, 'g'), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); PyObjectPtr test(PyUnicode_FromString("a\xc2\xa0g")); EXPECT_TRUE(_PyUnicode_EQ(unicode, test)); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWritesLatin1String) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, "hello\xA0", 6), 0); ASSERT_EQ(_PyUnicodeWriter_WriteLatin1String(&writer, " world", 6), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); PyObjectPtr test(PyUnicode_FromString("hello\xc2\xa0 world")); EXPECT_TRUE(_PyUnicode_EQ(unicode, test)); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteStrWritesStringObject) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr hello_str(PyUnicode_FromString("hello")); PyObjectPtr world_str(PyUnicode_FromString(" \xf0\x9f\x90\x8d world")); ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0); ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello \xf0\x9f\x90\x8d world")); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteStrWithSubClassWritesStringObject) { PyRun_SimpleString(R"( class SubStr(str): pass hello_str = SubStr("hello") world_str = SubStr(" world") )"); _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr hello_str(mainModuleGet("hello_str")); PyObjectPtr world_str(mainModuleGet("world_str")); ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, hello_str), 0); ASSERT_EQ(_PyUnicodeWriter_WriteStr(&writer, world_str), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteSubstringWritesSubStringObject) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr str(PyUnicode_FromString("hello \xf0\x9f\x90\x8d world")); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 8, 13), 0); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 8), 0); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "world \xf0\x9f\x90\x8d hello")); } TEST_F(UnicodeExtensionApiTest, PyUnicodeWriterWriteSubstringWithSubClassWritesSubStringObject) { PyRun_SimpleString(R"( class SubStr(str): pass str_value = SubStr("hello world") )"); _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr str(mainModuleGet("str_value")); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 5), 0); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 5, 11), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "hello world")); } TEST_F(UnicodeExtensionApiTest, WriteSubstringWithZeroEndReturnsString) { _PyUnicodeWriter writer; _PyUnicodeWriter_Init(&writer); PyObjectPtr str(PyUnicode_FromString("hello")); ASSERT_EQ(_PyUnicodeWriter_WriteSubstring(&writer, str, 0, 0), 0); PyObjectPtr unicode(_PyUnicodeWriter_Finish(&writer)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(unicode, "")); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8ReturnsString) { PyObjectPtr str(PyUnicode_DecodeUTF8("hello world", 11, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8WithUnfinishedBytesRaisesUnicodeDecodeError) { EXPECT_EQ(PyUnicode_DecodeUTF8("hello world\xC3", 12, nullptr), nullptr); PyObject *exc, *value, *tb; PyErr_Fetch(&exc, &value, &tb); ASSERT_NE(exc, nullptr); ASSERT_TRUE(PyErr_GivenExceptionMatches(exc, PyExc_UnicodeDecodeError)); Py_ssize_t temp; PyObjectPtr msg(PyUnicodeDecodeError_GetReason(value)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(msg, "unexpected end of data")); PyUnicodeDecodeError_GetStart(value, &temp); EXPECT_EQ(temp, 11); PyUnicodeDecodeError_GetEnd(value, &temp); EXPECT_EQ(temp, 12); Py_XDECREF(exc); Py_XDECREF(value); Py_XDECREF(tb); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8StatefulReturnsString) { Py_ssize_t consumed; PyObjectPtr str( PyUnicode_DecodeUTF8Stateful("hello world", 11, nullptr, &consumed)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); EXPECT_EQ(consumed, 11); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8StatefulWithUnfinishedBytesReturnsString) { Py_ssize_t consumed; PyObjectPtr str( PyUnicode_DecodeUTF8Stateful("hello world\xC3", 12, nullptr, &consumed)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); EXPECT_EQ(consumed, 11); } TEST_F(UnicodeExtensionApiTest, DecodeUnicodeEscapeReturnsString) { PyObjectPtr str( PyUnicode_DecodeUnicodeEscape("hello \\\nworld", 13, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); } TEST_F(UnicodeExtensionApiTest, UnderDecodeUnicodeEscapeReturnsFirstInvalid) { const char* invalid; PyObjectPtr str( _PyUnicode_DecodeUnicodeEscape("hello \\yworld", 13, nullptr, &invalid)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello \\yworld")); EXPECT_EQ(*invalid, 'y'); } TEST_F(UnicodeExtensionApiTest, UnderDecodeUnicodeEscapeSetsFirstInvalidEscapeToNull) { const char* invalid = reinterpret_cast<const char*>(0x100); PyObjectPtr result( _PyUnicode_DecodeUnicodeEscape("hello", 5, nullptr, &invalid)); EXPECT_NE(result, nullptr); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ(invalid, nullptr); } TEST_F(UnicodeExtensionApiTest, FromFormatWithNoArgsReturnsString) { PyObjectPtr str(PyUnicode_FromFormat("hello world")); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world")); } TEST_F(UnicodeExtensionApiTest, FromFormatWithManyArgsReturnsString) { PyObjectPtr str(PyUnicode_FromFormat("h%c%s%%%2.i", 'e', "llo world", 2)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello world% 2")); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesNumberTypes) { { PyObjectPtr str(PyUnicode_FromFormat("%x", 123)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "7b")); } { PyObjectPtr str(PyUnicode_FromFormat("%d", 124)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "124")); } { PyObjectPtr str(PyUnicode_FromFormat("%i", 125)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "125")); } { PyObjectPtr str(PyUnicode_FromFormat("%ld", 126)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "126")); } { PyObjectPtr str(PyUnicode_FromFormat("%li", 127)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "127")); } { PyObjectPtr str(PyUnicode_FromFormat("%lld", 128)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "128")); } { PyObjectPtr str(PyUnicode_FromFormat("%lli", 129)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "129")); } { PyObjectPtr str(PyUnicode_FromFormat("%u", 130)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "130")); } { PyObjectPtr str(PyUnicode_FromFormat("%lu", 131)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "131")); } { PyObjectPtr str(PyUnicode_FromFormat("%llu", 132)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "132")); } { PyObjectPtr str(PyUnicode_FromFormat("%zd", 133)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "133")); } { PyObjectPtr str(PyUnicode_FromFormat("%zu", 134)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "134")); } { PyObjectPtr str(PyUnicode_FromFormat("%zi", 135)); EXPECT_TRUE(isUnicodeEqualsCStr(str, "135")); } } TEST_F(UnicodeExtensionApiTest, FromFormatParsesCharacters) { PyObjectPtr str(PyUnicode_FromFormat("%c%c", 'h', 'w')); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hw")); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesPointer) { long value = 0; void* test = &value; char buff[18]; std::snprintf(buff, 18, "%p", test); PyObjectPtr str(PyUnicode_FromFormat("%p", test)); EXPECT_TRUE(isUnicodeEqualsCStr(str, buff)); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesString) { PyObjectPtr str(PyUnicode_FromFormat("%s", "UTF-8")); EXPECT_TRUE(isUnicodeEqualsCStr(str, "UTF-8")); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObject) { PyObjectPtr unicode(PyUnicode_FromString("hello")); PyObjectPtr str(PyUnicode_FromFormat("%U", static_cast<PyObject*>(unicode))); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello")); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesStringObjectAndString) { PyObjectPtr unicode(PyUnicode_FromString("hello")); PyObjectPtr str( PyUnicode_FromFormat("%V", static_cast<PyObject*>(unicode), "world")); EXPECT_TRUE(isUnicodeEqualsCStr(str, "hello")); } TEST_F(UnicodeExtensionApiTest, FromFormatParsesNullAndString) { PyObjectPtr str(PyUnicode_FromFormat("%V", nullptr, "world")); EXPECT_TRUE(isUnicodeEqualsCStr(str, "world")); } TEST_F(UnicodeExtensionApiTest, ConcatWithNonStringFails) { PyObjectPtr i(PyLong_FromLong(1)); EXPECT_EQ(PyUnicode_Concat(i, i), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, ConcatWithEmptyArgumentReturnsString) { PyObjectPtr hello(PyUnicode_FromString("hello")); PyObjectPtr empty(PyUnicode_FromString("")); PyObjectPtr empty_right(PyUnicode_Concat(hello, empty)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(empty_right, "hello")); PyObjectPtr empty_left(PyUnicode_Concat(empty, hello)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(empty_left, "hello")); } TEST_F(UnicodeExtensionApiTest, ConcatWithTwoStringsReturnsString) { PyObjectPtr hello(PyUnicode_FromString("hello ")); PyObjectPtr world(PyUnicode_FromString("world")); PyObjectPtr result(PyUnicode_Concat(hello, world)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(result, "hello world")); } TEST_F(UnicodeExtensionApiTest, AppendWithNullFails) { PyUnicode_Append(nullptr, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, AppendWithNonStringFails) { PyObject* not_str = PyLong_FromLong(1); PyUnicode_Append(&not_str, not_str); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, AppendWithEmptyArgumentReturnsString) { PyObject* hello(PyUnicode_FromString("hello")); PyObject* empty(PyUnicode_FromString("")); PyUnicode_Append(&hello, empty); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello")); PyUnicode_Append(&empty, hello); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(empty, "hello")); Py_DECREF(hello); Py_DECREF(empty); } TEST_F(UnicodeExtensionApiTest, AppendWithTwoStringsReturnsString) { PyObject* hello = PyUnicode_FromString("hello "); PyObjectPtr world(PyUnicode_FromString("world")); PyUnicode_Append(&hello, world); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world")); Py_DECREF(hello); } TEST_F(UnicodeExtensionApiTest, AppendAndDelWithStringDecreasesRefcnt) { PyObject* hello = PyUnicode_FromString("hello "); PyObject* world = PyUnicode_FromString("world"); Py_INCREF(world); Py_ssize_t original_refcnt = Py_REFCNT(world); PyUnicode_AppendAndDel(&hello, world); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(hello, "hello world")); Py_DECREF(hello); EXPECT_LT(Py_REFCNT(world), original_refcnt); Py_DECREF(world); } TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultWithNonStringReturnsNull) { PyObjectPtr bytes(PyUnicode_EncodeFSDefault(Py_None)); EXPECT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, EncodeFSDefaultReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo")); PyObjectPtr bytes(PyUnicode_EncodeFSDefault(unicode)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 3); EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); } TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithEmbeddedNulRaisesValueError) { PyObjectPtr nul_str(PyUnicode_FromStringAndSize("a\0b", 3)); PyObject* bytes = PyUnicode_EncodeLocale(nul_str, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(bytes, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithUnknownErrorHandlerNameRaisesValueError) { PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3)); PyObject* bytes = PyUnicode_EncodeLocale(str, "nonexistant"); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(bytes, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithStrReturnsBytes) { PyObjectPtr str(PyUnicode_FromStringAndSize("abc", 3)); PyObjectPtr bytes(PyUnicode_EncodeLocale(str, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_STREQ(PyBytes_AsString(bytes), "abc"); } TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithStrictErrorsAndSurrogatesRaisesError) { PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape")); PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "strict")); ASSERT_NE(PyErr_Occurred(), nullptr); ASSERT_EQ(bytes, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); } TEST_F(UnicodeExtensionApiTest, EncodeLocaleWithSurrogateescapeAndSurrogatesReturnsBytes) { PyObjectPtr str(PyUnicode_DecodeLocale("abc\x80", "surrogateescape")); PyObjectPtr bytes(PyUnicode_EncodeLocale(str, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_STREQ(PyBytes_AsString(bytes), "abc\x80"); } TEST_F(UnicodeExtensionApiTest, FSConverterWithNullSetAddrToNull) { PyObject* result = PyLong_FromLong(1); ASSERT_EQ(PyUnicode_FSConverter(nullptr, &result), 1); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ(result, nullptr); } TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesReturnsBytes) { PyObjectPtr bytes(PyBytes_FromString("foo")); PyObject* result = nullptr; ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), Py_CLEANUP_SUPPORTED); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyBytes_Check(result)); Py_DECREF(result); } TEST_F(UnicodeExtensionApiTest, FSConverterWithUnicodeReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo")); PyObject* result = nullptr; ASSERT_EQ(PyUnicode_FSConverter(unicode, &result), Py_CLEANUP_SUPPORTED); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyBytes_Check(result)); Py_DECREF(result); } TEST_F(UnicodeExtensionApiTest, FSConverterCallsDunderFspath) { PyRun_SimpleString(R"( class C: def __fspath__(self): return "foo" foo = C() )"); PyObjectPtr path(mainModuleGet("foo")); PyObject* result = nullptr; ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyBytes_Check(result)); Py_DECREF(result); } TEST_F(UnicodeExtensionApiTest, FSConverterWithBytesSubclassReturnsSubclass) { PyRun_SimpleString(R"( class C(bytes): pass foo = C() )"); PyObjectPtr path(mainModuleGet("foo")); PyObject* result = nullptr; ASSERT_EQ(PyUnicode_FSConverter(path, &result), Py_CLEANUP_SUPPORTED); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyBytes_Check(result)); EXPECT_EQ(result, path); Py_DECREF(result); } TEST_F(UnicodeExtensionApiTest, FSConverterWithEmbeddedNullRaisesValueError) { PyObjectPtr bytes(PyBytes_FromStringAndSize("foo \0 bar", 9)); PyObject* result = nullptr; ASSERT_EQ(PyUnicode_FSConverter(bytes, &result), 0); ASSERT_NE(PyErr_Occurred(), nullptr); ASSERT_EQ(result, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, InternInPlaceWritesNewHandleBack) { PyObject* a = PyUnicode_FromString("hello world aaaaaaaaaa"); PyObject* b = PyUnicode_FromString("hello world aaaaaaaaaa"); PyObject* b_addr = b; EXPECT_NE(a, b); PyUnicode_InternInPlace(&a); EXPECT_EQ(PyErr_Occurred(), nullptr); PyUnicode_InternInPlace(&b); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_NE(b, b_addr); Py_DECREF(a); Py_DECREF(b); } TEST_F(UnicodeExtensionApiTest, InternFromStringReturnsStr) { PyObjectPtr result(PyUnicode_InternFromString("szechuan broccoli")); ASSERT_NE(result, nullptr); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyUnicode_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, JoinWithEmptySeqReturnsEmptyStr) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr seq(PyList_New(0)); PyObjectPtr result(PyUnicode_Join(sep, seq)); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(result, "")); } TEST_F(UnicodeExtensionApiTest, JoinWithSeqJoinsElements) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr seq(PyList_New(0)); PyObjectPtr elt0(PyUnicode_FromString("a")); PyList_Append(seq, elt0); PyObjectPtr elt1(PyUnicode_FromString("b")); PyList_Append(seq, elt1); PyObjectPtr result(PyUnicode_Join(sep, seq)); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(isUnicodeEqualsCStr(result, "a.b")); } TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingNonStrRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr seq(PyList_New(0)); PyList_Append(seq, Py_None); PyObjectPtr result(PyUnicode_Join(sep, seq)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, JoinWithSeqContainingBytesRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr seq(PyList_New(0)); PyObjectPtr elt0(PyBytes_FromString("a")); PyList_Append(seq, elt0); PyObjectPtr result(PyUnicode_Join(sep, seq)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrStrRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_Partition(Py_None, sep)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, PartitionWithNonStrSepRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("hello")); PyObjectPtr result(PyUnicode_Partition(str, Py_None)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, PartitionReturnsTuple) { PyObjectPtr str(PyUnicode_FromString("a.b")); PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_Partition(str, sep)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyTuple_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrStrRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_RPartition(Py_None, sep)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, RPartitionWithNonStrSepRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("hello")); PyObjectPtr result(PyUnicode_RPartition(str, Py_None)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, RPartitionReturnsTuple) { PyObjectPtr str(PyUnicode_FromString("a.b")); PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_RPartition(str, sep)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyTuple_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, SplitlinesWithNonStrStrRaisesTypeError) { PyObjectPtr result(PyUnicode_Splitlines(Py_None, 0)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, SplitlinesReturnsList) { PyObjectPtr str(PyUnicode_FromString("hello\nworld")); PyObjectPtr result(PyUnicode_Splitlines(str, 1)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyList_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, SplitlinesWithSubClassReturnsList) { PyRun_SimpleString(R"( class SubStr(str): pass str_val = SubStr('hello\nworld') )"); PyObjectPtr str(mainModuleGet("str_val")); PyObjectPtr result(PyUnicode_Splitlines(str, 1)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyList_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, SplitlinesWithNoNewlinesReturnsIdEqualString) { PyObjectPtr str(PyUnicode_FromString("hello")); PyObjectPtr result(PyUnicode_Splitlines(str, 1)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); ASSERT_TRUE(PyList_CheckExact(result)); ASSERT_EQ(PyList_Size(result), 1); PyObject* str_elt = PyList_GetItem(result, 0); EXPECT_EQ(str, str_elt); } TEST_F(UnicodeExtensionApiTest, SplitWithNonStrStrRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_Split(Py_None, sep, 5)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, SplitWithNonStrSepRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("hello")); PyObjectPtr sep(PyLong_FromLong(8)); PyObjectPtr result(PyUnicode_Split(str, sep, 5)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, SplitReturnsList) { PyObjectPtr str(PyUnicode_FromString("a.b")); PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_Split(str, sep, 5)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyList_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrStrRaisesTypeError) { PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_RSplit(Py_None, sep, 5)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, RSplitWithNonStrSepRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("hello")); PyObjectPtr sep(PyLong_FromLong(8)); PyObjectPtr result(PyUnicode_RSplit(str, sep, 5)); EXPECT_EQ(result, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, RSplitReturnsList) { PyObjectPtr str(PyUnicode_FromString("a.b")); PyObjectPtr sep(PyUnicode_FromString(".")); PyObjectPtr result(PyUnicode_RSplit(str, sep, 5)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_NE(result, nullptr); EXPECT_TRUE(PyList_CheckExact(result)); } TEST_F(UnicodeExtensionApiTest, StrlenWithEmptyStrReturnsZero) { const wchar_t* str = L""; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" ASSERT_EQ(Py_UNICODE_strlen(str), 0U); #pragma GCC diagnostic pop } TEST_F(UnicodeExtensionApiTest, StrlenWithStrReturnsNumberOfChars) { const wchar_t* str = L"hello"; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" ASSERT_EQ(Py_UNICODE_strlen(str), 5U); #pragma GCC diagnostic pop } TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeStartRaisesIndexError) { PyObjectPtr str(PyUnicode_FromString("foo")); ASSERT_EQ(PyUnicode_Substring(str, -1, 3), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); } TEST_F(UnicodeExtensionApiTest, SubstringWithNegativeEndRaisesIndexError) { PyObjectPtr str(PyUnicode_FromString("foo")); ASSERT_EQ(PyUnicode_Substring(str, 0, -3), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_IndexError)); } TEST_F(UnicodeExtensionApiTest, SubstringWithFullStringReturnsSameObject) { PyObjectPtr str(PyUnicode_FromString("foo")); PyObjectPtr result(PyUnicode_Substring(str, 0, 5)); EXPECT_EQ(PyErr_Occurred(), nullptr); EXPECT_EQ(result, str); } TEST_F(UnicodeExtensionApiTest, SubstringWithSameStartAndEndReturnsEmpty) { PyObjectPtr str(PyUnicode_FromString("foo")); PyObjectPtr result(PyUnicode_Substring(str, 2, 2)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(result)); EXPECT_STREQ(PyUnicode_AsUTF8(result), ""); } TEST_F(UnicodeExtensionApiTest, SubstringWithASCIIReturnsSubstring) { PyObjectPtr str(PyUnicode_FromString("Hello world!")); PyObjectPtr result(PyUnicode_Substring(str, 3, 8)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(result)); EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo"); } TEST_F(UnicodeExtensionApiTest, SubstringWithSubClassReturnsSubstring) { PyRun_SimpleString(R"( class SubStr(str): pass str_val = SubStr('Hello world!') )"); PyObjectPtr str(mainModuleGet("str_val")); PyObjectPtr result(PyUnicode_Substring(str, 3, 8)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(result)); EXPECT_STREQ(PyUnicode_AsUTF8(result), "lo wo"); } TEST_F(UnicodeExtensionApiTest, SubstringCountsCodePoints) { PyObjectPtr str(PyUnicode_FromString("cre\u0300me bru\u0302le\u0301e")); PyObjectPtr result(PyUnicode_Substring(str, 2, 11)); EXPECT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(result)); EXPECT_STREQ(PyUnicode_AsUTF8(result), "e\u0300me bru\u0302"); } TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithEmptyStringsReturnsOne) { PyObjectPtr str(PyUnicode_FromString("")); PyObjectPtr substr(PyUnicode_FromString("")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, 1), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithEmptyStringsReturnsOne) { PyObjectPtr str(PyUnicode_FromString("")); PyObjectPtr substr(PyUnicode_FromString("")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 0, 0, -1), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithMatchReturnsOne) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr substr(PyUnicode_FromString("cde")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 9, -1), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchPrefixWithoutMatchReturnsZero) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr substr(PyUnicode_FromString("cde")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 2, 4, -1), 0); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, -1), 0); PyObjectPtr substr2(PyUnicode_FromString("cdf")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 2, 6, -1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithMatchReturnsOne) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr substr(PyUnicode_FromString("cde")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 5, 1), 1); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 1, 6, 1), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchSuffixWithoutMatchReturnsZero) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr substr(PyUnicode_FromString("cde")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0); PyObjectPtr substr2(PyUnicode_FromString("bde")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr2, 1, 5, 1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchWithLargerNeedleReturnsZero) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr substr(PyUnicode_FromString("bananas")); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, 1), 0); EXPECT_EQ(PyUnicode_Tailmatch(str, substr, 3, 5, -1), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrHaystackRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr num(PyLong_FromLong(7)); EXPECT_EQ(PyUnicode_Tailmatch(num, str, 1, 6, 1), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, TailmatchWithNonStrNeedleRaisesTypeError) { PyObjectPtr str(PyUnicode_FromString("abcde")); PyObjectPtr num(PyLong_FromLong(7)); EXPECT_EQ(PyUnicode_Tailmatch(str, num, 1, 6, 1), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, NewWithInvalidSizeReturnsError) { EXPECT_EQ(PyUnicode_New(-1, 0), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, NewWithInvalidMaxCharReturnsError) { EXPECT_EQ(PyUnicode_New(1, 0x11FFFF), nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, NewWithZeroSizeAndInvalidMaxCharReturnsStr) { PyObjectPtr empty(PyUnicode_New(0, 0x11FFFF)); ASSERT_EQ(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyUnicode_CheckExact(empty)); EXPECT_TRUE(isUnicodeEqualsCStr(empty, "")); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithNegativeOneRaiseError) { char c = 'a'; PyObjectPtr empty(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, &c, -1)); EXPECT_EQ(empty, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithInvalidKindRaiseError) { char c = 'a'; PyObjectPtr empty(PyUnicode_FromKindAndData(100, &c, 1)); EXPECT_EQ(empty, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_SystemError)); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithOneByteKindAndASCIICodePointsReturnsStr) { Py_UCS1 buffer[] = {'h', 'e', 'l', 'l', 'o'}; PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "hello")); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithOneByteKindAndLatin1CodePointsReturnsStr) { Py_UCS1 buffer[] = {'h', 0xe4, 'l', 'l', 'o'}; PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4llo"); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithTwoByteKindAndBMPCodePointsReturnsStr) { Py_UCS2 buffer[] = {'h', 0xe4, 'l', 0x2cc0, 'o'}; PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_STREQ(PyUnicode_AsUTF8(str), "h\xc3\xa4l\xe2\xb3\x80o"); } TEST_F(UnicodeExtensionApiTest, FromKindAndDataWithFourByteKindAndNonBMPCodePointsReturnsStr) { Py_UCS4 buffer[] = {0x1f192, 'h', 0xe4, 'l', 0x2cc0}; PyObjectPtr str(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, Py_ARRAY_LENGTH(buffer))); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_STREQ(PyUnicode_AsUTF8(str), "\xf0\x9f\x86\x92h\xc3\xa4l\xe2\xb3\x80"); } TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrSelfRaisesTypeError) { PyObjectPtr self(PyLong_FromLong(7)); PyObjectPtr other(PyUnicode_FromString("hello")); EXPECT_EQ(PyUnicode_Contains(self, other), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, ContainsWithNonStrOtherRaisesTypeError) { PyObjectPtr self(PyUnicode_FromString("hello")); PyObjectPtr other(PyLong_FromLong(7)); EXPECT_EQ(PyUnicode_Contains(self, other), -1); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, ContainsWithPresentSubstrReturnsTrue) { PyObjectPtr self(PyUnicode_FromString("foo")); PyObjectPtr other(PyUnicode_FromString("f")); EXPECT_EQ(PyUnicode_Contains(self, other), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, ContainsWithNotPresentSubstrReturnsTrue) { PyObjectPtr self(PyUnicode_FromString("foo")); PyObjectPtr other(PyUnicode_FromString("q")); EXPECT_EQ(PyUnicode_Contains(self, other), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, NormalizeEncodingEscapesMidStringPunctuation) { char buffer[11] = {0}; EXPECT_EQ(_Py_normalize_encoding("utf-8", buffer, sizeof(buffer)), 1); EXPECT_STREQ(buffer, "utf_8"); EXPECT_EQ(_Py_normalize_encoding("utf}8", buffer, sizeof(buffer)), 1); EXPECT_STREQ(buffer, "utf_8"); } TEST_F(UnicodeExtensionApiTest, NormalizeEncodingIgnoresEndOfStringPunctuation) { char buffer[11] = {0}; EXPECT_EQ(_Py_normalize_encoding("_utf8", buffer, sizeof(buffer)), 1); EXPECT_STREQ(buffer, "utf8"); EXPECT_EQ(_Py_normalize_encoding("utf8_", buffer, sizeof(buffer)), 1); EXPECT_STREQ(buffer, "utf8"); } TEST_F(UnicodeExtensionApiTest, NormalizeEncodingProperlyLowercases) { char buffer[11] = {0}; EXPECT_EQ(_Py_normalize_encoding("ASCII", buffer, sizeof(buffer)), 1); EXPECT_STREQ(buffer, "ascii"); } TEST_F(UnicodeExtensionApiTest, NormalizeEncodingWithTooLongStringReturnsEmptyString) { char buffer[5] = {0}; EXPECT_EQ(_Py_normalize_encoding("12345", buffer, sizeof(buffer)), 0); EXPECT_STREQ(buffer, "1234"); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithNullErrorValueEmbeddedNulRaisesValueError) { PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(self, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F( UnicodeExtensionApiTest, DecodeLocaleAndSizeWithNullErrorValueNonNulTerminatedStrRaisesValueError) { const char data[] = {'a', 'b'}; PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(self, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithNullErrorValueReturnsStr) { PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithNullErrorValueStrictAndSurrogatesRaisesError) { PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, nullptr); ASSERT_EQ(str, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithEmbeddedNulRaisesValueError) { PyObject* self = PyUnicode_DecodeLocaleAndSize("a\0b", 3, "strict"); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(self, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithNonNulTerminatedStrRaisesValueError) { const char data[] = {'a', 'b'}; PyObject* self = PyUnicode_DecodeLocaleAndSize(data, 1, "strict"); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(self, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithUnknownErrorHandlerNameRaisesValueError) { PyObject* self = PyUnicode_DecodeLocaleAndSize("abc", 3, "nonexistant"); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_EQ(self, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_ValueError)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithStrictReturnsStr) { PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "strict")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithSurrogateescapeReturnsStr) { PyObjectPtr str(PyUnicode_DecodeLocaleAndSize("abc", 3, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); EXPECT_TRUE(_PyUnicode_EqualToASCIIString(str, "abc")); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithSurrogateescapeAndSurrogatesReturnsStr) { PyObjectPtr str( PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(str)); Py_UCS4 data[] = {'a', 'b', 'c', 0xDC80}; PyObjectPtr test(PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, data, 4)); EXPECT_TRUE(_PyUnicode_EQ(str, test)); } TEST_F(UnicodeExtensionApiTest, DecodeLocaleAndSizeWithStrictAndSurrogatesRaisesError) { PyObject* str = PyUnicode_DecodeLocaleAndSize("abc\x80", 4, "strict"); ASSERT_EQ(str, nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)); } TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithNonStringReturnsNull) { PyObjectPtr bytes(_PyUnicode_AsASCIIString(Py_None, nullptr)); ASSERT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsASCIIStringReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo")); PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 3); EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); } TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithInvalidCodepointRaisesEncodeError) { PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF")); PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, nullptr)); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); EXPECT_EQ(bytes, nullptr); } TEST_F(UnicodeExtensionApiTest, AsASCIIStringWithReplaceErrorsReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo\u00EF")); PyObjectPtr bytes(_PyUnicode_AsASCIIString(unicode, "replace")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 4); EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); } TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithNonStringReturnsNull) { PyObjectPtr bytes(_PyUnicode_AsLatin1String(Py_None, nullptr)); ASSERT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsLatin1StringReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo")); PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 3); EXPECT_STREQ(PyBytes_AsString(bytes), "foo"); } TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithLatin1ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo\u00E4")); PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 4); EXPECT_STREQ(PyBytes_AsString(bytes), "foo\xE4"); } TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithInvalidCodepointRaisesEncodeError) { PyObjectPtr unicode(PyUnicode_FromString("foo\u01EF")); PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, nullptr)); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); EXPECT_EQ(bytes, nullptr); } TEST_F(UnicodeExtensionApiTest, AsLatin1StringWithReplaceErrorsReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("foo\u0AE4")); PyObjectPtr bytes(_PyUnicode_AsLatin1String(unicode, "replace")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 4); EXPECT_STREQ(PyBytes_AsString(bytes), "foo?"); } TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithNonStringReturnsNull) { PyObjectPtr bytes(PyUnicode_AsUTF16String(Py_None)); ASSERT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsUTF16StringReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("hi")); PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 6); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00i\x00", 6), 0); } TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithInvalidCodepointRaisesEncodeError) { PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(unicode)); PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); EXPECT_EQ(bytes, nullptr); } TEST_F(UnicodeExtensionApiTest, AsUTF16StringWithUTF16ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(PyUnicode_AsUTF16String(unicode)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 10); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithUTF16ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 10); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00\x34\xd8\xf0\xddi\x00", 10), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16LeWithUTF16ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", -1)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 8); EXPECT_EQ( std::memcmp(PyBytes_AsString(bytes), "h\x00\x34\xd8\xf0\xddi\x00", 8), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16BeWithUTF16ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 1)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 8); EXPECT_EQ( std::memcmp(PyBytes_AsString(bytes), "\x00h\xd8\x34\xdd\xf0\x00i", 8), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF16WithReplaceReturnsBytes) { PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 8); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8), 0); } TEST_F(UnicodeExtensionApiTest, EncodeUTF16WithReplaceReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3)); PyObjectPtr bytes(_PyUnicode_EncodeUTF16(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 8); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfeh\x00?\x00i\x00", 8), 0); } TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithNonStringReturnsNull) { PyObjectPtr bytes(PyUnicode_AsUTF32String(Py_None)); ASSERT_EQ(bytes, nullptr); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_TypeError)); } TEST_F(UnicodeExtensionApiTest, AsUTF32StringReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("hi")); PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 12); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\xff\xfe\x00\x00h\x00\x00\x00i\x00\x00\x00", 12), 0); } TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithInvalidCodepointRaisesEncodeError) { PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyUnicode_CheckExact(unicode)); PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); ASSERT_NE(PyErr_Occurred(), nullptr); EXPECT_TRUE(PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)); EXPECT_EQ(bytes, nullptr); } TEST_F(UnicodeExtensionApiTest, AsUTF32StringWithUTF32ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(PyUnicode_AsUTF32String(unicode)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 16); EXPECT_EQ( std::memcmp(PyBytes_AsString(bytes), "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 16), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithUTF32ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 16); EXPECT_EQ( std::memcmp(PyBytes_AsString(bytes), "\xff\xfe\x00\x00h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 16), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32LeWithUTF32ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", -1)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 12); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "h\x00\x00\x00\xf0\xd1\x01\x00i\x00\x00\x00", 12), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32BeWithUTF32ReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromString("h\U0001d1f0i")); PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 1)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 12); EXPECT_EQ(std::memcmp(PyBytes_AsString(bytes), "\x00\x00\x00h\x00\x01\xd1\xf0\x00\x00\x00i", 12), 0); } TEST_F(UnicodeExtensionApiTest, UnderEncodeUTF32WithReplaceReturnsBytes) { PyObjectPtr unicode(PyUnicode_DecodeASCII("h\x80i", 3, "surrogateescape")); PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 16); EXPECT_EQ(std::memcmp( PyBytes_AsString(bytes), "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16), 0); } TEST_F(UnicodeExtensionApiTest, EncodeUTF32WithReplaceReturnsBytes) { PyObjectPtr unicode(PyUnicode_FromWideChar(L"h\xDC80i", 3)); PyObjectPtr bytes(_PyUnicode_EncodeUTF32(unicode, "replace", 0)); ASSERT_EQ(PyErr_Occurred(), nullptr); ASSERT_TRUE(PyBytes_Check(bytes)); EXPECT_EQ(PyBytes_Size(bytes), 16); EXPECT_EQ(std::memcmp( PyBytes_AsString(bytes), "\xff\xfe\x00\x00h\x00\x00\x00?\x00\x00\x00i\x00\x00\x00", 16), 0); } TEST_F(UnicodeExtensionApiTest, IsAsciiWithAsciiOnlyCharsReturnsOne) { PyObjectPtr unicode(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 1); } TEST_F(UnicodeExtensionApiTest, IsAsciiWithNonAsciiCharsReturnsZero) { PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o")); EXPECT_EQ(PyUnicode_IS_ASCII(unicode.get()), 0); } TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithAsciiOnlyCharsReturnsOne) { PyObjectPtr unicode(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 1); } TEST_F(UnicodeExtensionApiTest, IsCompactAsciiWithNonAsciiCharsReturnsZero) { PyObjectPtr unicode(PyUnicode_FromString("fo\u00e4o")); EXPECT_EQ(PyUnicode_IS_COMPACT_ASCII(unicode.get()), 0); } TEST_F(UnicodeExtensionApiTest, IsIdentifierWithEmptyStringReturnsFalse) { PyObjectPtr unicode(PyUnicode_FromString("")); EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, IsIdentifierWithValidIdentifierReturnsTrue) { PyObjectPtr unicode(PyUnicode_FromString("foo")); EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 1); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, IsIdentifierWithInvalidIdentifierReturnsFalse) { PyObjectPtr unicode(PyUnicode_FromString("b$ar")); EXPECT_EQ(PyUnicode_IsIdentifier(unicode), 0); EXPECT_EQ(PyErr_Occurred(), nullptr); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithEmptyStrReturnsZero) { const char* str = ""; wchar_t* result = nullptr; EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/0, /*result=*/&result, /*wlen=*/nullptr, /*reason=*/nullptr, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, L""); PyMem_RawFree(result); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrReturnsZero) { const char* str = "hello"; wchar_t* result = nullptr; EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result, /*wlen=*/nullptr, /*reason=*/nullptr, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_EQ(std::wcslen(result), size_t{5}); EXPECT_EQ('h', result[0]); EXPECT_EQ('e', result[1]); EXPECT_EQ('l', result[2]); EXPECT_EQ('l', result[3]); EXPECT_EQ('o', result[4]); PyMem_RawFree(result); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExDecodesUpToSizeBytes) { const char* str = "hello"; wchar_t* result = nullptr; EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/3, /*result=*/&result, /*wlen=*/nullptr, /*reason=*/nullptr, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_EQ(std::wcslen(result), size_t{3}); EXPECT_EQ('h', result[0]); EXPECT_EQ('e', result[1]); EXPECT_EQ('l', result[2]); PyMem_RawFree(result); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8ExWithASCIIStrSetsWlen) { const char* str = "hello"; wchar_t* result = nullptr; size_t wlen = 0; EXPECT_EQ(0, _Py_DecodeUTF8Ex(str, /*size=*/std::strlen(str), /*result=*/&result, /*wlen=*/&wlen, /*reason=*/nullptr, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_EQ(std::wcslen(result), size_t{5}); EXPECT_EQ('h', result[0]); EXPECT_EQ('e', result[1]); EXPECT_EQ('l', result[2]); EXPECT_EQ('l', result[3]); EXPECT_EQ('o', result[4]); EXPECT_EQ(wlen, size_t{5}); PyMem_RawFree(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithEmptyStrReturnsZero) { const wchar_t* str = L""; char* result = nullptr; EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, ""); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithASCIIStrReturnsZero) { const wchar_t* str = L"hello"; char* result = nullptr; EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, "hello"); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithRawMallocReturnsZero) { const wchar_t* str = L"hello"; char* result = nullptr; EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/nullptr, /*raw_malloc=*/1, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, "hello"); PyMem_RawFree(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithLatin1ReturnsZero) { const wchar_t* str = L"cr\xe8me br\xfbl\xe9e"; char* result = nullptr; EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_STRICT)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, u8"cr\xC3\xA8me br\xC3\xBBl\xE0\xBA\x9E"); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithoutSurrogateEscapeReturnsNegativeTwo) { const wchar_t* str = L"\x0000dc80"; char* result = reinterpret_cast<char*>(0xdeadbeef); EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_STRICT)); EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithoutSurrogateEscapeAndErrorPosSetsErrorPos) { const wchar_t* str = L"foo\x0000dc80zip"; char* result = reinterpret_cast<char*>(0xdeadbeef); size_t error_pos = 1337; EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_STRICT)); EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); EXPECT_EQ(error_pos, size_t{3}); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithoutSurrogateEscapeAndReasonSetsReason) { const wchar_t* str = L"\x0000dc80"; char* result = reinterpret_cast<char*>(0xdeadbeef); const char* reason = nullptr; EXPECT_EQ(-2, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/nullptr, /*reason=*/&reason, /*raw_malloc=*/0, _Py_ERROR_STRICT)); EXPECT_EQ(result, reinterpret_cast<char*>(0xdeadbeef)); ASSERT_NE(reason, nullptr); EXPECT_STREQ(reason, "encoding error"); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithSurrogateEscapeEscapesSurrogate) { const wchar_t* str = L"\x0000dc80"; char* result = nullptr; size_t error_pos = 1337; const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, /*reason=*/&reason, /*raw_malloc=*/0, _Py_ERROR_SURROGATEESCAPE)); EXPECT_EQ(error_pos, size_t{1337}); EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, u8"\x80"); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithThreeByteCodePointEncodesCodePoint) { const wchar_t* str = L"\x0000efff"; char* result = nullptr; size_t error_pos = 1337; const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_SURROGATEESCAPE)); EXPECT_EQ(error_pos, size_t{1337}); EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, u8"\xee\xbf\xbf"); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, EncodeUTF8ExWithFourByteCodePointEncodesCodePoint) { const wchar_t* str = L"\x10000"; char* result = nullptr; size_t error_pos = 1337; const char* reason = const_cast<const char*>(reinterpret_cast<char*>(0x1337)); EXPECT_EQ(0, _Py_EncodeUTF8Ex(str, &result, /*error_pos=*/&error_pos, /*reason=*/nullptr, /*raw_malloc=*/0, _Py_ERROR_SURROGATEESCAPE)); EXPECT_EQ(error_pos, size_t{1337}); EXPECT_EQ(reason, reinterpret_cast<char*>(0x1337)); ASSERT_NE(result, nullptr); EXPECT_STREQ(result, u8"\xf0\x90\x80\x80"); PyMem_Free(result); } TEST_F(UnicodeExtensionApiTest, FileSystemDefaultEncodeErrorsMatchesSysGetfilesystemencodeerrors) { PyRun_SimpleString(R"( import sys errors = sys.getfilesystemencodeerrors() )"); PyObjectPtr errors(mainModuleGet("errors")); EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncodeErrors)); } TEST_F(UnicodeExtensionApiTest, FileSystemDefaultEncodingMatchesSysGetfilesystemencoding) { PyRun_SimpleString(R"( import sys encoding = sys.getfilesystemencoding() )"); PyObjectPtr errors(mainModuleGet("encoding")); EXPECT_TRUE(isUnicodeEqualsCStr(errors, Py_FileSystemDefaultEncoding)); } TEST_F(UnicodeExtensionApiTest, GetDefaultEncodingMatchesSysGetdefaultencoding) { PyRun_SimpleString(R"( import sys sys_default = sys.getdefaultencoding() )"); PyObjectPtr sys_default(mainModuleGet("sys_default")); EXPECT_TRUE(isUnicodeEqualsCStr(sys_default, PyUnicode_GetDefaultEncoding())); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8SurrogateEscapeWithEmptyStringReturnsEmptyString) { size_t wlen; wchar_t* wpath = _Py_DecodeUTF8_surrogateescape("", 0, &wlen); EXPECT_STREQ(wpath, L""); EXPECT_EQ(wlen, size_t{0}); PyMem_RawFree(wpath); } TEST_F(UnicodeExtensionApiTest, DecodeUTF8SurrogateEscapeReturnsWideString) { const char* path = "/foo/bar/bat"; size_t len = std::strlen(path); size_t wlen; wchar_t* wpath = _Py_DecodeUTF8_surrogateescape(path, len, &wlen); EXPECT_STREQ(wpath, L"/foo/bar/bat"); EXPECT_EQ(wlen, len); PyMem_RawFree(wpath); } } // namespace testing } // namespace py