in language-extensions/python/src/PythonDataSet.cpp [1093:1235]
void PythonOutputDataSet::RetrieveStringColumnFromDataFrame(
string columnName,
SQLULEN &columnSize,
SQLSMALLINT &decimalDigits,
SQLSMALLINT &nullable)
{
LOG("PythonOutputDataSet::RetrieveStringColumnFromDataFrame");
vector<char> columnData;
SQLINTEGER *strLenOrNullMap = nullptr;
if (m_rowsNumber > 0)
{
strLenOrNullMap = new SQLINTEGER[m_rowsNumber];
}
decimalDigits = 0;
nullable = SQL_NO_NULLS;
np::ndarray column = ExtractArrayFromDataFrame(columnName);
string dType = bp::extract<string>(bp::str(column.get_dtype()));
// Store the data in a vector (columnData) while we calculate the total size we need
// for the contiguous char data.
//
SQLINTEGER maxLen = sizeof(char);
int fullSize = 0;
for (SQLULEN row = 0; row < m_rowsNumber; ++row)
{
string stringToAdd = "";
bp::object pyObj = column[row];
bool null = true;
if (!pyObj.is_none())
{
null = false;
// If we have a bytes object we want to take only the bytes, not the b'' around them
//
if (PyBytes_Check(pyObj.ptr()))
{
// Extract the size and bytes of the pyObj
//
PyObject *baseObj = pyObj.ptr();
int size = PyBytes_Size(baseObj);
char *bytes = static_cast<char *>(
static_cast<void *>(PyBytes_AsString(baseObj)));
stringToAdd = string(bytes, size);
}
else
{
// Extract a utf-8 encoded version of the string
//
stringToAdd = bp::extract<string>(bp::str(pyObj).encode("utf-8"));
// If we are dealing with date types in a streaming case,
// we have to clean up the datetime string so that SQL can convert properly
//
if (m_isStreaming && regex_match(dType, regex(".*date.*")))
{
// NaT counts as None / NULL for date types
//
if (stringToAdd.compare("NaT") == 0)
{
null = true;
}
else
{
// Python Datetimes will look like this: YYYY-MM-DDThh:mm:ss.fffffffff
// We trim trailing 0s in the fractional part of the timestamp
// (and the "." if the fraction is all 0) so SQL can auto-convert
// from string to the date type.
// Lower precision date types cannot handle trailing zeroes.
//
smatch matches;
string trimmedDate;
if (regex_search(stringToAdd, matches, regex("((.*)([.].*?))(0+)$")))
{
// "matches" will have this by index:
// 0. YYYY-MM-DDThh:mm:ss.fffff0000
// 1. YYYY-MM-DDThh:mm:ss.fffff
// 2. YYYY-MM-DDThh:mm:ss
// 3. .fffff
// If there is no fractional second then #3 is just ".", and #1 will
// have the trailing "." which we don't want, so we use #2 instead.
//
trimmedDate = matches[1].str();
if (matches[3].str() == ".")
{
trimmedDate = matches[2].str();
}
stringToAdd = trimmedDate;
}
}
}
}
}
if (!null)
{
fullSize += stringToAdd.size();
strLenOrNullMap[row] = stringToAdd.size();
columnData.insert(columnData.end(), stringToAdd.begin(), stringToAdd.end());
}
else
{
strLenOrNullMap[row] = SQL_NULL_DATA;
nullable = SQL_NULLABLE;
}
// Store the maximum length to find the widest the column needs to be
//
if (maxLen < strLenOrNullMap[row])
{
maxLen = strLenOrNullMap[row];
}
}
// Create a single block of memory that will hold all the data contiguously.
//
unique_ptr<char[]> dataPtr(new char[fullSize]);
// Copy our data from the vector of strings to the single chunk of memory.
//
memcpy(dataPtr.get(), columnData.data(), fullSize);
columnSize = maxLen;
if (m_rowsNumber > 0)
{
// Add the memory block to the m_data and give the memory handling job to m_data.
//
m_data.push_back(static_cast<SQLPOINTER>(dataPtr.release()));
}
else
{
m_data.push_back(nullptr);
}
m_columnNullMap.push_back(strLenOrNullMap);
}