in be/src/exec/text-converter.cc [110:352]
Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, TupleDescriptor* tuple_desc,
SlotDescriptor* slot_desc, llvm::Function** fn, const char* null_col_val, int len,
bool check_null, bool strict_mode) {
DCHECK(fn != nullptr);
DCHECK(SupportsCodegenWriteSlot(slot_desc->type()));
// Codegen is_null_string
bool is_default_null = (len == 2 && null_col_val[0] == '\\' && null_col_val[1] == 'N');
llvm::Function* is_null_string_fn;
if (is_default_null) {
is_null_string_fn = codegen->GetFunction(IRFunction::IS_NULL_STRING, false);
} else {
is_null_string_fn = codegen->GetFunction(IRFunction::GENERIC_IS_NULL_STRING, false);
}
DCHECK(is_null_string_fn != NULL);
llvm::StructType* tuple_type = tuple_desc->GetLlvmStruct(codegen);
if (tuple_type == NULL) {
return Status("TextConverter::CodegenWriteSlot(): Failed to generate "
"tuple type");
}
llvm::PointerType* tuple_ptr_type = tuple_type->getPointerTo();
LlvmCodeGen::FnPrototype prototype(
codegen, "WriteSlot", codegen->bool_type());
prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple_arg", tuple_ptr_type));
prototype.AddArgument(LlvmCodeGen::NamedVariable("data", codegen->ptr_type()));
prototype.AddArgument(LlvmCodeGen::NamedVariable("len", codegen->i32_type()));
LlvmBuilder builder(codegen->context());
llvm::Value* args[3];
*fn = prototype.GeneratePrototype(&builder, &args[0]);
llvm::BasicBlock *set_null_block, *parse_slot_block, *check_zero_block = NULL;
codegen->CreateIfElseBlocks(*fn, "set_null", "parse_slot",
&set_null_block, &parse_slot_block);
if (!slot_desc->type().IsVarLenStringType()) {
check_zero_block = llvm::BasicBlock::Create(codegen->context(), "check_zero", *fn);
}
// Check if the data matches the configured NULL string.
llvm::Value* is_null;
if (check_null) {
if (is_default_null) {
is_null = builder.CreateCall(
is_null_string_fn, llvm::ArrayRef<llvm::Value*>({args[1], args[2]}));
} else {
llvm::Value* const null_col_ir_str =
codegen->GetStringConstant(&builder, null_col_val, len);
is_null = builder.CreateCall(is_null_string_fn,
llvm::ArrayRef<llvm::Value*>(
{args[1], args[2], null_col_ir_str, codegen->GetI32Constant(len)}));
}
} else {
// Constant FALSE as branch condition. We rely on later optimization passes
// to remove the branch and THEN block.
is_null = codegen->false_value();
}
builder.CreateCondBr(is_null, set_null_block,
(slot_desc->type().IsVarLenStringType()) ? parse_slot_block : check_zero_block);
if (!slot_desc->type().IsVarLenStringType()) {
builder.SetInsertPoint(check_zero_block);
// If len == 0 and it is not a string col, set slot to NULL
llvm::Value* null_len =
builder.CreateICmpEQ(args[2], codegen->GetI32Constant(0));
builder.CreateCondBr(null_len, set_null_block, parse_slot_block);
}
// Codegen parse slot block
builder.SetInsertPoint(parse_slot_block);
llvm::Value* slot =
builder.CreateStructGEP(NULL, args[0], slot_desc->llvm_field_idx(), "slot");
if (slot_desc->type().IsVarLenStringType()) {
llvm::Function* str_assign_fn = codegen->GetFunction(
IRFunction::STRING_VALUE_ASSIGN, false);
// TODO codegen memory allocation for CHAR
DCHECK(slot_desc->type().type != TYPE_CHAR);
if (slot_desc->type().type == TYPE_VARCHAR) {
// determine if we need to truncate the string
llvm::Value* maxlen = codegen->GetI32Constant(slot_desc->type().len);
llvm::Value* len_lt_maxlen =
builder.CreateICmpSLT(args[2], maxlen, "len_lt_maxlen");
llvm::Value* minlen =
builder.CreateSelect(len_lt_maxlen, args[2], maxlen, "select_min_len");
builder.CreateCall(str_assign_fn,
llvm::ArrayRef<llvm::Value*>({slot, args[1], minlen}));
} else {
builder.CreateCall(str_assign_fn,
llvm::ArrayRef<llvm::Value*>({slot, args[1], args[2]}));
}
builder.CreateRet(codegen->true_value());
} else {
IRFunction::Type parse_fn_enum;
llvm::Function* parse_fn = NULL;
switch (slot_desc->type().type) {
case TYPE_BOOLEAN:
parse_fn_enum = IRFunction::STRING_TO_BOOL;
break;
case TYPE_TINYINT:
parse_fn_enum = IRFunction::STRING_TO_INT8;
break;
case TYPE_SMALLINT:
parse_fn_enum = IRFunction::STRING_TO_INT16;
break;
case TYPE_INT:
parse_fn_enum = IRFunction::STRING_TO_INT32;
break;
case TYPE_BIGINT:
parse_fn_enum = IRFunction::STRING_TO_INT64;
break;
case TYPE_FLOAT:
parse_fn_enum = IRFunction::STRING_TO_FLOAT;
break;
case TYPE_DOUBLE:
parse_fn_enum = IRFunction::STRING_TO_DOUBLE;
break;
case TYPE_TIMESTAMP:
parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP;
break;
case TYPE_DATE:
parse_fn_enum = IRFunction::STRING_TO_DATE;
break;
case TYPE_DECIMAL:
switch (slot_desc->slot_size()) {
case 4:
parse_fn_enum = IRFunction::STRING_TO_DECIMAL4;
break;
case 8:
parse_fn_enum = IRFunction::STRING_TO_DECIMAL8;
break;
case 16:
parse_fn_enum = IRFunction::STRING_TO_DECIMAL16;
break;
default:
DCHECK(false);
return Status("TextConverter::CodegenWriteSlot(): "
"Decimal slots can't be this size.");
}
break;
default:
DCHECK(false);
return Status("TextConverter::CodegenWriteSlot(): Codegen'd parser NYI for the"
"slot_desc type");
}
parse_fn = codegen->GetFunction(parse_fn_enum, false);
DCHECK(parse_fn != NULL);
// Set up trying to parse the string to the slot type
llvm::BasicBlock *parse_success_block, *parse_failed_block;
codegen->CreateIfElseBlocks(*fn, "parse_success", "parse_fail",
&parse_success_block, &parse_failed_block);
LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->i32_type());
llvm::Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(*fn, parse_result);
llvm::CallInst* parse_return;
// Call Impala's StringTo* function
// Function implementations in exec/hdfs-scanner-ir.cc
if (slot_desc->type().type == TYPE_DECIMAL) {
// Special case for decimal since it has additional precision/scale parameters
parse_return = builder.CreateCall(parse_fn, {args[1], args[2],
codegen->GetI32Constant(slot_desc->type().precision),
codegen->GetI32Constant(slot_desc->type().scale), parse_result_ptr});
} else if (slot_desc->type().type == TYPE_TIMESTAMP) {
// If the return value is large (more than 16 bytes in our toolchain) the first
// parameter would be a pointer to value parsed and the return value of callee
// should be ignored
parse_return =
builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr});
} else {
parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr});
}
llvm::Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result");
llvm::Value* failed_value =
codegen->GetI32Constant(StringParser::PARSE_FAILURE);
// Check for parse error.
llvm::Value* parse_failed =
builder.CreateICmpEQ(parse_result_val, failed_value, "failed");
if (strict_mode) {
// In strict_mode, also check if parse_result is PARSE_OVERFLOW.
llvm::Value* overflow_value =
codegen->GetI32Constant(StringParser::PARSE_OVERFLOW);
llvm::Value* parse_overflow =
builder.CreateICmpEQ(parse_result_val, overflow_value, "overflowed");
parse_failed = builder.CreateOr(parse_failed, parse_overflow, "failed_or");
}
builder.CreateCondBr(parse_failed, parse_failed_block, parse_success_block);
// Parse succeeded
builder.SetInsertPoint(parse_success_block);
// If the parsed value is in parse_return, move it into slot
if (slot_desc->type().type == TYPE_DECIMAL) {
#ifdef __aarch64__
// On aarch64, the 4 bytes decimal still return i64 type, so here truncing is need
if (slot_desc->slot_size() == 4) {
llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
builder.CreateStore(temp_slot, slot);
} else {
llvm::Value* cast_slot =
builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
builder.CreateStore(parse_return, cast_slot);
}
#else
// For Decimal values, the return type generated by Clang is struct type rather than
// integer so casting is necessary
llvm::Value* cast_slot =
builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
builder.CreateStore(parse_return, cast_slot);
#endif
#ifdef __aarch64__
} else if (slot_desc->type().type == TYPE_DATE) {
// On aarch64, for Date Values, the return type generated by Clang is i64, not i32,
// so truncing is necessary.
llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
builder.CreateStore(temp_slot, slot);
#endif
} else if (slot_desc->type().type != TYPE_TIMESTAMP) {
builder.CreateStore(parse_return, slot);
}
builder.CreateRet(codegen->true_value());
// Parse failed, set slot to null and return false
builder.SetInsertPoint(parse_failed_block);
slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value());
builder.CreateRet(codegen->false_value());
}
// Case where data is \N or len == 0 and it is not a string col
builder.SetInsertPoint(set_null_block);
slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value());
builder.CreateRet(codegen->true_value());
if (codegen->FinalizeFunction(*fn) == NULL) {
return Status("TextConverter::CodegenWriteSlot(): codegen'd "
"WriteSlot function failed verification, see log");
}
return Status::OK();
}