Status TextConverter::CodegenWriteSlot()

in be/src/exec/text-converter.cc [110:352]


Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen, TupleDescriptor* tuple_desc,
    SlotDescriptor* slot_desc, llvm::Function** fn, const char* null_col_val, int len,
    bool check_null, bool strict_mode) {
  DCHECK(fn != nullptr);
  DCHECK(SupportsCodegenWriteSlot(slot_desc->type()));

  // Codegen is_null_string
  bool is_default_null = (len == 2 && null_col_val[0] == '\\' && null_col_val[1] == 'N');
  llvm::Function* is_null_string_fn;
  if (is_default_null) {
    is_null_string_fn = codegen->GetFunction(IRFunction::IS_NULL_STRING, false);
  } else {
    is_null_string_fn = codegen->GetFunction(IRFunction::GENERIC_IS_NULL_STRING, false);
  }

  DCHECK(is_null_string_fn != NULL);

  llvm::StructType* tuple_type = tuple_desc->GetLlvmStruct(codegen);
  if (tuple_type == NULL) {
    return Status("TextConverter::CodegenWriteSlot(): Failed to generate "
        "tuple type");
  }
  llvm::PointerType* tuple_ptr_type = tuple_type->getPointerTo();

  LlvmCodeGen::FnPrototype prototype(
      codegen, "WriteSlot", codegen->bool_type());
  prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple_arg", tuple_ptr_type));
  prototype.AddArgument(LlvmCodeGen::NamedVariable("data", codegen->ptr_type()));
  prototype.AddArgument(LlvmCodeGen::NamedVariable("len", codegen->i32_type()));

  LlvmBuilder builder(codegen->context());
  llvm::Value* args[3];
  *fn = prototype.GeneratePrototype(&builder, &args[0]);

  llvm::BasicBlock *set_null_block, *parse_slot_block, *check_zero_block = NULL;
  codegen->CreateIfElseBlocks(*fn, "set_null", "parse_slot",
      &set_null_block, &parse_slot_block);

  if (!slot_desc->type().IsVarLenStringType()) {
    check_zero_block = llvm::BasicBlock::Create(codegen->context(), "check_zero", *fn);
  }

  // Check if the data matches the configured NULL string.
  llvm::Value* is_null;
  if (check_null) {
    if (is_default_null) {
      is_null = builder.CreateCall(
          is_null_string_fn, llvm::ArrayRef<llvm::Value*>({args[1], args[2]}));
    } else {
      llvm::Value* const null_col_ir_str =
          codegen->GetStringConstant(&builder, null_col_val, len);
      is_null = builder.CreateCall(is_null_string_fn,
          llvm::ArrayRef<llvm::Value*>(
              {args[1], args[2], null_col_ir_str, codegen->GetI32Constant(len)}));
    }
  } else {
    // Constant FALSE as branch condition. We rely on later optimization passes
    // to remove the branch and THEN block.
    is_null = codegen->false_value();
  }
  builder.CreateCondBr(is_null, set_null_block,
      (slot_desc->type().IsVarLenStringType()) ? parse_slot_block : check_zero_block);

  if (!slot_desc->type().IsVarLenStringType()) {
    builder.SetInsertPoint(check_zero_block);
    // If len == 0 and it is not a string col, set slot to NULL
    llvm::Value* null_len =
        builder.CreateICmpEQ(args[2], codegen->GetI32Constant(0));
    builder.CreateCondBr(null_len, set_null_block, parse_slot_block);
  }

  // Codegen parse slot block
  builder.SetInsertPoint(parse_slot_block);
  llvm::Value* slot =
      builder.CreateStructGEP(NULL, args[0], slot_desc->llvm_field_idx(), "slot");

  if (slot_desc->type().IsVarLenStringType()) {
    llvm::Function* str_assign_fn = codegen->GetFunction(
        IRFunction::STRING_VALUE_ASSIGN, false);

    // TODO codegen memory allocation for CHAR
    DCHECK(slot_desc->type().type != TYPE_CHAR);
    if (slot_desc->type().type == TYPE_VARCHAR) {
      // determine if we need to truncate the string
      llvm::Value* maxlen = codegen->GetI32Constant(slot_desc->type().len);
      llvm::Value* len_lt_maxlen =
          builder.CreateICmpSLT(args[2], maxlen, "len_lt_maxlen");
      llvm::Value* minlen =
          builder.CreateSelect(len_lt_maxlen, args[2], maxlen, "select_min_len");
      builder.CreateCall(str_assign_fn,
          llvm::ArrayRef<llvm::Value*>({slot, args[1], minlen}));
    } else {
      builder.CreateCall(str_assign_fn,
          llvm::ArrayRef<llvm::Value*>({slot, args[1], args[2]}));
    }
    builder.CreateRet(codegen->true_value());
  } else {
    IRFunction::Type parse_fn_enum;
    llvm::Function* parse_fn = NULL;
    switch (slot_desc->type().type) {
      case TYPE_BOOLEAN:
        parse_fn_enum = IRFunction::STRING_TO_BOOL;
        break;
      case TYPE_TINYINT:
        parse_fn_enum = IRFunction::STRING_TO_INT8;
        break;
      case TYPE_SMALLINT:
        parse_fn_enum = IRFunction::STRING_TO_INT16;
        break;
      case TYPE_INT:
        parse_fn_enum = IRFunction::STRING_TO_INT32;
        break;
      case TYPE_BIGINT:
        parse_fn_enum = IRFunction::STRING_TO_INT64;
        break;
      case TYPE_FLOAT:
        parse_fn_enum = IRFunction::STRING_TO_FLOAT;
        break;
      case TYPE_DOUBLE:
        parse_fn_enum = IRFunction::STRING_TO_DOUBLE;
        break;
      case TYPE_TIMESTAMP:
        parse_fn_enum = IRFunction::STRING_TO_TIMESTAMP;
        break;
      case TYPE_DATE:
        parse_fn_enum = IRFunction::STRING_TO_DATE;
        break;
      case TYPE_DECIMAL:
        switch (slot_desc->slot_size()) {
          case 4:
            parse_fn_enum = IRFunction::STRING_TO_DECIMAL4;
            break;
          case 8:
            parse_fn_enum = IRFunction::STRING_TO_DECIMAL8;
            break;
          case 16:
            parse_fn_enum = IRFunction::STRING_TO_DECIMAL16;
            break;
          default:
            DCHECK(false);
            return Status("TextConverter::CodegenWriteSlot(): "
                "Decimal slots can't be this size.");
        }
        break;
      default:
        DCHECK(false);
        return Status("TextConverter::CodegenWriteSlot(): Codegen'd parser NYI for the"
            "slot_desc type");
    }
    parse_fn = codegen->GetFunction(parse_fn_enum, false);
    DCHECK(parse_fn != NULL);

    // Set up trying to parse the string to the slot type
    llvm::BasicBlock *parse_success_block, *parse_failed_block;
    codegen->CreateIfElseBlocks(*fn, "parse_success", "parse_fail",
        &parse_success_block, &parse_failed_block);
    LlvmCodeGen::NamedVariable parse_result("parse_result", codegen->i32_type());
    llvm::Value* parse_result_ptr = codegen->CreateEntryBlockAlloca(*fn, parse_result);

    llvm::CallInst* parse_return;
    // Call Impala's StringTo* function
    // Function implementations in exec/hdfs-scanner-ir.cc
    if (slot_desc->type().type == TYPE_DECIMAL) {
      // Special case for decimal since it has additional precision/scale parameters
      parse_return = builder.CreateCall(parse_fn, {args[1], args[2],
          codegen->GetI32Constant(slot_desc->type().precision),
          codegen->GetI32Constant(slot_desc->type().scale), parse_result_ptr});
    } else if (slot_desc->type().type == TYPE_TIMESTAMP) {
      // If the return value is large (more than 16 bytes in our toolchain) the first
      // parameter would be a pointer to value parsed and the return value of callee
      // should be ignored
      parse_return =
          builder.CreateCall(parse_fn, {slot, args[1], args[2], parse_result_ptr});
    } else {
      parse_return = builder.CreateCall(parse_fn, {args[1], args[2], parse_result_ptr});
    }
    llvm::Value* parse_result_val = builder.CreateLoad(parse_result_ptr, "parse_result");
    llvm::Value* failed_value =
        codegen->GetI32Constant(StringParser::PARSE_FAILURE);

    // Check for parse error.
    llvm::Value* parse_failed =
        builder.CreateICmpEQ(parse_result_val, failed_value, "failed");
    if (strict_mode) {
      // In strict_mode, also check if parse_result is PARSE_OVERFLOW.
      llvm::Value* overflow_value =
          codegen->GetI32Constant(StringParser::PARSE_OVERFLOW);
      llvm::Value* parse_overflow =
          builder.CreateICmpEQ(parse_result_val, overflow_value, "overflowed");
      parse_failed = builder.CreateOr(parse_failed, parse_overflow, "failed_or");
    }
    builder.CreateCondBr(parse_failed, parse_failed_block, parse_success_block);

    // Parse succeeded
    builder.SetInsertPoint(parse_success_block);
    // If the parsed value is in parse_return, move it into slot
    if (slot_desc->type().type == TYPE_DECIMAL) {
#ifdef __aarch64__
      // On aarch64, the 4 bytes decimal still return i64 type, so here truncing is need
      if (slot_desc->slot_size() == 4) {
        llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
        builder.CreateStore(temp_slot, slot);
      } else {
        llvm::Value* cast_slot =
          builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
        builder.CreateStore(parse_return, cast_slot);
      }
#else
      // For Decimal values, the return type generated by Clang is struct type rather than
      // integer so casting is necessary
      llvm::Value* cast_slot =
          builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
      builder.CreateStore(parse_return, cast_slot);
#endif
#ifdef __aarch64__
    } else if (slot_desc->type().type == TYPE_DATE) {
      // On aarch64, for Date Values, the return type generated by Clang is i64, not i32,
      // so truncing is necessary.
      llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
      builder.CreateStore(temp_slot, slot);
#endif
    } else if (slot_desc->type().type != TYPE_TIMESTAMP) {
      builder.CreateStore(parse_return, slot);
    }
    builder.CreateRet(codegen->true_value());

    // Parse failed, set slot to null and return false
    builder.SetInsertPoint(parse_failed_block);
    slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value());
    builder.CreateRet(codegen->false_value());
  }

  // Case where data is \N or len == 0 and it is not a string col
  builder.SetInsertPoint(set_null_block);
  slot_desc->CodegenSetNullIndicator(codegen, &builder, args[0], codegen->true_value());
  builder.CreateRet(codegen->true_value());

  if (codegen->FinalizeFunction(*fn) == NULL) {
    return Status("TextConverter::CodegenWriteSlot(): codegen'd "
        "WriteSlot function failed verification, see log");
  }
  return Status::OK();
}