void csv_writer::csv_print()

in src/core/storage/sframe_data/csv_writer.cpp [71:196]


void csv_writer::csv_print(std::ostream& out,
                           const flexible_type& val,
                           bool allow_empty_output) {
  bool str_needs_delimiter = false;
  bool str_has_quote_char = false;
  switch(val.get_type()) {
    case flex_type_enum::INTEGER:
    case flex_type_enum::FLOAT:
      if (quote_level == csv_quote_level::QUOTE_ALL) {
        out << quote_char << std::string(val) << quote_char; // quote numbers only at QUOTE_ALL
      } else {
        out << std::string(val);
      }
      break;
    case flex_type_enum::DATETIME:
    case flex_type_enum::VECTOR:
      if (quote_level == csv_quote_level::QUOTE_NONE) {
        out << std::string(val);
      } else {
        // quote this field at any level higher than QUOTE_NONE
        out << quote_char << std::string(val) << quote_char;
      }
      break;
    case flex_type_enum::STRING:
      /*
       * I have 4 quoting mechanisms to pick from
       * 1) full quoting and escaping
       * 2) no quoting but full escaping
       * 3) no quoting but only double quote escaping
       * 4) no quoting but no escaping
       */
      if (quote_level == csv_quote_level::QUOTE_ALL) {
        // quote all, pass through the whole escaping sequence
        escape_string(val.get<flex_string>(), escape_char, use_escape_char,
                      quote_char, true,
                      double_quote,
                      m_string_escape_buffer, m_string_escape_buffer_len);
        out.write(m_string_escape_buffer.c_str(), m_string_escape_buffer_len);
      } else {
        // not quote all. we can pick from a bunch of heuristics
        // to get minimal quoting
        const flex_string& valstr = val.get<flex_string>();
        // if there is a special character, or escape character or
        // line terminater in the string, we need full escaping
        //
        // if there is a quote char in the string, we need  at least
        // double quote escaping
        for (const char c : valstr) {
          if (str_needs_delimiter == false &&
              (c == '\t' || c == '\r' || c== '\n' || c == '\b' || c == escape_char ||
               (!line_terminator.empty() && c == line_terminator[0]) ||
               (!delimiter.empty() && c == delimiter[0]))) {
            str_needs_delimiter = true;
          }
          if (str_has_quote_char == false && c == quote_char) {
            str_has_quote_char = true;
          }
          if (str_has_quote_char && str_needs_delimiter) break;
        }

        if (allow_empty_output == false && valstr.length() == 0) {
          out << quote_char << quote_char;
        } else if (str_needs_delimiter == false && str_has_quote_char == false) {
          // - no delimiterization needed.
          out.write(valstr.c_str(), valstr.length());
        } else if (str_needs_delimiter == false &&
                   str_has_quote_char == true &&
                   double_quote == true) {
          // - no delimiterization needed.
          // - we have double quote to handle quotes
          escape_string(valstr, escape_char, false,
                        quote_char, false,
                        double_quote,
                        m_string_escape_buffer, m_string_escape_buffer_len);
          out.write(m_string_escape_buffer.c_str(), m_string_escape_buffer_len);
        }  else if (quote_level == csv_quote_level::QUOTE_NONE) {
          // do not quote at all, just escape
          escape_string(valstr, escape_char, use_escape_char,
                        quote_char, false,
                        double_quote,
                        m_string_escape_buffer, m_string_escape_buffer_len);
          out.write(m_string_escape_buffer.c_str(), m_string_escape_buffer_len);
        } else {
          // the regular case
          escape_string(val.get<flex_string>(), escape_char, use_escape_char,
                        quote_char, true,
                        double_quote,
                        m_string_escape_buffer, m_string_escape_buffer_len);
          out.write(m_string_escape_buffer.c_str(), m_string_escape_buffer_len);
        }
      }
      break;
    case flex_type_enum::LIST:
    case flex_type_enum::DICT:
      if (quote_level == csv_quote_level::QUOTE_NONE) {
        m_complex_type_temporary.clear();
        csv_print_internal(m_complex_type_temporary, val);
        out.write(m_complex_type_temporary.c_str(), m_complex_type_temporary.length());
      } else {
        m_complex_type_temporary.clear();
        csv_print_internal(m_complex_type_temporary, val);
        escape_string(m_complex_type_temporary, escape_char, use_escape_char,
                      quote_char, true,
                      double_quote,
                      m_complex_type_escape_buffer,
                      m_complex_type_escape_buffer_len);
        out.write(m_complex_type_escape_buffer.c_str(), m_complex_type_escape_buffer_len);
      }
      break;
    case flex_type_enum::UNDEFINED:
      if (quote_level == csv_quote_level::QUOTE_ALL) {
        out << quote_char << na_value << quote_char;
      } else {
        out.write(na_value.c_str(), na_value.length());
      }
      break;
    default:
      if (quote_level == csv_quote_level::QUOTE_NONE) {
        out << std::string(val);
      } else {
        // quote this field at any level higher than QUOTE_NONE
        out << quote_char << std::string(val) << quote_char;
      }
      break;
  }
}