public void guessType()

in plugins/tech/google/src/main/java/org/apache/hop/pipeline/transforms/googlesheets/GoogleSheetsInputField.java [311:540]


  public void guessType() {
    // Guess fields...
    NumberFormat nf = NumberFormat.getInstance();
    DecimalFormat df = (DecimalFormat) nf;
    DecimalFormatSymbols dfs = new DecimalFormatSymbols();
    SimpleDateFormat daf = new SimpleDateFormat();

    daf.setLenient(false);

    // Start with a string...
    type = IValueMeta.TYPE_STRING;

    // If we have no samples, we assume a String...
    if (samples == null) {
      return;
    }

    // ////////////////////////////
    // DATES
    // ////////////////////////////

    // See if all samples can be transformed into a date...
    int datefmt_cnt = date_formats.length;
    boolean[] datefmt = new boolean[date_formats.length];
    for (int i = 0; i < date_formats.length; i++) {
      datefmt[i] = true;
    }
    int datenul = 0;

    for (int i = 0; i < samples.length; i++) {
      if (samples[i].length() > 0 && samples[i].equalsIgnoreCase(nullString)) {
        datenul++;
      } else {
        for (int x = 0; x < date_formats.length; x++) {
          if (samples[i] == null || Const.onlySpaces(samples[i]) || samples[i].length() == 0) {
            datefmt[x] = false;
            datefmt_cnt--;
          }

          if (datefmt[x]) {
            try {
              daf.applyPattern(date_formats[x]);
              Date date = daf.parse(samples[i]);

              Calendar cal = Calendar.getInstance();
              cal.setTime(date);
              int year = cal.get(Calendar.YEAR);

              if (year < 1800 || year > 2200) {
                datefmt[x] = false; // Don't try it again in the future.
                datefmt_cnt--; // One less that works..
              }
            } catch (Exception e) {
              datefmt[x] = false; // Don't try it again in the future.
              datefmt_cnt--; // One less that works..
            }
          }
        }
      }
    }

    // If it is a date, copy info over to the format etc. Then return with the info.
    // If all samples where NULL values, we can't really decide what the type is.
    // So we're certainly not going to take a date, just take a string in that case.
    if (datefmt_cnt > 0 && datenul != samples.length) {
      int first = -1;
      for (int i = 0; i < date_formats.length && first < 0; i++) {
        if (datefmt[i]) {
          first = i;
        }
      }

      type = IValueMeta.TYPE_DATE;
      format = date_formats[first];

      return;
    }

    // ////////////////////////////
    // NUMBERS
    // ////////////////////////////

    boolean isnumber = true;

    // Set decimal symbols to default
    decimalSymbol = "" + dfs.getDecimalSeparator();
    groupSymbol = "" + dfs.getGroupingSeparator();

    boolean[] numfmt = new boolean[number_formats.length];
    int[] maxprecision = new int[number_formats.length];
    for (int i = 0; i < numfmt.length; i++) {
      numfmt[i] = true;
      maxprecision[i] = -1;
    }
    int numfmt_cnt = number_formats.length;
    int numnul = 0;

    for (int i = 0; i < samples.length && isnumber; i++) {
      boolean contains_dot = false;
      boolean contains_comma = false;

      String field = samples[i];

      if (field.length() > 0 && field.equalsIgnoreCase(nullString)) {
        numnul++;
      } else {
        for (int x = 0; x < field.length() && isnumber; x++) {
          char ch = field.charAt(x);
          if (!Character.isDigit(ch)
              && ch != '.'
              && ch != ','
              && (ch != '-' || x > 0)
              && ch != 'E'
              && ch != 'e' // exponential
          ) {
            isnumber = false;
            numfmt_cnt = 0;
          } else {
            if (ch == '.') {
              contains_dot = true;
            }
            if (ch == ',') {
              contains_comma = true;
            }
          }
        }
        // If it's still a number, try to parse it as a double
        if (isnumber) {
          if (contains_dot && !contains_comma) { // American style 174.5

            dfs.setDecimalSeparator('.');
            decimalSymbol = ".";
            dfs.setGroupingSeparator(',');
            groupSymbol = ",";
          } else if (!contains_dot && contains_comma) { // European style 174,5

            dfs.setDecimalSeparator(',');
            decimalSymbol = ",";
            dfs.setGroupingSeparator('.');
            groupSymbol = ".";
          } else if (contains_dot && contains_comma) { // Both appear!

            // What's the last occurance: decimal point!
            int idx_dot = field.indexOf('.');
            int idx_com = field.indexOf(',');
            if (idx_dot > idx_com) {
              dfs.setDecimalSeparator('.');
              decimalSymbol = ".";
              dfs.setGroupingSeparator(',');
              groupSymbol = ",";
            } else {
              dfs.setDecimalSeparator(',');
              decimalSymbol = ",";
              dfs.setGroupingSeparator('.');
              groupSymbol = ".";
            }
          }

          // Try the remaining possible number formats!
          for (int x = 0; x < number_formats.length; x++) {
            if (numfmt[x]) {
              boolean islong = true;

              try {
                int prec = -1;
                // Try long integers first....
                if (!contains_dot && !contains_comma) {
                  try {
                    Long.parseLong(field);
                    prec = 0;
                  } catch (Exception e) {
                    islong = false;
                  }
                }

                if (!islong) { // Try the double

                  df.setDecimalFormatSymbols(dfs);
                  df.applyPattern(number_formats[x]);

                  double d = df.parse(field).doubleValue();
                  prec = guessPrecision(d);
                }
                if (prec > maxprecision[x]) {
                  maxprecision[x] = prec;
                }
              } catch (Exception e) {
                numfmt[x] = false; // Don't try it again in the future.
                numfmt_cnt--; // One less that works..
              }
            }
          }
        }
      }
    }

    // Still a number? Grab the result and return.
    // If all sample strings are empty or represent NULL values we can't take a number as type.
    if (numfmt_cnt > 0 && numnul != samples.length) {
      int first = -1;
      for (int i = 0; i < number_formats.length && first < 0; i++) {
        if (numfmt[i]) {
          first = i;
        }
      }

      type = IValueMeta.TYPE_NUMBER;
      format = number_formats[first];
      precision = maxprecision[first];

      // Wait a minute!!! What about Integers?
      // OK, only if the precision is 0 and the length <19 (java long integer)
      /*
       * if (length<19 && precision==0 && !containsDot && !containsComma) { type=ValueMetaInterface.TYPE_INTEGER;
       * decimalSymbol=""; groupSymbol=""; }
       */

      return;
    }

    //
    // Assume it's a string...
    //
    type = IValueMeta.TYPE_STRING;
    format = "";
    precision = -1;
    decimalSymbol = "";
    groupSymbol = "";
    currencySymbol = "";
  }