public void guessType()

in engine/src/main/java/org/apache/hop/core/file/TextFileInputField.java [330:552]


  public void guessType() {
    // Guess fields...
    NumberFormat nf = NumberFormat.getInstance();
    DecimalFormat df = (DecimalFormat) nf;
    DecimalFormatSymbols dfs = new DecimalFormatSymbols();
    SimpleDateFormat daf = new SimpleDateFormat();

    daf.setLenient(false);

    // Start with a string...
    type = IValueMeta.TYPE_STRING;

    // If we have no samples, we assume a String...
    if (samples == null) {
      return;
    }

    // ////////////////////////////
    // DATES
    // ////////////////////////////

    // See if all samples can be transformed into a date...
    int datefmtCnt = dateFormats.length;
    boolean[] datefmt = new boolean[dateFormats.length];
    for (int i = 0; i < dateFormats.length; i++) {
      datefmt[i] = true;
    }
    int datenul = 0;

    for (int i = 0; i < samples.length; i++) {
      if (samples[i].length() > 0 && samples[i].equalsIgnoreCase(nullString)) {
        datenul++;
      } else {
        for (int x = 0; x < dateFormats.length; x++) {
          if (samples[i] == null || Const.onlySpaces(samples[i]) || samples[i].length() == 0) {
            datefmt[x] = false;
            datefmtCnt--;
          }

          if (datefmt[x]) {
            try {
              daf.applyPattern(dateFormats[x]);
              Date date = daf.parse(samples[i]);

              Calendar cal = Calendar.getInstance();
              cal.setTime(date);
              int year = cal.get(Calendar.YEAR);

              if (year < 1800 || year > 2200) {
                datefmt[x] = false; // Don't try it again in the future.
                datefmtCnt--; // One less that works..
              }
            } catch (Exception e) {
              datefmt[x] = false; // Don't try it again in the future.
              datefmtCnt--; // One less that works..
            }
          }
        }
      }
    }

    // If it is a date, copy info over to the format etc. Then return with the info.
    // If all samples where NULL values, we can't really decide what the type is.
    // So we're certainly not going to take a date, just take a string in that case.
    if (datefmtCnt > 0 && datenul != samples.length) {
      int first = -1;
      for (int i = 0; i < dateFormats.length && first < 0; i++) {
        if (datefmt[i]) {
          first = i;
        }
      }

      type = IValueMeta.TYPE_DATE;
      format = dateFormats[first];

      return;
    }

    // ////////////////////////////
    // NUMBERS
    // ////////////////////////////

    boolean isnumber = true;

    // Set decimal symbols to default
    decimalSymbol = "" + dfs.getDecimalSeparator();
    groupSymbol = "" + dfs.getGroupingSeparator();

    boolean[] numfmt = new boolean[numberFormats.length];
    int[] maxprecision = new int[numberFormats.length];
    for (int i = 0; i < numfmt.length; i++) {
      numfmt[i] = true;
      maxprecision[i] = -1;
    }
    int numfmtCnt = numberFormats.length;
    int numnul = 0;

    for (int i = 0; i < samples.length && isnumber; i++) {
      boolean containsDot = false;
      boolean containsComma = false;

      String field = samples[i];

      if (field.length() > 0 && field.equalsIgnoreCase(nullString)) {
        numnul++;
      } else {
        for (int x = 0; x < field.length() && isnumber; x++) {
          char ch = field.charAt(x);
          if (!Character.isDigit(ch)
              && ch != '.'
              && ch != ','
              && (ch != '-' || x > 0)
              && ch != 'E'
              && ch != 'e' // exponential
          ) {
            isnumber = false;
            numfmtCnt = 0;
          } else {
            if (ch == '.') {
              containsDot = true;
            }
            if (ch == ',') {
              containsComma = true;
            }
          }
        }
        // If it's still a number, try to parse it as a double
        if (isnumber) {
          if (containsDot && !containsComma) { // American style 174.5

            dfs.setDecimalSeparator('.');
            decimalSymbol = ".";
            dfs.setGroupingSeparator(',');
            groupSymbol = ",";
          } else if (!containsDot && containsComma) { // European style 174,5

            dfs.setDecimalSeparator(',');
            decimalSymbol = ",";
            dfs.setGroupingSeparator('.');
            groupSymbol = ".";
          } else if (containsDot && containsComma) { // Both appear!

            // What's the last occurance: decimal point!
            int idxDot = field.indexOf('.');
            int idxCom = field.indexOf(',');
            if (idxDot > idxCom) {
              dfs.setDecimalSeparator('.');
              decimalSymbol = ".";
              dfs.setGroupingSeparator(',');
              groupSymbol = ",";
            } else {
              dfs.setDecimalSeparator(',');
              decimalSymbol = ",";
              dfs.setGroupingSeparator('.');
              groupSymbol = ".";
            }
          }

          // Try the remaining possible number formats!
          for (int x = 0; x < numberFormats.length; x++) {
            if (numfmt[x]) {
              boolean islong = true;

              try {
                int prec = -1;
                // Try long integers first....
                if (!containsDot && !containsComma) {
                  try {
                    Long.parseLong(field);
                    prec = 0;
                  } catch (Exception e) {
                    islong = false;
                  }
                }

                if (!islong) { // Try the double

                  df.setDecimalFormatSymbols(dfs);
                  df.applyPattern(numberFormats[x]);

                  double d = df.parse(field).doubleValue();
                  prec = guessPrecision(d);
                }
                if (prec > maxprecision[x]) {
                  maxprecision[x] = prec;
                }
              } catch (Exception e) {
                numfmt[x] = false; // Don't try it again in the future.
                numfmtCnt--; // One less that works..
              }
            }
          }
        }
      }
    }

    // Still a number? Grab the result and return.
    // If all sample strings are empty or represent NULL values we can't take a number as type.
    if (numfmtCnt > 0 && numnul != samples.length) {
      int first = -1;
      for (int i = 0; i < numberFormats.length && first < 0; i++) {
        if (numfmt[i]) {
          first = i;
        }
      }

      type = IValueMeta.TYPE_NUMBER;
      format = numberFormats[first];
      precision = maxprecision[first];

      return;
    }

    //
    // Assume it's a string...
    //
    type = IValueMeta.TYPE_STRING;
    format = "";
    precision = -1;
    decimalSymbol = "";
    groupSymbol = "";
    currencySymbol = "";
  }