in engine/src/main/java/org/apache/hop/pipeline/transforms/file/BaseFileField.java [334:556]
public void guessType() {
// Guess fields...
NumberFormat nf = NumberFormat.getInstance();
DecimalFormat df = (DecimalFormat) nf;
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
SimpleDateFormat daf = new SimpleDateFormat();
daf.setLenient(false);
// Start with a string...
type = IValueMeta.TYPE_STRING;
// If we have no samples, we assume a String...
if (samples == null) {
return;
}
// ////////////////////////////
// DATES
// ////////////////////////////
// See if all samples can be transformed into a date...
int datefmtCnt = dateFormats.length;
boolean[] datefmt = new boolean[dateFormats.length];
for (int i = 0; i < dateFormats.length; i++) {
datefmt[i] = true;
}
int datenul = 0;
for (int i = 0; i < samples.length; i++) {
if (samples[i].length() > 0 && samples[i].equalsIgnoreCase(nullString)) {
datenul++;
} else {
for (int x = 0; x < dateFormats.length; x++) {
if (samples[i] == null || Const.onlySpaces(samples[i]) || samples[i].length() == 0) {
datefmt[x] = false;
datefmtCnt--;
}
if (datefmt[x]) {
try {
daf.applyPattern(dateFormats[x]);
Date date = daf.parse(samples[i]);
Calendar cal = Calendar.getInstance();
cal.setTime(date);
int year = cal.get(Calendar.YEAR);
if (year < 1800 || year > 2200) {
datefmt[x] = false; // Don't try it again in the future.
datefmtCnt--; // One less that works..
}
} catch (Exception e) {
datefmt[x] = false; // Don't try it again in the future.
datefmtCnt--; // One less that works..
}
}
}
}
}
// If it is a date, copy info over to the format etc. Then return with the info.
// If all samples where NULL values, we can't really decide what the type is.
// So we're certainly not going to take a date, just take a string in that case.
if (datefmtCnt > 0 && datenul != samples.length) {
int first = -1;
for (int i = 0; i < dateFormats.length && first < 0; i++) {
if (datefmt[i]) {
first = i;
}
}
type = IValueMeta.TYPE_DATE;
format = dateFormats[first];
return;
}
// ////////////////////////////
// NUMBERS
// ////////////////////////////
boolean isnumber = true;
// Set decimal symbols to default
decimalSymbol = "" + dfs.getDecimalSeparator();
groupSymbol = "" + dfs.getGroupingSeparator();
boolean[] numfmt = new boolean[numberFormats.length];
int[] maxprecision = new int[numberFormats.length];
for (int i = 0; i < numfmt.length; i++) {
numfmt[i] = true;
maxprecision[i] = -1;
}
int numfmtCnt = numberFormats.length;
int numnul = 0;
for (int i = 0; i < samples.length && isnumber; i++) {
boolean containsDot = false;
boolean containsComma = false;
String field = samples[i];
if (field.length() > 0 && field.equalsIgnoreCase(nullString)) {
numnul++;
} else {
for (int x = 0; x < field.length() && isnumber; x++) {
char ch = field.charAt(x);
if (!Character.isDigit(ch)
&& ch != '.'
&& ch != ','
&& (ch != '-' || x > 0)
&& ch != 'E'
&& ch != 'e' // exponential
) {
isnumber = false;
numfmtCnt = 0;
} else {
if (ch == '.') {
containsDot = true;
}
if (ch == ',') {
containsComma = true;
}
}
}
// If it's still a number, try to parse it as a double
if (isnumber) {
if (containsDot && !containsComma) { // American style 174.5
dfs.setDecimalSeparator('.');
decimalSymbol = ".";
dfs.setGroupingSeparator(',');
groupSymbol = ",";
} else if (!containsDot && containsComma) { // European style 174,5
dfs.setDecimalSeparator(',');
decimalSymbol = ",";
dfs.setGroupingSeparator('.');
groupSymbol = ".";
} else if (containsDot && containsComma) { // Both appear!
// What's the last occurance: decimal point!
int idxDot = field.indexOf('.');
int idxCom = field.indexOf(',');
if (idxDot > idxCom) {
dfs.setDecimalSeparator('.');
decimalSymbol = ".";
dfs.setGroupingSeparator(',');
groupSymbol = ",";
} else {
dfs.setDecimalSeparator(',');
decimalSymbol = ",";
dfs.setGroupingSeparator('.');
groupSymbol = ".";
}
}
// Try the remaining possible number formats!
for (int x = 0; x < numberFormats.length; x++) {
if (numfmt[x]) {
boolean islong = true;
try {
int prec = -1;
// Try long integers first....
if (!containsDot && !containsComma) {
try {
Long.parseLong(field);
prec = 0;
} catch (Exception e) {
islong = false;
}
}
if (!islong) { // Try the double
df.setDecimalFormatSymbols(dfs);
df.applyPattern(numberFormats[x]);
double d = df.parse(field).doubleValue();
prec = guessPrecision(d);
}
if (prec > maxprecision[x]) {
maxprecision[x] = prec;
}
} catch (Exception e) {
numfmt[x] = false; // Don't try it again in the future.
numfmtCnt--; // One less that works..
}
}
}
}
}
}
// Still a number? Grab the result and return.
// If all sample strings are empty or represent NULL values we can't take a number as type.
if (numfmtCnt > 0 && numnul != samples.length) {
int first = -1;
for (int i = 0; i < numberFormats.length && first < 0; i++) {
if (numfmt[i]) {
first = i;
}
}
type = IValueMeta.TYPE_NUMBER;
format = numberFormats[first];
precision = maxprecision[first];
return;
}
//
// Assume it's a string...
//
type = IValueMeta.TYPE_STRING;
format = "";
precision = -1;
decimalSymbol = "";
groupSymbol = "";
currencySymbol = "";
}