in core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/ApacheAccessLog.java [61:115]
public static String parseFromLogLine(String log, Properties props) throws ParseException {
String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})] \"(.+?)\" (\\d{3}) (\\d+|-) \"((?:[^\"]|\")+)\" \"([^\"]+)\"";
final int numFields = 9;
Pattern p = Pattern.compile(logEntryPattern);
Matcher matcher;
String lineJson = "{}";
matcher = p.matcher(log);
if (!matcher.matches() || numFields != matcher.groupCount()) {
return lineJson;
}
String time = matcher.group(4);
time = SwithtoNum(time);
SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss", Locale.ENGLISH);
Date date = formatter.parse(time);
String bytes = matcher.group(7);
if ("-".equals(bytes)) {
bytes = "0";
}
String request = matcher.group(5).toLowerCase(Locale.ENGLISH);
String agent = matcher.group(9);
CrawlerDetection crawlerDe = new CrawlerDetection(props);
if (crawlerDe.checkKnownCrawler(agent)) {
return lineJson;
} else {
String[] mimeTypes = props.getProperty(MudrodConstants.BLACK_LIST_REQUEST).split(",");
for (String mimeType : mimeTypes) {
if (request.contains(mimeType)) {
return lineJson;
}
}
ApacheAccessLog accesslog = new ApacheAccessLog();
accesslog.LogType = MudrodConstants.HTTP_LOG;
accesslog.IP = matcher.group(1);
accesslog.Request = matcher.group(5);
accesslog.Response = matcher.group(6);
accesslog.Bytes = Double.parseDouble(bytes);
accesslog.Referer = matcher.group(8);
accesslog.Browser = matcher.group(9);
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.sss'Z'", Locale.ENGLISH);
accesslog.Time = df.format(date);
Gson gson = new Gson();
lineJson = gson.toJson(accesslog);
return lineJson;
}
}