public static String parseFromLogLine()

in core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/ApacheAccessLog.java [61:115]


  public static String parseFromLogLine(String log, Properties props) throws ParseException {

    String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})] \"(.+?)\" (\\d{3}) (\\d+|-) \"((?:[^\"]|\")+)\" \"([^\"]+)\"";
    final int numFields = 9;
    Pattern p = Pattern.compile(logEntryPattern);
    Matcher matcher;

    String lineJson = "{}";
    matcher = p.matcher(log);
    if (!matcher.matches() || numFields != matcher.groupCount()) {
      return lineJson;
    }

    String time = matcher.group(4);
    time = SwithtoNum(time);
    SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss", Locale.ENGLISH);
    Date date = formatter.parse(time);

    String bytes = matcher.group(7);

    if ("-".equals(bytes)) {
      bytes = "0";
    }

    String request = matcher.group(5).toLowerCase(Locale.ENGLISH);
    String agent = matcher.group(9);
    CrawlerDetection crawlerDe = new CrawlerDetection(props);
    if (crawlerDe.checkKnownCrawler(agent)) {
      return lineJson;
    } else {

      String[] mimeTypes = props.getProperty(MudrodConstants.BLACK_LIST_REQUEST).split(",");
      for (String mimeType : mimeTypes) {
        if (request.contains(mimeType)) {
          return lineJson;
        }
      }

      ApacheAccessLog accesslog = new ApacheAccessLog();
      accesslog.LogType = MudrodConstants.HTTP_LOG;
      accesslog.IP = matcher.group(1);
      accesslog.Request = matcher.group(5);
      accesslog.Response = matcher.group(6);
      accesslog.Bytes = Double.parseDouble(bytes);
      accesslog.Referer = matcher.group(8);
      accesslog.Browser = matcher.group(9);
      SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.sss'Z'", Locale.ENGLISH);
      accesslog.Time = df.format(date);

      Gson gson = new Gson();
      lineJson = gson.toJson(accesslog);

      return lineJson;
    }
  }