private int checkByRate()

in core/src/main/java/org/apache/sdap/mudrod/weblog/pre/CrawlerDetection.java [137:214]


  private int checkByRate(ESDriver es, String user) {

    int rate = Integer.parseInt(props.getProperty(MudrodConstants.REQUEST_RATE));
    Pattern pattern = Pattern.compile("get (.*?) http/*");
    Matcher matcher;

    BoolQueryBuilder filterSearch = new BoolQueryBuilder();
    filterSearch.must(QueryBuilders.termQuery("IP", user));

    AggregationBuilder aggregation = AggregationBuilders
            .dateHistogram("by_minute")
            .field("Time")
            .dateHistogramInterval(DateHistogramInterval.MINUTE)
            .order(Order.COUNT_DESC);
    SearchResponse checkRobot = es.getClient()
            .prepareSearch(logIndex)
            .setTypes(httpType, ftpType)
            .setQuery(filterSearch)
            .setSize(0)
            .addAggregation(aggregation)
            .execute()
            .actionGet();

    Histogram agg = checkRobot.getAggregations().get("by_minute");

    List<? extends Histogram.Bucket> botList = agg.getBuckets();
    long maxCount = botList.get(0).getDocCount();
    if (maxCount >= rate) {
      return 0;
    } else {
      DateTime dt1 = null;
      int toLast = 0;
      SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(httpType, ftpType).setScroll(new TimeValue(60000)).setQuery(filterSearch).setSize(100).execute().actionGet();
      while (true) {
        for (SearchHit hit : scrollResp.getHits().getHits()) {
          Map<String, Object> result = hit.getSource();
          String logtype = (String) result.get("LogType");
          if (logtype.equals(MudrodConstants.HTTP_LOG)) {
            String request = (String) result.get("Request");
            matcher = pattern.matcher(request.trim().toLowerCase(Locale.ENGLISH));
            boolean find = false;
            while (matcher.find()) {
              request = matcher.group(1);
              result.put("RequestUrl", props.getProperty(MudrodConstants.BASE_URL) + request);
              find = true;
            }
            if (!find) {
              result.put("RequestUrl", request);
            }
          } else {
            result.put("RequestUrl", result.get("Request"));
          }

          DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
          DateTime dt2 = fmt.parseDateTime((String) result.get("Time"));

          if (dt1 == null) {
            toLast = 0;
          } else {
            toLast = Math.abs(Seconds.secondsBetween(dt1, dt2).getSeconds());
          }
          result.put("ToLast", toLast);
          IndexRequest ir = new IndexRequest(logIndex, cleanupType).source(result);

          es.getBulkProcessor().add(ir);
          dt1 = dt2;
        }

        scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
        if (scrollResp.getHits().getHits().length == 0) {
          break;
        }
      }

    }

    return 1;
  }