ees_network_drive/indexing_rule.py (50 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
"""The module defines methods used to check the rules to be followed while indexing the objects to
Enterprise Search.
"""
import re
from wcmatch import glob
class IndexingRules:
"""This class holds methods used to apply indexing filters on the documents to be indexed
"""
def __init__(self, config):
self.include = config.get_value("include")
self.exclude = config.get_value("exclude")
def filter_size(self, file_details, symbol, pattern):
"""This method is used to find if the file size is matching with the pattern
:param file_details: dictionary containing file properties
:param symbol: >,<,>=,<=,!,!=,=,== symbol
:param pattern: numeric part of pattern as a string
:returns: True or False denoting whether the file size is according to the pattern
"""
file_size = file_details['file_size']
int_value = int(pattern)
operation = {
'>': file_size > int_value,
'>=': file_size >= int_value,
'<': file_size < int_value,
'<=': file_size <= int_value,
'!': file_size != int_value,
'!=': file_size != int_value,
'=': file_size == int_value,
'==': file_size == int_value,
}
return operation.get(symbol)
def should_index(self, file_details):
"""This method is used to check if the current file is following the indexing rule or not
:param file_details: dictionary containing file properties
:param include: include pattern provided for matching
:param exclude: exclude pattern for matching
:returns: True or False denoting if the file is to following the indexing rule or not
"""
should_include, should_exclude = True, True
if self.include:
should_include = self.should_include_or_exclude(self.include, {}, file_details, 'include')
if self.exclude:
should_exclude = self.should_include_or_exclude(self.exclude, self.include, file_details, 'exclude')
return should_include and should_exclude
def should_include_or_exclude(self, pattern_dict, is_present_in_include, file_details, pattern_type):
"""Function to decide wether to include the file or exclude it based on the indexing rules defined in the configuration
:param pattern_dict: Dictionary containing key value pairs as filter type and list of patterns
:param is_present_in_include: Used to check if any pattern is already present in include type
:param file_details: dictionary containing file properties
:param pattern_type: include/exclude
"""
should_index = True
for filtertype, pattern in pattern_dict.items():
for value in (pattern or []):
if is_present_in_include and (value in (is_present_in_include.get(filtertype) or [])):
pattern.remove(value)
result = self.follows_indexing_rule(filtertype, pattern, file_details, pattern_type)
if result is False:
should_index = False
elif result is True:
return True
return should_index
def follows_indexing_rule(self, filtertype, pattern, file_details, pattern_type):
"""Applies filters on the file and returns True or False based on whether
it follows the pattern or not
:filtertype: denotes the type of filter used: size/path_template
:param pattern: include/ exclude pattern provided for matching
:param file_details: dictionary containing file properties
:param pattern_type: include/exclude
"""
if pattern:
for value in pattern:
if filtertype == 'size':
initial = re.match('[><=!]=?', value)
result = self.filter_size(file_details, initial[0], re.findall("[0-9]+", value)[0])
else:
result = glob.globmatch(file_details['file_path'], value, flags=glob.GLOBSTAR)
if (pattern_type == 'include' and result) or (pattern_type == 'exclude' and not(result)):
return True
return False