ees_sharepoint/sharepoint_client.py (94 lines of code) (raw):
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License 2.0;
# you may not use this file except in compliance with the Elastic License 2.0.
#
"""sharepoint_client allows to call Sharepoint or make queries for it."""
import time
import requests
from requests.exceptions import RequestException
from requests_ntlm import HttpNtlmAuth
class SharePoint:
"""This class encapsulates all module logic."""
def __init__(self, config, logger):
self.logger = logger
self.retry_count = int(config.get_value("retry_count"))
self.host = config.get_value("sharepoint.host_url")
self.domain = config.get_value("sharepoint.domain")
self.username = config.get_value("sharepoint.username")
self.password = config.get_value("sharepoint.password")
self.secure_connection = config.get_value("sharepoint.secure_connection")
self.certificate_path = config.get_value("sharepoint.certificate_path")
def get(self, rel_url, query, param_name):
""" Invokes a GET call to the Sharepoint server
:param rel_url: relative url to the sharepoint farm
:param query: query for passing arguments to the url
:param param_name: parameter name whether it is sites, lists, list_items, drive_items, permissions or deindex
Returns:
Response of the GET call"""
request_headers = {
"accept": "application/json;odata=verbose",
"content-type": "application/json;odata=verbose"
}
response_list = {"d": {"results": []}}
paginate_query = True
skip, top = 0, 5000
while paginate_query:
if param_name in ["sites", "lists"]:
paginate_query = query + f"&$skip={skip}&$top={top}"
elif skip == 0 and param_name in ["list_items", "drive_items"]:
paginate_query = query + f"&$top={top}"
elif param_name in ["permission_users", "permission_groups", "deindex", "attachment"]:
paginate_query = query
url = f"{self.host}/{rel_url}{paginate_query}"
skip += 5000
retry = 0
if self.secure_connection and self.certificate_path:
verify = self.certificate_path
else:
verify = self.secure_connection
while retry <= self.retry_count:
try:
response = requests.get(
url,
auth=HttpNtlmAuth(self.domain + "\\" + self.username, self.password),
headers=request_headers,
verify=verify,
)
if response.ok:
if param_name in ["sites", "lists"] and response:
response_data = response.json()
response_result = response_data.get("d", {}).get("results")
response_list["d"]["results"].extend(response_result)
if len(response_result) < 5000:
paginate_query = None
break
if param_name in ["list_items", "drive_items"] and response:
response_data = response.json()
response_list["d"]["results"].extend(response_data.get("d", {}).get("results"))
paginate_query = response_data.get("d", {}).get("__next", False)
break
return response
if response.status_code >= 400 and response.status_code < 500:
if not (param_name == 'deindex' and response.status_code == 404):
self.logger.exception(
f"Error: {response.reason}. Error while fetching from the sharepoint, url: {url}."
)
return response
self.logger.error(
f"Error while fetching from the sharepoint, url: {url}. Retry Count: {retry}. Error: {response.reason}"
)
# This condition is to avoid sleeping for the last time
if retry < self.retry_count:
time.sleep(2 ** retry)
retry += 1
paginate_query = None
continue
except RequestException as exception:
self.logger.exception(
f"Error while fetching from the sharepoint, url: {url}. Retry Count: {retry}. Error: {exception}"
)
# This condition is to avoid sleeping for the last time
if retry < self.retry_count:
time.sleep(2 ** retry)
else:
return False
retry += 1
if retry > self.retry_count:
return response
return response_list
@staticmethod
def get_query(start_time, end_time, param_name):
""" returns the query for each objects
:param start_time: start time of the interval for fetching the documents
:param end_time: end time of the interval for fetching the documents
Returns:
query: query for each object"""
query = ""
if param_name == "sites":
query = f"?$filter=(LastItemModifiedDate ge datetime'{start_time}') and (LastItemModifiedDate le datetime'{end_time}')"
elif param_name == "lists":
query = f"?$expand=RootFolder&$filter=(LastItemModifiedDate ge datetime'{start_time}') and (LastItemModifiedDate le datetime'{end_time}') and (Hidden eq false)"
else:
query = f"&$filter=(Modified ge datetime'{start_time}') and (Modified le datetime'{end_time}')"
return query