facebook_business/adobjects/serverside/normalize.py (112 lines of code) (raw):
# coding=utf-8
# Copyright 2014 Facebook, Inc.
# You are hereby granted a non-exclusive, worldwide, royalty-free license to
# use, copy, modify, and distribute this software in source code or binary
# form for use in connection with the web services and APIs provided by
# Facebook.
# As with any software that integrates with the Facebook platform, your use
# of this software is subject to the Facebook Developer Principles and
# Policies [http://developers.facebook.com/policy/]. This copyright notice
# shall be included in all copies or substantial portions of the software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
import datetime
import hashlib
import pycountry
import re
# defined regex for normalization of data
location_excluded_chars = re.compile(r"[0-9.\s\-()]")
isocode_included_chars = re.compile(r"[^a-z]")
email_pattern = re.compile(r".+@.+\..+")
md5_pattern = re.compile(r"^[a-f0-9]{32}$")
sha256_pattern = re.compile(r"^[a-f0-9]{64}$")
year_pattern = re.compile(r"^[0-9]{4}$")
class Normalize(object):
@staticmethod
def normalize_field(field, data):
return Normalize.normalize(field, data, True)
@staticmethod
def normalize_field_skip_hashing(field, data):
return Normalize.normalize(field, data, False)
@staticmethod
def normalize(field, data, hash_field):
"""Computes the normalized value for the given field type and data.
:param field: The field name that is being normalized.
:param data: The data that is being normalized.
:return: Normalized value.
:rtype: str
"""
if field is None:
raise TypeError('Field Type must be passed for Normalization')
if data is None or len(data) == 0:
return None
normalized_data = data.lower().strip()
if Normalize.is_already_hashed(normalized_data):
return normalized_data
if field == "em":
normalized_data = Normalize.validate_email(normalized_data)
elif field == "ct":
# Remove numbers, space and period character
normalized_data = location_excluded_chars.sub("", normalized_data)
elif field == "zp":
normalized_data = re.sub(r"\s","", normalized_data)
normalized_data = normalized_data.split("-")[0]
elif field == "st":
# Remove numbers, space and period character
normalized_data = location_excluded_chars.sub("", normalized_data)
elif field == "country":
# Remove any non-alpha characters from the data
normalized_data = isocode_included_chars.sub("", normalized_data)
if not Normalize.is_valid_country_code(normalized_data):
raise TypeError("Invalid format for country:'" + data + "'.Please follow ISO 2-letter ISO 3166-1 standard for representing country. eg: us")
elif field == "currency":
# Remove any non-alpha characters from the data
normalized_data = isocode_included_chars.sub("", normalized_data)
if len(normalized_data) != 3:
raise TypeError("Invalid format for currency:'" + data + "'.Please follow ISO 3-letter ISO 4217 standard for representing currency. Eg: usd")
elif field == "ph":
# Remove spaces and parenthesis within phone number
normalized_data = re.sub(r"[\s\-()]", "", normalized_data)
# Removes the starting + and leading two 0's
normalized_data = re.sub(r"^\+?0{0,2}", "", normalized_data)
international_number = Normalize.get_international_number(normalized_data)
if international_number is None:
raise ValueError("Invalid format for phone number:'" + normalized_data + "'. Please check passed phone number.")
else:
normalized_data = international_number
elif field == "f5first" or field == "f5last":
normalized_data = normalized_data[:5]
elif field == "fi":
normalized_data = normalized_data[:1]
elif field == "dobd":
if len(normalized_data) == 1:
normalized_data = '0' + normalized_data
try:
dobd_int = int(normalized_data)
if dobd_int < 1 or dobd_int > 31:
raise ValueError
except ValueError:
raise ValueError("Invalid format for dobd: '%s'. Day should be specified in 'DD' format." % data)
elif field == "dobm":
if len(normalized_data) == 1:
normalized_data = '0' + normalized_data
try:
dobm_int = int(normalized_data)
if dobm_int < 1 or dobm_int > 12:
raise ValueError
except ValueError:
raise ValueError("Invalid format for dobm: '%s'. Month should be specified in 'MM' format." % data)
elif field == "doby":
if not year_pattern.match(normalized_data):
raise ValueError("Invalid format for doby: '%s'. Year should be specified in 'YYYY' format." % data)
if hash_field:
normalized_data = Normalize.hash_sha_256(normalized_data)
return normalized_data
"""
Validates the email field for RFC 5322
:param token: The email token that is being validates.
:return: validated email value.
:rtype: str
"""
@staticmethod
def validate_email(email):
result = email_pattern.match(email)
if result is None:
raise TypeError('Invalid email format for the passed email:' + email + '.Please check the passed email format.')
return email
"""
Checks if the given data is already hashed by MD5 or SHA256 Hash
:param data: The token that is being checked for hashed.
:return: boolean representing the {md5/sha256} hash state of the token.
:rtype: bool
"""
@staticmethod
def is_already_hashed(data):
md5_match = md5_pattern.match(data)
sha256_match = sha256_pattern.match(data)
if md5_match is None and sha256_match is None:
return False
return True
@staticmethod
def hash_sha_256(input):
if input is None:
return None
input = input.encode('utf-8')
return hashlib.sha256(input).hexdigest()
@staticmethod
def get_international_number(phone_number):
# Removes the + and leading two 0's
phone_number = re.sub(r"^\+?0{0,2}", "", phone_number)
if phone_number.startswith('0'):
return None
# International Phone number with country calling code.
international_number_regex = re.compile(r'^\d{1,4}\(?\d{2,3}\)?\d{4,}$')
matched_groups = international_number_regex.match(phone_number)
if matched_groups is None:
return None
return matched_groups.group()
"""
Checks if the given country code is present in the ISO list
:param country code: The code that is being checked for presence.
:return: boolean indicating whether country code is valid.
:rtype: bool
"""
@staticmethod
def is_valid_country_code(country_code):
country_data = pycountry.countries.get(alpha_2=country_code.upper())
return country_data is not None