in core/gfwlist/parse.py [0:0]
def parse_gfwlist(content):
gfwlist = content.splitlines(False)
domains = set()
for line in gfwlist:
if line.find('.*') >= 0:
continue
elif line.find('*') >= 0:
line = line.replace('*', '/')
if line.startswith('!'):
continue
elif line.startswith('['):
continue
elif line.startswith('@'):
# ignore white list
continue
elif line.startswith('||'):
add_domain_to_set(domains, line.lstrip('||'))
elif line.startswith('|'):
add_domain_to_set(domains, line.lstrip('|'))
elif line.startswith('.'):
add_domain_to_set(domains, line.lstrip('.'))
else:
add_domain_to_set(domains, line)
# TODO: reduce ['www.google.com', 'google.com'] to ['google.com']
return domains