in scripts/origin_content_classifier.py [0:0]
def classify_sitelist(file, tag, matchdict, field):
"""
This is a placeholder function. Replace this with your actual logic.
"""
try:
with open(file, 'r') as f:
# Process the JSON data here
#print(f"file: {file}")
data = json.load(f)
data_url = data['url']
#data_text = data['text']
data_field = data[field]
#print(data.keys())
# data = {
# 'url': r.request.url,
# 'text': r.text,
# 'headers': dict(r.headers),
# 'status_code': r.status_code,
# 'datetime': datetime.datetime.now().isoformat(),
#}
matchp = False
for item in matchdict:
if item in data_field:
matchp = True
#print(f"item: {item}")
#print(f"txt: {data_text}")
if matchp:
with open("response_" + field + "_matches_" + tag + ".txt", "a") as ofile:
ofile.write(data_url + '\n')
except json.JSONDecodeError as e:
print(f"Error decoding JSON in {file}: {e}")
except Exception as e:
print(f"Error processing {file}: {e}")