def classify_sitelist()

in scripts/origin_content_classifier.py [0:0]


def classify_sitelist(file, tag, matchdict, field):
  """
  This is a placeholder function. Replace this with your actual logic.
  """
  try:
    with open(file, 'r') as f:

      # Process the JSON data here
      #print(f"file: {file}")

      data = json.load(f)
      data_url = data['url']
      #data_text = data['text']
      data_field = data[field]

      #print(data.keys())

      # data = {
      #  'url': r.request.url,
      #  'text': r.text,
      #  'headers': dict(r.headers),
      #  'status_code': r.status_code,
      #  'datetime': datetime.datetime.now().isoformat(),
      #}

      matchp = False
      for item in matchdict:
        if item in data_field:
          matchp = True
        #print(f"item: {item}")
        #print(f"txt: {data_text}")

      if matchp:
        with open("response_" + field + "_matches_" + tag + ".txt", "a") as ofile:
          ofile.write(data_url + '\n')

  except json.JSONDecodeError as e:
      print(f"Error decoding JSON in {file}: {e}")
  except Exception as e:
    print(f"Error processing {file}: {e}")