def classify_web_content_traits()

in scripts/origin_content_classifier.py [0:0]


def classify_web_content_traits(url):
  tdict = { }
  r = requests.get(url, timeout=10)

  compressiondictp = 0
  ztp = classify_origin(r, "compression-dictionary", mb_compressiondict, "text")
  zhp = classify_origin(r, "compression-dictionary-header", mh_compressiondict, "headers")
  if ztp or zhp:
    compressiondictp = 1
  tdict["compression-dictionary"] = compressiondictp;

  tdict["dns-prefetch"] = classify_origin(r, "dns-prefetch", mb_dnsprefetch, "text")
  tdict["google-publisher-tag"] = classify_origin(r, "google-publisher-tag", mb_gpt, "text")
  tdict["preconnect"] = classify_origin(r, "preconnect", mb_preconnect, "text")
  tdict["prefetch"] = classify_origin(r, "prefetch", mb_prefetch, "text")
  tdict["preload"] = classify_origin(r, "preload", mb_preload, "text")
  tdict["prerender"] = classify_origin(r, "prerender", mb_prerender, "text")

  return tdict