def scan()

in src/plugins/scanners/discourse.py [0:0]


def scan(KibbleBit, source):
    # Simple URL check
    discourse = re.match(r"(https?://.+)", source['sourceURL'])
    if discourse:
        
        source['steps']['forum'] = {
            'time': time.time(),
            'status': 'Parsing Discourse topics...',
            'running': True,
            'good': True
        }
        KibbleBit.updateSource(source)
        
        badOnes = 0
        pendingJobs = []
        KibbleBit.pprint("Parsing Discourse activity at %s" % source['sourceURL'])
        source['steps']['forum'] = {
            'time': time.time(),
            'status': 'Downloading changeset',
            'running': True,
            'good': True
        }
        KibbleBit.updateSource(source)
        
        # Discourse may neeed credentials (if basic auth)
        creds = None
        if source['creds'] and 'username' in source['creds'] and source['creds']['username'] and len(source['creds']['username']) > 0:
            creds = "%s:%s" % (source['creds']['username'], source['creds']['password'])
            
        # Get the list of categories
        sURL = source['sourceURL']
        KibbleBit.pprint("Getting categories...")
        catjs = plugins.utils.jsonapi.get("%s/categories_and_latest" % sURL , auth = creds)
        
        # Directly assign the category list as pending jobs queue, ezpz.
        pendingJobs = catjs['category_list']['categories']
        
        KibbleBit.pprint("Found %u categories" % len(pendingJobs))
        
        # Now fire off 4 threads to parse the categories
        threads = []
        block = threading.Lock()
        KibbleBit.pprint("Scanning jobs using 4 sub-threads")
        for i in range(0,4):
            t = discourseThread(block, KibbleBit, source, creds, pendingJobs)
            threads.append(t)
            t.start()
        
        for t in threads:
            t.join()

        # We're all done, yaay        
        KibbleBit.pprint("Done scanning %s" % source['sourceURL'])

        source['steps']['forum'] = {
            'time': time.time(),
            'status': 'Discourse successfully scanned at ' + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
            'running': False,
            'good': True
        }
        KibbleBit.updateSource(source)