def info_job()

in clutrr/utils/data_backend.py [0:0]


def info_job():
    data = DB(port=PORT)
    print("Generating statistics at {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
    gold_c = data.gold.find({}).count()
    pending_c = data.gold.count_documents({'used':0})
    avg_used = list(data.gold.aggregate([{'$group': {'_id':None,'avg' : {'$avg' : '$used'}}}]))
    if len(avg_used) > 0:
        avg_used = avg_used[0]['avg']
    mturk_c = data.mturk.count_documents({})
    uniq_workers = len(data.mturk.find({}).distinct("worker_id"))
    mturk_c_1 = data.mturk.count_documents({'relation_length':1})
    gold_agg = list(data.gold.aggregate([{'$group': {'_id': {'relation_length': '$relation_length', 'f_comb': '$f_comb'},
                                                       'avg' : {'$avg' : '$used'}}}, {'$sort': {"_id.relation_length": 1}}]))
    mturk_reviews = list(data.mturk.aggregate([{'$group': {'_id': None, 'total_rev': {'$sum': {'$size': '$reviewed_by'}}}}]))

    for rec in gold_agg:
        if rec['_id']['relation_length'] != 3:
            print(rec['_id']['relation_length'], '\t', rec['_id']['f_comb'], '\t', rec['avg'])

    mturk_c_2 = data.mturk.count_documents({'relation_length':2})
    #gold_c_2_u = list(data.gold.aggregate([{'$group': {'_id':None,'relation_length':2, 'avg' : {'$avg' : '$used'}}}]))[0]['avg']

    mturk_c_3 = data.mturk.count_documents({'relation_length':3})
    #gold_c_3_u = list(data.gold.aggregate([{'$group': {'_id':None,'relation_length':3, 'avg' : {'$avg' : '$used'}}}]))[0]['avg']
    print("Number of gold data : {} \n ".format(gold_c) +
          "Number of pending rows to annotate : {} \n ".format(pending_c) +
          "Average times each gold row has been used : {} \n ".format(avg_used) +
          "Number of annotations given : {} \n".format(mturk_c) +
          "Unique workers : {}\n".format(uniq_workers) + 
          "Number of 1 relations annotated : {}\n".format(mturk_c_1) +  
          "Number of 2 relations annotated : {}\n".format(mturk_c_2) + 
          "Number of 3 relations annotated : {}\n".format(mturk_c_3) +
          "Total reviews provided : {}\n".format(mturk_reviews[0]['total_rev']))