in clutrr/utils/data_backend.py [0:0]
def info_job():
data = DB(port=PORT)
print("Generating statistics at {}".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))
gold_c = data.gold.find({}).count()
pending_c = data.gold.count_documents({'used':0})
avg_used = list(data.gold.aggregate([{'$group': {'_id':None,'avg' : {'$avg' : '$used'}}}]))
if len(avg_used) > 0:
avg_used = avg_used[0]['avg']
mturk_c = data.mturk.count_documents({})
uniq_workers = len(data.mturk.find({}).distinct("worker_id"))
mturk_c_1 = data.mturk.count_documents({'relation_length':1})
gold_agg = list(data.gold.aggregate([{'$group': {'_id': {'relation_length': '$relation_length', 'f_comb': '$f_comb'},
'avg' : {'$avg' : '$used'}}}, {'$sort': {"_id.relation_length": 1}}]))
mturk_reviews = list(data.mturk.aggregate([{'$group': {'_id': None, 'total_rev': {'$sum': {'$size': '$reviewed_by'}}}}]))
for rec in gold_agg:
if rec['_id']['relation_length'] != 3:
print(rec['_id']['relation_length'], '\t', rec['_id']['f_comb'], '\t', rec['avg'])
mturk_c_2 = data.mturk.count_documents({'relation_length':2})
#gold_c_2_u = list(data.gold.aggregate([{'$group': {'_id':None,'relation_length':2, 'avg' : {'$avg' : '$used'}}}]))[0]['avg']
mturk_c_3 = data.mturk.count_documents({'relation_length':3})
#gold_c_3_u = list(data.gold.aggregate([{'$group': {'_id':None,'relation_length':3, 'avg' : {'$avg' : '$used'}}}]))[0]['avg']
print("Number of gold data : {} \n ".format(gold_c) +
"Number of pending rows to annotate : {} \n ".format(pending_c) +
"Average times each gold row has been used : {} \n ".format(avg_used) +
"Number of annotations given : {} \n".format(mturk_c) +
"Unique workers : {}\n".format(uniq_workers) +
"Number of 1 relations annotated : {}\n".format(mturk_c_1) +
"Number of 2 relations annotated : {}\n".format(mturk_c_2) +
"Number of 3 relations annotated : {}\n".format(mturk_c_3) +
"Total reviews provided : {}\n".format(mturk_reviews[0]['total_rev']))