dataset-construction/src/ndb_data/sample_questions_1000.py [117:251]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                continue
                        else:

                            if (
                                len(question["answer"]) > 2
                                or len(question["derivations"]) == 1
                            ):
                                if random.random() < 0.95:
                                    continue
                            else:
                                if random.random() < 0.3:
                                    continue

                    #
                    if question["type"] == "bool" and "TRUE" in question["answer"]:
                        if random.random() < 0.3:
                            continue

                    # Less than 8 facts
                    if q_bin == 5:
                        # Drop half of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin == 4:
                        # Drop half of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin == 3:
                        # Drop half of the facts
                        if random.random() < 0.6:
                            continue

                    if q_bin == 2:
                        # Drop half of the facts
                        if random.random() < 0.9:
                            continue

                    if q_bin == 1:
                        # Drop 80% of the facts

                        if question["type"] == "bool":
                            if random.random() < 0:
                                continue

                        else:
                            if random.random() < 0.5:
                                continue

                    if q_bin == 0:
                        # Drop 90% of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin < 4:
                        if random.random() < 0.6:
                            continue

                    # if (
                    #     question["type"] == "bool"
                    #     and "FALSE" in question["answer"]
                    #     and random.randint(0, 100) > 55
                    # ):
                    #     continue
                    #
                    # if (
                    #     "complex" not in question["id"] and
                    #     question["type"] in {"argmax", "argmin"}
                    #     and random.randint(0, 100) > 30
                    # ):
                    #     continue
                    #
                    #
                    #
                    # if q_bin == 0 and random.randint(0, 100) > 30:
                    #     continue
                    # if (
                    #     question["type"] != "bool"
                    #     and q_bin == 1
                    #     and random.randint(0, 100) > 40
                    # ):
                    #     continue
                    # if (
                    #
                    #     question["type"] != "bool"
                    #     and q_bin == 2
                    #     and random.randint(0, 100) > 30
                    # ):
                    #     continue
                    # if q_bin == 3 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 4 and random.randint(0, 100) > 9:
                    #     continue
                    # if q_bin == 5 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 6 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 7 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 8 and random.randint(0, 100) > 90:
                    #     continue
                    counts_types[question["type"]] += 1
                    counts_facts[len(question["facts"])] += 1

                    if "complex" in question["id"] or "join" in question["id"]:
                        complex_counts_types[question["type"]] += 1
                        complex_counts_facts[len(question["facts"])] += 1

                    instance["queries"].append(question)
            del instance["all_queries"]

            if len(instance["queries"]):
                of.write(json.dumps(instance) + "\n")
                db_sizes[len(instance["queries"])] += 1

    for k, v in added_q_type_bin.items():
        print(k, len(v))
    print(added)
    # print(q_bin)
    # print(q_type)
    # print(q_type_bin)

    print("bins")
    for i in range(0, 9):
        print(i, counts_bins[i], complex_counts_bins[i])

    print("lens")
    for i in range(0, 26):
        print(i, counts_facts[i], complex_counts_facts[i])

    for k, v in counts_types.items():
        print(k, v, complex_counts_types[k])

    print(db_sizes)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



dataset-construction/src/ndb_data/sample_questions_500.py [117:251]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                                continue
                        else:

                            if (
                                len(question["answer"]) > 2
                                or len(question["derivations"]) == 1
                            ):
                                if random.random() < 0.95:
                                    continue
                            else:
                                if random.random() < 0.3:
                                    continue

                    #
                    if question["type"] == "bool" and "TRUE" in question["answer"]:
                        if random.random() < 0.3:
                            continue

                    # Less than 8 facts
                    if q_bin == 5:
                        # Drop half of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin == 4:
                        # Drop half of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin == 3:
                        # Drop half of the facts
                        if random.random() < 0.6:
                            continue

                    if q_bin == 2:
                        # Drop half of the facts
                        if random.random() < 0.9:
                            continue

                    if q_bin == 1:
                        # Drop 80% of the facts

                        if question["type"] == "bool":
                            if random.random() < 0:
                                continue

                        else:
                            if random.random() < 0.5:
                                continue

                    if q_bin == 0:
                        # Drop 90% of the facts
                        if random.random() < 0.5:
                            continue

                    if q_bin < 4:
                        if random.random() < 0.6:
                            continue

                    # if (
                    #     question["type"] == "bool"
                    #     and "FALSE" in question["answer"]
                    #     and random.randint(0, 100) > 55
                    # ):
                    #     continue
                    #
                    # if (
                    #     "complex" not in question["id"] and
                    #     question["type"] in {"argmax", "argmin"}
                    #     and random.randint(0, 100) > 30
                    # ):
                    #     continue
                    #
                    #
                    #
                    # if q_bin == 0 and random.randint(0, 100) > 30:
                    #     continue
                    # if (
                    #     question["type"] != "bool"
                    #     and q_bin == 1
                    #     and random.randint(0, 100) > 40
                    # ):
                    #     continue
                    # if (
                    #
                    #     question["type"] != "bool"
                    #     and q_bin == 2
                    #     and random.randint(0, 100) > 30
                    # ):
                    #     continue
                    # if q_bin == 3 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 4 and random.randint(0, 100) > 9:
                    #     continue
                    # if q_bin == 5 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 6 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 7 and random.randint(0, 100) > 90:
                    #     continue
                    # if q_bin == 8 and random.randint(0, 100) > 90:
                    #     continue
                    counts_types[question["type"]] += 1
                    counts_facts[len(question["facts"])] += 1

                    if "complex" in question["id"] or "join" in question["id"]:
                        complex_counts_types[question["type"]] += 1
                        complex_counts_facts[len(question["facts"])] += 1

                    instance["queries"].append(question)
            del instance["all_queries"]

            if len(instance["queries"]):
                of.write(json.dumps(instance) + "\n")
                db_sizes[len(instance["queries"])] += 1

    for k, v in added_q_type_bin.items():
        print(k, len(v))
    print(added)
    # print(q_bin)
    # print(q_type)
    # print(q_type_bin)

    print("bins")
    for i in range(0, 9):
        print(i, counts_bins[i], complex_counts_bins[i])

    print("lens")
    for i in range(0, 26):
        print(i, counts_facts[i], complex_counts_facts[i])

    for k, v in counts_types.items():
        print(k, v, complex_counts_types[k])

    print(db_sizes)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



