huggingface / dataset-dedupe-estimator
Metrics

All numeric values measured by Sokrates.

Intro
Metrics
Metric Value
TOTAL_NUMBER_OF_FILES 525
NUMBER_OF_FILES_MAIN 11
LINES_OF_CODE_MAIN 1,418
NUMBER_OF_FILES_MAIN_EXT_PY 5
LINES_OF_CODE_MAIN_EXT_PY 782
NUMBER_OF_FILES_MAIN_EXT_RS 3
LINES_OF_CODE_MAIN_EXT_RS 389
NUMBER_OF_FILES_MAIN_EXT_JINJA2 1
LINES_OF_CODE_MAIN_EXT_JINJA2 201
NUMBER_OF_FILES_MAIN_EXT_TOML 2
LINES_OF_CODE_MAIN_EXT_TOML 46
NUMBER_OF_FILES_TEST 0
LINES_OF_CODE_TEST 0
TEST_VS_MAIN_LINES_OF_CODE_PERCENTAGE 0
NUMBER_OF_FILES_GENERATED 0
LINES_OF_CODE_GENERATED 0
NUMBER_OF_FILES_BUILD_AND_DEPLOYMENT 0
LINES_OF_CODE_BUILD_AND_DEPLOYMENT 0
NUMBER_OF_FILES_OTHER 1
LINES_OF_CODE_OTHER 432
NUMBER_OF_FILES_OTHER_EXT_MD 1
LINES_OF_CODE_OTHER_EXT_MD 432
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC 3
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC 389
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC_EXT_RS 3
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC_EXT_RS 389
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT 3
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT 247
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT_EXT_JINJA2 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT_EXT_JINJA2 201
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT_EXT_TOML 2
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT_EXT_TOML 46
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_DE 5
LINES_OF_CODE_DECOMPOSITION_PRIMARY_DE 782
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_DE_EXT_PY 5
LINES_OF_CODE_DECOMPOSITION_PRIMARY_DE_EXT_PY 782
NUMBER_OF_DEPENDENCY_LINKS_DECOMPOSITION_PRIMARY 0
NUMBER_OF_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_PLACES_WITH_CYCLIC_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_FILES_CONCERN_GENERAL_TODOS 0
LINES_OF_CODE_CONCERN_GENERAL_TODOS 0
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED 11
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED 1,418
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 5
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 782
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_RS 3
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_RS 389
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_JINJA2 1
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_JINJA2 201
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_TOML 2
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_TOML 46
NEGLIGIBLE_RISK_FILE_SIZE_COUNT 6
LOW_RISK_FILE_SIZE_COUNT 2
MEDIUM_RISK_FILE_SIZE_COUNT 3
HIGH_RISK_FILE_SIZE_COUNT 0
VERY_HIGH_RISK_FILE_SIZE_COUNT 0
NEGLIGIBLE_RISK_FILE_SIZE_LOC 187
LOW_RISK_FILE_SIZE_LOC 279
MEDIUM_RISK_FILE_SIZE_LOC 952
HIGH_RISK_FILE_SIZE_LOC 0
VERY_HIGH_RISK_FILE_SIZE_LOC 0
NUMBER_OF_UNITS 38
LINES_OF_CODE_IN_UNITS 490
LINES_OF_CODE_OUTSIDE_UNITS 928
UNIT_SIZE_NEGLIGIBLE_RISK_LOC 114
UNIT_SIZE_NEGLIGIBLE_RISK_PERCENTAGE 23.27
UNIT_SIZE_NEGLIGIBLE_RISK_COUNT 22
UNIT_SIZE_LOW_RISK_LOC 112
UNIT_SIZE_LOW_RISK_PERCENTAGE 22.86
UNIT_SIZE_LOW_RISK_COUNT 8
UNIT_SIZE_MEDIUM_RISK_LOC 210
UNIT_SIZE_MEDIUM_RISK_PERCENTAGE 42.86
UNIT_SIZE_MEDIUM_RISK_COUNT 7
UNIT_SIZE_HIGH_RISK_LOC 54
UNIT_SIZE_HIGH_RISK_PERCENTAGE 11.02
UNIT_SIZE_HIGH_RISK_COUNT 1
UNIT_SIZE_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_LOC 3
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_PERCENTAGE 3.75
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_COUNT 1
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_LOC 13
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_PERCENTAGE 16.25
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_COUNT 1
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_LOC 64
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_PERCENTAGE 80
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_COUNT 2
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_DE_NEGLIGIBLE_RISK_LOC 111
UNIT_SIZE_COMPONENT_DE_NEGLIGIBLE_RISK_PERCENTAGE 27.07
UNIT_SIZE_COMPONENT_DE_NEGLIGIBLE_RISK_COUNT 21
UNIT_SIZE_COMPONENT_DE_LOW_RISK_LOC 99
UNIT_SIZE_COMPONENT_DE_LOW_RISK_PERCENTAGE 24.15
UNIT_SIZE_COMPONENT_DE_LOW_RISK_COUNT 7
UNIT_SIZE_COMPONENT_DE_MEDIUM_RISK_LOC 146
UNIT_SIZE_COMPONENT_DE_MEDIUM_RISK_PERCENTAGE 35.61
UNIT_SIZE_COMPONENT_DE_MEDIUM_RISK_COUNT 5
UNIT_SIZE_COMPONENT_DE_HIGH_RISK_LOC 54
UNIT_SIZE_COMPONENT_DE_HIGH_RISK_PERCENTAGE 13.17
UNIT_SIZE_COMPONENT_DE_HIGH_RISK_COUNT 1
UNIT_SIZE_COMPONENT_DE_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_DE_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_DE_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_LOC 3
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_PERCENTAGE 3.75
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_COUNT 1
UNIT_SIZE_EXTENSION_RS_LOW_RISK_LOC 13
UNIT_SIZE_EXTENSION_RS_LOW_RISK_PERCENTAGE 16.25
UNIT_SIZE_EXTENSION_RS_LOW_RISK_COUNT 1
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_LOC 64
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_PERCENTAGE 80
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_COUNT 2
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_LOC 111
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_PERCENTAGE 27.07
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_COUNT 21
UNIT_SIZE_EXTENSION_PY_LOW_RISK_LOC 99
UNIT_SIZE_EXTENSION_PY_LOW_RISK_PERCENTAGE 24.15
UNIT_SIZE_EXTENSION_PY_LOW_RISK_COUNT 7
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_LOC 146
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_PERCENTAGE 35.61
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_COUNT 5
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_LOC 54
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_PERCENTAGE 13.17
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_COUNT 1
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_LOC 370
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_PERCENTAGE 75.51
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_COUNT 34
CONDITIONAL_COMPLEXITY_LOW_RISK_LOC 92
CONDITIONAL_COMPLEXITY_LOW_RISK_PERCENTAGE 18.78
CONDITIONAL_COMPLEXITY_LOW_RISK_COUNT 3
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_LOC 28
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_PERCENTAGE 5.71
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_LOC 51
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_PERCENTAGE 63.75
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_COUNT 3
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_LOC 29
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_PERCENTAGE 36.25
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_NEGLIGIBLE_RISK_LOC 319
CONDITIONAL_COMPLEXITY_COMPONENT_DE_NEGLIGIBLE_RISK_PERCENTAGE 77.8
CONDITIONAL_COMPLEXITY_COMPONENT_DE_NEGLIGIBLE_RISK_COUNT 31
CONDITIONAL_COMPLEXITY_COMPONENT_DE_LOW_RISK_LOC 63
CONDITIONAL_COMPLEXITY_COMPONENT_DE_LOW_RISK_PERCENTAGE 15.37
CONDITIONAL_COMPLEXITY_COMPONENT_DE_LOW_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_DE_MEDIUM_RISK_LOC 28
CONDITIONAL_COMPLEXITY_COMPONENT_DE_MEDIUM_RISK_PERCENTAGE 6.83
CONDITIONAL_COMPLEXITY_COMPONENT_DE_MEDIUM_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_DE_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_DE_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_LOC 51
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_PERCENTAGE 63.75
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_COUNT 3
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_LOC 29
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_PERCENTAGE 36.25
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_LOC 319
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_PERCENTAGE 77.8
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_COUNT 31
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_LOC 63
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_PERCENTAGE 15.37
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_LOC 28
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_PERCENTAGE 6.83
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_COUNT 0
FILE_CHANGE_HISTORY_TOTAL_AGE_DAYS 159
FILE_CHANGE_HISTORY_ACTIVE_DAYS 14
FILE_CHANGE_HISTORY_WEEKS 20
FILE_CHANGE_HISTORY_ESTIMATED_WORKING_DAYS 100
FILE_AGE_NEGLIGIBLE_RISK_COUNT 2
FILE_AGE_LOW_RISK_COUNT 3
FILE_AGE_MEDIUM_RISK_COUNT 6
FILE_AGE_HIGH_RISK_COUNT 0
FILE_AGE_VERY_HIGH_RISK_COUNT 0
FILE_AGE_NEGLIGIBLE_RISK_LOC 663
FILE_AGE_LOW_RISK_LOC 319
FILE_AGE_MEDIUM_RISK_LOC 436
FILE_AGE_HIGH_RISK_LOC 0
FILE_AGE_VERY_HIGH_RISK_LOC 0
NUMBER_OF_CONTRIBUTORS 1
DUPLICATION_NUMBER_OF_DUPLICATES 4
DUPLICATION_NUMBER_OF_FILES_WITH_DUPLICATES 2
DUPLICATION_NUMBER_OF_DUPLICATED_LINES 38
DUPLICATION_NUMBER_OF_CLEANED_LINES 1,348
DUPLICATION_PERCENTAGE 2.82
UNIT_DUPLICATES_COUNT 0
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_ROOT 26
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_ROOT 26
DUPLICATION_PERCENTAGE_PRIMARY_ROOT 100
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_DE 12
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_DE 12
DUPLICATION_PERCENTAGE_PRIMARY_DE 100
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_SRC 0
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_SRC 0
DUPLICATION_PERCENTAGE_PRIMARY_SRC -
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_JINJA2 26
DUPLICATION_NUMBER_OF_CLEANED_LINES_JINJA2 201
DUPLICATION_PERCENTAGE_JINJA2 12.94
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PY 12
DUPLICATION_NUMBER_OF_CLEANED_LINES_PY 747
DUPLICATION_PERCENTAGE_PY 1.61
TOTAL_ANALYSIS_TIME_IN_MILLIS 599