huggingface / datatrove
Metrics

All numeric values measured by Sokrates.

Intro
Metrics
Metric Value
TOTAL_NUMBER_OF_FILES 156
NUMBER_OF_FILES_MAIN 100
LINES_OF_CODE_MAIN 12,804
NUMBER_OF_FILES_MAIN_EXT_PY 96
LINES_OF_CODE_MAIN_EXT_PY 11,877
NUMBER_OF_FILES_MAIN_EXT_RS 2
LINES_OF_CODE_MAIN_EXT_RS 749
NUMBER_OF_FILES_MAIN_EXT_TOML 2
LINES_OF_CODE_MAIN_EXT_TOML 178
NUMBER_OF_FILES_TEST 29
LINES_OF_CODE_TEST 2,247
NUMBER_OF_FILES_TEST_EXT_PY 29
LINES_OF_CODE_TEST_EXT_PY 2,247
TEST_VS_MAIN_LINES_OF_CODE_PERCENTAGE 17.54
NUMBER_OF_FILES_GENERATED 0
LINES_OF_CODE_GENERATED 0
NUMBER_OF_FILES_BUILD_AND_DEPLOYMENT 0
LINES_OF_CODE_BUILD_AND_DEPLOYMENT 0
NUMBER_OF_FILES_OTHER 15
LINES_OF_CODE_OTHER 1,558
NUMBER_OF_FILES_OTHER_EXT_PY 10
LINES_OF_CODE_OTHER_EXT_PY 653
NUMBER_OF_FILES_OTHER_EXT_MD 2
LINES_OF_CODE_OTHER_EXT_MD 465
NUMBER_OF_FILES_OTHER_EXT_TXT 3
LINES_OF_CODE_OTHER_EXT_TXT 440
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT 152
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT_EXT_TOML 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT_EXT_TOML 152
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC 99
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC 12,652
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC_EXT_PY 96
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC_EXT_PY 11,877
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC_EXT_RS 2
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC_EXT_RS 749
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_SRC_EXT_TOML 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_SRC_EXT_TOML 26
NUMBER_OF_DEPENDENCY_LINKS_DECOMPOSITION_PRIMARY 0
NUMBER_OF_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_PLACES_WITH_CYCLIC_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_FILES_CONCERN_GENERAL_TODOS 6
LINES_OF_CODE_CONCERN_GENERAL_TODOS 696
NUMBER_OF_FILES_CONCERN_GENERAL_TODOS_EXT_PY 6
LINES_OF_CODE_CONCERN_GENERAL_TODOS_EXT_PY 696
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED 94
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED 12,108
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 90
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 11,181
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_RS 2
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_RS 749
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_TOML 2
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_TOML 178
NEGLIGIBLE_RISK_FILE_SIZE_COUNT 72
LOW_RISK_FILE_SIZE_COUNT 15
MEDIUM_RISK_FILE_SIZE_COUNT 12
HIGH_RISK_FILE_SIZE_COUNT 0
VERY_HIGH_RISK_FILE_SIZE_COUNT 1
NEGLIGIBLE_RISK_FILE_SIZE_LOC 2,713
LOW_RISK_FILE_SIZE_LOC 2,176
MEDIUM_RISK_FILE_SIZE_LOC 3,809
HIGH_RISK_FILE_SIZE_LOC 0
VERY_HIGH_RISK_FILE_SIZE_LOC 4,106
NUMBER_OF_UNITS 548
LINES_OF_CODE_IN_UNITS 5,296
LINES_OF_CODE_OUTSIDE_UNITS 7,508
UNIT_SIZE_NEGLIGIBLE_RISK_LOC 1,752
UNIT_SIZE_NEGLIGIBLE_RISK_PERCENTAGE 33.08
UNIT_SIZE_NEGLIGIBLE_RISK_COUNT 395
UNIT_SIZE_LOW_RISK_LOC 1,408
UNIT_SIZE_LOW_RISK_PERCENTAGE 26.59
UNIT_SIZE_LOW_RISK_COUNT 95
UNIT_SIZE_MEDIUM_RISK_LOC 1,374
UNIT_SIZE_MEDIUM_RISK_PERCENTAGE 25.94
UNIT_SIZE_MEDIUM_RISK_COUNT 46
UNIT_SIZE_HIGH_RISK_LOC 762
UNIT_SIZE_HIGH_RISK_PERCENTAGE 14.39
UNIT_SIZE_HIGH_RISK_COUNT 12
UNIT_SIZE_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_LOC 1,752
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_PERCENTAGE 33.08
UNIT_SIZE_COMPONENT_SRC_NEGLIGIBLE_RISK_COUNT 395
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_LOC 1,408
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_PERCENTAGE 26.59
UNIT_SIZE_COMPONENT_SRC_LOW_RISK_COUNT 95
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_LOC 1,374
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_PERCENTAGE 25.94
UNIT_SIZE_COMPONENT_SRC_MEDIUM_RISK_COUNT 46
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_LOC 762
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_PERCENTAGE 14.39
UNIT_SIZE_COMPONENT_SRC_HIGH_RISK_COUNT 12
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_SRC_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_LOC 1,716
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_PERCENTAGE 32.71
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_COUNT 390
UNIT_SIZE_EXTENSION_PY_LOW_RISK_LOC 1,394
UNIT_SIZE_EXTENSION_PY_LOW_RISK_PERCENTAGE 26.57
UNIT_SIZE_EXTENSION_PY_LOW_RISK_COUNT 94
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_LOC 1,374
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_PERCENTAGE 26.19
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_COUNT 46
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_LOC 762
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_PERCENTAGE 14.53
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_COUNT 12
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_LOC 36
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_PERCENTAGE 72
UNIT_SIZE_EXTENSION_RS_NEGLIGIBLE_RISK_COUNT 5
UNIT_SIZE_EXTENSION_RS_LOW_RISK_LOC 14
UNIT_SIZE_EXTENSION_RS_LOW_RISK_PERCENTAGE 28
UNIT_SIZE_EXTENSION_RS_LOW_RISK_COUNT 1
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_LOC 0
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_RS_MEDIUM_RISK_COUNT 0
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_RS_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_RS_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_LOC 2,835
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_PERCENTAGE 53.53
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_COUNT 463
CONDITIONAL_COMPLEXITY_LOW_RISK_LOC 997
CONDITIONAL_COMPLEXITY_LOW_RISK_PERCENTAGE 18.83
CONDITIONAL_COMPLEXITY_LOW_RISK_COUNT 48
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_LOC 1,338
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_PERCENTAGE 25.26
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_COUNT 35
CONDITIONAL_COMPLEXITY_HIGH_RISK_LOC 126
CONDITIONAL_COMPLEXITY_HIGH_RISK_PERCENTAGE 2.38
CONDITIONAL_COMPLEXITY_HIGH_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_LOC 126
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_LOC 2,835
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_PERCENTAGE 53.53
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_NEGLIGIBLE_RISK_COUNT 463
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_LOC 997
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_PERCENTAGE 18.83
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_LOW_RISK_COUNT 48
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_LOC 1,338
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_PERCENTAGE 25.26
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_MEDIUM_RISK_COUNT 35
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_LOC 126
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_PERCENTAGE 2.38
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_HIGH_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_SRC_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_LOC 2,785
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_PERCENTAGE 53.09
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_COUNT 457
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_LOC 997
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_PERCENTAGE 19
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_COUNT 48
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_LOC 1,338
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_PERCENTAGE 25.51
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_COUNT 35
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_LOC 126
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_PERCENTAGE 2.4
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_LOC 50
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_PERCENTAGE 100
CONDITIONAL_COMPLEXITY_COMPONENT_RS_NEGLIGIBLE_RISK_COUNT 6
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_LOW_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_MEDIUM_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_RS_VERY_HIGH_RISK_COUNT 0
FILE_CHANGE_HISTORY_TOTAL_AGE_DAYS 748
FILE_CHANGE_HISTORY_ACTIVE_DAYS 172
FILE_CHANGE_HISTORY_WEEKS 106
FILE_CHANGE_HISTORY_ESTIMATED_WORKING_DAYS 530
FILE_AGE_NEGLIGIBLE_RISK_COUNT 0
FILE_AGE_LOW_RISK_COUNT 23
FILE_AGE_MEDIUM_RISK_COUNT 14
FILE_AGE_HIGH_RISK_COUNT 20
FILE_AGE_VERY_HIGH_RISK_COUNT 40
FILE_AGE_NEGLIGIBLE_RISK_LOC 0
FILE_AGE_LOW_RISK_LOC 3,499
FILE_AGE_MEDIUM_RISK_LOC 1,388
FILE_AGE_HIGH_RISK_LOC 6,398
FILE_AGE_VERY_HIGH_RISK_LOC 1,516
NUMBER_OF_CONTRIBUTORS 49
DUPLICATION_NUMBER_OF_DUPLICATES 61
DUPLICATION_NUMBER_OF_FILES_WITH_DUPLICATES 18
DUPLICATION_NUMBER_OF_DUPLICATED_LINES 865
DUPLICATION_NUMBER_OF_CLEANED_LINES 12,007
DUPLICATION_PERCENTAGE 7.2
UNIT_DUPLICATES_COUNT 1
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_SRC 865
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_SRC 865
DUPLICATION_PERCENTAGE_PRIMARY_SRC 100
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_ROOT 0
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_ROOT 0
DUPLICATION_PERCENTAGE_PRIMARY_ROOT -
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_RS 356
DUPLICATION_NUMBER_OF_CLEANED_LINES_RS 667
DUPLICATION_PERCENTAGE_RS 53.37
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PY 509
DUPLICATION_NUMBER_OF_CLEANED_LINES_PY 11,162
DUPLICATION_PERCENTAGE_PY 4.56
TOTAL_ANALYSIS_TIME_IN_MILLIS 3,745