huggingface / fineweb-2
Metrics

All numeric values measured by Sokrates.

Intro
Metrics
Metric Value
TOTAL_NUMBER_OF_FILES 7,704
NUMBER_OF_FILES_MAIN 28
LINES_OF_CODE_MAIN 1,973
NUMBER_OF_FILES_MAIN_EXT_PY 28
LINES_OF_CODE_MAIN_EXT_PY 1,973
NUMBER_OF_FILES_TEST 0
LINES_OF_CODE_TEST 0
TEST_VS_MAIN_LINES_OF_CODE_PERCENTAGE 0
NUMBER_OF_FILES_GENERATED 0
LINES_OF_CODE_GENERATED 0
NUMBER_OF_FILES_BUILD_AND_DEPLOYMENT 1
LINES_OF_CODE_BUILD_AND_DEPLOYMENT 19
NUMBER_OF_FILES_BUILD_AND_DEPLOYMENT_EXT_SH 1
LINES_OF_CODE_BUILD_AND_DEPLOYMENT_EXT_SH 19
NUMBER_OF_FILES_OTHER 4,929
LINES_OF_CODE_OTHER 2,635,798
NUMBER_OF_FILES_OTHER_EXT_TXT 3,718
LINES_OF_CODE_OTHER_EXT_TXT 2,631,267
NUMBER_OF_FILES_OTHER_EXT_JSON 1,208
LINES_OF_CODE_OTHER_EXT_JSON 4,304
NUMBER_OF_FILES_OTHER_EXT_MD 3
LINES_OF_CODE_OTHER_EXT_MD 227
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ABLATIONS 4
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ABLATIONS 478
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ABLATIONS_EXT_PY 4
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ABLATIONS_EXT_PY 478
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT 227
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_ROOT_EXT_PY 1
LINES_OF_CODE_DECOMPOSITION_PRIMARY_ROOT_EXT_PY 227
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_MISC 23
LINES_OF_CODE_DECOMPOSITION_PRIMARY_MISC 1,268
NUMBER_OF_FILES_DECOMPOSITION_PRIMARY_MISC_EXT_PY 23
LINES_OF_CODE_DECOMPOSITION_PRIMARY_MISC_EXT_PY 1,268
NUMBER_OF_DEPENDENCY_LINKS_DECOMPOSITION_PRIMARY 0
NUMBER_OF_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_PLACES_WITH_CYCLIC_DEPENDENCIES_DECOMPOSITION_PRIMARY 0
NUMBER_OF_FILES_CONCERN_GENERAL_TODOS 0
LINES_OF_CODE_CONCERN_GENERAL_TODOS 0
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED 28
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED 1,973
NUMBER_OF_FILES_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 28
LINES_OF_CODE_CONCERN_GENERAL_UNCLASSIFIED_EXT_PY 1,973
NEGLIGIBLE_RISK_FILE_SIZE_COUNT 20
LOW_RISK_FILE_SIZE_COUNT 6
MEDIUM_RISK_FILE_SIZE_COUNT 2
HIGH_RISK_FILE_SIZE_COUNT 0
VERY_HIGH_RISK_FILE_SIZE_COUNT 0
NEGLIGIBLE_RISK_FILE_SIZE_LOC 756
LOW_RISK_FILE_SIZE_LOC 764
MEDIUM_RISK_FILE_SIZE_LOC 453
HIGH_RISK_FILE_SIZE_LOC 0
VERY_HIGH_RISK_FILE_SIZE_LOC 0
NUMBER_OF_UNITS 53
LINES_OF_CODE_IN_UNITS 781
LINES_OF_CODE_OUTSIDE_UNITS 1,192
UNIT_SIZE_NEGLIGIBLE_RISK_LOC 170
UNIT_SIZE_NEGLIGIBLE_RISK_PERCENTAGE 21.77
UNIT_SIZE_NEGLIGIBLE_RISK_COUNT 29
UNIT_SIZE_LOW_RISK_LOC 236
UNIT_SIZE_LOW_RISK_PERCENTAGE 30.22
UNIT_SIZE_LOW_RISK_COUNT 15
UNIT_SIZE_MEDIUM_RISK_LOC 228
UNIT_SIZE_MEDIUM_RISK_PERCENTAGE 29.19
UNIT_SIZE_MEDIUM_RISK_COUNT 8
UNIT_SIZE_HIGH_RISK_LOC 0
UNIT_SIZE_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_HIGH_RISK_COUNT 0
UNIT_SIZE_VERY_HIGH_RISK_LOC 147
UNIT_SIZE_VERY_HIGH_RISK_PERCENTAGE 18.82
UNIT_SIZE_VERY_HIGH_RISK_COUNT 1
UNIT_SIZE_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_LOC 30
UNIT_SIZE_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_PERCENTAGE 37.04
UNIT_SIZE_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_COUNT 4
UNIT_SIZE_COMPONENT_ABLATIONS_LOW_RISK_LOC 27
UNIT_SIZE_COMPONENT_ABLATIONS_LOW_RISK_PERCENTAGE 33.33
UNIT_SIZE_COMPONENT_ABLATIONS_LOW_RISK_COUNT 2
UNIT_SIZE_COMPONENT_ABLATIONS_MEDIUM_RISK_LOC 24
UNIT_SIZE_COMPONENT_ABLATIONS_MEDIUM_RISK_PERCENTAGE 29.63
UNIT_SIZE_COMPONENT_ABLATIONS_MEDIUM_RISK_COUNT 1
UNIT_SIZE_COMPONENT_ABLATIONS_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_ABLATIONS_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ABLATIONS_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_ABLATIONS_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_ABLATIONS_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ABLATIONS_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_ROOT_NEGLIGIBLE_RISK_LOC 2
UNIT_SIZE_COMPONENT_ROOT_NEGLIGIBLE_RISK_PERCENTAGE 100
UNIT_SIZE_COMPONENT_ROOT_NEGLIGIBLE_RISK_COUNT 1
UNIT_SIZE_COMPONENT_ROOT_LOW_RISK_LOC 0
UNIT_SIZE_COMPONENT_ROOT_LOW_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ROOT_LOW_RISK_COUNT 0
UNIT_SIZE_COMPONENT_ROOT_MEDIUM_RISK_LOC 0
UNIT_SIZE_COMPONENT_ROOT_MEDIUM_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ROOT_MEDIUM_RISK_COUNT 0
UNIT_SIZE_COMPONENT_ROOT_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_ROOT_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ROOT_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_ROOT_VERY_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_ROOT_VERY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_ROOT_VERY_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_MISC_NEGLIGIBLE_RISK_LOC 138
UNIT_SIZE_COMPONENT_MISC_NEGLIGIBLE_RISK_PERCENTAGE 19.77
UNIT_SIZE_COMPONENT_MISC_NEGLIGIBLE_RISK_COUNT 24
UNIT_SIZE_COMPONENT_MISC_LOW_RISK_LOC 209
UNIT_SIZE_COMPONENT_MISC_LOW_RISK_PERCENTAGE 29.94
UNIT_SIZE_COMPONENT_MISC_LOW_RISK_COUNT 13
UNIT_SIZE_COMPONENT_MISC_MEDIUM_RISK_LOC 204
UNIT_SIZE_COMPONENT_MISC_MEDIUM_RISK_PERCENTAGE 29.23
UNIT_SIZE_COMPONENT_MISC_MEDIUM_RISK_COUNT 7
UNIT_SIZE_COMPONENT_MISC_HIGH_RISK_LOC 0
UNIT_SIZE_COMPONENT_MISC_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_COMPONENT_MISC_HIGH_RISK_COUNT 0
UNIT_SIZE_COMPONENT_MISC_VERY_HIGH_RISK_LOC 147
UNIT_SIZE_COMPONENT_MISC_VERY_HIGH_RISK_PERCENTAGE 21.06
UNIT_SIZE_COMPONENT_MISC_VERY_HIGH_RISK_COUNT 1
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_LOC 170
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_PERCENTAGE 21.77
UNIT_SIZE_EXTENSION_PY_NEGLIGIBLE_RISK_COUNT 29
UNIT_SIZE_EXTENSION_PY_LOW_RISK_LOC 236
UNIT_SIZE_EXTENSION_PY_LOW_RISK_PERCENTAGE 30.22
UNIT_SIZE_EXTENSION_PY_LOW_RISK_COUNT 15
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_LOC 228
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_PERCENTAGE 29.19
UNIT_SIZE_EXTENSION_PY_MEDIUM_RISK_COUNT 8
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_LOC 0
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_PERCENTAGE 0
UNIT_SIZE_EXTENSION_PY_HIGH_RISK_COUNT 0
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_LOC 147
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_PERCENTAGE 18.82
UNIT_SIZE_EXTENSION_PY_VERY_HIGH_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_LOC 348
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_PERCENTAGE 44.56
CONDITIONAL_COMPLEXITY_NEGLIGIBLE_RISK_COUNT 38
CONDITIONAL_COMPLEXITY_LOW_RISK_LOC 372
CONDITIONAL_COMPLEXITY_LOW_RISK_PERCENTAGE 47.63
CONDITIONAL_COMPLEXITY_LOW_RISK_COUNT 13
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_LOC 61
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_PERCENTAGE 7.81
CONDITIONAL_COMPLEXITY_MEDIUM_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_HIGH_PLUS_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_LOC 30
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_PERCENTAGE 37.04
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_NEGLIGIBLE_RISK_COUNT 4
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_LOW_RISK_LOC 27
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_LOW_RISK_PERCENTAGE 33.33
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_LOW_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_MEDIUM_RISK_LOC 24
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_MEDIUM_RISK_PERCENTAGE 29.63
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_MEDIUM_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ABLATIONS_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_NEGLIGIBLE_RISK_LOC 2
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_NEGLIGIBLE_RISK_PERCENTAGE 100
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_NEGLIGIBLE_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_LOW_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_LOW_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_LOW_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_MEDIUM_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_MEDIUM_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_MEDIUM_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_ROOT_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_NEGLIGIBLE_RISK_LOC 316
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_NEGLIGIBLE_RISK_PERCENTAGE 45.27
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_NEGLIGIBLE_RISK_COUNT 33
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_LOW_RISK_LOC 345
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_LOW_RISK_PERCENTAGE 49.43
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_LOW_RISK_COUNT 11
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_MEDIUM_RISK_LOC 37
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_MEDIUM_RISK_PERCENTAGE 5.3
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_MEDIUM_RISK_COUNT 1
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_MISC_VERY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_LOC 348
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_PERCENTAGE 44.56
CONDITIONAL_COMPLEXITY_COMPONENT_PY_NEGLIGIBLE_RISK_COUNT 38
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_LOC 372
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_PERCENTAGE 47.63
CONDITIONAL_COMPLEXITY_COMPONENT_PY_LOW_RISK_COUNT 13
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_LOC 61
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_PERCENTAGE 7.81
CONDITIONAL_COMPLEXITY_COMPONENT_PY_MEDIUM_RISK_COUNT 2
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_HIGH_RISK_COUNT 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_LOC 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_PERCENTAGE 0
CONDITIONAL_COMPLEXITY_COMPONENT_PY_VERY_HIGH_RISK_COUNT 0
FILE_CHANGE_HISTORY_TOTAL_AGE_DAYS 208
FILE_CHANGE_HISTORY_ACTIVE_DAYS 9
FILE_CHANGE_HISTORY_WEEKS 29
FILE_CHANGE_HISTORY_ESTIMATED_WORKING_DAYS 145
FILE_AGE_NEGLIGIBLE_RISK_COUNT 5
FILE_AGE_LOW_RISK_COUNT 0
FILE_AGE_MEDIUM_RISK_COUNT 0
FILE_AGE_HIGH_RISK_COUNT 23
FILE_AGE_VERY_HIGH_RISK_COUNT 0
FILE_AGE_NEGLIGIBLE_RISK_LOC 317
FILE_AGE_LOW_RISK_LOC 0
FILE_AGE_MEDIUM_RISK_LOC 0
FILE_AGE_HIGH_RISK_LOC 1,656
FILE_AGE_VERY_HIGH_RISK_LOC 0
NUMBER_OF_CONTRIBUTORS 4
DUPLICATION_NUMBER_OF_DUPLICATES 18
DUPLICATION_NUMBER_OF_FILES_WITH_DUPLICATES 16
DUPLICATION_NUMBER_OF_DUPLICATED_LINES 441
DUPLICATION_NUMBER_OF_CLEANED_LINES 1,803
DUPLICATION_PERCENTAGE 24.46
UNIT_DUPLICATES_COUNT 1
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_MISC 409
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_MISC 409
DUPLICATION_PERCENTAGE_PRIMARY_MISC 100
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_ABLATIONS 32
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_ABLATIONS 32
DUPLICATION_PERCENTAGE_PRIMARY_ABLATIONS 100
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PRIMARY_ROOT 0
DUPLICATION_NUMBER_OF_CLEANED_LINES_PRIMARY_ROOT 0
DUPLICATION_PERCENTAGE_PRIMARY_ROOT -
DUPLICATION_NUMBER_OF_DUPLICATED_LINES_PY 441
DUPLICATION_NUMBER_OF_CLEANED_LINES_PY 1,803
DUPLICATION_PERCENTAGE_PY 24.46
TOTAL_ANALYSIS_TIME_IN_MILLIS 8,418