Apache Pig
File Size

Intro
  • File size measurements show the distribution of size of files.
  • Files are classified in four categories based on their size (lines of code): 1-100 (very small files), 100-200 (small files), 200-500 (medium size files), 501-1000 (long files), 1001+ (very long files).
  • It is a good practice to keep files small. Long files may become "bloaters", code that have increased to such gargantuan proportions that they are hard to work with.
Learn more...
File Size Overall
  • There are 1,278 files with 156,535 lines of code.
    • 17 very long files (22,829 lines of code)
    • 32 long files (20,686 lines of code)
    • 155 medium size files (45,630 lines of code)
    • 233 small files (33,474 lines of code)
    • 841 very small files (33,916 lines of code)
14% | 13% | 29% | 21% | 21%
Legend:
1001+
501-1000
201-500
101-200
1-100
File Size per Extension
java14% | 13% | 29% | 21% | 21%
pig0% | 0% | 0% | 0% | 100%
Legend:
1001+
501-1000
201-500
101-200
1-100
File Size per Logical Decomposition
primary
src/org/apache/pig/backend16% | 16% | 24% | 22% | 19%
src/org/apache/pig/data44% | 0% | 28% | 15% | 10%
src/org/apache/pig/parser42% | 0% | 33% | 9% | 13%
src/org/apache/pig/newplan7% | 16% | 31% | 23% | 21%
lib-src/bzip2/org/apache/tools58% | 33% | 0% | 0% | 7%
src/org/apache/pig/tools15% | 7% | 36% | 26% | 13%
src/org/apache/pig/impl7% | 19% | 22% | 24% | 25%
src/org/apache/pig23% | 17% | 5% | 26% | 27%
src/org/apache/pig/pen32% | 0% | 48% | 3% | 14%
contrib/piggybank/java/src/main0% | 13% | 41% | 13% | 31%
src/org/apache/pig/builtin0% | 7% | 38% | 18% | 34%
src/org/apache/pig/scripting0% | 0% | 51% | 39% | 9%
lib-src/bzip2/org/apache/pig0% | 0% | 0% | 100% | 0%
src/org/apache/pig/validator0% | 0% | 0% | 69% | 30%
shims/src/hive3/org/apache0% | 0% | 0% | 100% | 0%
tutorial/src/org/apache/pig0% | 0% | 0% | 32% | 67%
shims/src/hive1/org/apache0% | 0% | 0% | 100% | 0%
tutorial/scripts0% | 0% | 0% | 0% | 100%
shims/src/hadoop2/org/apache0% | 0% | 0% | 0% | 100%
src/org/apache/pig/classification0% | 0% | 0% | 0% | 100%
Legend:
1001+
501-1000
201-500
101-200
1-100
backend
src/org/apache/pig/backend/hadoop16% | 16% | 24% | 22% | 18%
shims/src/hadoop2/org/apache/pig0% | 0% | 0% | 0% | 100%
src/org/apache/pig/backend/executionengine0% | 0% | 0% | 0% | 100%
src/org/apache/pig/backend0% | 0% | 0% | 0% | 100%
src/org/apache/pig/backend/datastorage0% | 0% | 0% | 0% | 100%
Legend:
1001+
501-1000
201-500
101-200
1-100
Alternative Visuals
Longest Files (Top 50)
File# lines# units
MRCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
2045 55
TezCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/tez/plan
2017 45
POCast.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators
1810 28
JobControlCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
1462 66
LogicalPlanBuilder.java
in src/org/apache/pig/parser
1451 102
LogToPhyTranslationVisitor.java
in src/org/apache/pig/newplan/logical/relational
1383 30
CBZip2OutputStream.java
in lib-src/bzip2/org/apache/tools/bzip2r
1264 33
SparkCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/spark/plan
1205 44
GruntParser.java
in src/org/apache/pig/tools/grunt
1199 64
Schema.java
in src/org/apache/pig/impl/logicalLayer/schema
1193 59
DataType.java
in src/org/apache/pig/data
1187 46
BinInterSedes.java
in src/org/apache/pig/data
1173 42
PigServer.java
in src/org/apache/pig
1172 103
TezDagBuilder.java
in src/org/apache/pig/backend/hadoop/executionengine/tez
1097 21
SchemaTuple.java
in src/org/apache/pig/data
1064 193
SchemaTupleClassGenerator.java
in src/org/apache/pig/data
1060 123
AugmentBaseDataVisitor.java
in src/org/apache/pig/pen
1047 33
TypeCheckingExpVisitor.java
in src/org/apache/pig/newplan/logical/visitor
983 47
OperatorPlan.java
in src/org/apache/pig/impl/plan
942 49
HBaseStorage.java
in src/org/apache/pig/backend/hadoop/hbase
931 44
Main.java
in src/org/apache/pig
881 25
MultiQueryOptimizer.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
850 34
TypeCheckingRelVisitor.java
in src/org/apache/pig/newplan/logical/visitor
742 27
CBZip2InputStream.java
in lib-src/bzip2/org/apache/tools/bzip2r
731 35
HiveUtils.java
in src/org/apache/pig/impl/util/hive
729 39
LogicalSchema.java
in src/org/apache/pig/newplan/logical/relational
726 43
POForEach.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
666 42
Over.java
in contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation
636 45
MapReduceLauncher.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
631 13
OrcStorage.java
in src/org/apache/pig/builtin
625 39
FileLocalizer.java
in src/org/apache/pig/impl/io
623 47
SparkLauncher.java
in src/org/apache/pig/backend/hadoop/executionengine/spark
623 25
PigContext.java
in src/org/apache/pig/impl
610 68
ScriptState.java
in src/org/apache/pig/tools/pigstats
595 78
LineageFindRelVisitor.java
in src/org/apache/pig/newplan/logical/visitor
595 37
TezOperator.java
in src/org/apache/pig/backend/hadoop/executionengine/tez/plan
571 126
IndexedStorage.java
in contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage
570 52
POPartialAgg.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
562 38
Utf8StorageConverter.java
in src/org/apache/pig/builtin
561 21
POMergeJoin.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
559 35
POLocalRearrange.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
549 40
AvroStorage.java
in contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro
543 32
SecondaryKeyOptimizerUtil.java
in src/org/apache/pig/backend/hadoop/executionengine/util
542 23
CombinerOptimizerUtil.java
in src/org/apache/pig/backend/hadoop/executionengine/util
539 30
AbstractAccumuloStorage.java
in src/org/apache/pig/backend/hadoop/accumulo
526 38
UnionOptimizer.java
in src/org/apache/pig/backend/hadoop/executionengine/tez/plan/optimizer
525 13
MapRedUtil.java
in src/org/apache/pig/backend/hadoop/executionengine/util
513 26
POUserFunc.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators
504 59
SecondaryKeyOptimizer.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
503 16
Utils.java
in src/org/apache/pig/impl/util
493 40
Files With Most Units (Top 50)
File# lines# units
SchemaTuple.java
in src/org/apache/pig/data
1064 193
TezOperator.java
in src/org/apache/pig/backend/hadoop/executionengine/tez/plan
571 126
SchemaTupleClassGenerator.java
in src/org/apache/pig/data
1060 123
PigServer.java
in src/org/apache/pig
1172 103
LogicalPlanBuilder.java
in src/org/apache/pig/parser
1451 102
PigWritableComparators.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
407 89
ScriptState.java
in src/org/apache/pig/tools/pigstats
595 78
Storage.java
in src/org/apache/pig/builtin/mock
421 69
PigContext.java
in src/org/apache/pig/impl
610 68
JobControlCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
1462 66
GruntParser.java
in src/org/apache/pig/tools/grunt
1199 64
PhysicalOperator.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer
355 62
MapReduceOper.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
351 62
PlanHelper.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util
390 60
Schema.java
in src/org/apache/pig/impl/logicalLayer/schema
1193 59
POUserFunc.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators
504 59
PhyPlanVisitor.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans
233 56
MRCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
2045 55
IndexedStorage.java
in contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage
570 52
GroovyAlgebraicEvalFunc.java
in src/org/apache/pig/scripting/groovy
269 52
PhyPlanSetter.java
in src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
288 52
OperatorPlan.java
in src/org/apache/pig/impl/plan
942 49
FileLocalizer.java
in src/org/apache/pig/impl/io
623 47
PigStats.java
in src/org/apache/pig/tools/pigstats
381 47
TypeCheckingExpVisitor.java
in src/org/apache/pig/newplan/logical/visitor
983 47
DataType.java
in src/org/apache/pig/data
1187 46
Over.java
in contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation
636 45
TezCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/tez/plan
2017 45
RubySchema.java
in src/org/apache/pig/scripting/jruby
466 44
SparkOperator.java
in src/org/apache/pig/backend/hadoop/executionengine/spark/plan
202 44
SparkCompiler.java
in src/org/apache/pig/backend/hadoop/executionengine/spark/plan
1205 44
HBaseStorage.java
in src/org/apache/pig/backend/hadoop/hbase
931 44
POProject.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators
442 43
LogicalSchema.java
in src/org/apache/pig/newplan/logical/relational
726 43
POForEach.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
666 42
BinInterSedes.java
in src/org/apache/pig/data
1173 42
IllustratorAttacher.java
in src/org/apache/pig/pen
321 41
Utils.java
in src/org/apache/pig/impl/util
493 40
POLocalRearrange.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
549 40
POStore.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
236 40
SparkPigSplit.java
in src/org/apache/pig/backend/hadoop/executionengine/spark
234 40
HiveUtils.java
in src/org/apache/pig/impl/util/hive
729 39
OrcStorage.java
in src/org/apache/pig/builtin
625 39
AbstractAccumuloStorage.java
in src/org/apache/pig/backend/hadoop/accumulo
526 38
POPartialAgg.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
562 38
PigException.java
in src/org/apache/pig
162 37
LineageFindRelVisitor.java
in src/org/apache/pig/newplan/logical/visitor
595 37
CBZip2InputStream.java
in lib-src/bzip2/org/apache/tools/bzip2r
731 35
StreamingCommand.java
in src/org/apache/pig/impl/streaming
240 35
POMergeJoin.java
in src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators
559 35