parquet-column/src/main/java/org/apache/parquet/filter2/predicate/FilterApi.java (5 lines): - line 61: // TODO: Support repeated columns (https://issues.apache.org/jira/browse/PARQUET-34) - line 63: // TODO: Support filtering on groups (eg, filter where this group is / isn't null) - line 64: // TODO: (https://issues.apache.org/jira/browse/PARQUET-43) - line 66: // TODO: Consider adding support for more column types that aren't coupled with parquet types, eg Column - line 67: // TODO: (https://issues.apache.org/jira/browse/PARQUET-35) parquet-column/src/main/java/org/apache/parquet/filter2/recordlevel/FilteringPrimitiveConverter.java (4 lines): - line 43: // TODO: this works, but - line 44: // TODO: essentially turns off the benefits of dictionary support - line 45: // TODO: even if the underlying delegate supports it. - line 46: // TODO: we should support it here. (https://issues.apache.org/jira/browse/PARQUET-36) parquet-common/src/main/java/org/apache/parquet/bytes/BytesUtils.java (3 lines): - line 82: // TODO: this is duplicated code in LittleEndianDataInputStream - line 157: // TODO: this is duplicated code in LittleEndianDataOutputStream - line 244: * TODO: the implementation is compatible with readZigZagVarInt. Is there a need for different functions? parquet-column/src/main/java/org/apache/parquet/io/RecordReaderImplementation.java (3 lines): - line 251: this.recordRootConverter = recordMaterializer.getRootConverter(); // TODO: validator(wrap(recordMaterializer), validating, root.getType()); - line 282: //TODO: when we use nextColumnIdxForRepLevel, should we provide current rep level or the rep level for next item - line 374: //TODO: have those wrappers for a converter parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java (3 lines): - line 55: * TODO: detect if a column is optional or required and validate that eq(null) - line 56: * TODO: is not called on required fields (is that too strict?) - line 57: * TODO: (https://issues.apache.org/jira/browse/PARQUET-44) parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java (3 lines): - line 143: // TODO: This file has become too long! - line 144: // TODO: Lets split it up: https://issues.apache.org/jira/browse/PARQUET-310 - line 984: case BYTE_ARRAY: // TODO: rename BINARY and remove this switch parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java (3 lines): - line 443: // out.write(MAGIC); // TODO: add a magic delimiter - line 498: int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts - line 1090: // TODO: column/offset indexes are not copied parquet-column/src/main/java/org/apache/parquet/filter2/recordlevel/IncrementallyUpdatedFilterPredicateEvaluator.java (2 lines): - line 33: * TODO: We could also build an evaluator that detects if enough values are known to determine the outcome - line 34: * TODO: of the predicate and quit the record assembly early. (https://issues.apache.org/jira/browse/PARQUET-37) parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java (2 lines): - line 1248: // TODO: this should use getDictionaryPageOffset() but it isn't reliable. - line 1277: return null; // TODO: should this complain? parquet-hadoop/src/main/java/org/apache/parquet/filter2/statisticslevel/StatisticsFilter.java (2 lines): - line 65: // TODO: this belongs in the parquet-column project, but some of the classes here need to be moved too - line 66: // TODO: (https://issues.apache.org/jira/browse/PARQUET-38) parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java (2 lines): - line 37: * TODO: this has some overlap with {@link PrimitiveTypeName#javaType} - line 38: * TODO: (https://issues.apache.org/jira/browse/PARQUET-30) parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputCommitter.java (2 lines): - line 53: // TODO: This method should propagate errors, and we should clean up - line 54: // TODO: all the catching of Exceptions below -- see PARQUET-383 parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftRecordConverter.java (2 lines): - line 338: // TODO: check thrift has no float - line 349: // TODO: make subclass per type parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java (2 lines): - line 135: // TODO: This is a bug! this should be REQUIRED but changing this will - line 264: // TODO: in the future, we should just filter these records out instead parquet-column/src/main/java/org/apache/parquet/filter2/predicate/UserDefinedPredicate.java (2 lines): - line 29: // TODO: consider avoiding autoboxing and adding the specialized methods for each type - line 30: // TODO: downside is that's fairly unwieldy for users parquet-common/src/main/java/org/apache/parquet/bytes/LittleEndianDataInputStream.java (2 lines): - line 342: // TODO: has this been benchmarked against two alternate implementations? - line 373: // TODO: see perf question above in readInt parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java (2 lines): - line 103: // TODO: probably implement it separately - line 156: // TODO: update the packer to consume from an InputStream parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java (2 lines): - line 137: // TODO: Show total column size and overall size per value in the column summary line - line 189: // TODO: the compressed size of a dictionary page is lost in Parquet parquet-column/src/main/java/org/apache/parquet/column/page/PageReadStore.java (1 line): - line 28: * TODO: rename to RowGroup? parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java (1 line): - line 162: // TODO: rework that parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java (1 line): - line 94: // TODO: make this work for non-tbase types parquet-column/src/main/java/org/apache/parquet/column/values/ValuesWriter.java (1 line): - line 38: // TODO: maybe consolidate into a getPage parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java (1 line): - line 168: // TODO: use PrimitiveTuple accessors parquet-common/src/main/java/org/apache/parquet/bytes/MultiBufferInputStream.java (1 line): - line 146: // TODO: use an allocator parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLong.java (1 line): - line 122: // TODO: should this cache the packer? parquet-column/src/main/java/org/apache/parquet/column/page/DictionaryPage.java (1 line): - line 43: this(bytes, (int)bytes.size(), dictionarySize, encoding); // TODO: fix sizes long or int parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriter.java (1 line): - line 80: // TODO: remove this. parquet-cli/src/main/java/org/apache/parquet/cli/csv/RecordBuilder.java (1 line): - line 165: // TODO: translate to enum class parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java (1 line): - line 261: // TODO: what if the value is null? parquet-hadoop/src/main/java/org/apache/parquet/hadoop/rewrite/RewriteOptions.java (1 line): - line 128: * TODO: support rewrite by record to break the original row groups into reasonable ones. parquet-column/src/main/java/org/apache/parquet/io/api/RecordConsumer.java (1 line): - line 130: //TODO: make this abstract in 2.0 parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java (1 line): - line 619: // TODO: should we print decimal metadata too? parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java (1 line): - line 58: * Only supports positive values (including 0) // TODO: is that ok? Should we make a signed version? parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java (1 line): - line 52: // TODO: for 2.0.0, make this final (breaking change) parquet-common/src/main/java/org/apache/parquet/bytes/BytesInput.java (1 line): - line 279: // TODO: more efficient parquet-common/src/main/java/org/apache/parquet/bytes/LittleEndianDataOutputStream.java (1 line): - line 144: // TODO: see note in LittleEndianDataInputStream: maybe faster parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/BitPacking.java (1 line): - line 29: // TODO: rework the whole thing. It does not need to use streams at all parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java (1 line): - line 81: // TODO: rework this API. The bytes shall be retrieved before the encoding (encoding might be different otherwise) parquet-common/src/main/java/org/apache/parquet/glob/GlobParser.java (1 line): - line 78: // TODO: maybe turn this check off? parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/ReadBenchmarks.java (1 line): - line 108: //TODO how to handle lzo jar? parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java (1 line): - line 108: // TODO: optimize this parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java (1 line): - line 40: // TODO: show dictionary size in values and in bytes parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java (1 line): - line 245: // TODO: decide if we compress parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java (1 line): - line 74: @SuppressWarnings("rawtypes") // TODO: fix type parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java (1 line): - line 93: currentBuffer = new int[currentCount]; // TODO: reuse a buffer parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java (1 line): - line 280: // TODO - this outgoing buffer might be better off not being shared, this seems to parquet-column/src/main/java/org/apache/parquet/column/Encoding.java (1 line): - line 112: * TODO: Should we rename this to be more clear? parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java (1 line): - line 570: // TODO: figure out a way to use MessageOrBuilder parquet-column/src/main/java/org/apache/parquet/io/EmptyRecordReader.java (1 line): - line 36: this.recordConsumer = recordMaterializer.getRootConverter(); // TODO: validator(wrap(recordMaterializer), validating, root.getType()); parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java (1 line): - line 124: //TODO how to handle lzo jar? parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java (1 line): - line 56: // TODO: use proto custom options to override per field. parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java (1 line): - line 338: // TODO: add these to the reader builder parquet-column/src/main/java/org/apache/parquet/filter2/recordlevel/IncrementallyUpdatedFilterPredicateBuilderBase.java (1 line): - line 56: * TODO: UserDefinedPredicates still autobox however parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java (1 line): - line 51: * TODO: too many constructors (https://issues.apache.org/jira/browse/PARQUET-39) parquet-column/src/main/java/org/apache/parquet/filter2/recordlevel/FilteringGroupConverter.java (1 line): - line 67: // TODO: making the assumption that getConverter(i) is only called once, is that valid? parquet-pig/src/main/java/org/apache/parquet/pig/ParquetLoader.java (1 line): - line 339: // TODO use pig-0.12 setBytes api when its available parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/SerializationUtil.java (1 line): - line 37: * TODO: Refactor elephant-bird so that we can depend on utils like this without extra baggage. parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java (1 line): - line 552: // TODO: should not have to materialize those bytes parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java (1 line): - line 300: // TODO: this should only return something if the type can match rather than explicitly parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java (1 line): - line 52: * TODO: should this actually be called RowGroupImpl or something?