hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java (7 lines): - line 148: // TODO: is this this called precombine in 1.0. .. - line 179: // TODO: is this necessary? won't we just use a table schema. - line 283: // TODO: this has to be UTC. why is it not the default? - line 333: // TODO: understand why is writing/changing all these. this has to work on both HDFS and Cloud. - line 566: * TODO: this directory creation etc should happen in the HoodieTableMetaClient. - line 624: // TODO: this can be eventually tightened to ensure all table configs are defined. - line 634: // TODO: this can be tightened up, once all configs have a since version. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ExpressionIndexSupport.scala (7 lines): - line 248: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 346: // TODO encoding should be done internally w/in HoodieBackedTableMetadata - line 363: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 498: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 507: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 518: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 564: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java (7 lines): - line 136: // TODO support spark 3.3.x as it supports TimeStampNTZ (SPARK-35662) - line 293: * TODO: support more types - line 327: * TODO: support more types - line 355: * TODO: support more types - line 381: * TODO: support more types - line 407: * TODO: support more types - line 429: * TODO: support more types hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java (6 lines): - line 171: * TODO serialize other type of record. - line 504: * TODO: See if we can always pass GenericRecord instead of SpecificBaseRecord in some cases. - line 880: // TODO java-doc - line 1149: // TODO: support more types - line 1390: // TODO in HoodieFileSliceReader may partitionName=option#empty - line 1544: // TODO add logical type decoding hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala (5 lines): - line 249: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 319: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 346: // TODO encoding should be done internally w/in HoodieBackedTableMetadata - line 363: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ - line 444: @inline private def formatColName(col: String, statName: String) = { // TODO add escaping for hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java (4 lines): - line 45: // TODO : Get file status and create a rollback stat and file - line 46: // TODO : Delete the .aux files along with the instant file, okay for now since the archival process will - line 66: // TODO : Get file status and create a rollback stat and file - line 67: // TODO : Delete the .aux files along with the instant file, okay for now since the archival process will hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java (4 lines): - line 918: // (TODO: make this configurable) - line 1793: // TODO we need to handle unions in general case as well - line 1833: // TODO add support for those types - line 2417: Collections.emptyList(), // TODO: support different merger classes, which is currently only known to write config hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieHadoopIOFactory.java (4 lines): - line 55: //TODO: remove this case [HUDI-7746] - line 64: //TODO: remove this case [HUDI-7746] - line 83: //TODO: remove this case [HUDI-7746] - line 92: //TODO: remove this case [HUDI-7746] hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/HoodieAdbJdbcClient.java (3 lines): - line 314: * TODO migrate to implementation of {@link #getAllPartitions(String)} - line 343: * TODO align with {@link org.apache.hudi.sync.common.HoodieMetaSyncOperations#updateTableSchema} - line 446: * TODO align with {@link HoodieSyncClient#getPartitionEvents} hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala (3 lines): - line 565: // TODO For v2 commands, we will cast the string back to its actual value, - line 856: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3105: // TODO we need proper support for the NULL format. hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java (3 lines): - line 63: * corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO : Change this to a class, something - line 341: // TODO re-use buffer if stream is backed by buffer - line 416: // TODO : fs.open() and return inputstream again, need to pass FS configuration hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java (3 lines): - line 314: //TODO save history schema by metaTable - line 1297: // TODO: this also does MT table management.. - line 1320: // TODO: this method will be removed with restore/rollback changes in MDT hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala (3 lines): - line 466: // TODO move partition columns handling down into the handlers - line 563: // TODO clean up - line 887: // TODO HUDI-6286 should not delete old data if using `Overwrite` mode hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitTool.java (3 lines): - line 45: // TODO: figure out how to integrate this in production - line 50: // TODO: get clusterId as input parameters - line 58: // TODO: add retry attempts hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java (3 lines): - line 100: // TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link BaseHoodieWriteClient}. - line 127: * are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO : - line 150: // TODO : Write baseCommitTime is possible here ? hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java (3 lines): - line 127: // TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows - line 162: // TODO replace w/ hasContentLength - line 345: * hasNext is not idempotent. TODO - Fix this. It is okay for now - PR hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieInternalRowUtils.scala (3 lines): - line 184: // TODO need to canonicalize schemas (casing) - line 336: // TODO this has to be revisited to avoid loss of precision (for fps) - line 395: // TODO revisit this (we need to align permitted casting w/ Spark) hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/update/strategy/FlinkConsistentBucketUpdateStrategy.java (3 lines): - line 55: * TODO: remove this class when RowData mode writing is supported for COW. - line 85: // TODO throw exception if exists bucket merge plan - line 135: // TODO add option to skip dual update, i.e., write updates only to the new file group hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala (3 lines): - line 563: // TODO For v2 commands, we will cast the string back to its actual value, - line 854: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3101: // TODO we need proper support for the NULL format. hudi-aws/src/main/java/org/apache/hudi/aws/metrics/cloudwatch/CloudWatchReporter.java (3 lines): - line 199: // TODO: Publish other Histogram metrics to cloud watch - line 204: // TODO: Publish other Meter metrics to cloud watch - line 209: // TODO: Publish other Timer metrics to cloud watch hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala (3 lines): - line 564: // TODO For v2 commands, we will cast the string back to its actual value, - line 855: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3104: // TODO we need proper support for the NULL format. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java (3 lines): - line 236: // TODO - fix the method naming here - line 343: // TODO: struct field name is only translated to support special char($) - line 499: // TODO - all partition fields should be part of the schema. datestr is treated as special. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java (2 lines): - line 84: // TODO : check if maxMemory is not greater than JVM or executor memory - line 85: // TODO - rollback any compactions in flight hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java (2 lines): - line 133: // TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used - line 138: // TODO AvroSparkReader need hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala (2 lines): - line 122: // TODO: if you move this into the closure it reverts to the default values. - line 166: //TODO: HARDCODED TIMELINE OBJECT hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java (2 lines): - line 201: // TODO : Generate updates for only N partitions. - line 332: // TODO : Ensure that the difference between totalRecords and totalRecordsGenerated is not too big, if yes, hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala (2 lines): - line 53: // TODO replace w/ DateTimeFormatter - line 227: * TODO: standardize the key prefix so that we don't need this helper (HUDI-4935) hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkers.java (2 lines): - line 87: // TODO If current is compact or clustering then create marker directly without early conflict detection. - line 147: // TODO If current is compact or clustering then create marker directly without early conflict detection. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala (2 lines): - line 189: * TODO unify w/ HoodieFileIndex#listFiles - line 437: // TODO support coercible expressions (ie attr-references casted to particular type), similar hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandleWithChangeLog.java (2 lines): - line 72: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 87: // TODO Remove these unnecessary newInstance invocations hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java (2 lines): - line 62: // TODO rebase records/content to leverage Either to warrant - line 174: // TODO need convert record type hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 125: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 349: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 125: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 349: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 125: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 349: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/GenericRecordFullPayloadGenerator.java (2 lines): - line 215: // TODO : pack remaining bytes into a complex field - line 305: // TODO: Need to implement valid data generation for fixed type hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java (2 lines): - line 207: // TODO make sure writing w/ and w/o meta fields is consistent (currently writing w/o - line 284: // TODO extract to utils hudi-common/src/main/java/org/apache/hudi/common/table/view/RocksDbBasedFileSystemView.java (2 lines): - line 64: * TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores. To - line 552: //TODO can we make this more efficient by storing reverse mapping (Instant -> FileGroupId) as well? hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala (2 lines): - line 311: // TODO rebase onto JoinRecord - line 577: * TODO rebase on Spark's SerializerSupport hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/PulsarSource.java (2 lines): - line 108: // TODO validate endpoints provided in the appropriate format - line 158: // TODO support capping the amount of records fetched hudi-common/src/main/java/org/apache/hudi/index/secondary/SecondaryIndexManager.java (2 lines): - line 129: // TODO: build index - line 165: // TODO: drop index data hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java (2 lines): - line 363: // TODO: Check if logcompaction also needs to be included in this API. - line 604: * @return Get the stream of completed instants in reverse order TODO Change code references to getInstants() that hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java (2 lines): - line 144: * TODO: [HUDI-8294] - line 1042: // TODO: fix `isTightBound` flag when stats based on log files are available hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/update/strategy/ConsistentBucketUpdateStrategy.java (2 lines): - line 85: // TODO throw exception if exists bucket merge plan - line 136: // TODO add option to skip dual update, i.e., write updates only to the new file group hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java (2 lines): - line 117: // TODO support bulk insert for consistent bucket index - line 358: // TODO support insert overwrite for consistent bucket index hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala (2 lines): - line 107: * TODO explain workflow for MOR tables - line 274: // TODO move to analysis phase hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java (2 lines): - line 181: // TODO: consider skipping this method for non-partitioned table and simplify the checks - line 224: // TODO: what do we do if both does not exist? should we throw an exception and let caller do the fallback ? hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala (2 lines): - line 131: // TODO: if you move this into the closure it reverts to the default values. - line 177: //TODO: HARDCODED TIMELINE OBJECT hudi-common/src/main/java/org/apache/hudi/avro/ConvertingGenericData.java (2 lines): - line 47: // TODO re-enable upon upgrading to 1.10 - line 62: // TODO re-enable upon upgrading to 1.10 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java (2 lines): - line 146: // TODO: vb - No logical way to represent parallel stream pos in a single long. - line 159: // TODO fix to reflect scanner progress hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java (2 lines): - line 198: // TODO - FIX THIS - line 243: // TODO: we may directly rename original parquet file if there is not evolution/devolution of schema hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java (2 lines): - line 46: * TODO: The reader/writer may try to use relative paths based on the inlinepath and it may not work. Need to handle - line 47: * this gracefully eg. the parquet summary metadata reading. TODO: If this shows promise, also support directly writing hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/CachingPath.java (2 lines): - line 135: // TODO java-doc - line 141: // TODO java-doc hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandleWithChangeLog.java (2 lines): - line 70: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 85: // TODO Remove these unnecessary newInstance invocations hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v2/TimelineArchiverV2.java (2 lines): - line 256: //TODO: HARDCODED TIMELINE OBJECT - line 309: //TODO: HARDCODED TIMELINE OBJECT hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelationV1.scala (2 lines): - line 57: * TODO: rebase w/ HoodieBaseRelation HUDI-5362 - line 70: // TODO : Figure out a valid HoodieWriteConfig hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandleWithChangeLog.java (2 lines): - line 89: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 103: // TODO Remove these unnecessary newInstance invocations hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java (2 lines): - line 151: // TODO better enable urlEncodePartitioningEnabled if hiveStylePartitioningEnabled is enabled? - line 163: // TODO: Get the parallelism from HoodieWriteConfig hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala (2 lines): - line 132: // TODO: if you move this into the closure it reverts to the default values. - line 179: //TODO: HARDCODED TIMELINE OBJECT hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java (2 lines): - line 55: // TODO : Make this bucketed so don't have a large number of files in a single directory - line 64: // TODO : Handle failure case which may leave behind tons of small corrupt files hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java (1 line): - line 284: // TODO (NA) : Clean only the earliest pending clean just like how we do for other table services hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieArrayWritableAvroUtils.java (1 line): - line 55: //TODO: [HUDI-8261] add casting to the projection hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieBaseParquetWriter.java (1 line): - line 85: // TODO - compute this compression ratio dynamically by looking at the bytes written to the hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java (1 line): - line 247: // TODO: How to obtain hive configs to register? hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java (1 line): - line 594: // TODO this should cherry-pick only clustering properties hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala (1 line): - line 85: * TODO rename to HoodieSparkSqlFileIndex hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java (1 line): - line 65: * TODO: [HUDI-8294] hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDDV2.scala (1 line): - line 160: // TODO clean up, this lock is unnecessary hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java (1 line): - line 161: * TODO: This method is not needed, since log compaction plan is not a immutable plan. packaging/bundle-validation/conf/hive-site.xml (1 line): - line 40: hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java (1 line): - line 500: // TODO: Use HoodieMetaTable to extract affected file directly. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java (1 line): - line 68: // TODO Handle Date value hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieAvroHFileWriter.java (1 line): - line 95: // TODO - compute this compression ratio dynamically by looking at the bytes written to the hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala (1 line): - line 153: * TODO: Revisit to return a concrete relation here when we support CREATE TABLE AS for Hudi with DataSource API. hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java (1 line): - line 168: // TODO - Better Exception handling hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java (1 line): - line 252: // TODO: use infer function instead hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java (1 line): - line 155: // TODO add additional types when needed. default to string hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java (1 line): - line 46: * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro. hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java (1 line): - line 103: //TODO Duplicating it here from HMSDLExecutor as HiveQueryQL has no way of doing it on its own currently. Need to refactor it hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/add/AddPrimitiveColumnSchemaPostProcessor.java (1 line): - line 50: * TODO support complex types. hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java (1 line): - line 117: // TODO: functional and secondary index are not supported with Flink yet, but we should fix the partition name when we support them. hudi-platform-service/hudi-metaserver/hudi-metaserver-client/src/main/java/org/apache/hudi/common/table/HoodieTableMetaserverClient.java (1 line): - line 74: // TODO: transfer table parameters to table config hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkBaseIndexSupport.scala (1 line): - line 54: * TODO: The default implementation should be changed to throw hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java (1 line): - line 59: //TODO: [HUDI-6249] change the maps below to implement ConcurrentMap hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java (1 line): - line 124: // only JavaRDD is supported for Spark partitioner, but it is not enforced by BulkInsertPartitioner API. To improve this, TODO HUDI-3463 hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java (1 line): - line 302: // fall back to legacy config for BWC. TODO consolidate in HUDI-6020 hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala (1 line): - line 249: * TODO support casing reconciliation hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark33NestedSchemaPruning.scala (1 line): - line 55: // TODO generalize to any file-based relation hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieSparkTypeUtils.scala (1 line): - line 22: // TODO unify w/ DataTypeUtils hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/vectorized/ColumnarBatchUtils.java (1 line): - line 48: //TODO: [HUDI-8099] replace this with inplace projection by extending columnar batch hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java (1 line): - line 212: // TODO : readerSchema can change across blocks/log files, fix this inside Scanner hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala (1 line): - line 75: //TODO: [HUDI-8303] Explicit conversion might not be required for Scala 2.12+ hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelationV2.scala (1 line): - line 53: * TODO: rebase w/ HoodieBaseRelation HUDI-5362 hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java (1 line): - line 120: // TODO: Add a rollback instant but for compaction hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SchemaChangeUtils.java (1 line): - line 48: * TODO: support more type update. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/MarkerBasedCommitMetadataResolver.java (1 line): - line 82: // TODO: refactor based on HoodieData hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/main/java/org/apache/hudi/metaserver/HoodieMetaserver.java (1 line): - line 96: // TODO: add metaserver factory. hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java (1 line): - line 160: // TODO abstract this w/in HoodieDataBlock hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/strategy/BoundedPartitionAwareCompactionStrategy.java (1 line): - line 45: // TODO replace w/ DateTimeFormatter hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala (1 line): - line 355: // TODO dedup w/ HoodieAnalysis hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/GenericRecordPartialPayloadGenerator.java (1 line): - line 51: if (setNull) { // TODO : DO NOT SET THE RECORD KEY FIELDS TO NULL hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java (1 line): - line 414: // TODO: This case has to be handled. HUDI-6352 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java (1 line): - line 178: false, // TODO: Fix this to support incremental queries hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-common/src/main/java/org/apache/hudi/common/util/HoodieCommonKryoRegistrar.java (1 line): - line 92: // TODO need to relocate to hudi-common hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java (1 line): - line 71: * TODO: [HUDI-8294] hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala (1 line): - line 145: // TODO clean up hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java (1 line): - line 63: * TODO rename to AvroFallbackBaseKeyGenerator hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SecondaryIndexSupport.scala (1 line): - line 97: * TODO: [HUDI-8302] Handle multiple secondary indexes (similar to expression index) hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java (1 line): - line 172: * TODO: (Lin) Delete this function after we remove the assume.date.partitioning config completely. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDDV1.scala (1 line): - line 157: // TODO clean up, this lock is unnecessary hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapMORRDD.scala (1 line): - line 75: // TODO clean up, this lock is unnecessary see HoodieMergeOnReadRDD hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java (1 line): - line 60: // TODO: add upgrade step if required. hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java (1 line): - line 116: // TODO Support different date format hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIdentifier.java (1 line): - line 165: * TODO support different split criteria, e.g., distribute records evenly using statistics hudi-common/src/main/java/org/apache/hudi/common/table/read/PositionBasedFileGroupRecordBuffer.java (1 line): - line 119: // TODO: Return an iterator that can generate sequence number with the record. hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-platform-service/hudi-metaserver/hudi-metaserver-client/src/main/java/org/apache/hudi/common/table/timeline/HoodieMetaserverBasedTimeline.java (1 line): - line 104: * TODO [HUDI-6883] We should change HoodieMetaserverBasedTimeline to store completion time as well. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java (1 line): - line 1812: // TODO: future support async clustering hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java (1 line): - line 125: // TODO we should remove it if we can read InternalRow from source. hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala (1 line): - line 105: * TODO convert directly from GenericRecord into InternalRow instead hudi-io/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java (1 line): - line 96: * TODO: ReflectionUtils should throw a specific exception to indicate Reflection problem. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v1/TimelineArchiverV1.java (1 line): - line 194: // TODO (na) : Add a way to return actions associated with a timeline and then merge/unify hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieMultiWriterTestSuiteJob.java (1 line): - line 55: * TODO: [HUDI-8294] hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala (1 line): - line 56: // TODO generalize to any file-based relation hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala (1 line): - line 85: // TODO need to add meta-fields here hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/TimelineDTO.java (1 line): - line 50: // TODO: For Now, we will assume, only active-timeline will be transferred. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java (1 line): - line 142: // TODO: On by default. Once stable, we will remove the other mode. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala (1 line): - line 154: // TODO support CDC with spark record hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java (1 line): - line 38: // TODO: stop extending HiveSyncConfig and take all the variables needed from config file hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java (1 line): - line 99: // TODO Auto-generated method stub hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala (1 line): - line 184: * TODO move to HoodieCatalystExpressionUtils hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/sort/SpaceCurveSortingHelper.java (1 line): - line 112: // TODO validate if we need Spark to re-partition hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala (1 line): - line 52: // TODO scala-doc hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala (1 line): - line 411: // TODO: make it work for non-hive style partitioning hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java (1 line): - line 348: // TODO: Make this configurable along with strategy specific config - For now, this is a generic enough strategy hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala (1 line): - line 533: * TODO merge w/ ResolveImplementations hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InMemoryFileSystem.java (1 line): - line 42: // TODO: this needs to be per path to support num_cores > 1, and we should release the buffer once done hudi-common/src/main/java/org/apache/hudi/common/table/view/SpillableMapBasedFileSystemView.java (1 line): - line 216: //TODO should we make this more efficient by having reverse mapping of instant to file group id? hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/index/AbstractIndexingCatchupTask.java (1 line): - line 166: * For new indexes added in 1.0.0, these flows are experimental. TODO: HUDI-8607. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetStreamWriter.java (1 line): - line 70: // TODO support populating the metadata hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/transaction/lock/HiveMetastoreBasedLockProvider.java (1 line): - line 196: // TODO : FIX:Using the parameterized constructor throws MethodNotFound hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java (1 line): - line 540: // TODO we can actually deduce file size purely from AppendResult (based on offset and size hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java (1 line): - line 73: // TODO support spark orc reader hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieInlineTestSuiteWriter.java (1 line): - line 190: // TODO: fix clustering to be done async https://issues.apache.org/jira/browse/HUDI-1590 hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecordPayload.java (1 line): - line 152: //TODO: After we have merge mode working for writing, we should have a dummy payload that will throw exception when used hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java (1 line): - line 40: // TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark3ParquetSchemaEvolutionUtils.scala (1 line): - line 61: //TODO: HARDCODED TIMELINE OBJECT hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java (1 line): - line 542: // TODO: skip cascading when new fields in structs are added to the schema in last position hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/common/model/HoodieSparkRecord.java (1 line): - line 304: // TODO HUDI-5282 support metaData hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/BaseConsistentHashingBucketClusteringPlanStrategy.java (1 line): - line 79: * TODO maybe add force config to schedule the clustering. It could allow clustering on partitions that are not doing write operation. hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java (1 line): - line 65: * TODO: [HUDI-8294] hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHiveRecord.java (1 line): - line 195: // TODO HUDI-5282 support metaData hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java (1 line): - line 167: // TODO kafka connect config needs to support setting base file format hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java (1 line): - line 44: // TODO get this from HoodieConfig hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java (1 line): - line 47: * TODO: [HUDI-8294] hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala (1 line): - line 46: * TODO move to HoodieBaseRelation, make private hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkMetadataWriterUtils.java (1 line): - line 193: // TODO: HUDI-8848: Allow configurable storage level while computing expression index update hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetWriter.java (1 line): - line 101: // TODO set partition path in ctor hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RowCustomColumnsSortPartitioner.java (1 line): - line 81: // TODO: Remove the else block as sorting in row-writer mode is always used with populateMetaFields turned on as it allows easy access to hoodie meta fields hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java (1 line): - line 61: * TODO handle sample_writes sub-path clean-up w.r.t. rollback and insert overwrite. (HUDI-6044) hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java (1 line): - line 159: * TODO: unify the incremental view code between hive/spark-sql and spark datasource hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java (1 line): - line 85: // TODO maybe batch the operation to improve performance hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetRowDataWriter.java (1 line): - line 348: * TODO: Leaving this here as there might be a requirement to support TIMESTAMP(9) in the future hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java (1 line): - line 78: // TODO : UUID's can clash even for insert/insert, handle that case. hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieReaderContext.java (1 line): - line 301: //TODO: [HUDI-8261] cover more types hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/BucketizedBloomCheckPartitioner.java (1 line): - line 148: // TODO replace w/ more performant hash hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieFileProbingFunction.java (1 line): - line 127: // TODO add assertion that file is checked only once hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieHBaseAvroHFileReader.java (1 line): - line 213: // TODO eval whether seeking scanner would be faster than pread hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/BucketIndexConcurrentFileWritesConflictResolutionStrategy.java (1 line): - line 39: // TODO : UUID's can clash even for insert/insert, handle that case. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java (1 line): - line 57: // TODO: Should we treat this fatal and throw exception? hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/SparkAvroDeltaInputWriter.java (1 line): - line 35: // TODO : the base path has to be a new path every time for spark avro hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java (1 line): - line 140: // TODO enable automatic predicate pushdown after fixing issues hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java (1 line): - line 53: // TODO: consolidate all checkpointing configs into HoodieCheckpointStrategy (HUDI-5906) hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark34NestedSchemaPruning.scala (1 line): - line 55: // TODO generalize to any file-based relation hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java (1 line): - line 45: // TODO : Sort list by file size and take the median file status to ensure fair calculation and change to remote hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java (1 line): - line 30: * TODO: enable docs gen by adding {@link ConfigClassProperty} after TSM is landed (HUDI-3475) hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java (1 line): - line 478: // TODO cleanup after Parquet upgrade to 1.12 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java (1 line): - line 279: // TODO: Implement automatic schema evolution when you add a new column. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDReadClient.java (1 line): - line 66: * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/strategy/DayBasedCompactionStrategy.java (1 line): - line 45: // TODO replace w/ DateTimeFormatter hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java (1 line): - line 174: // TODO support bootstrap hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java (1 line): - line 498: // TODO : Where is shouldComplete used ? hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/v1/ArchivedTimelineLoaderV1.java (1 line): - line 106: // TODO If we can store additional metadata in datablock, we can skip parsing records hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieContinuousTestSuiteWriter.java (1 line): - line 43: * TODO: [HUDI-8294] hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieIndexer.java (1 line): - line 66: * TODO: [HUDI-8294] hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java (1 line): - line 107: // TODO Replace Java serialization with Avro approach to keep state compatibility. hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java (1 line): - line 47: // TODO more indexes to be added hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/v2/ActiveTimelineV2.java (1 line): - line 252: *
TODO: [HUDI-6885] Depreciate HoodieActiveTimeline#getInstantFileName and fix related tests. hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java (1 line): - line 167: // TODO : read record count from metadata hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java (1 line): - line 198: // TODO partition columns have to be appended in all read-paths hudi-hadoop-common/src/main/java/org/apache/hudi/io/hadoop/HoodieParquetStreamWriter.java (1 line): - line 77: // TODO support populating the metadata hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieCDCLogRecordIterator.java (1 line): - line 97: // TODO support cdc with spark record. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCFileGroupIterator.scala (1 line): - line 119: // TODO support CDC with spark record hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/FileStatsIndex.java (1 line): - line 382: // TODO encoding should be done internally w/in HoodieBackedTableMetadata hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala (1 line): - line 117: // TODO use mutable row, avoid re-allocating hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java (1 line): - line 336: * TODO: Revisit this logic and validate that filtering for all hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/PathUtils.scala (1 line): - line 26: * TODO convert to Java, move to hudi-common hudi-common/src/main/java/org/apache/hudi/common/table/timeline/BaseHoodieTimeline.java (1 line): - line 348: //TODO: Make sure this change does not break existing functionality. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala (1 line): - line 115: // TODO fix, currently assuming parquet as underlying format