hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java (7 lines): - line 164: * TODO serialize other type of record. - line 498: * TODO: See if we can always pass GenericRecord instead of SpecificBaseRecord in some cases. - line 521: // TODO Unify the logical of rewriteRecordWithMetadata and rewriteEvolutionRecordWithMetadata, and delete this function. - line 889: // TODO java-doc - line 1139: // TODO: support more types - line 1286: // TODO in HoodieFileSliceReader may partitionName=option#empty - line 1425: // TODO add logical type decoding hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java (7 lines): - line 136: // TODO support spark 3.3.x as it supports TimeStampNTZ (SPARK-35662) - line 293: * TODO: support more types - line 327: * TODO: support more types - line 355: * TODO: support more types - line 381: * TODO: support more types - line 407: * TODO: support more types - line 429: * TODO: support more types hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/MergeOnReadRestoreActionExecutor.java (4 lines): - line 44: // TODO : Get file status and create a rollback stat and file - line 45: // TODO : Delete the .aux files along with the instant file, okay for now since the archival process will - line 65: // TODO : Get file status and create a rollback stat and file - line 66: // TODO : Delete the .aux files along with the instant file, okay for now since the archival process will hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala (4 lines): - line 60: // TODO elaborate on the ordering - line 67: // TODO elaborate on the ordering - line 73: // TODO elaborate on the ordering - line 536: * TODO merge w/ ResolveImplementations hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java (4 lines): - line 1174: // TODO we should delete records instead of stubbing them - line 1271: // TODO we need to handle unions in general case as well - line 1310: // TODO add support for those types - line 1863: Collections.emptyList(), // TODO: support different merger classes, which is currently only known to write config hudi-sync/hudi-adb-sync/src/main/java/org/apache/hudi/sync/adb/HoodieAdbJdbcClient.java (3 lines): - line 313: * TODO migrate to implementation of {@link #getAllPartitions(String)} - line 340: * TODO align with {@link org.apache.hudi.sync.common.HoodieMetaSyncOperations#updateTableSchema} - line 443: * TODO align with {@link HoodieSyncClient#getPartitionEvents} hudi-aws/src/main/java/org/apache/hudi/aws/cloudwatch/CloudWatchReporter.java (3 lines): - line 198: //TODO: Publish other Histogram metrics to cloud watch - line 203: //TODO: Publish other Meter metrics to cloud watch - line 208: //TODO: Publish other Timer metrics to cloud watch hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala (3 lines): - line 562: // TODO For v2 commands, we will cast the string back to its actual value, - line 853: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3102: // TODO we need proper support for the NULL format. hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java (3 lines): - line 57: * corresponding changes need to be made to {@link HoodieLogBlockVersion} TODO : Change this to a class, something - line 296: // TODO re-use buffer if stream is backed by buffer - line 316: // TODO : fs.open() and return inputstream again, need to pass FS configuration hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java (3 lines): - line 309: //TODO save history schema by metaTable - line 362: // TODO : MULTIWRITER -> check if failed bootstrap files can be cleaned later - line 1248: // TODO: this method will be removed with restore/rollback changes in MDT hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala (3 lines): - line 435: // TODO move partition columns handling down into the handlers - line 536: // TODO clean up - line 847: // TODO HUDI-6286 should not delete old data if using `Overwrite` mode hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitTool.java (3 lines): - line 45: // TODO: figure out how to integrate this in production - line 50: // TODO: get clusterId as input parameters - line 58: // TODO: add retry attempts hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java (3 lines): - line 95: // TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link BaseHoodieWriteClient}. - line 117: * are unknown across batches Inserts (which are new parquet files) are rolled back based on commit time. // TODO : - line 140: // TODO : Write baseCommitTime is possible here ? hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java (3 lines): - line 142: // TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows - line 177: // TODO replace w/ hasContentLength - line 360: * hasNext is not idempotent. TODO - Fix this. It is okay for now - PR hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieInternalRowUtils.scala (3 lines): - line 153: // TODO need to canonicalize schemas (casing) - line 298: // TODO this has to be revisited to avoid loss of precision (for fps) - line 357: // TODO revisit this (we need to align permitted casting w/ Spark) hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala (3 lines): - line 560: // TODO For v2 commands, we will cast the string back to its actual value, - line 851: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3098: // TODO we need proper support for the NULL format. hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala (3 lines): - line 561: // TODO For v2 commands, we will cast the string back to its actual value, - line 852: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3101: // TODO we need proper support for the NULL format. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/HiveSchemaUtil.java (3 lines): - line 246: // TODO - fix the method naming here - line 353: // TODO: struct field name is only translated to support special char($) - line 526: // TODO - all partition fields should be part of the schema. datestr is treated as special. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala (3 lines): - line 334: // TODO encoding should be done internally w/in HoodieBackedTableMetadata - line 421: @inline private def formatColName(col: String, statName: String) = { // TODO add escaping for - line 470: // TODO fix hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_2ExtendedSqlAstBuilder.scala (3 lines): - line 558: // TODO For v2 commands, we will cast the string back to its actual value, - line 849: // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema - line 3096: // TODO we need proper support for the NULL format. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/BaseHoodieCompactionPlanGenerator.java (2 lines): - line 81: // TODO : check if maxMemory is not greater than JVM or executor memory - line 82: // TODO - rollback any compactions in flight hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java (2 lines): - line 142: // TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used - line 147: // TODO AvroSparkReader need hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java (2 lines): - line 339: *

TODO: [HUDI-6885] Depreciate HoodieActiveTimeline#getInstantFileName and fix related tests. - line 448: * TODO: This method is not needed, since log compaction plan is not a immutable plan. hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java (2 lines): - line 201: // TODO : Generate updates for only N partitions. - line 332: // TODO : Ensure that the difference between totalRecords and totalRecordsGenerated is not too big, if yes, hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala (2 lines): - line 49: // TODO replace w/ DateTimeFormatter - line 221: * TODO: standardize the key prefix so that we don't need this helper (HUDI-4935) hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkers.java (2 lines): - line 87: // TODO If current is compact or clustering then create marker directly without early conflict detection. - line 127: // TODO If current is compact or clustering then create marker directly without early conflict detection. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala (2 lines): - line 178: * TODO unify w/ HoodieFileIndex#listFiles - line 415: // TODO support coercible expressions (ie attr-references casted to particular type), similar hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieConsistentBucketIndex.java (2 lines): - line 92: // TODO maybe batch the operation to improve performance - line 108: // TODO maybe parallel hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java (2 lines): - line 128: //TODO: Use a better naming convention for this. - line 334: //TODO: Make sure this change does not break existing functionality. hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeAndReplaceHandleWithChangeLog.java (2 lines): - line 73: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 87: // TODO Remove these unnecessary newInstance invocations hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java (2 lines): - line 60: // TODO rebase records/content to leverage Either to warrant - line 168: // TODO need convert record type hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 108: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 316: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 125: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 330: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 124: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 348: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 124: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 348: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 124: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 348: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 112: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 320: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/GenericRecordFullPayloadGenerator.java (2 lines): - line 215: // TODO : pack remaining bytes into a complex field - line 305: // TODO: Need to implement valid data generation for fixed type hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java (2 lines): - line 207: // TODO make sure writing w/ and w/o meta fields is consistent (currently writing w/o - line 284: // TODO extract to utils hudi-common/src/main/java/org/apache/hudi/common/table/view/RocksDbBasedFileSystemView.java (2 lines): - line 58: * TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores. To - line 546: //TODO can we make this more efficient by storing reverse mapping (Instant -> FileGroupId) as well? hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala (2 lines): - line 278: // TODO rebase onto JoinRecord - line 539: * TODO rebase on Spark's SerializerSupport hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/PulsarSource.java (2 lines): - line 106: // TODO validate endpoints provided in the appropriate format - line 156: // TODO support capping the amount of records fetched hudi-common/src/main/java/org/apache/hudi/index/secondary/SecondaryIndexManager.java (2 lines): - line 130: // TODO: build index - line 166: // TODO: drop index data hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java (2 lines): - line 155: // TODO: Check if logcompaction also needs to be included in this API. - line 374: * @return Get the stream of completed instants in reverse order TODO Change code references to getInstants() that hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java (2 lines): - line 115: // TODO support bulk insert for consistent bucket index - line 353: // TODO support insert overwrite for consistent bucket index hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala (2 lines): - line 86: * TODO explain workflow for MOR tables - line 271: // TODO move to analysis phase hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java (2 lines): - line 176: // TODO: consider skipping this method for non-partitioned table and simplify the checks - line 219: // TODO: what do we do if both does not exist? should we throw an exception and let caller do the fallback ? hudi-common/src/main/java/org/apache/hudi/avro/ConvertingGenericData.java (2 lines): - line 46: // TODO re-enable upon upgrading to 1.10 - line 61: // TODO re-enable upon upgrading to 1.10 hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala (2 lines): - line 176: // TODO: Use FileGroupReader here: HUDI-6942. - line 185: // TODO: Use FileGroupReader here: HUDI-6942. hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java (2 lines): - line 149: // TODO: vb - No logical way to represent parallel stream pos in a single long. - line 162: // TODO fix to reflect scanner progress hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java (2 lines): - line 178: // TODO - FIX THIS - line 224: // TODO: we may directly rename original parquet file if there is not evolution/devolution of schema hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InLineFileSystem.java (2 lines): - line 41: * TODO: The reader/writer may try to use relative paths based on the inlinepath and it may not work. Need to handle - line 42: * this gracefully eg. the parquet summary metadata reading. TODO: If this shows promise, also support directly writing hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/update/strategy/FlinkConsistentBucketUpdateStrategy.java (2 lines): - line 83: // TODO throw exception if exists bucket merge plan - line 129: // TODO add option to skip dual update, i.e., write updates only to the new file group hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/CachingPath.java (2 lines): - line 135: // TODO java-doc - line 141: // TODO java-doc hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandleWithChangeLog.java (2 lines): - line 71: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 85: // TODO Remove these unnecessary newInstance invocations hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala (2 lines): - line 55: * TODO: rebase w/ HoodieBaseRelation HUDI-5362 - line 67: // TODO : Figure out a valid HoodieWriteConfig hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandleWithChangeLog.java (2 lines): - line 89: // TODO [HUDI-5019] Remove these unnecessary newInstance invocations - line 103: // TODO Remove these unnecessary newInstance invocations hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java (2 lines): - line 146: // TODO better enable urlEncodePartitioningEnabled if hiveStylePartitioningEnabled is enabled? - line 158: // TODO: Get the parallelism from HoodieWriteConfig hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala (2 lines): - line 91: // TODO: we can avoid boxing if future version of avro provide primitive accessors. - line 294: // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java (2 lines): - line 54: // TODO : Make this bucketed so don't have a large number of files in a single directory - line 63: // TODO : Handle failure case which may leave behind tons of small corrupt files hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java (2 lines): - line 270: // TODO: fix the filter to check for exact partition name, e.g. completedPartitions could have func_index_datestr, - line 342: * TODO: Revisit this logic and validate that filtering for all hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java (1 line): - line 280: // TODO (NA) : Clean only the earliest pending clean just like how we do for other table services hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala (1 line): - line 98: // TODO use HoodieSparkValidateDuplicateKeyRecordMerger when SparkRecordMerger is default hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java (1 line): - line 229: // TODO: How to obtain hive configs to register? hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java (1 line): - line 592: // TODO this should cherry-pick only clustering properties hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala (1 line): - line 71: * TODO rename to HoodieSparkSqlFileIndex hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/DeltaWriteProfile.java (1 line): - line 108: // TODO (NA) : Make this static part of utility hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodiePositionBasedFileGroupRecordBuffer.java (1 line): - line 99: // TODO: return an iterator that can generate sequence number with the record. hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala (1 line): - line 119: // TODO: if you move this into the closure it reverts to the default values. packaging/bundle-validation/conf/hive-site.xml (1 line): - line 40: hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java (1 line): - line 88: // TODO - compute this compression ratio dynamically by looking at the bytes written to the hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java (1 line): - line 508: // TODO: Use HoodieMetaTable to extract affected file directly. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java (1 line): - line 68: // TODO Handle Date value hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32LegacyHoodieParquetFileFormat.scala (1 line): - line 115: // TODO: if you move this into the closure it reverts to the default values. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala (1 line): - line 139: * TODO: Revisit to return a concrete relation here when we support CREATE TABLE AS for Hudi with DataSource API. hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java (1 line): - line 166: // TODO - Better Exception handling hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieBootstrapConfig.java (1 line): - line 252: // TODO: use infer function instead hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java (1 line): - line 155: // TODO add additional types when needed. default to string hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java (1 line): - line 46: * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro. hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java (1 line): - line 103: //TODO Duplicating it here from HMSDLExecutor as HiveQueryQL has no way of doing it on its own currently. Need to refactor it hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/postprocessor/add/AddPrimitiveColumnSchemaPostProcessor.java (1 line): - line 50: * TODO support complex types. hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java (1 line): - line 147: // TODO: vb - Preserving for avoiding cascading changes. This constructor will be updated in subsequent PR hudi-platform-service/hudi-metaserver/hudi-metaserver-client/src/main/java/org/apache/hudi/common/table/HoodieTableMetaserverClient.java (1 line): - line 73: // TODO: transfer table parameters to table config hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/stats/ColumnStatsIndices.java (1 line): - line 291: // TODO encoding should be done internally w/in HoodieBackedTableMetadata hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTableFileSystemView.java (1 line): - line 57: //TODO: [HUDI-6249] change the maps below to implement ConcurrentMap hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark31SqlAstBuilder.scala (1 line): - line 26: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java (1 line): - line 124: // only JavaRDD is supported for Spark partitioner, but it is not enforced by BulkInsertPartitioner API. To improve this, TODO HUDI-3463 hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java (1 line): - line 227: // fall back to legacy config for BWC. TODO consolidate in HUDI-6020 hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSchemaUtils.scala (1 line): - line 212: * TODO support casing reconciliation hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark33NestedSchemaPruning.scala (1 line): - line 54: // TODO generalize to any file-based relation hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/HoodieSparkTypeUtils.scala (1 line): - line 22: // TODO unify w/ DataTypeUtils hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java (1 line): - line 216: // TODO : readerSchema can change across blocks/log files, fix this inside Scanner hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java (1 line): - line 249: // TODO: refactor this method to avoid doing the json tree walking (HUDI-4822). hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java (1 line): - line 121: // TODO: Add a rollback instant but for compaction hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SchemaChangeUtils.java (1 line): - line 48: * TODO: support more type update. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala (1 line): - line 177: // TODO clean up, this lock is unnecessary hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/main/java/org/apache/hudi/metaserver/HoodieMetaserver.java (1 line): - line 96: // TODO: add metaserver factory. hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_1ExtendedSqlParser.scala (1 line): - line 26: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java (1 line): - line 218: // TODO abstract this w/in HoodieDataBlock hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/strategy/BoundedPartitionAwareCompactionStrategy.java (1 line): - line 43: // TODO replace w/ DateTimeFormatter hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/hudi/Spark30ResolveHudiAlterTableCommand.scala (1 line): - line 38: * TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.0.x hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/GenericRecordPartialPayloadGenerator.java (1 line): - line 51: if (setNull) { // TODO : DO NOT SET THE RECORD KEY FIELDS TO NULL hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java (1 line): - line 285: // TODO: This case has to be handled. HUDI-6352 hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java (1 line): - line 176: false, // TODO: Fix this to support incremental queries hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark24LegacyHoodieParquetFileFormat.scala (1 line): - line 89: // TODO: if you move this into the closure it reverts to the default values. hudi-common/src/main/java/org/apache/hudi/common/util/HoodieCommonKryoRegistrar.java (1 line): - line 78: // TODO need to relocate to hudi-common hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java (1 line): - line 436: // TODO cleanup after Parquet upgrade to 1.12 hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala (1 line): - line 144: // TODO clean up hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java (1 line): - line 67: * TODO rename to AvroFallbackBaseKeyGenerator hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java (1 line): - line 183: * TODO: (Lin) Delete this function after we remove the assume.date.partitioning config completely. hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapMORRDD.scala (1 line): - line 74: // TODO clean up, this lock is unnecessary see HoodieMergeOnReadRDD hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java (1 line): - line 57: // TODO: add upgrade step if required. hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala (1 line): - line 351: // TODO dedup w/ HoodieAnalysis hudi-common/src/main/java/org/apache/hudi/internal/schema/Type.java (1 line): - line 116: // TODO Support different date format hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIdentifier.java (1 line): - line 161: * TODO support different split criteria, e.g., distribute records evenly using statistics hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java (1 line): - line 81: // TODO - compute this compression ratio dynamically by looking at the bytes written to the hudi-platform-service/hudi-metaserver/hudi-metaserver-client/src/main/java/org/apache/hudi/common/table/timeline/HoodieMetaserverBasedTimeline.java (1 line): - line 94: * TODO [HUDI-6883] We should change HoodieMetaserverBasedTimeline to store completion time as well. hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java (1 line): - line 1695: // TODO: future support async clustering hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/Spark31ResolveHudiAlterTableCommand.scala (1 line): - line 38: * TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java (1 line): - line 117: // TODO we should remove it if we can read InternalRow from source. hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala (1 line): - line 105: * TODO convert directly from GenericRecord into InternalRow instead hudi-io/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java (1 line): - line 96: * TODO: ReflectionUtils should throw a specific exception to indicate Reflection problem. hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala (1 line): - line 55: // TODO generalize to any file-based relation hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala (1 line): - line 82: // TODO need to add meta-fields here hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/TimelineDTO.java (1 line): - line 47: // TODO: For Now, we will assume, only active-timeline will be transferred. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/HoodieCDCRDD.scala (1 line): - line 151: // TODO support CDC with spark record hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java (1 line): - line 141: // TODO: On by default. Once stable, we will remove the other mode. hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java (1 line): - line 38: // TODO: stop extending HiveSyncConfig and take all the variables needed from config file hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java (1 line): - line 94: // TODO Auto-generated method stub hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala (1 line): - line 179: * TODO move to HoodieCatalystExpressionUtils hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark31LegacyHoodieParquetFileFormat.scala (1 line): - line 112: // TODO: if you move this into the closure it reverts to the default values. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/sort/SpaceCurveSortingHelper.java (1 line): - line 113: // TODO validate if we need Spark to re-partition hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala (1 line): - line 52: // TODO scala-doc hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java (1 line): - line 413: // TODO: Make this configurable along with strategy specific config - For now, this is a generic enough strategy hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark30LegacyHoodieParquetFileFormat.scala (1 line): - line 112: // TODO: if you move this into the closure it reverts to the default values. hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/inline/InMemoryFileSystem.java (1 line): - line 43: // TODO: this needs to be per path to support num_cores > 1, and we should release the buffer once done hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala (1 line): - line 411: // TODO: make it work for non-hive style partitioning hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala (1 line): - line 128: // TODO: if you move this into the closure it reverts to the default values. hudi-common/src/main/java/org/apache/hudi/common/table/view/SpillableMapBasedFileSystemView.java (1 line): - line 208: //TODO should we make this more efficient by having reverse mapping of instant to file group id? hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetStreamWriter.java (1 line): - line 68: // TODO support populating the metadata hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/transaction/lock/HiveMetastoreBasedLockProvider.java (1 line): - line 183: // TODO : FIX:Using the parameterized constructor throws MethodNotFound hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark30NestedSchemaPruning.scala (1 line): - line 53: // TODO generalize to any file-based relation hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java (1 line): - line 532: // TODO we can actually deduce file size purely from AppendResult (based on offset and size hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java (1 line): - line 69: // TODO support spark orc reader hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieInlineTestSuiteWriter.java (1 line): - line 178: // TODO: fix clustering to be done async https://issues.apache.org/jira/browse/HUDI-1590 hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java (1 line): - line 43: // TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/common/model/HoodieSparkRecord.java (1 line): - line 296: // TODO HUDI-5282 support metaData hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark31NestedSchemaPruning.scala (1 line): - line 54: // TODO generalize to any file-based relation hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/BaseConsistentHashingBucketClusteringPlanStrategy.java (1 line): - line 78: * TODO maybe add force config to schedule the clustering. It could allow clustering on partitions that are not doing write operation. hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java (1 line): - line 166: // TODO kafka connect config needs to support setting base file format hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java (1 line): - line 44: // TODO get this from HoodieConfig hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseAvroHFileReader.java (1 line): - line 220: // TODO eval whether seeking scanner would be faster than pread hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java (1 line): - line 208: // TODO clip for array,map,row types. hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/hudi/command/Spark30AlterTableCommand.scala (1 line): - line 54: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.0.x hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala (1 line): - line 45: * TODO move to HoodieBaseRelation, make private hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetWriter.java (1 line): - line 99: // TODO set partition path in ctor hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java (1 line): - line 62: * TODO handle sample_writes sub-path clean-up w.r.t. rollback and insert overwrite. (HUDI-6044) hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java (1 line): - line 157: * TODO: unify the incremental view code between hive/spark-sql and spark datasource hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/parquet/ParquetRowDataWriter.java (1 line): - line 348: * TODO: Leaving this here as there might be a requirement to support TIMESTAMP(9) in the future hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java (1 line): - line 77: // TODO : UUID's can clash even for insert/insert, handle that case. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/BucketizedBloomCheckPartitioner.java (1 line): - line 147: // TODO replace w/ more performant hash hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieFileProbingFunction.java (1 line): - line 127: // TODO add assertion that file is checked only once hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/hudi/command/Spark31AlterTableCommand.scala (1 line): - line 54: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.1x hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/BucketIndexConcurrentFileWritesConflictResolutionStrategy.java (1 line): - line 39: // TODO : UUID's can clash even for insert/insert, handle that case. hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java (1 line): - line 58: // TODO: Should we treat this fatal and throw exception? hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/SparkAvroDeltaInputWriter.java (1 line): - line 35: // TODO : the base path has to be a new path every time for spark avro hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java (1 line): - line 97: // TODO enable automatic predicate pushdown after fixing issues hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark34NestedSchemaPruning.scala (1 line): - line 54: // TODO generalize to any file-based relation hudi-utilities/src/main/java/org/apache/hudi/utilities/config/HoodieStreamerConfig.java (1 line): - line 53: // TODO: consolidate all checkpointing configs into HoodieCheckpointStrategy (HUDI-5906) hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java (1 line): - line 30: * TODO: enable docs gen by adding {@link ConfigClassProperty} after TSM is landed (HUDI-3475) hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java (1 line): - line 45: // TODO : Sort list by file size and take the median file status to ensure fair calculation and change to remote hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java (1 line): - line 142: // TODO Reduce the duplicated code hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java (1 line): - line 282: // TODO: Implement automatic schema evolution when you add a new column. hudi-common/src/main/java/org/apache/hudi/common/table/timeline/MetadataConversionUtils.java (1 line): - line 238: // TODO revisit requested commit file standardization https://issues.apache.org/jira/browse/HUDI-1739 hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDReadClient.java (1 line): - line 66: * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java (1 line): - line 172: // TODO support bootstrap hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/strategy/DayBasedCompactionStrategy.java (1 line): - line 43: // TODO replace w/ DateTimeFormatter hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java (1 line): - line 467: // TODO : Where is shouldComplete used ? hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java (1 line): - line 107: // TODO Replace Java serialization with Avro approach to keep state compatibility. hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java (1 line): - line 48: // TODO more indexes to be added hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark30SqlAstBuilder.scala (1 line): - line 26: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.0.x hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java (1 line): - line 225: // TODO partition columns have to be appended in all read-paths hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java (1 line): - line 165: // TODO : read record count from metadata hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala (1 line): - line 129: // TODO: if you move this into the closure it reverts to the default values. hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieCDCLogRecordIterator.java (1 line): - line 98: // TODO support cdc with spark record. hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCFileGroupIterator.scala (1 line): - line 111: // TODO support CDC with spark record hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark32NestedSchemaPruning.scala (1 line): - line 55: // TODO generalize to any file-based relation hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_0ExtendedSqlParser.scala (1 line): - line 25: // TODO: we should remove this file when we support datasourceV2 for hoodie on spark3.0.x hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java (1 line): - line 137: // TODO (NA) : Make this static part of utility hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala (1 line): - line 115: // TODO use mutable row, avoid re-allocating hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/PathUtils.scala (1 line): - line 24: * TODO convert to Java, move to hudi-common hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala (1 line): - line 114: // TODO fix, currently assuming parquet as underlying format hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java (1 line): - line 74: // TODO support populating the metadata