cpp-ch/local-engine/Common/GlutenSignalHandler.cpp (8 lines): - line 231: std::string build_id; // TODO : Build ID - line 233: std::string stored_binary_hash; // TODO: binary checksum - line 309: /// TODO: Please keep the below log messages in-sync with the ones in ~programs/server/Server.cpp~ - line 339: /// FIXME: Write crash to system.crash_log table if available. - line 343: ///TODO: Send crash report to developers (if configured) - line 346: /// TODO: SentryWriter::onFault(sig, error_message, stack_trace); - line 348: /// TODO: Advice the user to send it manually. - line 392: /// TODO:: Set up Poco ErrorHandler for Poco Threads. cpp/velox/substrait/SubstraitToVeloxPlan.cc (7 lines): - line 197: // TODO Simplify Velox's aggregation steps - line 798: // TODO: support non-literal expression. - line 806: // TODO: support non-literal expression. - line 1220: // TODO: Use the names as the output names for the whole computing. - line 1278: // TODO: Only and relation is supported here. - line 1383: // TODO: support push down of Not In. - line 2009: // TODO: open it when the Velox's modification is ready. cpp-ch/local-engine/proto/substrait/algebra.proto (5 lines): - line 50: // TODO: nodes, cpu threads/%, memory, iops, etc. - line 511: //TODO add PK/constraints/indexes/etc..? - line 911: // greater than the upper bound, TODO (null range/no records passed? - line 921: // less than the lower bound, TODO (null range/no records passed? - line 1216: // TODO: should allow expressions gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto (5 lines): - line 50: // TODO: nodes, cpu threads/%, memory, iops, etc. - line 511: //TODO add PK/constraints/indexes/etc..? - line 911: // greater than the upper bound, TODO (null range/no records passed? - line 921: // less than the lower bound, TODO (null range/no records passed? - line 1216: // TODO: should allow expressions gluten-core/src/main/scala/io/glutenproject/expression/ConverterUtils.scala (4 lines): - line 91: // TODO: This is used only by `BasicScanExecTransformer`, - line 349: case BooleanType => // TODO: Not in Substrait yet. - line 365: // TODO: different with Substrait due to more details here. - line 370: // TODO: different with Substrait due to more details here. backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala (4 lines): - line 88: // TODO: Best effort cleanup - line 113: // TODO: timezones? - line 114: // TODO: enable validatePartitionColumns? - line 233: // TODO: we can also try delete the addedFiles as a best-effort cleanup. backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala (4 lines): - line 80: // TODO: When limit is positive, CH result is wrong, fix it later - line 94: // TODO: CH substringIndexUTF8 function only support string literal as delimiter - line 99: // TODO: CH substringIndexUTF8 function only support single character as delimiter - line 135: // TODO: CH formatDateTimeInJodaSyntax/fromUnixTimestampInJodaSyntax only support backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/files/MergeTreeCommitProtocol.scala (4 lines): - line 83: // TODO: Best effort cleanup - line 108: // TODO: timezones? - line 109: // TODO: enable validatePartitionColumns? - line 231: // TODO: we can also try delete the addedFiles as a best-effort cleanup. cpp/velox/compute/VeloxBackend.cc (3 lines): - line 209: // FIXME It's known that if spill compression is disabled, the actual spill file size may - line 248: // TODO: this is not tracked by Spark. - line 251: // TODO: this is not tracked by Spark. backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/ClickHouseSparkCatalog.scala (3 lines): - line 135: // TODO: Generate WriteClickHouseTableCommand - line 151: // TODO: remove this operation after implementing write mergetree into table - line 162: // TODO: support to list all children paths gluten-core/src/main/scala/io/glutenproject/extension/columnar/TransformHintRule.scala (3 lines): - line 255: * FIXME To be removed: Since Velox backend is the only one to use the strategy, and we already - line 591: // FIXME Hongze: In following codes we perform a lot of if-else conditions to - line 635: // FIXME did we consider the case that AQE: OFF && Reuse: ON ? cpp-ch/local-engine/Storages/ch_parquet/OptimizedArrowColumnToCHColumn.cpp (3 lines): - line 266: chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast(chunk.Value(value_i))); // TODO: copy column - line 508: // TODO: read JSON as a string? - line 509: // TODO: read UUID as a string? cpp/core/jni/JniWrapper.cc (2 lines): - line 1038: // TODO: Add coalesce option and maximum coalesced size. - line 1280: // TODO: move memory manager into Runtime then we can use more general Runtime. cpp-ch/local-engine/Parser/SerializedPlanParser.cpp (2 lines): - line 478: // TODO: We still maintain the old logic of parsing LocalFiles or ExtensionTable in RealRel - line 2228: // TODO: make it the same as spark, it's too simple at present. gluten-core/src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala (2 lines): - line 84: // TODO: support ctas in Spark3.4, see https://github.com/apache/spark/pull/39220 - line 85: // TODO: support dynamic partition and bucket write backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHIteratorApi.scala (2 lines): - line 127: // TODO: Support custom partition location - line 241: // TODO: SPARK-25083 remove the type erasure hack in data source scan backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala (2 lines): - line 55: * 2. TODO: support push down filter - line 56: * 3. Super TODO: support store offheap object directly gluten-core/src/main/scala/io/glutenproject/execution/SortMergeJoinExecTransformer.scala (2 lines): - line 214: // TODO: Support cross join with Cross Rel - line 215: // TODO: Support existence join backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHValidatorApi.scala (2 lines): - line 73: // TODO: Currently there are some fallback issues on CH backend when SparkPlan is - line 74: // TODO: SerializeFromObjectExec, ObjectHashAggregateExec and V2CommandExec. gluten-data/src/main/java/io/glutenproject/vectorized/ArrowColumnVector.java (2 lines): - line 210: // TODO: should be final after removing ArrayAccessor workaround - line 490: // TODO: Workaround if vector has all non-null values, see ARROW-1948 cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp (2 lines): - line 214: // TODO need to test - line 280: // TODO: get primary_key_names backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/MergeTreeFileFormatWriter.scala (2 lines): - line 116: // TODO: check whether it needs to use `convertEmptyToNullIfNeeded` to convert empty to null - line 194: // TODO: to optimize, bucket value is computed twice here backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ListenerApiImpl.scala (2 lines): - line 164: // FIXME: The following set instances twice in local mode? - line 171: // TODO shutdown implementation in velox to release resources backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHTransformerApi.scala (2 lines): - line 134: // TODO: set default to true when metrics could be collected - line 142: // TODO: consider compression or orc.compression in table options. gluten-data/src/main/java/io/glutenproject/vectorized/ArrowWritableColumnVector.java (2 lines): - line 751: // TODO: should be final after removing ArrayAccessor workaround - line 1155: // TODO: Workaround if vector has all non-null values, see ARROW-1948 gluten-core/src/main/scala/io/glutenproject/extension/ColumnarOverrides.scala (2 lines): - line 138: // FIXME: Filter push-down should be better done by Vanilla Spark's planner or by - line 568: // TODO: Add DynamicPartitionPruningHiveScanSuite.scala substrait/substrait-spark/src/main/scala/io/substrait/spark/logical/ToSubstraitRel.scala (2 lines): - line 115: * TODO: support [[Rollup]] and [[GroupingSets]] - line 306: // TODO: LocalRelation,Range=>Virtual Table,LogicalRelation(HadoopFsRelation)=>LocalFiles gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala (2 lines): - line 173: // FIXME Hongze 22/12/06 - line 215: // TODO categorize the APIs by driver's or executor's gluten-core/src/main/scala/org/apache/spark/sql/execution/ShuffledColumnarBatchRDD.scala (2 lines): - line 41: // TODO this check is based on assumptions of callers' behavior but is sufficient for now. - line 77: // TODO order by partition size. backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/utils/MergeTreePartsPartitionsUtil.scala (2 lines): - line 52: // TODO: remove `substring` - line 150: // TODO: remove `substring` backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/clickhouse/source/ClickHouseWriteBuilder.scala (2 lines): - line 81: // TODO: Get the config from WriteIntoDelta's txn. - line 100: // TODO: Push this to Apache Spark backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/commands/CreateClickHouseTableCommand.scala (2 lines): - line 110: // TODO: implement writing clickhouse data - line 155: // TODO: shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala (2 lines): - line 44: // FIXME the option currently controls both JVM and native validation against a Substrait plan. - line 632: // FIXME the option currently controls both JVM and native validation against a Substrait plan. cpp-ch/local-engine/Storages/Output/FileWriterWrappers.h (2 lines): - line 53: //TODO: EmptyFileReader and ConstColumnsFileReader ? - line 54: //TODO: to support complex types backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala (2 lines): - line 304: // FIXME Hongze 22/12/06 - line 321: // FIXME Hongze 22/12/06 cpp/velox/shuffle/VeloxShuffleWriter.cc (2 lines): - line 723: // TODO: maybe an estimated row is more reasonable - line 727: // TODO: maybe memory issue, copy many times gluten-iceberg/src/main/java/io/glutenproject/substrait/rel/IcebergLocalFilesNode.java (1 line): - line 51: // TODO: Add delete file support for MOR iceberg table gluten-core/src/main/resources/substrait/extensions/functions_comparison.yaml (1 line): - line 25: # TODO: add lt, gt, lte, gte, compare cpp/core/compute/ResultIterator.h (1 line): - line 28: // FIXME the code is tightly coupled with Velox plan execution. Should cleanup the abstraction for uses from gluten-data/src/main/java/io/glutenproject/memory/alloc/NativeMemoryAllocators.java (1 line): - line 31: *

FIXME: to export the native APIs in a standard way cpp-ch/local-engine/Storages/Output/ORCOutputFormatFile.cpp (1 line): - line 41: // TODO: align all spark orc config with ch orc config cpp/velox/compute/VeloxRuntime.h (1 line): - line 65: // FIXME This is not thread-safe? backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala (1 line): - line 314: // TODO: remove this after pushdowning preprojection cpp-ch/local-engine/Parser/JoinRelParser.cpp (1 line): - line 201: /// TODO: make grace hash join be the default hash join algorithm. shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala (1 line): - line 52: * TODO: implement the read logic. cpp/core/shuffle/ShuffleReader.h (1 line): - line 50: // FIXME iterator should be unique_ptr or un-copyable singleton gluten-core/src/main/scala/io/glutenproject/extension/CommonSubexpressionEliminateRule.scala (1 line): - line 59: // TODO: CSE in Filter doesn't work for unknown reason, need to fix it later backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/table/ClickHouseTableV2.scala (1 line): - line 361: // TODO: Refresh cache after writing data. gluten-core/src/main/scala/io/glutenproject/execution/BasicScanExecTransformer.scala (1 line): - line 49: // TODO: Remove this expensive call when CH support scan custom partition location. backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala (1 line): - line 363: if aggregateExpression.mode == Partial => // FIXME: Any difference with the last branch? cpp/core/jni/JniCommon.h (1 line): - line 156: // TODO: Move the static functions to namespace gluten gluten-iceberg/src/main/java/io/glutenproject/substrait/rel/IcebergLocalFilesBuilder.java (1 line): - line 24: // TODO: Add makeIcebergLocalFiles for MOR iceberg table tools/gluten-it/common/src/main/scala/org/apache/spark/sql/QueryRunner.scala (1 line): - line 176: // We have 50% chance to kill the task. FIXME make it configurable? backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHListenerApi.scala (1 line): - line 72: // FIXME: The following set instances twice in local mode? cpp-ch/local-engine/Common/CHUtil.cpp (1 line): - line 331: // TODO: May need to check the storage format version cpp-ch/local-engine/Join/StorageJoinFromReadBuffer.cpp (1 line): - line 103: /// TODO: check key columns gluten-core/src/main/scala/io/glutenproject/substrait/SubstraitContext.scala (1 line): - line 85: // FIXME Hongze 22/11/28 cpp/velox/substrait/SubstraitExtensionCollector.cc (1 line): - line 26: // TODO: Currently we treat all velox registry based function signatures as gluten-data/src/main/java/io/glutenproject/memory/arrowalloc/ArrowBufferAllocators.java (1 line): - line 38: // FIXME: Remove this then use contextInstance(name) instead gluten-data/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala (1 line): - line 70: // TODO: Time unit is not handled. gluten-core/src/main/java/io/glutenproject/substrait/rel/ReadRelNode.java (1 line): - line 63: // TODO: remove setDataSchema and setProperties shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala (1 line): - line 271: // TODO: if you move this into the closure it reverts to the default values. backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/source/ClickHouseScanBase.scala (1 line): - line 50: /** TODO: MergeTree DS V2 can not support partitions now. */ shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala (1 line): - line 278: // TODO: if you move this into the closure it reverts to the default values. gluten-data/src/main/java/io/glutenproject/columnarbatch/ColumnarBatches.java (1 line): - line 336: // FIXME: The check could be removed to adopt ownership-transfer semantic backends-velox/src/main/scala/io/glutenproject/execution/ShuffledHashJoinExecTransformer.scala (1 line): - line 189: // TODO: Support cross join with Cross Rel backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/velox/VeloxOrcWriterInjects.scala (1 line): - line 27: // TODO: implement it gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala (1 line): - line 463: // TODO ch support PrecisionLoss=true backends-clickhouse/src/main/java/io/glutenproject/vectorized/BlockOutputStream.java (1 line): - line 93: // FIXME: finalize cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp (1 line): - line 198: // TODO support group cpp-ch/local-engine/Storages/ch_parquet/arrow/column_reader.cc (1 line): - line 819: // TODO figure a way to set max_def_level_ to 0 backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v1/CHOrcWriterInjects.scala (1 line): - line 27: // TODO: implement it gluten-core/src/main/scala/io/glutenproject/extension/RewriteMultiChildrenCount.scala (1 line): - line 49: * TODO: Remove this rule when Velox support multi-children Count cpp-ch/local-engine/Storages/Output/WriteBufferBuilder.cpp (1 line): - line 100: //TODO: support azure and S3 gluten-core/src/main/scala/io/glutenproject/softaffinity/strategy/SoftAffinityStrategy.scala (1 line): - line 37: // TODO: try to use ConsistentHash cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp (1 line): - line 114: /// TODO: implement new hash function sparkCityHash64 like sparkXxHash64 to process null literal as column more gracefully. substrait/substrait-spark/src/main/scala/io/substrait/spark/logical/ToLogicalPlan.scala (1 line): - line 132: // TODO: Support different join types here when join types are added to cross rel for BNLJ cpp/velox/compute/WholeStageResultIterator.cc (1 line): - line 440: // FIXME this uses process-wise off-heap memory which is not for task cpp/core/shuffle/Spill.cc (1 line): - line 55: // TODO: Add compression threshold. gluten-core/src/main/scala/io/glutenproject/expression/UnaryExpressionTransformer.scala (1 line): - line 206: // TODO: for user-specified seed, we need to pass partition index to native engine. shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala (1 line): - line 256: // TODO: to optimize, bucket value is computed twice here backends-velox/src/main/scala/io/glutenproject/execution/VeloxColumnarToRowExec.scala (1 line): - line 99: // TODO:: pass the jni jniWrapper and arrowSchema and serializeSchema method by broadcast gluten-core/src/main/scala/io/glutenproject/utils/SubstraitUtil.scala (1 line): - line 39: // TODO: Support existence join backends-clickhouse/src/main/delta-20/org/apache/spark/sql/execution/datasources/v1/clickhouse/commands/WriteMergeTreeToDelta.scala (1 line): - line 91: // TODO: replace the method below with `CharVarcharUtils.replaceCharWithVarchar`, when 3.3 is out. cpp/core/shuffle/LocalPartitionWriter.cc (1 line): - line 128: // TODO: Merging complex type is currently not supported. gluten-core/src/main/scala/org/apache/spark/sql/hive/HiveTableScanExecTransformer.scala (1 line): - line 74: // FIXME how does a hive table expose file paths? backends-velox/src/main/java/io/glutenproject/fs/OnHeapFileSystem.java (1 line): - line 53: // FIXME: This is rough. JVM heap can still be filled out by other threads gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala (1 line): - line 138: // TODO IdentityBroadcastMode not supported. Need to support BroadcastNestedLoopJoin first. backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/TransformerApiImpl.scala (1 line): - line 62: // TODO: IMPLEMENT SPECIAL PROCESS FOR VELOX BACKEND gluten-data/src/main/java/io/glutenproject/vectorized/ShuffleWriterJniWrapper.java (1 line): - line 181: * @param memLimit memory usage limit for the split operation FIXME setting a cap to pool / backends-clickhouse/src/main/delta-22/org/apache/spark/sql/execution/datasources/v1/clickhouse/commands/WriteMergeTreeToDelta.scala (1 line): - line 93: // TODO: replace the method below with `CharVarcharUtils.replaceCharWithVarchar`, when 3.3 is out. backends-velox/src/main/scala/org/apache/spark/sql/execution/datasources/VeloxWriteQueue.scala (1 line): - line 36: // TODO: This probably can be removed: Velox's Parquet writer already supports push-based write. gluten-core/src/main/scala/io/glutenproject/metrics/MetricsUpdater.scala (1 line): - line 26: * TODO: place it to some other where since it's used not only by whole stage facilities gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala (1 line): - line 150: .recyclePayload(p => ColumnarBatches.forceClose(p._2)) // FIXME why force close? gluten-core/src/main/scala/io/glutenproject/execution/WholeStageTransformer.scala (1 line): - line 135: // TODO: remove this work around after we make `RelNode#toProtobuf` idempotent gluten-data/src/main/java/io/glutenproject/columnarbatch/IndicatorVector.java (1 line): - line 68: // TODO use stronger restriction (IllegalStateException probably) gluten-core/src/main/scala/io/glutenproject/execution/BasicPhysicalOperatorTransformer.scala (1 line): - line 404: // TODO: For data lake format use pushedFilters in SupportsPushDownFilters backends-clickhouse/src/main/java/io/glutenproject/memory/alloc/CHNativeMemoryAllocators.java (1 line): - line 39: *

FIXME: to export the native APIs in a standard way shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala (1 line): - line 276: // TODO: to optimize, bucket value is computed twice here cpp-ch/local-engine/Storages/ch_parquet/arrow/encoding.cc (1 line): - line 2963: // TODO: read corrupted files written with bug(PARQUET-246). last_value_ should be set backends-velox/src/main/scala/io/glutenproject/expression/ExpressionTransformer.scala (1 line): - line 141: // TODO: split function support limit arg gluten-data/src/main/java/io/glutenproject/memory/nmm/NativeMemoryManagers.java (1 line): - line 41: // TODO: Let all caller support spill. gluten-data/src/main/java/io/glutenproject/datasource/DatasourceJniWrapper.java (1 line): - line 30: // FIXME: move to module gluten-data? cpp/velox/substrait/SubstraitParser.cc (1 line): - line 184: // TODO Refactor using Bison. gluten-core/src/main/scala/io/glutenproject/utils/Iterators.scala (1 line): - line 161: class WrapperBuilder[A](in: Iterator[A]) { // FIXME how to make the ctor companion-private? cpp/velox/memory/VeloxMemoryManager.cc (1 line): - line 70: std::lock_guard l(mutex_); // FIXME: Do we have recursive locking for this mutex? gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala (1 line): - line 85: .recyclePayload(ColumnarBatches.forceClose) // FIXME why force close? gluten-core/src/main/scala/io/glutenproject/extension/GlutenPlan.scala (1 line): - line 73: // FIXME: Use a validation-specific method to catch validation failures cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp (1 line): - line 616: //TODO: support online change config for cached per_bucket_clients gluten-core/src/main/scala/org/apache/spark/util/TaskResources.scala (1 line): - line 192: // TODO: gluten-core/src/main/scala/io/glutenproject/extension/RemoveNativeWriteFilesSortAndProject.scala (1 line): - line 111: // TODO: support bucket write gluten-core/src/main/resources/substrait/extensions/functions_string.yaml (1 line): - line 15: # TODO: add like, concat, substring, string specific cross type equalities. gluten-core/src/main/java/io/glutenproject/memory/memtarget/TreeMemoryTargets.java (1 line): - line 147: while (true) { // FIXME should we add retry limit? cpp-ch/local-engine/Storages/Output/ParquetOutputFormatFile.cpp (1 line): - line 49: // TODO: align all spark parquet config with ch parquet config cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc (1 line): - line 1123: // The supported aggregation functions. TODO: Remove this set when Presto aggregate functions in Velox are not