content/releases/spark-release-3-3-2.html (363 lines of code) (raw):
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>
Spark Release 3.3.2 | Apache Spark
</title>
<link href="/css/bootstrap.min.css" rel="stylesheet">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
<link href="/css/custom.css" rel="stylesheet">
<!-- Code highlighter CSS -->
<link href="/css/pygments-default.css" rel="stylesheet">
<link rel="icon" href="/favicon.ico" type="image/x-icon">
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body class="global">
<nav class="navbar navbar-expand-lg navbar-dark p-0 px-4" style="background: #1D6890;">
<a class="navbar-brand" href="/">
<img src="/images/spark-logo-rev.svg" alt="" width="141" height="72">
</a>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarContent"
aria-controls="navbarContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse col-md-12 col-lg-auto pt-4" id="navbarContent">
<ul class="navbar-nav me-auto">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/downloads.html">Download</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="libraries" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Libraries
</a>
<ul class="dropdown-menu" aria-labelledby="libraries">
<li><a class="dropdown-item" href="/sql/">SQL and DataFrames</a></li>
<li><a class="dropdown-item" href="/spark-connect/">Spark Connect</a></li>
<li><a class="dropdown-item" href="/streaming/">Spark Streaming</a></li>
<li><a class="dropdown-item" href="/pandas-on-spark/">pandas on Spark</a></li>
<li><a class="dropdown-item" href="/mllib/">MLlib (machine learning)</a></li>
<li><a class="dropdown-item" href="/graphx/">GraphX (graph)</a></li>
<li>
<hr class="dropdown-divider">
</li>
<li><a class="dropdown-item" href="/third-party-projects.html">Third-Party Projects</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="documentation" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Documentation
</a>
<ul class="dropdown-menu" aria-labelledby="documentation">
<li><a class="dropdown-item" href="/docs/latest/">Latest Release</a></li>
<li><a class="dropdown-item" href="/documentation.html">Older Versions and Other Resources</a></li>
<li><a class="dropdown-item" href="/faq.html">Frequently Asked Questions</a></li>
</ul>
</li>
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/examples.html">Examples</a>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="community" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Community
</a>
<ul class="dropdown-menu" aria-labelledby="community">
<li><a class="dropdown-item" href="/community.html">Mailing Lists & Resources</a></li>
<li><a class="dropdown-item" href="/contributing.html">Contributing to Spark</a></li>
<li><a class="dropdown-item" href="/improvement-proposals.html">Improvement Proposals (SPIP)</a>
</li>
<li><a class="dropdown-item" href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a>
</li>
<li><a class="dropdown-item" href="/powered-by.html">Powered By</a></li>
<li><a class="dropdown-item" href="/committers.html">Project Committers</a></li>
<li><a class="dropdown-item" href="/history.html">Project History</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="developers" role="button" data-bs-toggle="dropdown"
aria-expanded="false">
Developers
</a>
<ul class="dropdown-menu" aria-labelledby="developers">
<li><a class="dropdown-item" href="/developer-tools.html">Useful Developer Tools</a></li>
<li><a class="dropdown-item" href="/versioning-policy.html">Versioning Policy</a></li>
<li><a class="dropdown-item" href="/release-process.html">Release Process</a></li>
<li><a class="dropdown-item" href="/security.html">Security</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="github" role="button" data-bs-toggle="dropdown" aria-expanded="false">
GitHub
</a>
<ul class="dropdown-menu" aria-labelledby="github">
<li><a class="dropdown-item" href="https://github.com/apache/spark">spark</a></li>
<li><a class="dropdown-item" href="https://github.com/apache/spark-connect-go">spark-connect-go</a></li>
<li><a class="dropdown-item" href="https://github.com/apache/spark-connect-swift">spark-connect-swift</a></li>
<li><a class="dropdown-item" href="https://github.com/apache/spark-docker">spark-docker</a></li>
<li><a class="dropdown-item" href="https://github.com/apache/spark-kubernetes-operator">spark-kubernetes-operator</a></li>
<li><a class="dropdown-item" href="https://github.com/apache/spark-website">spark-website</a></li>
</ul>
</li>
</ul>
<ul class="navbar-nav ml-auto">
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="#" id="apacheFoundation" role="button"
data-bs-toggle="dropdown" aria-expanded="false">
Apache Software Foundation
</a>
<ul class="dropdown-menu" aria-labelledby="apacheFoundation">
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Homepage</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/">License</a></li>
<li><a class="dropdown-item"
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/events/current-event">Event</a></li>
</ul>
</li>
</ul>
</div>
</nav>
<div class="container">
<div class="row mt-4">
<div class="col-12 col-md-9">
<h2>Spark Release 3.3.2</h2>
<p>Spark 3.3.2 is a maintenance release containing stability fixes. This release is based on the branch-3.3 maintenance branch of Spark. We strongly recommend all 3.3 users to upgrade to this stable release.</p>
<h3 id="notable-changes">Notable changes</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38697">[SPARK-38697]</a>: Extend SparkSessionExtensions to inject rules into AQE Optimizer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40872">[SPARK-40872]</a>: Fallback to original shuffle block when a push-merged shuffle chunk is zero-size</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41388">[SPARK-41388]</a>: getReusablePVCs should ignore recently created PVCs in the previous batch</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42071">[SPARK-42071]</a>: Register scala.math.Ordering$Reverse to KyroSerializer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-32380">[SPARK-32380]</a>: sparksql cannot access hive table while data in hbase</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39404">[SPARK-39404]</a>: Unable to query _metadata in streaming if getBatch returns multiple logical nodes in the DataFrame</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40493">[SPARK-40493]</a>: Revert “[SPARK-33861][SQL] Simplify conditional in predicate”</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40588">[SPARK-40588]</a>: Sorting issue with partitioned-writing and AQE turned on</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40817">[SPARK-40817]</a>: Remote spark.jars URIs ignored for Spark on Kubernetes in cluster mode</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40819">[SPARK-40819]</a>: Parquet INT64 (TIMESTAMP(NANOS,true)) now throwing Illegal Parquet type instead of automatically converting to LongType</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40829">[SPARK-40829]</a>: STORED AS serde in CREATE TABLE LIKE view does not work</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40851">[SPARK-40851]</a>: TimestampFormatter behavior changed when using the latest Java 8/11/17</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40869">[SPARK-40869]</a>: KubernetesConf.getResourceNamePrefix creates invalid name prefixes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40874">[SPARK-40874]</a>: Fix broadcasts in Python UDFs when encryption is enabled</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40902">[SPARK-40902]</a>: Quick submission of drivers in tests to mesos scheduler results in dropping drivers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40918">[SPARK-40918]</a>: Mismatch between ParquetFileFormat and FileSourceScanExec in # columns for WSCG.isTooManyFields when using _metadata</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40924">[SPARK-40924]</a>: Unhex function works incorrectly when input has uneven number of symbols</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40932">[SPARK-40932]</a>: Barrier: messages for allGather will be overridden by the following barrier APIs</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40963">[SPARK-40963]</a>: ExtractGenerator sets incorrect nullability in new Project</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40987">[SPARK-40987]</a>: Avoid creating a directory when deleting a block, causing DAGScheduler to not work</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41035">[SPARK-41035]</a>: Incorrect results or NPE when a literal is reused across distinct aggregations</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41118">[SPARK-41118]</a>: to_number/try_to_number throws NullPointerException when format is null</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41144">[SPARK-41144]</a>: UnresolvedHint should not cause query failure</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41151">[SPARK-41151]</a>: Keep built-in file _metadata column nullable value consistent</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41154">[SPARK-41154]</a>: Incorrect relation caching for queries with time travel spec</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41162">[SPARK-41162]</a>: Anti-join must not be pushed below aggregation with ambiguous predicates</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41187">[SPARK-41187]</a>: [Core] LiveExecutor MemoryLeak in AppStatusListener when ExecutorLost happen</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41188">[SPARK-41188]</a>: Set executorEnv OMP_NUM_THREADS to be spark.task.cpus by default for spark executor JVM processes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41254">[SPARK-41254]</a>: YarnAllocator.rpIdToYarnResource map is not properly updated</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41327">[SPARK-41327]</a>: Fix SparkStatusTracker.getExecutorInfos by switch On/OffHeapStorageMemory info</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41339">[SPARK-41339]</a>: RocksDB state store WriteBatch doesn’t clean up native memory</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41350">[SPARK-41350]</a>: allow simple name access of using join hidden columns after subquery alias</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41365">[SPARK-41365]</a>: Stages UI page fails to load for proxy in some yarn versions</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41375">[SPARK-41375]</a>: Avoid empty latest KafkaSourceOffset:</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41376">[SPARK-41376]</a>: Executor netty direct memory check should respect spark.shuffle.io.preferDirectBufs</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41379">[SPARK-41379]</a>: Inconsistency of spark session in DataFrame in user function for foreachBatch sink in PySpark</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41385">[SPARK-41385]</a>: Replace deprecated <code class="language-plaintext highlighter-rouge">.newInstance()</code> in K8s module</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41395">[SPARK-41395]</a>: InterpretedMutableProjection can corrupt unsafe buffer when used with decimal data</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41448">[SPARK-41448]</a>: Make consistent MR job IDs in FileBatchWriter and FileFormatWriter</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41458">[SPARK-41458]</a>: Correctly transform the SPI services for Yarn Shuffle Service</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41468">[SPARK-41468]</a>: Fix PlanExpression handling in EquivalentExpressions</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41522">[SPARK-41522]</a>: GA dependencies test faild</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41535">[SPARK-41535]</a>: InterpretedUnsafeProjection and InterpretedMutableProjection can corrupt unsafe buffer when used with calendar interval data</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41554">[SPARK-41554]</a>: Decimal.changePrecision produces ArrayIndexOutOfBoundsException</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41668">[SPARK-41668]</a>: DECODE function returns wrong results when passed NULL</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41732">[SPARK-41732]</a>: Session window: analysis rule “SessionWindowing” does not apply tree-pattern based pruning</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41989">[SPARK-41989]</a>: PYARROW_IGNORE_TIMEZONE warning can break application logging setup</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42084">[SPARK-42084]</a>: Avoid leaking the qualified-access-only restriction</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42090">[SPARK-42090]</a>: Introduce sasl retry count in RetryingBlockTransferor</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42134">[SPARK-42134]</a>: Fix getPartitionFiltersAndDataFilters() to handle filters without referenced attributes</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42157">[SPARK-42157]</a>: <code class="language-plaintext highlighter-rouge">spark.scheduler.mode=FAIR</code> should provide FAIR scheduler</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42176">[SPARK-42176]</a>: Cast boolean to timestamp fails with ClassCastException</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42188">[SPARK-42188]</a>: Force SBT protobuf version to match Maven on branch 3.2 and 3.3</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42201">[SPARK-42201]</a>: <code class="language-plaintext highlighter-rouge">build/sbt</code> should allow SBT_OPTS to override JVM memory setting</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42222">[SPARK-42222]</a>: Spark 3.3 Backport: SPARK-41344 Reading V2 datasource masks underlying error</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42259">[SPARK-42259]</a>: ResolveGroupingAnalytics should take care of Python UDAF</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42344">[SPARK-42344]</a>: The default size of the CONFIG_MAP_MAXSIZE should not be greater than 1048576</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42346">[SPARK-42346]</a>: distinct(count colname) with UNION ALL causes query analyzer bug</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38277">[SPARK-38277]</a>: Clear write batch after RocksDB state store’s commit</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40913">[SPARK-40913]</a>: Pin <code class="language-plaintext highlighter-rouge">pytest==7.1.3</code></li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41089">[SPARK-41089]</a>: Relocate Netty native arm64 libs</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41360">[SPARK-41360]</a>: Avoid BlockManager re-registration if the executor has been lost</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41476">[SPARK-41476]</a>: Prevent <code class="language-plaintext highlighter-rouge">README.md</code> from triggering CIs</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41541">[SPARK-41541]</a>: Fix wrong child call in SQLShuffleWriteMetricsReporter.decRecordsWritten()</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41962">[SPARK-41962]</a>: Update the import order of scala package in class SpecificParquetRecordReaderBase</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42230">[SPARK-42230]</a>: Improve <code class="language-plaintext highlighter-rouge">lint</code> job by skipping PySpark and SparkR docs if unchanged</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41863">[SPARK-41863]</a>: Skip <code class="language-plaintext highlighter-rouge">flake8</code> tests if the command is not available</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41864">[SPARK-41864]</a>: Fix mypy linter errors</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42110">[SPARK-42110]</a>: Reduce the number of repetition in ParquetDeltaEncodingSuite.<code class="language-plaintext highlighter-rouge">random data test</code></li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41415">[SPARK-41415]</a>: SASL Request Retries</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41538">[SPARK-41538]</a>: Metadata column should be appended at the end of project list</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40983">[SPARK-40983]</a>: Remove Hadoop requirements for zstd mention in Parquet compression codec</li>
<li>
<p><a href="https://issues.apache.org/jira/browse/SPARK-41185">[SPARK-41185]</a>: Remove ARM limitation for YuniKorn from docs</p>
</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-35542">[SPARK-35542]</a>: Fix: Bucketizer created for multiple columns with parameters splitsArray, inputCols and outputCols can not be loaded after saving it</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-36057">[SPARK-36057]</a>: SPIP: Support Customized Kubernetes Schedulers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38034">[SPARK-38034]</a>: Optimize TransposeWindow rule</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38404">[SPARK-38404]</a>: Improve CTE resolution when a nested CTE references an outer CTE</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38614">[SPARK-38614]</a>: Don’t push down limit through window that’s using percent_rank</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38717">[SPARK-38717]</a>: Handle Hive’s bucket spec case preserving behaviour</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-38796">[SPARK-38796]</a>: Update to_number and try_to_number functions to allow PR with positive numbers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39184">[SPARK-39184]</a>: Handle undersized result array in date and timestamp sequences</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39200">[SPARK-39200]</a>: Make Fallback Storage readFully on content</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39340">[SPARK-39340]</a>: DS v2 agg pushdown should allow dots in the name of top-level columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39355">[SPARK-39355]</a>: Single column uses quoted to construct UnresolvedAttribute</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39419">[SPARK-39419]</a>: Fix ArraySort to throw an exception when the comparator returns null</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39447">[SPARK-39447]</a>: Avoid AssertionError in AdaptiveSparkPlanExec.doExecuteBroadcast</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39476">[SPARK-39476]</a>: Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39548">[SPARK-39548]</a>: CreateView Command with a window clause query hit a wrong window definition not found issue</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39570">[SPARK-39570]</a>: Inline table should allow expressions with alias</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39614">[SPARK-39614]</a>: K8s pod name follows DNS Subdomain Names rule</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39633">[SPARK-39633]</a>: Support timestamp in seconds for TimeTravel using Dataframe options</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39647">[SPARK-39647]</a>: Register the executor with ESS before registering the BlockManager</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39650">[SPARK-39650]</a>: Fix incorrect value schema in streaming deduplication with backward compatibility</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39656">[SPARK-39656]</a>: Fix wrong namespace in DescribeNamespaceExec</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39657">[SPARK-39657]</a>: YARN AM client should call the non-static setTokensConf method</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39672">[SPARK-39672]</a>: Fix removing project before filter with correlated subquery</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39758">[SPARK-39758]</a>: Fix NPE from the regexp functions on invalid patterns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39775">[SPARK-39775]</a>: Disable validate default values when parsing Avro schemas</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39806">[SPARK-39806]</a>: Accessing _metadata on partitioned table can crash a query</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39833">[SPARK-39833]</a>: Disable Parquet column index in DSv1 to fix a correctness issue in the case of overlapping partition and data columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39835">[SPARK-39835]</a>: Fix EliminateSorts remove global sort below the local sort</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39839">[SPARK-39839]</a>: Handle special case of null variable-length Decimal with non-zero offsetAndSize in UnsafeRow structural integrity check</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39847">[SPARK-39847]</a>: Fix race condition in RocksDBLoader.loadLibrary() if caller thread is interrupted</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39857">[SPARK-39857]</a>: V2ExpressionBuilder uses the wrong LiteralValue data type for In predicate</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39867">[SPARK-39867]</a>: Global limit should not inherit OrderPreservingUnaryNode</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39887">[SPARK-39887]</a>: RemoveRedundantAliases should keep aliases that make the output of projection nodes unique</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39896">[SPARK-39896]</a>: UnwrapCastInBinaryComparison should work when the literal of In/InSet downcast failed</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39900">[SPARK-39900]</a>: Address partial or negated condition in binary format’s predicate pushdown</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39911">[SPARK-39911]</a>: Optimize global Sort to RepartitionByExpression</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39915">[SPARK-39915]</a>: Dataset.repartition(N) may not create N partitions Non-AQE part</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39915">[SPARK-39915]</a>: Ensure the output partitioning is user-specified in AQE</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39932">[SPARK-39932]</a>: WindowExec should clear the final partition buffer</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39951">[SPARK-39951]</a>: Update Parquet V2 columnar check for nested fields</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39952">[SPARK-39952]</a>: SaveIntoDataSourceCommand should recache result relation</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39962">[SPARK-39962]</a>: Apply projection when group attributes are empty</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-39976">[SPARK-39976]</a>: ArrayIntersect should handle null in left expression correctly</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40002">[SPARK-40002]</a>: Don’t push down limit through window using ntile</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40065">[SPARK-40065]</a>: Mount ConfigMap on executors with non-default profile as well</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40079">[SPARK-40079]</a>: Add Imputer inputCols validation for empty input case</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40089">[SPARK-40089]</a>: Fix sorting for some Decimal types</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40117">[SPARK-40117]</a>: Convert condition to java in DataFrameWriterV2.overwrite</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40121">[SPARK-40121]</a>: Initialize projection used for Python UDF</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40132">[SPARK-40132]</a>: Restore rawPredictionCol to MultilayerPerceptronClassifier.setParams</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40149">[SPARK-40149]</a>: Propagate metadata columns through Project</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40152">[SPARK-40152]</a>: Fix split_part codegen compilation issue</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40169">[SPARK-40169]</a>: Don’t pushdown Parquet filters with no reference to data schema</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40212">[SPARK-40212]</a>: SparkSQL castPartValue does not properly handle byte, short, or float</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40213">[SPARK-40213]</a>: Support ASCII value conversion for Latin-1 characters</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40218">[SPARK-40218]</a>: GROUPING SETS should preserve the grouping columns</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40228">[SPARK-40228]</a>: Do not simplify multiLike if child is not a cheap expression</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40247">[SPARK-40247]</a>: Fix BitSet equality check</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40280">[SPARK-40280]</a>: Add support for parquet push down for annotated int and long</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40297">[SPARK-40297]</a>: CTE outer reference nested in CTE main body cannot be resolved</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40362">[SPARK-40362]</a>: Fix BinaryComparison canonicalization</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40380">[SPARK-40380]</a>: Fix constant-folding of InvokeLike to avoid non-serializable literal embedded in the plan</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40385">[SPARK-40385]</a>: Fix interpreted path for companion object constructor</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40389">[SPARK-40389]</a>: Decimals can’t upcast as integral types if the cast can overflow</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40468">[SPARK-40468]</a>: Fix column pruning in CSV when _corrupt_record is selected</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40508">[SPARK-40508]</a>: Treat unknown partitioning as UnknownPartitioning</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40535">[SPARK-40535]</a>: Fix bug the buffer of AggregatingAccumulator will not be created if the input rows is empty</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40562">[SPARK-40562]</a>: Add <code class="language-plaintext highlighter-rouge">spark.sql.legacy.groupingIdWithAppendedUserGroupBy</code></li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40612">[SPARK-40612]</a>: Fixing the principal used for delegation token renewal on non-YARN resource managers</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40660">[SPARK-40660]</a>: Switch to XORShiftRandom to distribute elements</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40703">[SPARK-40703]</a>: Introduce shuffle on SinglePartition to improve parallelism</li>
</ul>
<h3 id="dependency-changes">Dependency Changes</h3>
<p>While being a maintenance release we did still upgrade some dependencies in this release they are:</p>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40801">[SPARK-40801]</a>: Upgrade Apache Commons Text to 1.10</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-40886">[SPARK-40886]</a>: Bump Jackson Databind 2.13.4.2</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41030">[SPARK-41030]</a>: Upgrade Apache Ivy to 2.5.1</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41031">[SPARK-41031]</a>: Upgrade <code class="language-plaintext highlighter-rouge">org.tukaani:xz</code> to 1.9</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41202">[SPARK-41202]</a>: Update ORC to 1.7.7</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-41686">[SPARK-41686]</a>: Upgrade Apache Ivy to 2.5.1</li>
<li><a href="https://issues.apache.org/jira/browse/SPARK-42179">[SPARK-42179]</a>: Upgrade ORC to 1.7.8</li>
</ul>
<p>You can consult JIRA for the <a href="https://s.apache.org/spark-3.3.2">detailed changes</a>.</p>
<p>We would like to acknowledge all community members for contributing patches to this release.</p>
<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>
</div>
<div class="col-12 col-md-3">
<div class="news" style="margin-bottom: 20px;">
<h5>Latest News</h5>
<ul class="list-unstyled">
<li><a href="/news/spark-3-5-5-released.html">Spark 3.5.5 released</a>
<span class="small">(Feb 27, 2025)</span></li>
<li><a href="/news/spark-3-5-4-released.html">Spark 3.5.4 released</a>
<span class="small">(Dec 20, 2024)</span></li>
<li><a href="/news/spark-3-4-4-released.html">Spark 3.4.4 released</a>
<span class="small">(Oct 27, 2024)</span></li>
<li><a href="/news/spark-4.0.0-preview2.html">Preview release of Spark 4.0</a>
<span class="small">(Sep 26, 2024)</span></li>
</ul>
<p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
</div>
<div style="text-align:center; margin-bottom: 20px;">
<a href="https://www.apache.org/events/current-event.html">
<img src="https://www.apache.org/events/current-event-234x60.png" style="max-width: 100%;"/>
</a>
</div>
<div class="hidden-xs hidden-sm">
<a href="/downloads.html" class="btn btn-cta btn-lg d-grid" style="margin-bottom: 30px;">
Download Spark
</a>
<p style="font-size: 16px; font-weight: 500; color: #555;">
Built-in Libraries:
</p>
<ul class="list-none">
<li><a href="/sql/">SQL and DataFrames</a></li>
<li><a href="/streaming/">Spark Streaming</a></li>
<li><a href="/mllib/">MLlib (machine learning)</a></li>
<li><a href="/graphx/">GraphX (graph)</a></li>
</ul>
<a href="/third-party-projects.html">Third-Party Projects</a>
</div>
</div>
</div>
<footer class="small">
<hr>
Apache Spark, Spark, Apache, the Apache feather logo, and the Apache Spark project logo are either registered
trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
See guidance on use of Apache Spark <a href="/trademarks.html">trademarks</a>.
All other marks mentioned may be trademarks or registered trademarks of their respective owners.
Copyright © 2018 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.
</footer>
</div>
<script src="/js/jquery.js"></script>
<script src="/js/bootstrap.bundle.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>
</body>
</html>