Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/pr_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
os: [ubuntu-latest]
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
spark-version: ['3.5']
scala-version: ['2.12', '2.13']
is_push_event:
- ${{ github.event_name == 'push' }}
Expand Down Expand Up @@ -109,7 +109,7 @@ jobs:
os: [ubuntu-latest]
java_version: [8, 11, 17]
test-target: [java]
spark-version: ['3.3']
spark-version: ['3.3', '3.4']
scala-version: ['2.12', '2.13']
fail-fast: false
name: ${{ matrix.os }}/java ${{ matrix.java_version }}-spark-${{matrix.spark-version}}-scala-${{matrix.scala-version}}/${{ matrix.test-target }}
Expand All @@ -134,7 +134,7 @@ jobs:
os: [macos-13]
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
spark-version: ['3.4', '3.5']
scala-version: ['2.12', '2.13']
fail-fast: false
if: github.event_name == 'push'
Expand All @@ -161,7 +161,7 @@ jobs:
matrix:
java_version: [8, 11, 17]
test-target: [rust, java]
spark-version: ['3.4']
spark-version: ['3.4', '3.5']
scala-version: ['2.12', '2.13']
is_push_event:
- ${{ github.event_name == 'push' }}
Expand Down Expand Up @@ -247,7 +247,7 @@ jobs:
matrix:
java_version: [8, 17]
test-target: [java]
spark-version: ['3.3']
spark-version: ['3.3', '3.4']
scala-version: ['2.12', '2.13']
exclude:
- java_version: 8
Expand Down
1 change: 1 addition & 0 deletions common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ under the License.
<sources>
<source>src/main/${shims.majorVerSrc}</source>
<source>src/main/${shims.minorVerSrc}</source>
<source>src/main/${shims.pre35Src}</source>
</sources>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,13 @@ import org.apache.spark.sql.execution.datasources.PartitionedFile

object ShimBatchReader {

// TODO: remove after dropping Spark 3.3 support and directly call PartitionedFile
def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile =
classOf[PartitionedFile].getDeclaredConstructors
.map(c =>
c.getParameterCount match {
case 5 =>
c.newInstance(
partitionValues,
file,
Long.box(-1), // -1 means we read the entire file
Long.box(-1),
Array.empty[String])
case 7 =>
c.newInstance(
partitionValues,
c.getParameterTypes()(1)
.getConstructor(classOf[String])
.newInstance(file)
.asInstanceOf[AnyRef],
Long.box(-1), // -1 means we read the entire file
Long.box(-1),
Array.empty[String],
Long.box(0),
Long.box(0))
})
.head
.asInstanceOf[PartitionedFile]
PartitionedFile(
partitionValues,
file,
-1, // -1 means we read the entire file
-1,
Array.empty[String],
0,
0)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.comet.shims

import org.apache.spark.paths.SparkPath
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile

object ShimBatchReader {

def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile =
PartitionedFile(
partitionValues,
SparkPath.fromPathString(file),
-1, // -1 means we read the entire file
-1,
Array.empty[String],
0,
0)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.comet.shims

import org.apache.spark.paths.SparkPath
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.PartitionedFile

object ShimBatchReader {

def newPartitionedFile(partitionValues: InternalRow, file: String): PartitionedFile =
PartitionedFile(
partitionValues,
SparkPath.fromPathString(file),
-1, // -1 means we read the entire file
-1,
Array.empty[String],
0,
0,
Map.empty)
}
14 changes: 13 additions & 1 deletion docs/source/user-guide/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,19 @@ The following diagram illustrates the architecture of Comet:

## Current Status

The project is currently integrated into Apache Spark 3.3, and 3.4.
Comet currently supports the following versions of Apache Spark:

- 3.3.x
- 3.4.x

Experimental support is provided for the following versions of Apache Spark and is intended for development/testing
use only and should not be used in production yet.

- 3.5.x
- 4.0.0-preview1

Note that Comet may not fully work with proprietary forks of Apache Spark such as the Spark versions offered by
Cloud Service Providers.

## Feature Parity with Apache Spark

Expand Down
20 changes: 20 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,11 @@ under the License.
<argLine>-ea -Xmx4g -Xss4m ${extraJavaTestArgs}</argLine>
<additional.3_3.test.source>spark-3.3-plus</additional.3_3.test.source>
<additional.3_4.test.source>spark-3.4-plus</additional.3_4.test.source>
<additional.3_5.test.source>not-needed</additional.3_5.test.source>
<additional.pre35.test.source>spark-pre-3.5</additional.pre35.test.source>
<shims.majorVerSrc>spark-3.x</shims.majorVerSrc>
<shims.minorVerSrc>spark-3.4</shims.minorVerSrc>
<shims.pre35Src>spark-pre-3.5</shims.pre35Src>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shims.pre35Src and additional.pre35.test.source seems a bit cumbersome consider there's already shims.majorVerSec and shims.minorVerSrc.

Do you think we should reuse the existing properties instead introducing a new one? If so, I could help submit a pr to address that since this PR is already merged.

</properties>

<dependencyManagement>
Expand Down Expand Up @@ -547,6 +550,21 @@ under the License.
</properties>
</profile>

<profile>
<!-- FIXME: this is WIP. Tests may fail -->
<id>spark-3.5</id>
<properties>
<scala.version>2.12.18</scala.version>
<spark.version>3.5.1</spark.version>
<spark.version.short>3.5</spark.version.short>
<parquet.version>1.13.1</parquet.version>
<shims.minorVerSrc>spark-3.5</shims.minorVerSrc>
<shims.pre35Src>not-needed</shims.pre35Src>
<additional.pre35.test.source>not-needed</additional.pre35.test.source>
<additional.3_5.test.source>spark-3.5</additional.3_5.test.source>
</properties>
</profile>

<profile>
<!-- FIXME: this is WIP. Tests may fail https://github.com/apache/datafusion-comet/issues/551 -->
<id>spark-4.0</id>
Expand All @@ -561,6 +579,8 @@ under the License.
<slf4j.version>2.0.13</slf4j.version>
<shims.majorVerSrc>spark-4.0</shims.majorVerSrc>
<shims.minorVerSrc>not-needed-yet</shims.minorVerSrc>
<shims.pre35Src>not-needed</shims.pre35Src>
<additional.pre35.test.source>not-needed</additional.pre35.test.source>
<!-- Use jdk17 by default -->
<java.version>17</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
Expand Down
3 changes: 3 additions & 0 deletions spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ under the License.
<sources>
<source>src/test/${additional.3_3.test.source}</source>
<source>src/test/${additional.3_4.test.source}</source>
<source>src/test/${additional.3_5.test.source}</source>
<source>src/test/${additional.pre35.test.source}</source>
<source>src/test/${shims.majorVerSrc}</source>
<source>src/test/${shims.minorVerSrc}</source>
</sources>
Expand All @@ -267,6 +269,7 @@ under the License.
<sources>
<source>src/main/${shims.majorVerSrc}</source>
<source>src/main/${shims.minorVerSrc}</source>
<source>src/main/${shims.pre35Src}</source>
</sources>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,10 @@ object CometSparkSessionExtensions extends Logging {
org.apache.spark.SPARK_VERSION >= "3.4"
}

def isSpark35Plus: Boolean = {
org.apache.spark.SPARK_VERSION >= "3.5"
}

def isSpark40Plus: Boolean = {
org.apache.spark.SPARK_VERSION >= "4.0"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.comet.shims

import org.apache.comet.expressions.CometEvalMode
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.{DataType, TimestampNTZType}

/**
* `CometExprShim` acts as a shim for for parsing expressions from different Spark versions.
*/
trait CometExprShim {
/**
* Returns a tuple of expressions for the `unhex` function.
*/
protected def unhexSerde(unhex: Unhex): (Expression, Expression) = {
(unhex.child, Literal(unhex.failOnError))
}

protected def isTimestampNTZType(dt: DataType): Boolean = dt match {
case _: TimestampNTZType => true
case _ => false
}

protected def evalMode(c: Cast): CometEvalMode.Value =
CometEvalModeUtil.fromSparkEvalMode(c.evalMode)
}

object CometEvalModeUtil {
def fromSparkEvalMode(evalMode: EvalMode.Value): CometEvalMode.Value = evalMode match {
case EvalMode.LEGACY => CometEvalMode.LEGACY
case EvalMode.TRY => CometEvalMode.TRY
case EvalMode.ANSI => CometEvalMode.ANSI
}
}

36 changes: 36 additions & 0 deletions spark/src/main/spark-3.5/org/apache/comet/shims/ShimSQLConf.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.comet.shims

import org.apache.spark.sql.internal.LegacyBehaviorPolicy
import org.apache.spark.sql.internal.SQLConf

trait ShimSQLConf {

/**
* Spark 3.4 renamed parquetFilterPushDownStringStartWith to
* parquetFilterPushDownStringPredicate
*/
protected def getPushDownStringPredicate(sqlConf: SQLConf): Boolean =
sqlConf.parquetFilterPushDownStringPredicate

protected val LEGACY = LegacyBehaviorPolicy.LEGACY
protected val CORRECTED = LegacyBehaviorPolicy.CORRECTED
}
Loading