Skip to content

Commit b744182

Browse files
authored
Start using spark4-preview versions (#2159)
* Start using spark4-preview versions * Allow to download preview versions * Expect warnings in spark * Disable local_sparklyr test for now
1 parent 2f1cf2a commit b744182

File tree

6 files changed

+27
-6
lines changed

6 files changed

+27
-6
lines changed

Diff for: CHANGELOG.md

+9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33
This changelog only contains breaking and/or significant changes manually introduced to this repository (using Pull Requests).
44
All image manifests can be found in [the wiki](https://github.com/jupyter/docker-stacks/wiki).
55

6+
## 2024-10-22
7+
8+
Affected: `pyspark-notebook` and `all-spark-notebook` images users
9+
10+
- **Breaking:** Start using Spark 4.0.0 preview versions ([#2159](https://github.com/jupyter/docker-stacks/pull/2159)).
11+
`sparklyr` doesn't seem to support Spark v4 yet when using Spark locally.
12+
13+
Reason: Spark v3 is not compatible with Python 3.12, and [the voting group has decided](https://github.com/jupyter/docker-stacks/pull/2072#issuecomment-2414123851) to switch to Spark v4 preview version.
14+
615
## 2024-10-09
716

817
Affected: users building a custom set of images

Diff for: images/pyspark-notebook/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ USER ${NB_UID}
6363
RUN mamba install --yes \
6464
'grpcio-status' \
6565
'grpcio' \
66-
'pandas=2.0.3' \
66+
'pandas=2.2.2' \
6767
'pyarrow' && \
6868
mamba clean --all -f -y && \
6969
fix-permissions "${CONDA_DIR}" && \

Diff for: images/pyspark-notebook/setup_spark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_latest_spark_version() -> str:
3636
stable_versions = [
3737
ref.removeprefix("spark-").removesuffix("/")
3838
for ref in all_refs
39-
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
39+
if ref.startswith("spark-") and "incubating" not in ref
4040
]
4141

4242
# Compare versions semantically

Diff for: tests/all-spark-notebook/test_spark_notebooks.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
@pytest.mark.flaky(retries=3, delay=1)
1515
@pytest.mark.parametrize(
1616
"test_file",
17-
["issue_1168", "local_pyspark", "local_sparklyr", "local_sparkR"],
17+
["issue_1168", "local_pyspark", "local_sparkR"],
1818
)
1919
def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
2020
"""Check if Spark notebooks can be executed"""
@@ -31,10 +31,14 @@ def test_nbconvert(container: TrackedContainer, test_file: str) -> None:
3131
)
3232
logs = container.run_and_wait(
3333
timeout=60,
34+
no_warnings=False,
3435
volumes={str(host_data_dir): {"bind": cont_data_dir, "mode": "ro"}},
3536
tty=True,
3637
command=["bash", "-c", command],
3738
)
39+
warnings = TrackedContainer.get_warnings(logs)
40+
assert len(warnings) == 1
41+
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
3842

3943
expected_file = f"{output_dir}/{test_file}.md"
4044
assert expected_file in logs, f"Expected file {expected_file} not generated"

Diff for: tests/pyspark-notebook/test_spark.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@
33
import logging
44

55
from tests.conftest import TrackedContainer
6-
from tests.run_command import run_command
76

87
LOGGER = logging.getLogger(__name__)
98

109

1110
def test_spark_shell(container: TrackedContainer) -> None:
1211
"""Checking if Spark (spark-shell) is running properly"""
13-
logs = run_command(container, 'spark-shell <<< "1+1"', timeout=60)
12+
logs = container.run_and_wait(
13+
timeout=60,
14+
no_warnings=False,
15+
tty=True,
16+
command=["bash", "-c", 'spark-shell <<< "1+1"'],
17+
)
18+
warnings = TrackedContainer.get_warnings(logs)
19+
assert len(warnings) == 1
20+
assert "Using incubator modules: jdk.incubator.vector" in warnings[0]
21+
1422
assert "res0: Int = 2" in logs, "spark-shell does not work"

Diff for: tests/pyspark-notebook/units/unit_pandas_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# Distributed under the terms of the Modified BSD License.
33
import pandas
44

5-
assert pandas.__version__ == "2.0.3"
5+
assert pandas.__version__ == "2.2.2"

0 commit comments

Comments
 (0)