From 23cbc88f92f76572588e31d3096ae77dc58719be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= Date: Mon, 13 Apr 2026 12:06:22 +0000 Subject: [PATCH] GH-3475: Fix parquet-vector compatiblity with Java > 17 Replace the ByteBuffer-specific vector loads with local helpers that copy the required bytes and then call ByteVector.fromArray. This removes the dependency on JDK-specific ByteVector.fromByteBuffer entry points, which can fail with NoSuchMethodError on newer runtimes. Assisted-by: OpenCode:gpt-5.4 --- .github/workflows/vector-plugins.yml | 13 ++- .gitignore | 2 +- .../bitpacking/ByteBitPacking512VectorLE.java | 93 +++++++++++-------- 3 files changed, 64 insertions(+), 44 deletions(-) diff --git a/.github/workflows/vector-plugins.yml b/.github/workflows/vector-plugins.yml index cc57e97ffd..957d0f8777 100644 --- a/.github/workflows/vector-plugins.yml +++ b/.github/workflows/vector-plugins.yml @@ -26,7 +26,7 @@ jobs: strategy: fail-fast: false matrix: - java: [ '17' ] + java: [ '17', '21', '25' ] codes: [ 'uncompressed' ] name: Build Parquet with JDK ${{ matrix.java }} and ${{ matrix.codes }} @@ -46,7 +46,7 @@ jobs: run: | EXTRA_JAVA_TEST_ARGS=$(./mvnw help:evaluate -Dexpression=extraJavaTestArgs -q -DforceStdout) export MAVEN_OPTS="$MAVEN_OPTS $EXTRA_JAVA_TEST_ARGS" - ./mvnw install --batch-mode -Pvector-plugins -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true -Dmaven.buildNumber.skip=true -Djava.version=${{ matrix.java }} -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am + ./mvnw install --batch-mode -Pvector-plugins -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true -Dmaven.buildNumber.skip=true -Dspotless.check.skip=true -Djava.version=${{ matrix.java }} -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am - name: verify env: TEST_CODECS: ${{ matrix.codes }} @@ -54,4 +54,11 @@ jobs: run: | EXTRA_JAVA_TEST_ARGS=$(./mvnw help:evaluate -Dexpression=extraJavaTestArgs -q -DforceStdout) export MAVEN_OPTS="$MAVEN_OPTS $EXTRA_JAVA_TEST_ARGS" - ./mvnw verify --batch-mode -Pvector-plugins javadoc:javadoc -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am + # Spotless check uses palantir-java-format which relies on internal javac APIs + # that are not available on all JDK versions (e.g. JDK 25+). Since the formatting + # result is JDK-independent, running the check on JDK 17 alone is sufficient. + SPOTLESS_ARGS="" + if [ "${{ matrix.java }}" != "17" ]; then + SPOTLESS_ARGS="-Dspotless.check.skip=true" + fi + ./mvnw verify --batch-mode -Pvector-plugins javadoc:javadoc $SPOTLESS_ARGS -pl parquet-plugins/parquet-encoding-vector,parquet-plugins/parquet-plugins-benchmarks -am diff --git a/.gitignore b/.gitignore index 2fd06049ea..568aa2a323 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.class .project .classpath +.factorypath .settings target # Package Files # @@ -20,4 +21,3 @@ target/ mvn_install.log .vscode/* .DS_Store - diff --git a/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java b/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java index eb1690a4e1..13b32ca50e 100644 --- a/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java +++ b/parquet-plugins/parquet-encoding-vector/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPacking512VectorLE.java @@ -177,7 +177,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BYTE_SPECIES_64, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BYTE_SPECIES_64, in, inPos); ShortVector tempRes = byteVector .castShape(SHORT_SPECIES_512, 0) .reinterpretAsBytes() @@ -260,7 +260,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BYTE_SPECIES, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BYTE_SPECIES, in, inPos); ShortVector tempRes = byteVector .castShape(LONG_SPECIES, 0) .reinterpretAsBytes() @@ -377,9 +377,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B128, in, inPos, in.order()) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B128, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -466,7 +465,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(BSPECIES, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(BSPECIES, in, inPos); ShortVector tempRes = byteVector .castShape(ISPECIES, 0) .reinterpretAsBytes() @@ -582,9 +581,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) @@ -705,9 +703,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) @@ -827,9 +824,8 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B256, in, inPos, in.order(), inp_mask) - .castShape(S512, 0) - .reinterpretAsBytes(); + ByteVector byteVector = + fromByteBuffer(B256, in, inPos, inp_mask).castShape(S512, 0).reinterpretAsBytes(); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -914,7 +910,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); byteVector .castShape(ISPECIES, 0) .lanewise(VectorOperators.AND, 255) @@ -1004,7 +1000,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1084,7 +1080,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1194,7 +1190,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1280,7 +1276,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order()); + ByteVector byteVector = fromByteBuffer(B512, in, inPos); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1388,7 +1384,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1512,7 +1508,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1630,7 +1626,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsShorts() @@ -1703,7 +1699,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); ShortVector shortVector = byteVector.reinterpretAsShorts(); shortVector .castShape(I512, 0) @@ -1783,7 +1779,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -1866,7 +1862,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -1944,7 +1940,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2022,7 +2018,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2102,7 +2098,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2182,7 +2178,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2261,7 +2257,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2332,7 +2328,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector.rearrange(perm_mask0).reinterpretAsInts().lanewise(VectorOperators.AND, 16777215); tempRes1.intoArray(out, outPos, out_mask); @@ -2407,7 +2403,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2486,7 +2482,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2603,7 +2599,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2718,7 +2714,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2832,7 +2828,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -2960,7 +2956,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -3089,7 +3085,7 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector .rearrange(perm_mask0) .reinterpretAsInts() @@ -3175,13 +3171,30 @@ public final void unpackValuesUsingVector(final byte[] in, final int inPos, fina public final void unpackValuesUsingVector( final ByteBuffer in, final int inPos, final int[] out, final int outPos) { - ByteVector byteVector = ByteVector.fromByteBuffer(B512, in, inPos, in.order(), inp_mask); + ByteVector byteVector = fromByteBuffer(B512, in, inPos, inp_mask); IntVector tempRes1 = byteVector.rearrange(perm_mask0).reinterpretAsInts(); tempRes1.intoArray(out, outPos, out_mask); } } + private static ByteVector fromByteBuffer(VectorSpecies species, ByteBuffer input, int inPos) { + return ByteVector.fromArray(species, readInputBytes(input, inPos, species.length()), 0); + } + + private static ByteVector fromByteBuffer( + VectorSpecies species, ByteBuffer input, int inPos, VectorMask mask) { + return ByteVector.fromArray(species, readInputBytes(input, inPos, mask.trueCount()), 0, mask); + } + + private static byte[] readInputBytes(ByteBuffer input, int inPos, int byteCount) { + byte[] bytes = new byte[byteCount]; + ByteBuffer source = input.duplicate(); + source.position(inPos); + source.get(bytes); + return bytes; + } + private static void notSupport() { throw new RuntimeException( "ByteBitPacking512VectorLE doesn't support the function, please use ByteBitPackingLE!");