From b6325c6c03ef02ff82ba361eb2b2a956d89ab300 Mon Sep 17 00:00:00 2001 From: Michael Wintermeyer Date: Tue, 28 Feb 2023 19:17:15 -0500 Subject: [PATCH 1/4] Do not set scheme if unset --- .../palantir/crypto2/hadoop/StandaloneEncryptedFileSystem.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystem.java b/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystem.java index b3b432d22..9489f26d6 100644 --- a/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystem.java +++ b/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystem.java @@ -164,6 +164,9 @@ private static Function setSchemeFunc(final String scheme) { private static Function setUriSchemeFunc(final String scheme) { return uri -> { + if (uri.getScheme() == null) { + return uri; + } try { return new URI( scheme, From 2defbb8eaa931b20b521de478788c1480a791281 Mon Sep 17 00:00:00 2001 From: svc-changelog Date: Wed, 1 Mar 2023 00:57:31 +0000 Subject: [PATCH 2/4] Add generated changelog entries --- changelog/@unreleased/pr-667.v2.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog/@unreleased/pr-667.v2.yml diff --git a/changelog/@unreleased/pr-667.v2.yml b/changelog/@unreleased/pr-667.v2.yml new file mode 100644 index 000000000..82682f8a4 --- /dev/null +++ b/changelog/@unreleased/pr-667.v2.yml @@ -0,0 +1,6 @@ +type: fix +fix: + description: StandaloneEncryptedFileSystem returns to its old behavior, of only + overriding the scheme when the uri uses a scheme + links: + - https://github.com/palantir/hadoop-crypto/pull/667 From 3ef5a2ba4b9bf70d229badcc93a451748e71a49e Mon Sep 17 00:00:00 2001 From: Michael Wintermeyer Date: Fri, 3 Mar 2023 15:05:38 -0500 Subject: [PATCH 3/4] add test for s3 checkPath validation --- .../hadoop/PathConvertingFileSystem.java | 4 +- .../StandaloneEncryptedFileSystemTest.java | 93 +++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/PathConvertingFileSystem.java b/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/PathConvertingFileSystem.java index 59c8cac3e..0b2f4e1d9 100644 --- a/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/PathConvertingFileSystem.java +++ b/hadoop-crypto/src/main/java/com/palantir/crypto2/hadoop/PathConvertingFileSystem.java @@ -16,6 +16,7 @@ package com.palantir.crypto2.hadoop; +import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.net.URI; import java.util.EnumSet; @@ -146,7 +147,8 @@ public FileChecksum getFileChecksum(Path path) throws IOException { return fs.getFileChecksum(to(path)); } - private Path to(Path path) { + @VisibleForTesting + Path to(Path path) { return toFunc.apply(path); } diff --git a/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java b/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java index 347b4f434..a70745f07 100644 --- a/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java +++ b/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.URI; +import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.security.KeyPair; @@ -35,6 +36,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.StringUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -296,6 +298,97 @@ public void testCopyFromLocalFile() throws IOException { assertThat(readBytes).containsExactly(DATA_BYTES); } + /** + * This validation logic for {@link S3MimicingFileSystem#checkPath} is copied exactly from + * {@link org.apache.hadoop.fs.s3a.S3AFileSystem} and {@link org.apache.hadoop.fs.s3native.S3xLoginHelper}. + */ + static class S3MimicingFileSystem extends DelegatingFileSystem { + protected S3MimicingFileSystem(FileSystem delegate) { + super(delegate); + } + + @Override + public void checkPath(Path path) { + checkPath(getConf(), getUri(), path, getDefaultPort()); + super.checkPath(path); + } + + @SuppressWarnings({"ReferenceEquality", "OperatorPrecedence"}) + private void checkPath(Configuration conf, URI fsUri, Path path, int defaultPort) { + URI pathUri = path.toUri(); + String thatScheme = pathUri.getScheme(); + if (thatScheme != null) { + URI thisUri = canonicalizeUri(fsUri, defaultPort); + String thisScheme = thisUri.getScheme(); + if (StringUtils.equalsIgnoreCase(thisScheme, thatScheme)) { + String thisHost = thisUri.getHost(); + String thatHost = pathUri.getHost(); + if (thatHost == null && thisHost != null) { + URI defaultUri = FileSystem.getDefaultUri(conf); + if (StringUtils.equalsIgnoreCase(thisScheme, defaultUri.getScheme())) { + pathUri = defaultUri; + } else { + pathUri = null; + } + } + + if (pathUri != null) { + pathUri = canonicalizeUri(pathUri, defaultPort); + thatHost = pathUri.getHost(); + if (thisHost == thatHost + || thisHost != null && StringUtils.equalsIgnoreCase(thisHost, thatHost)) { + return; + } + } + } + + throw new IllegalArgumentException("Wrong FS " + toString(pathUri) + " -expected " + fsUri); + } + } + + @SuppressWarnings("ParameterAssignment") + private URI canonicalizeUri(URI uri, int defaultPort) { + if (uri.getPort() == -1 && defaultPort > 0) { + try { + uri = new URI( + uri.getScheme(), + uri.getUserInfo(), + uri.getHost(), + defaultPort, + uri.getPath(), + uri.getQuery(), + uri.getFragment()); + } catch (URISyntaxException var3) { + throw new AssertionError("Valid URI became unparseable: " + uri); + } + } + + return uri; + } + + private String toString(URI pathUri) { + return pathUri != null + ? String.format("%s://%s/%s", pathUri.getScheme(), pathUri.getHost(), pathUri.getPath()) + : "(null URI)"; + } + } + + @Test + public void testS3WorksWhenNoDefaultFsSet() { + S3MimicingFileSystem s3MimicingEfs = new S3MimicingFileSystem(efs); + + // Convert the file path, because that normally happens by the time FileSystem calls checkPath internally + StandaloneEncryptedFileSystem standaloneEncryptedFileSystem = (StandaloneEncryptedFileSystem) efs; + EncryptedFileSystem encryptedFileSystem = + (EncryptedFileSystem) standaloneEncryptedFileSystem.getRawFileSystem(); + PathConvertingFileSystem pathConvertingFileSystem = + (PathConvertingFileSystem) encryptedFileSystem.getRawFileSystem(); + path = pathConvertingFileSystem.to(path); + + // Actually test that S3's logic to check the path should work. Most methods check this validation first. + s3MimicingEfs.checkPath(path); + } + private static Configuration getBaseConf() { Configuration conf = new Configuration(); conf.set("fs.efile.impl", StandaloneEncryptedFileSystem.class.getCanonicalName()); From 06e2db14b28350c0d466cd0c653a88cfec41f351 Mon Sep 17 00:00:00 2001 From: Michael Wintermeyer Date: Fri, 3 Mar 2023 15:51:58 -0500 Subject: [PATCH 4/4] simpler unit test --- .../StandaloneEncryptedFileSystemTest.java | 88 ++----------------- 1 file changed, 7 insertions(+), 81 deletions(-) diff --git a/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java b/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java index a70745f07..cfb1a4e87 100644 --- a/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java +++ b/hadoop-crypto/src/test/java/com/palantir/crypto2/hadoop/StandaloneEncryptedFileSystemTest.java @@ -25,7 +25,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.URI; -import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.security.KeyPair; @@ -36,7 +35,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.StringUtils; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -299,94 +297,22 @@ public void testCopyFromLocalFile() throws IOException { } /** - * This validation logic for {@link S3MimicingFileSystem#checkPath} is copied exactly from - * {@link org.apache.hadoop.fs.s3a.S3AFileSystem} and {@link org.apache.hadoop.fs.s3native.S3xLoginHelper}. + * Preserving paths with no scheme present is helpful to bypass validation in + * {@link org.apache.hadoop.fs.s3native.S3xLoginHelper#checkPath} when using S3A. */ - static class S3MimicingFileSystem extends DelegatingFileSystem { - protected S3MimicingFileSystem(FileSystem delegate) { - super(delegate); - } - - @Override - public void checkPath(Path path) { - checkPath(getConf(), getUri(), path, getDefaultPort()); - super.checkPath(path); - } - - @SuppressWarnings({"ReferenceEquality", "OperatorPrecedence"}) - private void checkPath(Configuration conf, URI fsUri, Path path, int defaultPort) { - URI pathUri = path.toUri(); - String thatScheme = pathUri.getScheme(); - if (thatScheme != null) { - URI thisUri = canonicalizeUri(fsUri, defaultPort); - String thisScheme = thisUri.getScheme(); - if (StringUtils.equalsIgnoreCase(thisScheme, thatScheme)) { - String thisHost = thisUri.getHost(); - String thatHost = pathUri.getHost(); - if (thatHost == null && thisHost != null) { - URI defaultUri = FileSystem.getDefaultUri(conf); - if (StringUtils.equalsIgnoreCase(thisScheme, defaultUri.getScheme())) { - pathUri = defaultUri; - } else { - pathUri = null; - } - } - - if (pathUri != null) { - pathUri = canonicalizeUri(pathUri, defaultPort); - thatHost = pathUri.getHost(); - if (thisHost == thatHost - || thisHost != null && StringUtils.equalsIgnoreCase(thisHost, thatHost)) { - return; - } - } - } - - throw new IllegalArgumentException("Wrong FS " + toString(pathUri) + " -expected " + fsUri); - } - } - - @SuppressWarnings("ParameterAssignment") - private URI canonicalizeUri(URI uri, int defaultPort) { - if (uri.getPort() == -1 && defaultPort > 0) { - try { - uri = new URI( - uri.getScheme(), - uri.getUserInfo(), - uri.getHost(), - defaultPort, - uri.getPath(), - uri.getQuery(), - uri.getFragment()); - } catch (URISyntaxException var3) { - throw new AssertionError("Valid URI became unparseable: " + uri); - } - } - - return uri; - } - - private String toString(URI pathUri) { - return pathUri != null - ? String.format("%s://%s/%s", pathUri.getScheme(), pathUri.getHost(), pathUri.getPath()) - : "(null URI)"; - } - } - @Test - public void testS3WorksWhenNoDefaultFsSet() { - S3MimicingFileSystem s3MimicingEfs = new S3MimicingFileSystem(efs); - + public void testNoScheme() { // Convert the file path, because that normally happens by the time FileSystem calls checkPath internally StandaloneEncryptedFileSystem standaloneEncryptedFileSystem = (StandaloneEncryptedFileSystem) efs; EncryptedFileSystem encryptedFileSystem = (EncryptedFileSystem) standaloneEncryptedFileSystem.getRawFileSystem(); PathConvertingFileSystem pathConvertingFileSystem = (PathConvertingFileSystem) encryptedFileSystem.getRawFileSystem(); - path = pathConvertingFileSystem.to(path); + Path convertedPath = pathConvertingFileSystem.to(path); - // Actually test that S3's logic to check the path should work. Most methods check this validation first. - s3MimicingEfs.checkPath(path); + // Just like the original path, the converted path should not have a scheme + assertThat(path.toUri().getScheme()).isNull(); + assertThat(convertedPath.toUri().getScheme()).isNull(); } private static Configuration getBaseConf() {