Skip to content

Commit 8ebbbc6

Browse files
author
Haohui Mai
committed
HDFS-8036. Use snapshot path as source when using snapshot diff report in DistCp. Contributed by Jing Zhao.
1 parent cf33bc1 commit 8ebbbc6

File tree

4 files changed

+63
-4
lines changed

4 files changed

+63
-4
lines changed

hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,9 @@ Release 2.7.0 - UNRELEASED
944944
HDFS-7748. Separate ECN flags from the Status in the DataTransferPipelineAck.
945945
(Anu Engineer and Haohui Mai via wheat9)
946946

947+
HDFS-8036. Use snapshot path as source when using snapshot diff report in
948+
DistCp. (Jing Zhao via wheat9)
949+
947950
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
948951

949952
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.fs.FileSystem;
2323
import org.apache.hadoop.fs.Path;
2424
import org.apache.hadoop.hdfs.DistributedFileSystem;
25+
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
2526
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
2627

2728
import java.io.IOException;
@@ -86,6 +87,22 @@ static boolean sync(DistCpOptions inputOptions, Configuration conf)
8687
} finally {
8788
deleteTargetTmpDir(targetFs, tmpDir);
8889
// TODO: since we have tmp directory, we can support "undo" with failures
90+
// set the source path using the snapshot path
91+
inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
92+
inputOptions.getToSnapshot())));
93+
}
94+
}
95+
96+
private static String getSnapshotName(String name) {
97+
return Path.CUR_DIR.equals(name) ? "" : name;
98+
}
99+
100+
private static Path getSourceSnapshotPath(Path sourceDir, String snapshotName) {
101+
if (Path.CUR_DIR.equals(snapshotName)) {
102+
return sourceDir;
103+
} else {
104+
return new Path(sourceDir,
105+
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + snapshotName);
89106
}
90107
}
91108

@@ -136,8 +153,10 @@ private static boolean checkNoChange(DistCpOptions inputOptions,
136153
static DiffInfo[] getDiffs(DistCpOptions inputOptions,
137154
DistributedFileSystem fs, Path sourceDir, Path targetDir) {
138155
try {
156+
final String from = getSnapshotName(inputOptions.getFromSnapshot());
157+
final String to = getSnapshotName(inputOptions.getToSnapshot());
139158
SnapshotDiffReport sourceDiff = fs.getSnapshotDiffReport(sourceDir,
140-
inputOptions.getFromSnapshot(), inputOptions.getToSnapshot());
159+
from, to);
141160
return DiffInfo.getDiffs(sourceDiff, targetDir);
142161
} catch (IOException e) {
143162
DistCp.LOG.warn("Failed to compute snapshot diff on " + sourceDir, e);

hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,7 @@ public void commitJob(JobContext jobContext) throws IOException {
9090
}
9191

9292
try {
93-
if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)
94-
&& !(conf.getBoolean(DistCpConstants.CONF_LABEL_DIFF, false))) {
93+
if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
9594
deleteMissing(conf);
9695
} else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
9796
commitData(conf);

hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.hadoop.hdfs.DistributedFileSystem;
2525
import org.apache.hadoop.hdfs.HdfsConfiguration;
2626
import org.apache.hadoop.hdfs.MiniDFSCluster;
27+
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
2728
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
2829
import org.apache.hadoop.io.IOUtils;
2930
import org.apache.hadoop.io.SequenceFile;
@@ -97,6 +98,8 @@ public void testFallback() throws Exception {
9798
dfs.createSnapshot(source, "s2");
9899
dfs.createSnapshot(target, "s1");
99100
Assert.assertTrue(DistCpSync.sync(options, conf));
101+
// reset source paths in options
102+
options.setSourcePaths(Arrays.asList(source));
100103

101104
// changes have been made in target
102105
final Path subTarget = new Path(target, "sub");
@@ -183,9 +186,21 @@ public void testSync() throws Exception {
183186
changeData(source);
184187
dfs.createSnapshot(source, "s2");
185188

189+
// before sync, make some further changes on source. this should not affect
190+
// the later distcp since we're copying (s2-s1) to target
191+
final Path toDelete = new Path(source, "foo/d1/foo/f1");
192+
dfs.delete(toDelete, true);
193+
final Path newdir = new Path(source, "foo/d1/foo/newdir");
194+
dfs.mkdirs(newdir);
195+
186196
// do the sync
187197
Assert.assertTrue(DistCpSync.sync(options, conf));
188198

199+
// make sure the source path has been updated to the snapshot path
200+
final Path spath = new Path(source,
201+
HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
202+
Assert.assertEquals(spath, options.getSourcePaths().get(0));
203+
189204
// build copy listing
190205
final Path listingPath = new Path("/tmp/META/fileList.seq");
191206
CopyListing listing = new GlobbedCopyListing(conf, new Credentials());
@@ -209,7 +224,7 @@ public void testSync() throws Exception {
209224
.getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
210225

211226
// verify the source and target now has the same structure
212-
verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false);
227+
verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
213228
}
214229

215230
private Map<Text, CopyListingFileStatus> getListing(Path listingPath)
@@ -248,6 +263,29 @@ private void verifyCopy(FileStatus s, FileStatus t, boolean compareName)
248263
}
249264
}
250265

266+
/**
267+
* Similar test with testSync, but the "to" snapshot is specified as "."
268+
* @throws Exception
269+
*/
270+
@Test
271+
public void testSyncWithCurrent() throws Exception {
272+
options.setUseDiff(true, "s1", ".");
273+
initData(source);
274+
initData(target);
275+
dfs.allowSnapshot(source);
276+
dfs.allowSnapshot(target);
277+
dfs.createSnapshot(source, "s1");
278+
dfs.createSnapshot(target, "s1");
279+
280+
// make changes under source
281+
changeData(source);
282+
283+
// do the sync
284+
Assert.assertTrue(DistCpSync.sync(options, conf));
285+
// make sure the source path is still unchanged
286+
Assert.assertEquals(source, options.getSourcePaths().get(0));
287+
}
288+
251289
private void initData2(Path dir) throws Exception {
252290
final Path test = new Path(dir, "test");
253291
final Path foo = new Path(dir, "foo");

0 commit comments

Comments
 (0)