Skip to content

Commit 988bc17

Browse files
committed
HADOOP-10674. Improve PureJavaCrc32 performance and use java.util.zip.CRC32 for Java 7 and above.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1605239 13f79535-47bb-0310-9956-ffa450edef68
1 parent 2eb913b commit 988bc17

File tree

8 files changed

+186
-110
lines changed

8 files changed

+186
-110
lines changed

hadoop-common-project/hadoop-common/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,9 @@ Release 2.5.0 - UNRELEASED
472472
HADOOP-10747. Support configurable retries on SASL connection failures in
473473
RPC client. (cnauroth)
474474

475+
HADOOP-10674. Improve PureJavaCrc32 performance and use java.util.zip.CRC32
476+
for Java 7 and above. (szetszwo)
477+
475478
OPTIMIZATIONS
476479

477480
BUG FIXES

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,18 @@
1818

1919
package org.apache.hadoop.fs;
2020

21-
import java.io.*;
21+
import java.io.FileNotFoundException;
22+
import java.io.IOException;
23+
import java.io.InputStream;
2224
import java.nio.channels.ClosedChannelException;
2325
import java.util.Arrays;
2426

2527
import org.apache.hadoop.classification.InterfaceAudience;
2628
import org.apache.hadoop.classification.InterfaceStability;
2729
import org.apache.hadoop.conf.Configuration;
2830
import org.apache.hadoop.fs.permission.FsPermission;
31+
import org.apache.hadoop.util.DataChecksum;
2932
import org.apache.hadoop.util.Progressable;
30-
import org.apache.hadoop.util.PureJavaCrc32;
3133

3234
/****************************************************************
3335
* Abstract Checksumed FileSystem.
@@ -147,7 +149,7 @@ public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
147149
if (!Arrays.equals(version, CHECKSUM_VERSION))
148150
throw new IOException("Not a checksum file: "+sumFile);
149151
this.bytesPerSum = sums.readInt();
150-
set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4);
152+
set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum, 4);
151153
} catch (FileNotFoundException e) { // quietly ignore
152154
set(fs.verifyChecksum, null, 1, 0);
153155
} catch (IOException e) { // loudly ignore
@@ -259,8 +261,7 @@ private static class FSDataBoundedInputStream extends FSDataInputStream {
259261
private Path file;
260262
private long fileLen = -1L;
261263

262-
FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in)
263-
throws IOException {
264+
FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in) {
264265
super(in);
265266
this.fs = fs;
266267
this.file = file;
@@ -379,7 +380,7 @@ public ChecksumFSOutputSummer(ChecksumFileSystem fs,
379380
long blockSize,
380381
Progressable progress)
381382
throws IOException {
382-
super(new PureJavaCrc32(), fs.getBytesPerSum(), 4);
383+
super(DataChecksum.newCrc32(), fs.getBytesPerSum(), 4);
383384
int bytesPerSum = fs.getBytesPerSum();
384385
this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize,
385386
replication, blockSize, progress);

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818

1919
package org.apache.hadoop.fs;
2020

21-
import java.io.*;
21+
import java.io.EOFException;
22+
import java.io.FileNotFoundException;
23+
import java.io.IOException;
2224
import java.net.URISyntaxException;
2325
import java.nio.channels.ClosedChannelException;
2426
import java.util.ArrayList;
@@ -31,8 +33,8 @@
3133
import org.apache.hadoop.classification.InterfaceStability;
3234
import org.apache.hadoop.fs.Options.ChecksumOpt;
3335
import org.apache.hadoop.fs.permission.FsPermission;
36+
import org.apache.hadoop.util.DataChecksum;
3437
import org.apache.hadoop.util.Progressable;
35-
import org.apache.hadoop.util.PureJavaCrc32;
3638

3739
/**
3840
* Abstract Checksumed Fs.
@@ -139,7 +141,7 @@ public ChecksumFSInputChecker(ChecksumFs fs, Path file, int bufferSize)
139141
throw new IOException("Not a checksum file: "+sumFile);
140142
}
141143
this.bytesPerSum = sums.readInt();
142-
set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4);
144+
set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum, 4);
143145
} catch (FileNotFoundException e) { // quietly ignore
144146
set(fs.verifyChecksum, null, 1, 0);
145147
} catch (IOException e) { // loudly ignore
@@ -335,7 +337,7 @@ public ChecksumFSOutputSummer(final ChecksumFs fs, final Path file,
335337
final short replication, final long blockSize,
336338
final Progressable progress, final ChecksumOpt checksumOpt,
337339
final boolean createParent) throws IOException {
338-
super(new PureJavaCrc32(), fs.getBytesPerSum(), 4);
340+
super(DataChecksum.newCrc32(), fs.getBytesPerSum(), 4);
339341

340342
// checksumOpt is passed down to the raw fs. Unless it implements
341343
// checksum impelemts internally, checksumOpt will be ignored.

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919
package org.apache.hadoop.io.compress.zlib;
2020

2121
import java.io.IOException;
22+
import java.util.zip.Checksum;
2223
import java.util.zip.DataFormatException;
2324
import java.util.zip.Inflater;
2425

25-
import org.apache.hadoop.util.PureJavaCrc32;
2626
import org.apache.hadoop.io.compress.Decompressor;
2727
import org.apache.hadoop.io.compress.DoNotPool;
28+
import org.apache.hadoop.util.DataChecksum;
2829

2930
/**
3031
* A {@link Decompressor} based on the popular gzip compressed file format.
@@ -54,7 +55,7 @@ public class BuiltInGzipDecompressor implements Decompressor {
5455
private int headerBytesRead = 0;
5556
private int trailerBytesRead = 0;
5657
private int numExtraFieldBytesRemaining = -1;
57-
private PureJavaCrc32 crc = new PureJavaCrc32();
58+
private Checksum crc = DataChecksum.newCrc32();
5859
private boolean hasExtraField = false;
5960
private boolean hasFilename = false;
6061
private boolean hasComment = false;

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.DataOutputStream;
2323
import java.io.IOException;
2424
import java.nio.ByteBuffer;
25+
import java.util.zip.CRC32;
2526
import java.util.zip.Checksum;
2627

2728
import org.apache.hadoop.classification.InterfaceAudience;
@@ -72,6 +73,13 @@ public static Type valueOf(int id) {
7273
}
7374
}
7475

76+
/**
77+
* Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
78+
* is chosen depending on the platform.
79+
*/
80+
public static Checksum newCrc32() {
81+
return Shell.isJava7OrAbove()? new CRC32(): new PureJavaCrc32();
82+
}
7583

7684
public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
7785
if ( bytesPerChecksum <= 0 ) {
@@ -82,7 +90,7 @@ public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
8290
case NULL :
8391
return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
8492
case CRC32 :
85-
return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
93+
return new DataChecksum(type, newCrc32(), bytesPerChecksum );
8694
case CRC32C:
8795
return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
8896
default:

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -57,38 +57,31 @@ public void reset() {
5757
}
5858

5959
@Override
60-
public void update(byte[] b, int off, int len) {
60+
public void update(final byte[] b, final int offset, final int len) {
6161
int localCrc = crc;
6262

63-
while(len > 7) {
64-
final int c0 =(b[off+0] ^ localCrc) & 0xff;
65-
final int c1 =(b[off+1] ^ (localCrc >>>= 8)) & 0xff;
66-
final int c2 =(b[off+2] ^ (localCrc >>>= 8)) & 0xff;
67-
final int c3 =(b[off+3] ^ (localCrc >>>= 8)) & 0xff;
68-
localCrc = (T[T8_7_start + c0] ^ T[T8_6_start + c1])
69-
^ (T[T8_5_start + c2] ^ T[T8_4_start + c3]);
63+
final int remainder = len & 0x7;
64+
int i = offset;
65+
for(final int end = offset + len - remainder; i < end; i += 8) {
66+
final int x = localCrc
67+
^ ((((b[i ] << 24) >>> 24) + ((b[i+1] << 24) >>> 16))
68+
+ (((b[i+2] << 24) >>> 8 ) + (b[i+3] << 24)));
7069

71-
final int c4 = b[off+4] & 0xff;
72-
final int c5 = b[off+5] & 0xff;
73-
final int c6 = b[off+6] & 0xff;
74-
final int c7 = b[off+7] & 0xff;
75-
76-
localCrc ^= (T[T8_3_start + c4] ^ T[T8_2_start + c5])
77-
^ (T[T8_1_start + c6] ^ T[T8_0_start + c7]);
78-
79-
off += 8;
80-
len -= 8;
70+
localCrc = ((T[((x << 24) >>> 24) + 0x700] ^ T[((x << 16) >>> 24) + 0x600])
71+
^ (T[((x << 8) >>> 24) + 0x500] ^ T[ (x >>> 24) + 0x400]))
72+
^ ((T[((b[i+4] << 24) >>> 24) + 0x300] ^ T[((b[i+5] << 24) >>> 24) + 0x200])
73+
^ (T[((b[i+6] << 24) >>> 24) + 0x100] ^ T[((b[i+7] << 24) >>> 24)]));
8174
}
8275

8376
/* loop unroll - duff's device style */
84-
switch(len) {
85-
case 7: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
86-
case 6: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
87-
case 5: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
88-
case 4: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
89-
case 3: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
90-
case 2: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
91-
case 1: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)];
77+
switch(remainder) {
78+
case 7: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
79+
case 6: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
80+
case 5: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
81+
case 4: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
82+
case 3: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
83+
case 2: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
84+
case 1: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24];
9285
default:
9386
/* nothing */
9487
}
@@ -99,24 +92,15 @@ public void update(byte[] b, int off, int len) {
9992

10093
@Override
10194
final public void update(int b) {
102-
crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)];
95+
crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)];
10396
}
10497

10598
/*
10699
* CRC-32 lookup tables generated by the polynomial 0xEDB88320.
107100
* See also TestPureJavaCrc32.Table.
108101
*/
109-
private static final int T8_0_start = 0*256;
110-
private static final int T8_1_start = 1*256;
111-
private static final int T8_2_start = 2*256;
112-
private static final int T8_3_start = 3*256;
113-
private static final int T8_4_start = 4*256;
114-
private static final int T8_5_start = 5*256;
115-
private static final int T8_6_start = 6*256;
116-
private static final int T8_7_start = 7*256;
117-
118102
private static final int[] T = new int[] {
119-
/* T8_0 */
103+
/* T8_0 */
120104
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
121105
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
122106
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,

0 commit comments

Comments
 (0)