Skip to content

Commit 33e4ae9

Browse files
committed
Merge branch 'bc/sha-256'
Add sha-256 hash and plug it through the code to allow building Git with the "NewHash". * bc/sha-256: hash: add an SHA-256 implementation using OpenSSL sha256: add an SHA-256 implementation using libgcrypt Add a base implementation of SHA-256 support commit-graph: convert to using the_hash_algo t/helper: add a test helper to compute hash speed sha1-file: add a constant for hash block size t: make the sha1 test-tool helper generic t: add basic tests for our SHA-1 implementation cache: make hashcmp and hasheq work with larger hashes hex: introduce functions to print arbitrary hashes sha1-file: provide functions to look up hash algorithms sha1-file: rename algorithm to "sha1"
2 parents 5d3635d + 4b4e291 commit 33e4ae9

File tree

16 files changed

+642
-94
lines changed

16 files changed

+642
-94
lines changed

Makefile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ all::
186186
# in one call to the platform's SHA1_Update(). e.g. APPLE_COMMON_CRYPTO
187187
# wants 'SHA1_MAX_BLOCK_SIZE=1024L*1024L*1024L' defined.
188188
#
189+
# Define BLK_SHA256 to use the built-in SHA-256 routines.
190+
#
191+
# Define GCRYPT_SHA256 to use the SHA-256 routines in libgcrypt.
192+
#
193+
# Define OPENSSL_SHA256 to use the SHA-256 routines in OpenSSL.
194+
#
189195
# Define NEEDS_CRYPTO_WITH_SSL if you need -lcrypto when using -lssl (Darwin).
190196
#
191197
# Define NEEDS_SSL_WITH_CRYPTO if you need -lssl when using -lcrypto (Darwin).
@@ -724,7 +730,9 @@ TEST_BUILTINS_OBJS += test-dump-split-index.o
724730
TEST_BUILTINS_OBJS += test-dump-untracked-cache.o
725731
TEST_BUILTINS_OBJS += test-example-decorate.o
726732
TEST_BUILTINS_OBJS += test-genrandom.o
733+
TEST_BUILTINS_OBJS += test-hash.o
727734
TEST_BUILTINS_OBJS += test-hashmap.o
735+
TEST_BUILTINS_OBJS += test-hash-speed.o
728736
TEST_BUILTINS_OBJS += test-index-version.o
729737
TEST_BUILTINS_OBJS += test-json-writer.o
730738
TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
@@ -747,6 +755,7 @@ TEST_BUILTINS_OBJS += test-run-command.o
747755
TEST_BUILTINS_OBJS += test-scrap-cache-tree.o
748756
TEST_BUILTINS_OBJS += test-sha1.o
749757
TEST_BUILTINS_OBJS += test-sha1-array.o
758+
TEST_BUILTINS_OBJS += test-sha256.o
750759
TEST_BUILTINS_OBJS += test-sigchain.o
751760
TEST_BUILTINS_OBJS += test-strcmp-offset.o
752761
TEST_BUILTINS_OBJS += test-string-list.o
@@ -1646,6 +1655,19 @@ endif
16461655
endif
16471656
endif
16481657

1658+
ifdef OPENSSL_SHA256
1659+
EXTLIBS += $(LIB_4_CRYPTO)
1660+
BASIC_CFLAGS += -DSHA256_OPENSSL
1661+
else
1662+
ifdef GCRYPT_SHA256
1663+
BASIC_CFLAGS += -DSHA256_GCRYPT
1664+
EXTLIBS += -lgcrypt
1665+
else
1666+
LIB_OBJS += sha256/block/sha256.o
1667+
BASIC_CFLAGS += -DSHA256_BLK
1668+
endif
1669+
endif
1670+
16491671
ifdef SHA1_MAX_BLOCK_SIZE
16501672
LIB_OBJS += compat/sha1-chunked.o
16511673
BASIC_CFLAGS += -DSHA1_MAX_BLOCK_SIZE="$(SHA1_MAX_BLOCK_SIZE)"

cache.h

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,20 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
4545
/* The length in bytes and in hex digits of an object name (SHA-1 value). */
4646
#define GIT_SHA1_RAWSZ 20
4747
#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
48+
/* The block size of SHA-1. */
49+
#define GIT_SHA1_BLKSZ 64
50+
51+
/* The length in bytes and in hex digits of an object name (SHA-256 value). */
52+
#define GIT_SHA256_RAWSZ 32
53+
#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
54+
/* The block size of SHA-256. */
55+
#define GIT_SHA256_BLKSZ 64
4856

4957
/* The length in byte and in hex digits of the largest possible hash value. */
50-
#define GIT_MAX_RAWSZ GIT_SHA1_RAWSZ
51-
#define GIT_MAX_HEXSZ GIT_SHA1_HEXSZ
58+
#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
59+
#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
60+
/* The largest possible block size for any supported hash. */
61+
#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
5262

5363
struct object_id {
5464
unsigned char hash[GIT_MAX_RAWSZ];
@@ -1028,16 +1038,12 @@ extern const struct object_id null_oid;
10281038
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
10291039
{
10301040
/*
1031-
* This is a temporary optimization hack. By asserting the size here,
1032-
* we let the compiler know that it's always going to be 20, which lets
1033-
* it turn this fixed-size memcmp into a few inline instructions.
1034-
*
1035-
* This will need to be extended or ripped out when we learn about
1036-
* hashes of different sizes.
1041+
* Teach the compiler that there are only two possibilities of hash size
1042+
* here, so that it can optimize for this case as much as possible.
10371043
*/
1038-
if (the_hash_algo->rawsz != 20)
1039-
BUG("hash size not yet supported by hashcmp");
1040-
return memcmp(sha1, sha2, the_hash_algo->rawsz);
1044+
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
1045+
return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
1046+
return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
10411047
}
10421048

10431049
static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
@@ -1047,7 +1053,13 @@ static inline int oidcmp(const struct object_id *oid1, const struct object_id *o
10471053

10481054
static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
10491055
{
1050-
return !hashcmp(sha1, sha2);
1056+
/*
1057+
* We write this here instead of deferring to hashcmp so that the
1058+
* compiler can properly inline it and avoid calling memcmp.
1059+
*/
1060+
if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
1061+
return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
1062+
return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
10511063
}
10521064

10531065
static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
@@ -1365,20 +1377,23 @@ extern int get_oid_hex(const char *hex, struct object_id *sha1);
13651377
extern int hex_to_bytes(unsigned char *binary, const char *hex, size_t len);
13661378

13671379
/*
1368-
* Convert a binary sha1 to its hex equivalent. The `_r` variant is reentrant,
1380+
* Convert a binary hash to its hex equivalent. The `_r` variant is reentrant,
13691381
* and writes the NUL-terminated output to the buffer `out`, which must be at
1370-
* least `GIT_SHA1_HEXSZ + 1` bytes, and returns a pointer to out for
1382+
* least `GIT_MAX_HEXSZ + 1` bytes, and returns a pointer to out for
13711383
* convenience.
13721384
*
13731385
* The non-`_r` variant returns a static buffer, but uses a ring of 4
13741386
* buffers, making it safe to make multiple calls for a single statement, like:
13751387
*
13761388
* printf("%s -> %s", sha1_to_hex(one), sha1_to_hex(two));
13771389
*/
1378-
extern char *sha1_to_hex_r(char *out, const unsigned char *sha1);
1379-
extern char *oid_to_hex_r(char *out, const struct object_id *oid);
1380-
extern char *sha1_to_hex(const unsigned char *sha1); /* static buffer result! */
1381-
extern char *oid_to_hex(const struct object_id *oid); /* same static buffer as sha1_to_hex */
1390+
char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash, const struct git_hash_algo *);
1391+
char *sha1_to_hex_r(char *out, const unsigned char *sha1);
1392+
char *oid_to_hex_r(char *out, const struct object_id *oid);
1393+
char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *); /* static buffer result! */
1394+
char *sha1_to_hex(const unsigned char *sha1); /* same static buffer */
1395+
char *hash_to_hex(const unsigned char *hash); /* same static buffer */
1396+
char *oid_to_hex(const struct object_id *oid); /* same static buffer */
13821397

13831398
/*
13841399
* Parse a 40-character hexadecimal object ID starting from hex, updating the

commit-graph.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,11 @@
2323
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
2424
#define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
2525

26-
#define GRAPH_DATA_WIDTH 36
26+
#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
2727

2828
#define GRAPH_VERSION_1 0x1
2929
#define GRAPH_VERSION GRAPH_VERSION_1
3030

31-
#define GRAPH_OID_VERSION_SHA1 1
32-
#define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
33-
#define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
34-
#define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
35-
3631
#define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
3732
#define GRAPH_EDGE_LAST_MASK 0x7fffffff
3833
#define GRAPH_PARENT_NONE 0x70000000
@@ -43,13 +38,18 @@
4338
#define GRAPH_FANOUT_SIZE (4 * 256)
4439
#define GRAPH_CHUNKLOOKUP_WIDTH 12
4540
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
46-
+ GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
41+
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
4742

4843
char *get_commit_graph_filename(const char *obj_dir)
4944
{
5045
return xstrfmt("%s/info/commit-graph", obj_dir);
5146
}
5247

48+
static uint8_t oid_version(void)
49+
{
50+
return 1;
51+
}
52+
5353
static struct commit_graph *alloc_commit_graph(void)
5454
{
5555
struct commit_graph *g = xcalloc(1, sizeof(*g));
@@ -124,15 +124,15 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
124124
}
125125

126126
hash_version = *(unsigned char*)(data + 5);
127-
if (hash_version != GRAPH_OID_VERSION) {
127+
if (hash_version != oid_version()) {
128128
error(_("hash version %X does not match version %X"),
129-
hash_version, GRAPH_OID_VERSION);
129+
hash_version, oid_version());
130130
goto cleanup_fail;
131131
}
132132

133133
graph = alloc_commit_graph();
134134

135-
graph->hash_len = GRAPH_OID_LEN;
135+
graph->hash_len = the_hash_algo->rawsz;
136136
graph->num_chunks = *(unsigned char*)(data + 6);
137137
graph->graph_fd = fd;
138138
graph->data = graph_map;
@@ -148,7 +148,7 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
148148

149149
chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
150150

151-
if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
151+
if (chunk_offset > graph_size - the_hash_algo->rawsz) {
152152
error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
153153
(uint32_t)chunk_offset);
154154
goto cleanup_fail;
@@ -773,6 +773,7 @@ void write_commit_graph(const char *obj_dir,
773773
int num_extra_edges;
774774
struct commit_list *parent;
775775
struct progress *progress = NULL;
776+
const unsigned hashsz = the_hash_algo->rawsz;
776777

777778
if (!commit_graph_compatible(the_repository))
778779
return;
@@ -918,7 +919,7 @@ void write_commit_graph(const char *obj_dir,
918919
hashwrite_be32(f, GRAPH_SIGNATURE);
919920

920921
hashwrite_u8(f, GRAPH_VERSION);
921-
hashwrite_u8(f, GRAPH_OID_VERSION);
922+
hashwrite_u8(f, oid_version());
922923
hashwrite_u8(f, num_chunks);
923924
hashwrite_u8(f, 0); /* unused padding byte */
924925

@@ -933,8 +934,8 @@ void write_commit_graph(const char *obj_dir,
933934

934935
chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
935936
chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
936-
chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.nr;
937-
chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.nr;
937+
chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr;
938+
chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr;
938939
chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
939940

940941
for (i = 0; i <= num_chunks; i++) {
@@ -947,8 +948,8 @@ void write_commit_graph(const char *obj_dir,
947948
}
948949

949950
write_graph_chunk_fanout(f, commits.list, commits.nr);
950-
write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
951-
write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
951+
write_graph_chunk_oids(f, hashsz, commits.list, commits.nr);
952+
write_graph_chunk_data(f, hashsz, commits.list, commits.nr);
952953
write_graph_chunk_large_edges(f, commits.list, commits.nr);
953954

954955
close_commit_graph(the_repository);

hash.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@
1515
#include "block-sha1/sha1.h"
1616
#endif
1717

18+
#if defined(SHA256_GCRYPT)
19+
#include "sha256/gcrypt.h"
20+
#elif defined(SHA256_OPENSSL)
21+
#include <openssl/sha.h>
22+
#else
23+
#include "sha256/block/sha256.h"
24+
#endif
25+
1826
#ifndef platform_SHA_CTX
1927
/*
2028
* platform's underlying implementation of SHA-1; could be OpenSSL,
@@ -34,6 +42,18 @@
3442
#define git_SHA1_Update platform_SHA1_Update
3543
#define git_SHA1_Final platform_SHA1_Final
3644

45+
#ifndef platform_SHA256_CTX
46+
#define platform_SHA256_CTX SHA256_CTX
47+
#define platform_SHA256_Init SHA256_Init
48+
#define platform_SHA256_Update SHA256_Update
49+
#define platform_SHA256_Final SHA256_Final
50+
#endif
51+
52+
#define git_SHA256_CTX platform_SHA256_CTX
53+
#define git_SHA256_Init platform_SHA256_Init
54+
#define git_SHA256_Update platform_SHA256_Update
55+
#define git_SHA256_Final platform_SHA256_Final
56+
3757
#ifdef SHA1_MAX_BLOCK_SIZE
3858
#include "compat/sha1-chunked.h"
3959
#undef git_SHA1_Update
@@ -52,12 +72,15 @@
5272
#define GIT_HASH_UNKNOWN 0
5373
/* SHA-1 */
5474
#define GIT_HASH_SHA1 1
75+
/* SHA-256 */
76+
#define GIT_HASH_SHA256 2
5577
/* Number of algorithms supported (including unknown). */
56-
#define GIT_HASH_NALGOS (GIT_HASH_SHA1 + 1)
78+
#define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1)
5779

5880
/* A suitably aligned type for stack allocations of hash contexts. */
5981
union git_hash_ctx {
6082
git_SHA_CTX sha1;
83+
git_SHA256_CTX sha256;
6184
};
6285
typedef union git_hash_ctx git_hash_ctx;
6386

@@ -81,6 +104,9 @@ struct git_hash_algo {
81104
/* The length of the hash in hex characters. */
82105
size_t hexsz;
83106

107+
/* The block size of the hash. */
108+
size_t blksz;
109+
84110
/* The hash initialization function. */
85111
git_hash_init_fn init_fn;
86112

@@ -98,4 +124,17 @@ struct git_hash_algo {
98124
};
99125
extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS];
100126

127+
/*
128+
* Return a GIT_HASH_* constant based on the name. Returns GIT_HASH_UNKNOWN if
129+
* the name doesn't match a known algorithm.
130+
*/
131+
int hash_algo_by_name(const char *name);
132+
/* Identical, except based on the format ID. */
133+
int hash_algo_by_id(uint32_t format_id);
134+
/* Identical, except for a pointer to struct git_hash_algo. */
135+
static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
136+
{
137+
return p - hash_algos;
138+
}
139+
101140
#endif

hex.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,15 @@ int parse_oid_hex(const char *hex, struct object_id *oid, const char **end)
7373
return ret;
7474
}
7575

76-
char *sha1_to_hex_r(char *buffer, const unsigned char *sha1)
76+
char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash,
77+
const struct git_hash_algo *algop)
7778
{
7879
static const char hex[] = "0123456789abcdef";
7980
char *buf = buffer;
8081
int i;
8182

82-
for (i = 0; i < the_hash_algo->rawsz; i++) {
83-
unsigned int val = *sha1++;
83+
for (i = 0; i < algop->rawsz; i++) {
84+
unsigned int val = *hash++;
8485
*buf++ = hex[val >> 4];
8586
*buf++ = hex[val & 0xf];
8687
}
@@ -89,20 +90,35 @@ char *sha1_to_hex_r(char *buffer, const unsigned char *sha1)
8990
return buffer;
9091
}
9192

93+
char *sha1_to_hex_r(char *buffer, const unsigned char *sha1)
94+
{
95+
return hash_to_hex_algop_r(buffer, sha1, &hash_algos[GIT_HASH_SHA1]);
96+
}
97+
9298
char *oid_to_hex_r(char *buffer, const struct object_id *oid)
9399
{
94-
return sha1_to_hex_r(buffer, oid->hash);
100+
return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo);
95101
}
96102

97-
char *sha1_to_hex(const unsigned char *sha1)
103+
char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
98104
{
99105
static int bufno;
100106
static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
101107
bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
102-
return sha1_to_hex_r(hexbuffer[bufno], sha1);
108+
return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
109+
}
110+
111+
char *sha1_to_hex(const unsigned char *sha1)
112+
{
113+
return hash_to_hex_algop(sha1, &hash_algos[GIT_HASH_SHA1]);
114+
}
115+
116+
char *hash_to_hex(const unsigned char *hash)
117+
{
118+
return hash_to_hex_algop(hash, the_hash_algo);
103119
}
104120

105121
char *oid_to_hex(const struct object_id *oid)
106122
{
107-
return sha1_to_hex(oid->hash);
123+
return hash_to_hex_algop(oid->hash, the_hash_algo);
108124
}

0 commit comments

Comments
 (0)