From 480cdd9f815d1d78caf98f22ed94ef4f58e46d0c Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Sat, 14 Nov 2020 09:32:34 -0600
Subject: [PATCH 1/5] Fixed incorrect modulus in lfs_alloc_reset

Modulus of the offset by block_size was clearly a typo, and should be
block_count. Interesting to note that later moduluses during alloc
calculations prevents this from breaking anything, but as gtaska notes it
could skew the wear-leveling distribution.

Found by guiserle and gtaska
---
 lfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lfs.c b/lfs.c
index eb832fa0..7f99ccb9 100644
--- a/lfs.c
+++ b/lfs.c
@@ -459,7 +459,7 @@ static void lfs_alloc_ack(lfs_t *lfs) {
 // Invalidate the lookahead buffer. This is done during mounting and
 // failed traversals
 static void lfs_alloc_reset(lfs_t *lfs) {
-    lfs->free.off = lfs->seed % lfs->cfg->block_size;
+    lfs->free.off = lfs->seed % lfs->cfg->block_count;
     lfs->free.size = 0;
     lfs->free.i = 0;
     lfs_alloc_ack(lfs);

From 1ae4b36f2a717cd52601d10de6337f97bf4bcf50 Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Fri, 20 Nov 2020 00:18:13 -0600
Subject: [PATCH 2/5] Removed unnecessary randomization of offsets in
 lfs_alloc_reset

On first read, randomizing the allocators offset may seem appropriate
for lfs_alloc_reset. However, it ends up using the filesystem-fed
pseudorandom seed in situations it wasn't designed for.

As noted by gtaska, the combination of using xors for feeding the seed
and multiple traverses of the same CRCs can cause the seed to flip to
zeros with concerning frequency.

Removed the randomization from lfs_alloc_reset, leaving it in only
lfs_mount.

Found by gtaska
---
 lfs.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/lfs.c b/lfs.c
index 7f99ccb9..cc8ca381 100644
--- a/lfs.c
+++ b/lfs.c
@@ -452,14 +452,16 @@ static int lfs_alloc_lookahead(void *p, lfs_block_t block) {
     return 0;
 }
 
+// indicate allocated blocks have been committed into the filesystem, this
+// is to prevent blocks from being garbage collected in the middle of a
+// commit operation
 static void lfs_alloc_ack(lfs_t *lfs) {
     lfs->free.ack = lfs->cfg->block_count;
 }
 
-// Invalidate the lookahead buffer. This is done during mounting and
-// failed traversals
-static void lfs_alloc_reset(lfs_t *lfs) {
-    lfs->free.off = lfs->seed % lfs->cfg->block_count;
+// drop the lookahead buffer, this is done during mounting and failed
+// traversals in order to avoid invalid lookahead state
+static void lfs_alloc_drop(lfs_t *lfs) {
     lfs->free.size = 0;
     lfs->free.i = 0;
     lfs_alloc_ack(lfs);
@@ -505,7 +507,7 @@ static int lfs_alloc(lfs_t *lfs, lfs_block_t *block) {
         memset(lfs->free.buffer, 0, lfs->cfg->lookahead_size);
         int err = lfs_fs_traverseraw(lfs, lfs_alloc_lookahead, lfs, true);
         if (err) {
-            lfs_alloc_reset(lfs);
+            lfs_alloc_drop(lfs);
             return err;
         }
     }
@@ -3797,8 +3799,10 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) {
     lfs->gstate.tag += !lfs_tag_isvalid(lfs->gstate.tag);
     lfs->gdisk = lfs->gstate;
 
-    // setup free lookahead
-    lfs_alloc_reset(lfs);
+    // setup free lookahead, to distribute allocations uniformly across
+    // boots, we start the allocator at a random location
+    lfs->free.off = lfs->seed % lfs->cfg->block_count;
+    lfs_alloc_drop(lfs);
 
     LFS_TRACE("lfs_mount -> %d", 0);
     return 0;

From f215027fd420468f793743ef8729701f25554b34 Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Fri, 20 Nov 2020 00:38:41 -0600
Subject: [PATCH 3/5] Switched to CRC as seed collection function instead of
 xor

As noted by gtaska, we are sitting on a better hash-combining function
than xor: CRC. Previous issues with xor were solvable, but relying on
xor for this isn't really worth the risk when we already have a CRC
function readily available.

To quote a study found by gtaska:

https://michiel.buddingh.eu/distribution-of-hash-values

> CRC32 seems to score really well, but its graph is skewed by the results
> of Dataset 5 (binary numbers), which may or may not be too synthetic to
> be considered a fair benchmark. But even if you substract the results
> from that test, it does not fare significantly worse than other,
> cryptographic hash functions.
---
 lfs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lfs.c b/lfs.c
index cc8ca381..45210640 100644
--- a/lfs.c
+++ b/lfs.c
@@ -872,8 +872,10 @@ static lfs_stag_t lfs_dir_fetchmatch(lfs_t *lfs,
                 ptag ^= (lfs_tag_t)(lfs_tag_chunk(tag) & 1U) << 31;
 
                 // toss our crc into the filesystem seed for
-                // pseudorandom numbers
-                lfs->seed ^= crc;
+                // pseudorandom numbers, note we use another crc here
+                // as a collection function because it is sufficiently
+                // random and convenient
+                lfs->seed = lfs_crc(lfs->seed, &crc, sizeof(crc));
 
                 // update with what's found so far
                 besttag = tempbesttag;

From 0ea2871e2472c779e686f0af76e45927cb20b72d Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Sun, 22 Nov 2020 15:05:22 -0600
Subject: [PATCH 4/5] Fixed typo in scripts/readtree.py

Not sure how this went unnoticed, I guess this is the first bug that
needed in-depth inspection after the a last-minute argument cleanup
in the debug scripts.
---
 scripts/readtree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/readtree.py b/scripts/readtree.py
index 36135ab4..be514e3b 100755
--- a/scripts/readtree.py
+++ b/scripts/readtree.py
@@ -106,7 +106,7 @@ def main(args):
             struct.unpack('<HH', superblock[1].data[0:4].ljust(4, b'\xff'))))
     print("%-47s%s" % ("littlefs v%s.%s" % version,
         "data (truncated, if it fits)"
-        if not any([args.no_truncate, args.tags, args.log, args.all]) else ""))
+        if not any([args.no_truncate, args.log, args.all]) else ""))
 
     # print gstate
     print("gstate 0x%s" % ''.join('%02x' % c for c in gstate))

From 0aba71d0d6d57813db2306670a9dd9d25efdfae5 Mon Sep 17 00:00:00 2001
From: Christopher Haster <chaster@utexas.edu>
Date: Sun, 22 Nov 2020 15:07:16 -0600
Subject: [PATCH 5/5] Fixed single unchecked bit during commit verification

This bug was exposed by the bad-block tests due to changes to block
allocation, but could have been hit before these changes.

In flash, when blocks fail, they don't fail in a predictable manner. To
account for this, the bad-block tests check a number of failure
behaviors. The interesting one here is "LFS_TESTBD_BADBLOCK_ERASENOOP",
in which bad blocks can not be erased or programmed, and are stuck with
the data written at the time the blocks go bad.

This is actually a pretty realistic failure behavior, since flash needs a
large voltage to force the electrons of the floating gates. Though
realistically, such a failure would like corrupt the data a bit, not leave the
underlying data perfectly intact.

LFS_TESTBD_BADBLOCK_ERASENOOP is rather interesting to test for because it
means bad blocks can end up with perfectly valid CRCs after a failed write,
confusing littlefs.

---

In this case, we had the perfect series of operations such that a test
was repeatedly writing the same sequence of metadata commits to the same
block, which eventually goes bad, leaving the block stuck with metadata
that occurs later in the sequence.

What this means is that after the first commit, the metadata block
contained both the first and second commits, even though the loop in the
test hadn't reached that point yet.

expected       actual
.----------.  .----------.
| commit 1 |  | commit 1 |
| crc 1    |  | crc 1    |
|          |  | commit 2 <-- (from previous iteration)
|          |  | crc 2    |
'----------'  '----------'

To protect against this, littlefs normally compares the written CRC
against the expected CRC, but because this was the exact same data that
it was going to write, this CRCs end up the same.

Ah! But doesn't littlefs also encode the state of the next page to keep
track of if the next page has been erased or not? Wouldn't that change
between iterations?

It does! In a single bit in the CRC-tag. But thanks to some incorrect
logic attempting to avoid an extra condition in the loop for writing out
padding commits, the CRC that littlefs checked against was the CRC
immediately before we include the "is-next-page-erased" bit.

Changing the verification check to use the same CRC as what is used to
verify commits on fetch solves this problem.
---
 lfs.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/lfs.c b/lfs.c
index 45210640..019db1f0 100644
--- a/lfs.c
+++ b/lfs.c
@@ -1265,12 +1265,13 @@ static int lfs_dir_commitattr(lfs_t *lfs, struct lfs_commit *commit,
 }
 
 static int lfs_dir_commitcrc(lfs_t *lfs, struct lfs_commit *commit) {
-    const lfs_off_t off1 = commit->off;
-    const uint32_t crc1 = commit->crc;
     // align to program units
-    const lfs_off_t end = lfs_alignup(off1 + 2*sizeof(uint32_t),
+    const lfs_off_t end = lfs_alignup(commit->off + 2*sizeof(uint32_t),
             lfs->cfg->prog_size);
 
+    lfs_off_t off1 = 0;
+    uint32_t crc1 = 0;
+
     // create crc tags to fill up remainder of commit, note that
     // padding is not crced, which lets fetches skip padding but
     // makes committing a bit more complicated
@@ -1306,6 +1307,12 @@ static int lfs_dir_commitcrc(lfs_t *lfs, struct lfs_commit *commit) {
             return err;
         }
 
+        // keep track of non-padding checksum to verify
+        if (off1 == 0) {
+            off1 = commit->off + sizeof(uint32_t);
+            crc1 = commit->crc;
+        }
+
         commit->off += sizeof(tag)+lfs_tag_size(tag);
         commit->ptag = tag ^ ((lfs_tag_t)reset << 31);
         commit->crc = 0xffffffff; // reset crc for next "commit"
@@ -1319,7 +1326,7 @@ static int lfs_dir_commitcrc(lfs_t *lfs, struct lfs_commit *commit) {
 
     // successful commit, check checksums to make sure
     lfs_off_t off = commit->begin;
-    lfs_off_t noff = off1 + sizeof(uint32_t);
+    lfs_off_t noff = off1;
     while (off < end) {
         uint32_t crc = 0xffffffff;
         for (lfs_off_t i = off; i < noff+sizeof(uint32_t); i++) {