-
Notifications
You must be signed in to change notification settings - Fork 66
Fix index corruption on reindex #170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
cfb761b
aee231c
db23e9e
4f1f412
e89c18e
9df92fc
cf3713e
e9d6ce2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| [package] | ||
| name = "parity-db" | ||
| version = "0.4.2" | ||
| version = "0.4.3" | ||
| authors = ["Parity Technologies <[email protected]>"] | ||
| edition = "2021" | ||
| license = "MIT OR Apache-2.0" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -359,8 +359,10 @@ impl HashColumn { | |
| if let Some(table) = IndexTable::open_existing(path, id)? { | ||
| if top.is_none() { | ||
| stats = table.load_stats()?; | ||
| log::trace!(target: "parity-db", "Opened main index {}", table.id); | ||
| top = Some(table); | ||
| } else { | ||
| log::trace!(target: "parity-db", "Opened stale index {}", table.id); | ||
| reindexing.push_front(table); | ||
| } | ||
| } | ||
|
|
@@ -409,24 +411,25 @@ impl HashColumn { | |
|
|
||
| fn write_reindex_plan_locked<'a, 'b>( | ||
| &self, | ||
| tables: RwLockUpgradableReadGuard<'a, Tables>, | ||
| reindex: RwLockUpgradableReadGuard<'b, Reindex>, | ||
| mut tables: RwLockUpgradableReadGuard<'a, Tables>, | ||
| mut reindex: RwLockUpgradableReadGuard<'b, Reindex>, | ||
| key: &Key, | ||
| address: Address, | ||
| log: &mut LogWriter, | ||
| ) -> Result<PlanOutcome> { | ||
| if Self::search_index(key, &tables.index, &tables, log)?.is_some() { | ||
| if Self::contains_partial_key_with_address(key, address, &tables.index, log)? { | ||
| log::trace!(target: "parity-db", "{}: Skipped reindex entry {} when reindexing", tables.index.id, hex(key)); | ||
| return Ok(PlanOutcome::Skipped) | ||
| } | ||
| match tables.index.write_insert_plan(key, address, None, log)? { | ||
| PlanOutcome::NeedReindex => { | ||
| log::debug!(target: "parity-db", "{}: Index chunk full {} when reindexing", tables.index.id, hex(key)); | ||
| let (tables, reindex) = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| self.write_reindex_plan_locked(tables, reindex, key, address, log)?; | ||
| Ok(PlanOutcome::NeedReindex) | ||
| }, | ||
| _ => Ok(PlanOutcome::Written), | ||
| let mut outcome = PlanOutcome::Written; | ||
| while let PlanOutcome::NeedReindex = | ||
| tables.index.write_insert_plan(key, address, None, log)? | ||
| { | ||
| log::info!(target: "parity-db", "{}: Index chunk full {} when reindexing", tables.index.id, hex(key)); | ||
| (tables, reindex) = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| outcome = PlanOutcome::NeedReindex; | ||
| } | ||
| Ok(outcome) | ||
| } | ||
|
|
||
| fn search_index<'a>( | ||
|
|
@@ -455,6 +458,25 @@ impl HashColumn { | |
| Ok(None) | ||
| } | ||
|
|
||
| fn contains_partial_key_with_address<'a>( | ||
| key: &Key, | ||
| address: Address, | ||
| index: &'a IndexTable, | ||
| log: &LogWriter, | ||
| ) -> Result<bool> { | ||
| let (mut existing_entry, mut sub_index) = index.get(key, 0, log)?; | ||
| while !existing_entry.is_empty() { | ||
| let existing_address = existing_entry.address(index.id.index_bits()); | ||
| if existing_address == address { | ||
| return Ok(true) | ||
| } | ||
| let (next_entry, next_index) = index.get(key, sub_index + 1, log)?; | ||
| existing_entry = next_entry; | ||
| sub_index = next_index; | ||
| } | ||
| Ok(false) | ||
| } | ||
|
|
||
| fn search_all_indexes<'a>( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes me think |
||
| key: &Key, | ||
| tables: &'a Tables, | ||
|
|
@@ -548,8 +570,8 @@ impl HashColumn { | |
|
|
||
| fn write_plan_new<'a, 'b>( | ||
| &self, | ||
| tables: RwLockUpgradableReadGuard<'a, Tables>, | ||
| reindex: RwLockUpgradableReadGuard<'b, Reindex>, | ||
| mut tables: RwLockUpgradableReadGuard<'a, Tables>, | ||
| mut reindex: RwLockUpgradableReadGuard<'b, Reindex>, | ||
| key: &Key, | ||
| value: &[u8], | ||
| log: &mut LogWriter, | ||
|
|
@@ -567,15 +589,15 @@ impl HashColumn { | |
| log, | ||
| stats, | ||
| )?; | ||
| match tables.index.write_insert_plan(key, address, None, log)? { | ||
| PlanOutcome::NeedReindex => { | ||
| log::debug!(target: "parity-db", "{}: Index chunk full {}", tables.index.id, hex(key)); | ||
| let (tables, reindex) = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| let (_, t, r) = self.write_plan_new(tables, reindex, key, value, log)?; | ||
| Ok((PlanOutcome::NeedReindex, t, r)) | ||
| }, | ||
| _ => Ok((PlanOutcome::Written, tables, reindex)), | ||
| let mut outcome = PlanOutcome::Written; | ||
| while let PlanOutcome::NeedReindex = | ||
| tables.index.write_insert_plan(key, address, None, log)? | ||
| { | ||
| log::debug!(target: "parity-db", "{}: Index chunk full {}", tables.index.id, hex(key)); | ||
| (tables, reindex) = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| outcome = PlanOutcome::NeedReindex; | ||
| } | ||
| Ok((outcome, tables, reindex)) | ||
| } | ||
|
|
||
| pub fn enact_plan(&self, action: LogAction, log: &mut LogReader) -> Result<()> { | ||
|
|
@@ -620,14 +642,20 @@ impl HashColumn { | |
| } else if let Some(table) = reindex.queue.iter().find(|r| r.id == record.table) { | ||
| table.validate_plan(record.index, log)?; | ||
| } else { | ||
| if record.table.index_bits() < tables.index.id.index_bits() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember deriving ord the other day when looking at this, just realize it may have been plain wrong (logic to ordering between different index may just be bad). |
||
| // Insertion into a previously dropped index. | ||
| log::warn!( target: "parity-db", "Index {} is too old. Current is {}", record.table, tables.index.id); | ||
| return Err(Error::Corruption("Unexpected log index id".to_string())) | ||
| } | ||
| // Re-launch previously started reindex | ||
| // TODO: add explicit log records for reindexing events. | ||
| log::warn!( | ||
| target: "parity-db", | ||
| "Missing table {}, starting reindex", | ||
| record.table, | ||
| ); | ||
| let _lock = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| let lock = Self::trigger_reindex(tables, reindex, self.path.as_path()); | ||
| std::mem::drop(lock); | ||
| return self.validate_plan(LogAction::InsertIndex(record), log) | ||
| } | ||
| }, | ||
|
|
@@ -723,7 +751,7 @@ impl HashColumn { | |
| }); | ||
| f(state).unwrap_or(false) | ||
| })?; | ||
| log::debug!( target: "parity-db", "{}: Done Iterating table {}", source.id, table.id); | ||
| log::debug!( target: "parity-db", "{}: Done iterating table {}", source.id, table.id); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1086,4 +1114,12 @@ impl Column { | |
| Column::Tree(_column) => Ok(()), | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| pub fn index_bits(&self) -> Option<u8> { | ||
| match self { | ||
| Column::Hash(column) => Some(column.tables.read().index.id.index_bits()), | ||
| Column::Tree(_column) => None, | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -461,6 +461,7 @@ impl DbInner { | |
| } | ||
|
|
||
| fn start_reindex(&self, record_id: u64) { | ||
| log::trace!(target: "parity-db", "Scheduled reindex at record {}", record_id); | ||
| self.next_reindex.store(record_id, Ordering::SeqCst); | ||
| } | ||
|
|
||
|
|
@@ -528,7 +529,7 @@ impl DbInner { | |
| if let Some(mut reader) = reader { | ||
| log::debug!( | ||
| target: "parity-db", | ||
| "Enacting log {}", | ||
| "Enacting log record {}", | ||
| reader.record_id(), | ||
| ); | ||
| if validation_mode { | ||
|
|
@@ -907,18 +908,23 @@ impl Db { | |
| }) | ||
| } | ||
|
|
||
| /// Get a value in a specified column by key. Returns `None` if the key does not exist. | ||
| pub fn get(&self, col: ColId, key: &[u8]) -> Result<Option<Value>> { | ||
| self.inner.get(col, key) | ||
| } | ||
|
|
||
| /// Get value size by key. Returns `None` if the key does not exist. | ||
| pub fn get_size(&self, col: ColId, key: &[u8]) -> Result<Option<u32>> { | ||
| self.inner.get_size(col, key) | ||
| } | ||
|
|
||
| /// Iterate over all ordered key-value pairs. Only supported for columns configured with | ||
| /// `btree_indexed`. | ||
| pub fn iter(&self, col: ColId) -> Result<BTreeIterator> { | ||
| self.inner.btree_iter(col) | ||
| } | ||
|
|
||
| /// Commit a set of changes to the database. | ||
| pub fn commit<I, K>(&self, tx: I) -> Result<()> | ||
| where | ||
| I: IntoIterator<Item = (ColId, K, Option<Value>)>, | ||
|
|
@@ -927,6 +933,7 @@ impl Db { | |
| self.inner.commit(tx) | ||
| } | ||
|
|
||
| /// Commit a set of changes to the database. | ||
| pub fn commit_changes<I>(&self, tx: I) -> Result<()> | ||
| where | ||
| I: IntoIterator<Item = (ColId, Operation<Vec<u8>, Vec<u8>>)>, | ||
|
|
@@ -938,15 +945,14 @@ impl Db { | |
| self.inner.commit_raw(commit) | ||
| } | ||
|
|
||
| /// Returns the number of columns in the database. | ||
| pub fn num_columns(&self) -> u8 { | ||
| self.inner.columns.len() as u8 | ||
| } | ||
|
|
||
| pub(crate) fn iter_column_while( | ||
| &self, | ||
| c: ColId, | ||
| f: impl FnMut(IterState) -> bool, | ||
| ) -> Result<()> { | ||
| /// Iterate a column and call a function for each value. Iteration order is unspecified. Note | ||
| /// that for hash columns the key is the hash of the original key. | ||
|
||
| pub fn iter_column_while(&self, c: ColId, f: impl FnMut(IterState) -> bool) -> Result<()> { | ||
| self.inner.iter_column_while(c, f) | ||
| } | ||
|
|
||
|
|
@@ -1018,6 +1024,7 @@ impl Db { | |
| self.inner.clear_stats(column) | ||
| } | ||
|
|
||
| /// Print database contents in text form to stderr. | ||
| pub fn dump(&self, check_param: check::CheckOptions) -> Result<()> { | ||
| if let Some(col) = check_param.column { | ||
| self.inner.columns[col as usize].dump(&self.inner.log, &check_param, col)?; | ||
|
|
@@ -1029,6 +1036,7 @@ impl Db { | |
| Ok(()) | ||
| } | ||
|
|
||
| /// Get database statistics. | ||
| pub fn stats(&self) -> StatSummary { | ||
| self.inner.stats() | ||
| } | ||
|
|
@@ -2424,4 +2432,61 @@ mod tests { | |
| assert!(db.get(0, &[0]).unwrap().is_some()); | ||
| } | ||
| } | ||
|
|
||
| #[cfg(feature = "instrumentation")] | ||
| #[test] | ||
| fn test_continue_reindex() { | ||
| let _ = env_logger::try_init(); | ||
| let tmp = tempdir().unwrap(); | ||
| let mut options = Options::with_columns(tmp.path(), 1); | ||
| options.columns[0].preimage = true; | ||
| options.columns[0].uniform = true; | ||
| options.always_flush = true; | ||
| options.with_background_thread = false; | ||
| options.salt = Some(Default::default()); | ||
|
|
||
| { | ||
| // Force a reindex by committing more than 64 values with the same 16 bit prefix | ||
| let db = Db::open_or_create(&options).unwrap(); | ||
| let commit: Vec<_> = (0..65u32) | ||
| .map(|index| { | ||
| let mut key = [0u8; 32]; | ||
| key[2] = (index as u8) << 1; | ||
| (0, key.to_vec(), Some(vec![index as u8])) | ||
| }) | ||
| .collect(); | ||
| db.commit(commit).unwrap(); | ||
|
|
||
| db.process_commits().unwrap(); | ||
| db.flush_logs().unwrap(); | ||
| db.enact_logs().unwrap(); | ||
| // i16 now contains 64 values and i17 contains a single value that did not fit | ||
|
|
||
| // Simulate interrupted reindex by processing it first and then restoring the old index | ||
| // file. Make a copy of the index file first. | ||
| std::fs::copy(tmp.path().join("index_00_16"), tmp.path().join("index_00_16.bak")) | ||
| .unwrap(); | ||
| db.process_reindex().unwrap(); | ||
| db.flush_logs().unwrap(); | ||
| db.enact_logs().unwrap(); | ||
| db.clean_logs().unwrap(); | ||
| std::fs::rename(tmp.path().join("index_00_16.bak"), tmp.path().join("index_00_16")) | ||
| .unwrap(); | ||
| } | ||
|
|
||
| // Reopen the database which should load the reindex. | ||
| { | ||
| let db = Db::open(&options).unwrap(); | ||
| db.process_reindex().unwrap(); | ||
| let mut entries = 0; | ||
| db.iter_column_while(0, |_| { | ||
| entries += 1; | ||
| true | ||
| }) | ||
| .unwrap(); | ||
|
|
||
| assert_eq!(entries, 65); | ||
| assert_eq!(db.inner.columns[0].index_bits(), Some(17)); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to be sure.
Looping is just in case there is some concurrent reindexing leading to a multiple reindex trigger (could happen , but probably rare in practice).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, it should never happen realistically. Insertion into an empty index should always be successful. This code had a small bug. An entry in the value table was created twice if reindex was triggered.