Skip to content

Commit 6e134d7

Browse files
committed
Loro source of truth
1 parent e4a4abd commit 6e134d7

12 files changed

Lines changed: 582 additions & 65 deletions

File tree

AGENTS.md

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,28 @@ Atomic Server is a graph database with real-time sync, built on **Loro CRDT** fo
8484
1. Commit arrives at `/commit`
8585
2. `apply_changes()` imports `loroUpdate` into resource's LoroDoc
8686
3. `import_update_with_diff()` computes add/remove atoms for search indexing
87-
4. `loro_value_to_atomic_value()` materializes Loro values to Atomic `Value` types
87+
4. `loro_value_to_atomic_value_tagged()` materializes Loro values to Atomic `Value` types, using the `datatypes` map
8888
5. Loro snapshot stored alongside PropVals for future merges
8989

9090
### Loro value serialization in the Map
9191

92-
- Strings, numbers, booleans → stored directly
93-
- `ResourceArray` → JSON string `["url1", "url2"]`
94-
- `AtomicUrl` → plain string
95-
- `loro_value_to_atomic_value()` parses back: strings starting with `[` → ResourceArray, `{` → NestedResource
92+
The LoroDoc has two sibling root maps:
93+
94+
- **`properties`**`property URL → value`. Loro primitives stored directly
95+
(strings, numbers, booleans); arrays as native `LoroList`s; objects as JSON strings.
96+
- **`datatypes`** — sparse `property URL → tag`, recording the datatype only
97+
where a bare primitive is ambiguous in a load-bearing way. Tags: `atomicUrl`,
98+
`resourceArray`, `jsonArray`, `json`, `resource`. Scalars and plain/cosmetic
99+
strings carry no entry. Written by `set_property` (Rust) and
100+
`Resource.writeDatatypeTags` at sign time (TS).
101+
102+
Materialization prefers the tag: `loro_value_to_atomic_value_tagged()` recovers
103+
the exact `Value` variant from it. Untagged values fall back to the
104+
`loro_value_to_atomic_value()` heuristic (URL-shaped strings → `AtomicUrl`,
105+
`{...}``NestedResource`), kept for legacy / not-yet-tagged docs. Cosmetic
106+
datatypes (`markdown`/`slug`/`date`/`uri`, `timestamp`) are deliberately not
107+
tagged — they collapse to `string`/`integer`; the Property's `datatype` stays
108+
authoritative. See `planning/loro-source-of-truth.md`.
96109

97110
### Critical: always build on existing state
98111

browser/lib/src/datatypes.test.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { describe, it } from 'vitest';
22

3-
import { Datatype, urls, validateDatatype } from './index.js';
3+
import { Datatype, datatypeTag, urls, validateDatatype } from './index.js';
44

55
describe('Datatypes', () => {
66
it('throws errors when datatypes dont match values', async ({ expect }) => {
@@ -65,3 +65,29 @@ describe('Datatypes', () => {
6565
expect(() => validateDatatype(int, Datatype.RESOURCEARRAY)).to.throw();
6666
});
6767
});
68+
69+
describe('datatypeTag', () => {
70+
it('tags load-bearing datatypes and collapses the rest', ({ expect }) => {
71+
// Load-bearing: references and arrays get a tag.
72+
expect(datatypeTag(Datatype.ATOMIC_URL, 'https://example.com/x')).toBe(
73+
'atomicUrl',
74+
);
75+
expect(datatypeTag(Datatype.RESOURCEARRAY, [])).toBe('resourceArray');
76+
expect(datatypeTag(Datatype.RESOURCEARRAY, ['https://example.com/x'])).toBe(
77+
'resourceArray',
78+
);
79+
expect(datatypeTag(Datatype.JSON, '{"a":1}')).toBe('json');
80+
81+
// A nested resource (object stored as a JSON string under an atomicURL
82+
// property) stays untagged — the server heuristic handles `{...}`.
83+
expect(datatypeTag(Datatype.ATOMIC_URL, '{"a":1}')).toBeUndefined();
84+
85+
// Cosmetic / scalar datatypes collapse — no tag.
86+
expect(datatypeTag(Datatype.STRING, 'hello')).toBeUndefined();
87+
expect(datatypeTag(Datatype.MARKDOWN, '# heading')).toBeUndefined();
88+
expect(datatypeTag(Datatype.SLUG, 'a-slug')).toBeUndefined();
89+
expect(datatypeTag(Datatype.DATE, '2026-05-21')).toBeUndefined();
90+
expect(datatypeTag(Datatype.INTEGER, 5)).toBeUndefined();
91+
expect(datatypeTag(Datatype.BOOLEAN, true)).toBeUndefined();
92+
});
93+
});

browser/lib/src/datatypes.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,37 @@ export const datatypeFromUrl = (url: string): Datatype => {
4242
return Datatype.UNKNOWN;
4343
};
4444

45+
/**
46+
* The sibling `datatypes` Loro-map tag for a property, mirroring the Rust
47+
* `datatype_tag` (`lib/src/loro.rs`). Lets the server materialize a value to
48+
* the exact `Value` variant instead of guessing from the primitive.
49+
*
50+
* Only the load-bearing reference / array distinctions are tagged; cosmetic
51+
* datatypes (string/markdown/slug/date/uri) and scalars collapse and carry no
52+
* tag. A nested resource (an object stored as a JSON string under an
53+
* `atomicURL` property) is left untagged for the server's heuristic — see
54+
* `planning/loro-source-of-truth.md`.
55+
*
56+
* `loroValue` is the value as stored in the Loro `properties` map.
57+
*/
58+
export const datatypeTag = (
59+
datatype: string,
60+
loroValue: unknown,
61+
): string | undefined => {
62+
switch (datatype) {
63+
case Datatype.ATOMIC_URL:
64+
return typeof loroValue === 'string' && !loroValue.startsWith('{')
65+
? 'atomicUrl'
66+
: undefined;
67+
case Datatype.RESOURCEARRAY:
68+
return 'resourceArray';
69+
case Datatype.JSON:
70+
return 'json';
71+
default:
72+
return undefined;
73+
}
74+
};
75+
4576
const slug_regex = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
4677
// https://stackoverflow.com/a/22061879/2502163
4778
const dateStringRegex = /^\d{4}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])$/;

browser/lib/src/resource.ts

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {
1212
commitIdOf,
1313
commitToJsonADObject,
1414
} from './commit.js';
15-
import { validateDatatype } from './datatypes.js';
15+
import { validateDatatype, datatypeTag } from './datatypes.js';
1616
import { isUnauthorized } from './error.js';
1717
import { commits } from './ontologies/commits.js';
1818
import { core } from './ontologies/core.js';
@@ -537,6 +537,45 @@ export class Resource<C extends OptionalClass = any> {
537537
this._cache = nextCache;
538538
}
539539

540+
/**
541+
* Phase 1 (loro-source-of-truth): populate the sibling `datatypes` Loro map
542+
* so the server recovers reference / array `Value` variants exactly instead
543+
* of guessing. The map is sparse — only load-bearing datatypes get a tag;
544+
* see {@link datatypeTag}. Idempotent: re-signing rewrites nothing.
545+
*
546+
* Cache-only — never triggers a fetch. A property whose definition is not
547+
* already cached is left untagged; the server then falls back to its
548+
* materialization heuristic, exactly as before this map existed. Properties
549+
* edited via `set()` with validation are always cached by the time we sign.
550+
*/
551+
private writeDatatypeTags(): void {
552+
const doc = this._loroDoc;
553+
554+
if (!doc) {
555+
return;
556+
}
557+
558+
const props = doc.getMap('properties').toJSON() as Record<string, unknown>;
559+
const datatypesMap = doc.getMap('datatypes');
560+
561+
for (const [prop, loroValue] of Object.entries(props)) {
562+
const datatype = this.store?.resources
563+
.get(prop)
564+
?.get(core.properties.datatype)
565+
?.toString();
566+
567+
if (datatype === undefined) {
568+
continue;
569+
}
570+
571+
const tag = datatypeTag(datatype, loroValue);
572+
573+
if (tag !== undefined && datatypesMap.get(prop) !== tag) {
574+
datatypesMap.set(prop, tag);
575+
}
576+
}
577+
}
578+
540579
private resetLoroState(): void {
541580
this._loroDoc = undefined;
542581
this._loroMap = undefined;
@@ -1396,11 +1435,7 @@ export class Resource<C extends OptionalClass = any> {
13961435
list = map.setContainer(propUrl, new LoroList());
13971436
}
13981437

1399-
if (
1400-
item !== null &&
1401-
typeof item === 'object' &&
1402-
!Array.isArray(item)
1403-
) {
1438+
if (item !== null && typeof item === 'object' && !Array.isArray(item)) {
14041439
const itemMap = list.pushContainer(new LoroMap());
14051440
this.writeJsonToLoroMap(itemMap, item as JSONObject);
14061441
} else {
@@ -1438,10 +1473,7 @@ export class Resource<C extends OptionalClass = any> {
14381473
}
14391474
}
14401475

1441-
private writeJsonToLoroList(
1442-
list: LoroList,
1443-
arr: JSONValue[],
1444-
): void {
1476+
private writeJsonToLoroList(list: LoroList, arr: JSONValue[]): void {
14451477
const { LoroList, LoroMap } = LoroLoader.Loro;
14461478

14471479
for (const item of arr) {
@@ -1501,6 +1533,12 @@ export class Resource<C extends OptionalClass = any> {
15011533
this.rebuildCacheFromLoro();
15021534
this._cacheDirty = false;
15031535

1536+
// Phase 1 (loro-source-of-truth): stamp the sibling `datatypes` map so
1537+
// the server materializes references/arrays exactly. Runs here — after
1538+
// every property is in the doc, before the snapshot export below — so it
1539+
// covers props set via `set()` and via cache hydration alike.
1540+
this.writeDatatypeTags();
1541+
15041542
// Chain: use last locally-signed commit, or the server-known lastCommit.
15051543
if (this._lastLocalSignature) {
15061544
// Construct the full commit URL that the server will use. This ensures

lib/src/db.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,16 +1483,28 @@ impl Db {
14831483
Ok(())
14841484
}
14851485

1486-
/// Recursively removes a resource and its children from the database
1486+
/// Recursively removes a resource and its children from the database.
1487+
/// `removed` collects the `pure_id()` of every deleted subject so the
1488+
/// caller can tombstone them after the transaction is applied.
14871489
async fn recursive_remove(
14881490
&self,
14891491
subject: &Subject,
14901492
transaction: &mut Transaction,
1493+
removed: &mut Vec<String>,
14911494
) -> AtomicResult<()> {
1492-
let subject_str = subject.to_string();
1495+
// Key by `pure_id()` — that is how resources and Loro snapshots are
1496+
// stored (`add_resource_tx`, `apply_commit`). Looking up by the raw
1497+
// `to_string()` (which may carry `?drive=` params) would miss the
1498+
// row entirely for DID subjects with a drive hint.
1499+
let subject_str = subject.pure_id();
14931500
if let Ok(found) = self.get_propvals(&subject_str) {
14941501
let resource = Resource::from_propvals(found, subject.clone());
14951502
transaction.push(Operation::remove_resource(&subject_str));
1503+
// Remove the Loro snapshot in the same transaction. Without this
1504+
// the snapshot is orphaned in `Tree::LoroSnapshots` and leaks
1505+
// forever — only the WS/Iroh DESTROY path cleaned it before.
1506+
transaction.push(Operation::remove_loro_snapshot(&subject_str));
1507+
removed.push(subject_str.clone());
14961508
let mut children = resource.get_children(self).await?;
14971509
for child in children.iter_mut() {
14981510
// Notify subscribers so clients evict the cascade-deleted
@@ -1505,7 +1517,8 @@ impl Db {
15051517
source_id: None,
15061518
});
15071519
// Because the function is async we need to box it to use recursion.
1508-
Box::pin(self.recursive_remove(child.get_subject(), transaction)).await?;
1520+
Box::pin(self.recursive_remove(child.get_subject(), transaction, removed))
1521+
.await?;
15091522
}
15101523
for (prop, val) in resource.get_propvals() {
15111524
let remove_atom = crate::Atom::new(subject.clone(), prop.clone(), val.clone());
@@ -2319,8 +2332,15 @@ impl Storelike for Db {
23192332
#[instrument(skip_all)]
23202333
async fn remove_resource(&self, subject: &Subject) -> AtomicResult<()> {
23212334
let mut transaction = Transaction::new();
2322-
self.recursive_remove(subject, &mut transaction).await?;
2335+
let mut removed = Vec::new();
2336+
self.recursive_remove(subject, &mut transaction, &mut removed)
2337+
.await?;
23232338
self.apply_transaction(&mut transaction)?;
2339+
// Tombstone every removed subject so bulk sync (Iroh / WS `SYNC`)
2340+
// does not resurrect them from a peer that still holds a stale copy.
2341+
for s in &removed {
2342+
crate::sync::tombstones::record_tombstone(self, s);
2343+
}
23242344
// TODO: deletion sync — should create a signed destroy commit
23252345
// and push it through the normal commit pipeline, not a raw DESTROY frame.
23262346
Ok(())

lib/src/db/test.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,3 +1342,96 @@ async fn loro_non_property_container_survives_commit_roundtrip() {
13421342
(this is what the WS GET handler serves to a second viewer)",
13431343
);
13441344
}
1345+
1346+
/// A deleted resource must not leave its Loro snapshot orphaned in
1347+
/// `Tree::LoroSnapshots`, and the subject must be tombstoned so bulk sync
1348+
/// does not resurrect it. Regression test for Phase 0 of the
1349+
/// `loro-source-of-truth` plan.
1350+
#[tokio::test]
1351+
#[timeout(30000)]
1352+
async fn remove_resource_deletes_loro_snapshot() {
1353+
let store = Db::init_temp("orphan_snapshot").await.unwrap();
1354+
let drive = store.create_drive("test-drive").await.unwrap();
1355+
let did = store
1356+
.create_resource(
1357+
"https://atomicdata.dev/classes/Property",
1358+
&drive,
1359+
"age",
1360+
None,
1361+
)
1362+
.await
1363+
.unwrap();
1364+
let subject = Subject::from_raw(&did, store.get_base_domain().as_deref());
1365+
let pure_id = subject.pure_id();
1366+
1367+
assert!(
1368+
store
1369+
.kv
1370+
.get(Tree::LoroSnapshots, pure_id.as_bytes())
1371+
.unwrap()
1372+
.is_some(),
1373+
"a Loro snapshot should be persisted for a freshly created resource"
1374+
);
1375+
1376+
store.remove_resource(&subject).await.unwrap();
1377+
1378+
assert!(
1379+
store
1380+
.kv
1381+
.get(Tree::LoroSnapshots, pure_id.as_bytes())
1382+
.unwrap()
1383+
.is_none(),
1384+
"Loro snapshot was orphaned after remove_resource"
1385+
);
1386+
assert!(
1387+
crate::sync::tombstones::is_tombstoned(&store, &pure_id),
1388+
"removed subject should be tombstoned to prevent sync resurrection"
1389+
);
1390+
}
1391+
1392+
/// Deleting via a subject that carries a `?drive=` hint must still remove the
1393+
/// snapshot — it is keyed by `pure_id()`. Regression test for the mis-keyed
1394+
/// `apply_destroy` snapshot removal.
1395+
#[tokio::test]
1396+
#[timeout(30000)]
1397+
async fn remove_resource_with_drive_hint_subject_deletes_snapshot() {
1398+
let store = Db::init_temp("orphan_snapshot_hint").await.unwrap();
1399+
let drive = store.create_drive("test-drive").await.unwrap();
1400+
let did = store
1401+
.create_resource(
1402+
"https://atomicdata.dev/classes/Property",
1403+
&drive,
1404+
"age",
1405+
None,
1406+
)
1407+
.await
1408+
.unwrap();
1409+
let subject = Subject::from_raw(&did, store.get_base_domain().as_deref());
1410+
let pure_id = subject.pure_id();
1411+
assert!(
1412+
store
1413+
.kv
1414+
.get(Tree::LoroSnapshots, pure_id.as_bytes())
1415+
.unwrap()
1416+
.is_some()
1417+
);
1418+
1419+
// Delete via a subject carrying a `?drive=` hint. The snapshot is keyed by
1420+
// pure_id(); the old raw-subject key would miss it.
1421+
let hinted = subject.clone().set_drive_hint(drive.clone());
1422+
assert_ne!(
1423+
hinted.to_string(),
1424+
pure_id,
1425+
"drive hint should make to_string() differ from pure_id()"
1426+
);
1427+
store.remove_resource(&hinted).await.unwrap();
1428+
1429+
assert!(
1430+
store
1431+
.kv
1432+
.get(Tree::LoroSnapshots, pure_id.as_bytes())
1433+
.unwrap()
1434+
.is_none(),
1435+
"snapshot orphaned when deleting via a drive-hinted subject"
1436+
);
1437+
}

lib/src/db/trees.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,18 @@ impl Operation {
121121
val: None,
122122
}
123123
}
124+
125+
/// Remove a resource's Loro snapshot. `pure_id` must be the
126+
/// [`crate::Subject::pure_id`] form — that is the key snapshots are
127+
/// written under (see `apply_commit` / `add_resource_opts`).
128+
pub fn remove_loro_snapshot(pure_id: &str) -> Self {
129+
Operation {
130+
tree: Tree::LoroSnapshots,
131+
method: Method::Delete,
132+
key: pure_id.as_bytes().to_vec(),
133+
val: None,
134+
}
135+
}
124136
}
125137

126138
/// A set of [Operation]s that should be executed atomically by the database.

0 commit comments

Comments
 (0)