Skip to content

Commit 2bd8878

Browse files
patrickwoodheadrvagg
patrickwoodhead
authored andcommitted
feat: add traversal/walk function
Closes: #118 Basic traversal functionality for deterministic DAG walking with no repeat block visits and support for block skipping. User supplies a block loader, which can be used to watch the block ordering of the walk.
1 parent 77c2577 commit 2bd8878

File tree

4 files changed

+241
-0
lines changed

4 files changed

+241
-0
lines changed

README.md

+37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* [Multibase Encoders / Decoders / Codecs](#multibase-encoders--decoders--codecs)
66
* [Multicodec Encoders / Decoders / Codecs](#multicodec-encoders--decoders--codecs)
77
* [Multihash Hashers](#multihash-hashers)
8+
* [Traversal](#traversal)
89
* [Legacy interface](#legacy-interface)
910
* [Implementations](#implementations)
1011
* [Multibase codecs](#multibase-codecs)
@@ -137,6 +138,42 @@ CID.create(1, json.code, hash)
137138
//> CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea)
138139
```
139140

141+
### Traversal
142+
143+
This library contains higher-order functions for traversing graphs of data easily.
144+
145+
`walk()` walks through the links in each block of a DAG calling a user-supplied loader function for each one, in depth-first order with no duplicate block visits. The loader should return a `Block` object and can be used to inspect and collect block ordering for a full DAG walk. The loader should `throw` on error, and return `null` if a block should be skipped by `walk()`.
146+
147+
```js
148+
import { walk } from 'multiformats/traversal'
149+
import * as Block from 'multiformats/block'
150+
import * as codec from 'multiformats/codecs/json'
151+
import { sha256 as hasher } from 'multiformats/hashes/sha2'
152+
153+
// build a DAG (a single block for this simple example)
154+
const value = { hello: 'world' }
155+
const block = await Block.encode({ value, codec, hasher })
156+
const { cid } = block
157+
console.log(cid)
158+
//> CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea)
159+
160+
// create a loader function that also collects CIDs of blocks in
161+
// their traversal order
162+
const load = (cid, blocks) => async (cid) => {
163+
// fetch a block using its cid
164+
// e.g.: const block = await fetchBlockByCID(cid)
165+
blocks.push(cid)
166+
return block
167+
}
168+
169+
// collect blocks in this DAG starting from the root `cid`
170+
const blocks = []
171+
await walk({ cid, load: load(cid, blocks) })
172+
173+
console.log(blocks)
174+
//> [CID(bagaaierasords4njcts6vs7qvdjfcvgnume4hqohf65zsfguprqphs3icwea)]
175+
```
176+
140177
## Legacy interface
141178

142179
[`blockcodec-to-ipld-format`](https://github.com/ipld/js-blockcodec-to-ipld-format) converts a multiformats [`BlockCodec`](https://github.com/multiformats/js-multiformats/blob/master/src/codecs/interface.ts#L21) into an

package.json

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
"./block": {
4949
"import": "./src/block.js"
5050
},
51+
"./traversal": {
52+
"import": "./src/traversal.js"
53+
},
5154
"./bases/identity": {
5255
"import": "./src/bases/identity.js"
5356
},
@@ -96,6 +99,7 @@
9699
}
97100
},
98101
"devDependencies": {
102+
"@ipld/dag-pb": "^2.1.14",
99103
"@types/node": "^16.7.10",
100104
"@typescript-eslint/eslint-plugin": "^4.30.0",
101105
"@typescript-eslint/parser": "^4.30.0",

src/traversal.js

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { base58btc } from 'multiformats/bases/base58'
2+
3+
/**
4+
* @typedef {import('./cid.js').CID} CID
5+
*/
6+
7+
/**
8+
* @template T
9+
* @typedef {import('./block.js').Block<T>} Block
10+
*/
11+
12+
/**
13+
* @template T
14+
* @param {Object} options
15+
* @param {CID} options.cid
16+
* @param {(cid: CID) => Promise<Block<T>|null>} options.load
17+
* @param {Set<string>?} options.seen
18+
*/
19+
const walk = async ({ cid, load, seen }) => {
20+
seen = seen || new Set()
21+
const b58Cid = cid.toString(base58btc)
22+
if (seen.has(b58Cid)) {
23+
return
24+
}
25+
26+
const block = await load(cid)
27+
seen.add(b58Cid)
28+
29+
if (block === null) { // the loader signals with `null` that we should skip this block
30+
return
31+
}
32+
33+
for (const [, cid] of block.links()) {
34+
await walk({ cid, load, seen })
35+
}
36+
}
37+
38+
export { walk }

test/test-traversal.js

+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/* globals describe, it */
2+
import * as codec from 'multiformats/codecs/json'
3+
import * as dagPB from '@ipld/dag-pb'
4+
import { sha256 as hasher } from 'multiformats/hashes/sha2'
5+
import * as main from 'multiformats/block'
6+
import { walk } from 'multiformats/traversal'
7+
import { deepStrictEqual as same } from 'assert'
8+
9+
const test = it
10+
const { createLink, createNode } = dagPB
11+
12+
describe('traversal', () => {
13+
describe('walk', async () => {
14+
// Forming the following DAG for testing
15+
// A
16+
// / \
17+
// B C
18+
// / \ / \
19+
// D D D E
20+
const linksE = []
21+
const valueE = createNode(Uint8Array.from('string E qacdswa'), linksE)
22+
const blockE = await main.encode({ value: valueE, codec, hasher })
23+
const cidE = blockE.cid
24+
25+
const linksD = []
26+
const valueD = createNode(Uint8Array.from('string D zasa'), linksD)
27+
const blockD = await main.encode({ value: valueD, codec, hasher })
28+
const cidD = blockD.cid
29+
30+
const linksC = [createLink('link1', 100, cidD), createLink('link2', 100, cidE)]
31+
const valueC = createNode(Uint8Array.from('string C zxc'), linksC)
32+
const blockC = await main.encode({ value: valueC, codec, hasher })
33+
const cidC = blockC.cid
34+
35+
const linksB = [createLink('link1', 100, cidD), createLink('link2', 100, cidD)]
36+
const valueB = createNode(Uint8Array.from('string B lpokjiasd'), linksB)
37+
const blockB = await main.encode({ value: valueB, codec, hasher })
38+
const cidB = blockB.cid
39+
40+
const linksA = [createLink('link1', 100, cidB), createLink('link2', 100, cidC)]
41+
const valueA = createNode(Uint8Array.from('string A qwertcfdgshaa'), linksA)
42+
const blockA = await main.encode({ value: valueA, codec, hasher })
43+
const cidA = blockA.cid
44+
45+
const load = async (cid) => {
46+
if (cid.equals(cidE)) {
47+
return blockE
48+
}
49+
if (cid.equals(cidD)) {
50+
return blockD
51+
}
52+
if (cid.equals(cidC)) {
53+
return blockC
54+
}
55+
if (cid.equals(cidB)) {
56+
return blockB
57+
}
58+
if (cid.equals(cidA)) {
59+
return blockA
60+
}
61+
return null
62+
}
63+
64+
const loadWrapper = (load, arr = []) => (cid) => {
65+
arr.push(cid.toString())
66+
return load(cid)
67+
}
68+
69+
test('block with no links', async () => {
70+
// Test Case 1
71+
// Input DAG
72+
// D
73+
//
74+
// Expect load to be called with D
75+
const expectedCallArray = [cidD.toString()]
76+
const callArray = []
77+
78+
await walk({ cid: cidD, load: loadWrapper(load, callArray) })
79+
80+
expectedCallArray.forEach((value, index) => {
81+
same(value, callArray[index])
82+
})
83+
})
84+
85+
test('block with links', async () => {
86+
// Test Case 2
87+
// Input
88+
// C
89+
// / \
90+
// D E
91+
//
92+
// Expect load to be called with C, then D, then E
93+
const expectedCallArray = [cidC.toString(), cidD.toString(), cidE.toString()]
94+
const callArray = []
95+
96+
await walk({ cid: cidC, load: loadWrapper(load, callArray) })
97+
98+
expectedCallArray.forEach((value, index) => {
99+
same(value, callArray[index])
100+
})
101+
})
102+
103+
test('block with matching links', async () => {
104+
// Test Case 3
105+
// Input
106+
// B
107+
// / \
108+
// D D
109+
//
110+
// Expect load to be called with B, then D
111+
const expectedCallArray = [cidB.toString(), cidD.toString()]
112+
const callArray = []
113+
114+
await walk({ cid: cidB, load: loadWrapper(load, callArray) })
115+
116+
expectedCallArray.forEach((value, index) => {
117+
same(value, callArray[index])
118+
})
119+
})
120+
121+
test('depth first with duplicated block', async () => {
122+
// Test Case 4
123+
// Input
124+
// A
125+
// / \
126+
// B C
127+
// / \ / \
128+
// D D D E
129+
//
130+
// Expect load to be called with A, then B, then D, then C, then E
131+
const expectedCallArray = [
132+
cidA.toString(),
133+
cidB.toString(),
134+
cidD.toString(),
135+
cidC.toString(),
136+
cidE.toString()
137+
]
138+
const callArray = []
139+
140+
await walk({ cid: cidA, load: loadWrapper(load, callArray) })
141+
142+
expectedCallArray.forEach((value, index) => {
143+
same(value, callArray[index])
144+
})
145+
})
146+
147+
test('null return', async () => {
148+
const links = []
149+
const value = createNode(Uint8Array.from('test'), links)
150+
const block = await main.encode({ value: value, codec, hasher })
151+
const cid = block.cid
152+
const expectedCallArray = [cid.toString()]
153+
const callArray = []
154+
155+
await walk({ cid, load: loadWrapper(load, callArray) })
156+
157+
expectedCallArray.forEach((value, index) => {
158+
same(value, callArray[index])
159+
})
160+
})
161+
})
162+
})

0 commit comments

Comments
 (0)