Skip to content

Commit 5f1a116

Browse files
committed
Add linear time implementation of append.
1 parent 9b903bf commit 5f1a116

File tree

3 files changed

+293
-21
lines changed

3 files changed

+293
-21
lines changed

src/libcollections/btree/map.rs

+86-3
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,13 @@ pub struct OccupiedEntry<'a, K:'a, V:'a> {
148148
stack: stack::SearchStack<'a, K, V, node::handle::KV, node::handle::LeafOrInternal>,
149149
}
150150

151+
struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
152+
left: I,
153+
right: I,
154+
left_cur: Option<(K, V)>,
155+
right_cur: Option<(K, V)>,
156+
}
157+
151158
impl<K: Ord, V> BTreeMap<K, V> {
152159
/// Makes a new empty BTreeMap with a reasonable choice for B.
153160
#[stable(feature = "rust1", since = "1.0.0")]
@@ -496,10 +503,36 @@ impl<K: Ord, V> BTreeMap<K, V> {
496503
#[unstable(feature = "append",
497504
reason = "recently added as part of collections reform 2")]
498505
pub fn append(&mut self, other: &mut Self) {
499-
let b = other.b;
500-
for (key, value) in mem::replace(other, BTreeMap::with_b(b)) {
501-
self.insert(key, value);
506+
// Do we have to append anything at all?
507+
if other.len() == 0 {
508+
return;
502509
}
510+
511+
// If the values of `b` of `self` and `other` are equal, we can just swap them if `self` is
512+
// empty.
513+
if self.len() == 0 && self.b == other.b {
514+
mem::swap(self, other);
515+
}
516+
517+
// First, we merge `self` and `other` into a sorted sequence in linear time.
518+
let self_b = self.b;
519+
let other_b = other.b;
520+
let mut self_iter = mem::replace(self, BTreeMap::with_b(self_b)).into_iter();
521+
let mut other_iter = mem::replace(other, BTreeMap::with_b(other_b)).into_iter();
522+
let self_cur = self_iter.next();
523+
let other_cur = other_iter.next();
524+
525+
// Second, we build a tree from the sorted sequence in linear time.
526+
let (length, depth, root) = Node::from_sorted_iter(MergeIter {
527+
left: self_iter,
528+
right: other_iter,
529+
left_cur: self_cur,
530+
right_cur: other_cur,
531+
}, self_b);
532+
533+
self.length = length;
534+
self.depth = depth;
535+
self.root = root.unwrap(); // `unwrap` won't panic because length can't be zero.
503536
}
504537

505538
/// Splits the map into two at the given key,
@@ -644,6 +677,56 @@ impl<'a, K, V> IntoIterator for &'a mut BTreeMap<K, V> {
644677
}
645678
}
646679

680+
// Helper enum for MergeIter
681+
enum MergeResult {
682+
Left,
683+
Right,
684+
Both,
685+
None,
686+
}
687+
688+
impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
689+
type Item = (K, V);
690+
691+
fn next(&mut self) -> Option<(K, V)> {
692+
let res = match (&self.left_cur, &self.right_cur) {
693+
(&Some((ref left_key, _)), &Some((ref right_key, _))) => {
694+
match left_key.cmp(right_key) {
695+
Ordering::Less => MergeResult::Left,
696+
Ordering::Equal => MergeResult::Both,
697+
Ordering::Greater => MergeResult::Right,
698+
}
699+
},
700+
(&Some(_), &None) => MergeResult::Left,
701+
(&None, &Some(_)) => MergeResult::Right,
702+
(&None, &None) => MergeResult::None,
703+
};
704+
705+
// Check which elements comes first and only advance the corresponding iterator.
706+
// If two keys are equal, take the value from `right`.
707+
match res {
708+
MergeResult::Left => {
709+
let ret = self.left_cur.take();
710+
self.left_cur = self.left.next();
711+
ret
712+
},
713+
MergeResult::Right => {
714+
let ret = self.right_cur.take();
715+
self.right_cur = self.right.next();
716+
ret
717+
},
718+
MergeResult::Both => {
719+
let ret = self.right_cur.take();
720+
self.left_cur = self.left.next();
721+
self.right_cur = self.right.next();
722+
ret
723+
},
724+
MergeResult::None => None,
725+
}
726+
}
727+
}
728+
729+
647730
/// A helper enum useful for deciding whether to continue a loop since we can't
648731
/// return from a closure
649732
enum Continuation<A, B> {

src/libcollections/btree/node.rs

+169
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use core::ptr::Unique;
2727
use core::{slice, mem, ptr, cmp, raw};
2828
use alloc::heap::{self, EMPTY};
2929

30+
use vec::Vec;
3031
use borrow::Borrow;
3132

3233
/// Represents the result of an Insertion: either the item fit, or the node had to split
@@ -904,6 +905,17 @@ impl<'a, K: 'a, V: 'a, NodeType> Handle<&'a mut Node<K, V>, handle::KV, NodeType
904905
marker: PhantomData,
905906
}
906907
}
908+
909+
/// Convert this handle into one pointing at the edge immediately to the right of the key/value
910+
/// pair pointed-to by this handle. This is useful because it returns a reference with larger
911+
/// lifetime than `right_edge`.
912+
pub fn into_right_edge(self) -> Handle<&'a mut Node<K, V>, handle::Edge, NodeType> {
913+
Handle {
914+
node: &mut *self.node,
915+
index: self.index + 1,
916+
marker: PhantomData,
917+
}
918+
}
907919
}
908920

909921
impl<'a, K: 'a, V: 'a, NodeRef: Deref<Target=Node<K, V>> + 'a, NodeType> Handle<NodeRef, handle::KV,
@@ -1230,6 +1242,163 @@ impl<K, V> Node<K, V> {
12301242
}
12311243
}
12321244

1245+
impl<K, V> Node<K, V> {
1246+
pub fn from_sorted_iter<I>(iter: I, b: usize) -> (usize, usize, Option<Node<K, V>>)
1247+
where I: Iterator<Item=(K, V)> {
1248+
let capacity = capacity_from_b(b);
1249+
let minimum = min_load_from_capacity(capacity);
1250+
1251+
// Holds the current level.
1252+
let mut num_level = 0;
1253+
// Needed to count the number of key-value pairs in `iter`.
1254+
let mut length = 0;
1255+
// `levels` contains the current node on every level, going from the leaves level to the
1256+
// root level.
1257+
let mut levels: Vec<Option<Node<K, V>>> = Vec::new();
1258+
1259+
// Iterate through all key-value pairs, pushing them into nodes of appropriate size at the
1260+
// right level.
1261+
for (key, value) in iter {
1262+
// Always go down to a leaf after inserting an element into an internal node.
1263+
if num_level > 0 {
1264+
num_level = 0;
1265+
}
1266+
1267+
loop {
1268+
// If we are in an internal node, extract node from the level below to insert it as
1269+
// edge on the level above; `unsafe` is needed for unchecked access.
1270+
let new_edge = unsafe {
1271+
if num_level > 0 {
1272+
levels.get_unchecked_mut(num_level - 1).take()
1273+
} else {
1274+
None
1275+
}
1276+
};
1277+
1278+
// Get current node on current level.
1279+
// If we are past the top-most level, insert a new level.
1280+
if num_level == levels.len() {
1281+
levels.push(None);
1282+
}
1283+
// If there is no node on this level, create a new node. `unsafe`
1284+
// is needed for unchecked access.
1285+
let level = unsafe { levels.get_unchecked_mut(num_level) };
1286+
if level.is_none() {
1287+
*level = if num_level == 0 {
1288+
Some(Node::new_leaf(capacity))
1289+
} else {
1290+
// `unsafe` is needed for `new_internal`.
1291+
unsafe {
1292+
Some(Node::new_internal(capacity))
1293+
}
1294+
};
1295+
}
1296+
let node = level.as_mut().unwrap();
1297+
1298+
// Insert edge from the level below; `unsafe` is needed for `push_edge`.
1299+
if let Some(edge) = new_edge {
1300+
unsafe {
1301+
node.push_edge(edge);
1302+
}
1303+
}
1304+
1305+
// If node is already full, we have to go up one level before we can insert the
1306+
// key-value pair.
1307+
if !node.is_full() {
1308+
// Insert key-value pair into node; `unsafe` is needed for `push_kv`.
1309+
unsafe {
1310+
node.push_kv(key, value);
1311+
}
1312+
break;
1313+
}
1314+
num_level += 1;
1315+
}
1316+
1317+
length += 1;
1318+
}
1319+
1320+
// Fix "right edge" of the tree.
1321+
if levels.len() > 1 {
1322+
1323+
num_level = 0;
1324+
while num_level < levels.len() - 1 {
1325+
// Extract node from this level or create a new one if there isn't any. `unsafe` is
1326+
// needed for unchecked access and `new_internal`.
1327+
let edge = unsafe {
1328+
match levels.get_unchecked_mut(num_level).take() {
1329+
Some(n) => n,
1330+
None => {
1331+
if num_level == 0 {
1332+
Node::new_leaf(capacity)
1333+
} else {
1334+
Node::new_internal(capacity)
1335+
}
1336+
},
1337+
}
1338+
};
1339+
1340+
// Go to the level above.
1341+
num_level += 1;
1342+
1343+
// Get node on this level, create one if there isn't any; `unsafe` is needed for
1344+
// unchecked access.
1345+
let level = unsafe { levels.get_unchecked_mut(num_level) };
1346+
if level.is_none() {
1347+
// `unsafe` is needed for `new_internal`.
1348+
unsafe {
1349+
*level = Some(Node::new_internal(capacity));
1350+
}
1351+
}
1352+
let mut node = level.as_mut().unwrap();
1353+
1354+
// Insert `edge` as new edge in `node`; `unsafe` is needed for `push_edge`.
1355+
unsafe {
1356+
node.push_edge(edge);
1357+
}
1358+
}
1359+
1360+
// Start at the root and steal to fix underfull nodes on the "right edge" of the tree.
1361+
let root_index = levels.len() - 1;
1362+
let mut node = unsafe { levels.get_unchecked_mut(root_index).as_mut().unwrap() };
1363+
1364+
loop {
1365+
let mut temp_node = node;
1366+
let index = temp_node.len() - 1;
1367+
let mut handle = match temp_node.kv_handle(index).force() {
1368+
ForceResult::Internal(h) => h,
1369+
ForceResult::Leaf(_) => break,
1370+
};
1371+
1372+
// Check if we need to steal, i.e. is the length of the right edge less than
1373+
// `minimum`?
1374+
let right_len = handle.right_edge().node().len();
1375+
if right_len < minimum {
1376+
// Steal!
1377+
let num_steals = minimum - right_len;
1378+
for _ in 0..num_steals {
1379+
// `unsafe` is needed for stealing.
1380+
unsafe {
1381+
handle.steal_rightward();
1382+
}
1383+
}
1384+
}
1385+
1386+
// Go down the right edge.
1387+
node = handle.into_right_edge().into_edge_mut();
1388+
}
1389+
}
1390+
1391+
// Get root node from `levels`.
1392+
let root = match levels.pop() {
1393+
Some(option) => option,
1394+
_ => None,
1395+
};
1396+
1397+
// Return (length, depth, root_node).
1398+
(length, levels.len(), root)
1399+
}
1400+
}
1401+
12331402
// Private implementation details
12341403
impl<K, V> Node<K, V> {
12351404
/// Node is full, so split it into two nodes, and yield the middle-most key-value pair

src/libcollectionstest/btree/map.rs

+38-18
Original file line numberDiff line numberDiff line change
@@ -294,30 +294,50 @@ fn test_extend_ref() {
294294
assert_eq!(a[&3], "three");
295295
}
296296

297-
#[test]
298-
fn test_append() {
299-
let mut a = BTreeMap::new();
300-
a.insert(1, "a");
301-
a.insert(2, "b");
302-
a.insert(3, "c");
297+
macro_rules! create_append_test {
298+
($name:ident, $len:expr) => {
299+
#[test]
300+
fn $name() {
301+
let mut a = BTreeMap::with_b(6);
302+
for i in 0..8 {
303+
a.insert(i, i);
304+
}
303305

304-
let mut b = BTreeMap::new();
305-
b.insert(3, "d"); // Overwrite element from a
306-
b.insert(4, "e");
307-
b.insert(5, "f");
306+
let mut b = BTreeMap::with_b(6);
307+
for i in 5..$len {
308+
b.insert(i, 2*i);
309+
}
308310

309-
a.append(&mut b);
311+
a.append(&mut b);
310312

311-
assert_eq!(a.len(), 5);
312-
assert_eq!(b.len(), 0);
313+
assert_eq!(a.len(), $len);
314+
assert_eq!(b.len(), 0);
313315

314-
assert_eq!(a[&1], "a");
315-
assert_eq!(a[&2], "b");
316-
assert_eq!(a[&3], "d");
317-
assert_eq!(a[&4], "e");
318-
assert_eq!(a[&5], "f");
316+
for i in 0..$len {
317+
if i < 5 {
318+
assert_eq!(a[&i], i);
319+
} else {
320+
assert_eq!(a[&i], 2*i);
321+
}
322+
}
323+
}
324+
};
319325
}
320326

327+
// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
328+
// Single node.
329+
create_append_test!(test_append_9, 9);
330+
// Two leafs that don't need fixing.
331+
create_append_test!(test_append_17, 17);
332+
// Two leafs where the second one ends up underfull and needs stealing at the end.
333+
create_append_test!(test_append_14, 14);
334+
// Two leafs where the first one isn't full; finish insertion at root.
335+
create_append_test!(test_append_12, 12);
336+
// Three levels; finish insertion at root.
337+
create_append_test!(test_append_144, 144);
338+
// Three levels; finish insertion at leaf without a node on the second level.
339+
create_append_test!(test_append_145, 145);
340+
321341
#[test]
322342
fn test_split_off() {
323343
// Split empty map

0 commit comments

Comments
 (0)