Skip to content

Commit

Permalink
Optimize paginated getChildren: return children names instead of Path…
Browse files Browse the repository at this point in the history
…WithStat
  • Loading branch information
huizhilu committed Nov 20, 2020
1 parent 4ee53c4 commit 37adb17
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 165 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,18 @@
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import org.apache.zookeeper.data.PathWithStat;

/**
* Iterator over children nodes of a given path.
* <p>
* Note: the final collection of children may not be strongly consistent with the server.
* If there are concurrent writes to the children during iteration, the final collection could
* miss some children or contain some duplicate children.
*
* @see ZooKeeper#getAllChildrenPaginated(String, boolean)
*/
class ChildrenBatchIterator implements RemoteIterator<String> {
class ChildrenBatchIterator implements RemoteIterator<PathWithStat> {

private final ZooKeeper zooKeeper;
private final String path;
private final Watcher watcher;
private final int batchSize;
private final LinkedList<String> childrenQueue;
private final PaginationNextPage nextPage;
private final LinkedList<PathWithStat> childrenQueue;
private long nextBatchMinZxid;
private int nextBatchZxidOffset;

Expand All @@ -53,44 +47,55 @@ class ChildrenBatchIterator implements RemoteIterator<String> {
this.nextBatchMinZxid = minZxid;

this.childrenQueue = new LinkedList<>();
this.nextPage = new PaginationNextPage();

batchGetChildren();
List<PathWithStat> firstChildrenBatch = zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset);
childrenQueue.addAll(firstChildrenBatch);

updateOffsetsForNextBatch(firstChildrenBatch);
}

@Override
public boolean hasNext() {

// next() never lets childrenQueue empty unless we iterated over all children
return !childrenQueue.isEmpty();
}

@Override
public String next() throws KeeperException, InterruptedException {
public PathWithStat next() throws KeeperException, InterruptedException, NoSuchElementException {

if (!hasNext()) {
throw new NoSuchElementException("No more children");
}

// If we're down to the last element, backfill before returning it
if (childrenQueue.size() == 1 && nextBatchMinZxid != ZooDefs.GetChildrenPaginated.lastPageMinCzxid
&& nextBatchZxidOffset != ZooDefs.GetChildrenPaginated.lastPageMinCzxidOffset) {
batchGetChildren();
if (childrenQueue.size() == 1) {

List<PathWithStat> childrenBatch = zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset);
childrenQueue.addAll(childrenBatch);

updateOffsetsForNextBatch(childrenBatch);
}

return childrenQueue.pop();
PathWithStat returnChildren = childrenQueue.pop();

return returnChildren;
}

/**
* Prepare minZxid and zxidOffset for the next batch request
* Prepare minZxid and zkidOffset for the next batch request based on the children returned in the current
*/
private void updateOffsetsForNextBatch() {
nextBatchMinZxid = nextPage.getMinCzxid();
nextBatchZxidOffset = nextPage.getMinCzxidOffset();
}
private void updateOffsetsForNextBatch(List<PathWithStat> children) {

private void batchGetChildren() throws KeeperException, InterruptedException {
List<String> childrenBatch =
zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset, nextPage);
childrenQueue.addAll(childrenBatch);
updateOffsetsForNextBatch();
for (PathWithStat child : children) {
long childZxid = child.getStat().getCzxid();

if (nextBatchMinZxid == childZxid) {
++nextBatchZxidOffset;
} else {
nextBatchZxidOffset = 1;
nextBatchMinZxid = childZxid;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,16 @@ public interface RemoteIterator<E> {

/**
* Returns true if the iterator has more elements.
*
* @return true if the iterator has more elements, false otherwise.
*/
boolean hasNext();

/**
* Returns the next element in the iteration.
*
* @return the next element in the iteration.
* @throws InterruptedException if the thread is interrupted
* @throws KeeperException if an error is encountered server-side
* @throws NoSuchElementException if the iteration has no more elements
*/
E next() throws InterruptedException, KeeperException;
E next() throws InterruptedException, KeeperException, NoSuchElementException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@
package org.apache.zookeeper.server.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.jute.BinaryInputArchive;
import org.apache.jute.BinaryOutputArchive;
import org.apache.jute.InputArchive;
import org.apache.jute.OutputArchive;
import org.apache.jute.Record;
Expand Down Expand Up @@ -186,17 +184,4 @@ public static byte[] serializeRequest(Request request) {
return data;
}

/**
* Serializes a {@link Record} into a byte array.
*
* @param record the {@link Record} to be serialized
* @return a new byte array
* @throws IOException if there is an error during serialization
*/
public static byte[] serializeRecord(Record record) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(ZooKeeperServer.intBufferStartingSizeBytes);
BinaryOutputArchive bos = BinaryOutputArchive.getArchive(baos);
bos.writeRecord(record, null);
return baos.toByteArray();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.apache.zookeeper.ZKTestCase;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.common.PathTrie;
import org.apache.zookeeper.data.PathWithStat;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.metrics.MetricsUtils;
import org.apache.zookeeper.txn.CreateTxn;
Expand Down Expand Up @@ -326,50 +327,53 @@ public void getChildrenPaginated() throws NodeExistsException, NoNodeException {

// Asking from a negative for 5 nodes should return the 5, and not set the watch
int curWatchCount = dt.getWatchCount();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, null);
List<PathWithStat> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0);
assertEquals(5, result.size());
assertEquals("The watch not should have been set", curWatchCount, dt.getWatchCount());
// Verify that the list is sorted
String before = "";
for (final String path : result) {
assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before),
for (final PathWithStat s : result) {
final String path = s.getPath();
assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before),
path.compareTo(before) > 0);
before = path;
}

// Asking from a negative would give me all children, and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0);
assertEquals(countNodes, result.size());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
// Verify that the list is sorted
before = "";
for (final String path : result) {
assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before),
for (final PathWithStat s : result) {
final String path = s.getPath();
assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before),
path.compareTo(before) > 0);
before = path;
}

// Asking from the last one should return only one node
// Asking from the last one should return only onde node
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0);
assertEquals(1, result.size());
assertEquals("test-" + (countNodes - 1), result.get(0));
assertEquals("test-" + (countNodes - 1), result.get(0).getPath());
assertEquals(firstCzxId + countNodes - 1, result.get(0).getStat().getMzxid());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking from the last created node+1 should return an empty list and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0);
assertTrue("The result should be an empty list", result.isEmpty());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking from -1 for one node should return two, and NOT set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0);
assertEquals("No watch should be set", curWatchCount, dt.getWatchCount());
assertEquals("We only return up to ", 1, result.size());
// Check that we ordered correctly
assertEquals("test-0", result.get(0));
assertEquals("test-0", result.get(0).getPath());
}

@Test(timeout = 60000)
Expand Down Expand Up @@ -398,58 +402,59 @@ public void getChildrenPaginatedWithOffset() throws NodeExistsException, NoNodeE

// Asking from a negative would give me all children, and set the watch
int curWatchCount = dt.getWatchCount();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, null);
List<PathWithStat> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0);
assertEquals(allNodes, result.size());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
// Verify that the list is sorted
String before = "";
for (final String path : result) {
assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before),
for (final PathWithStat s : result) {
final String path = s.getPath();
assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before),
path.compareTo(before) > 0);
before = path;
}

// Asking with offset minCzxId below childrenCzxId should not skip anything, regardless of offset
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3);
assertEquals(2, result.size());
assertEquals("test-1", result.get(0));
assertEquals("test-2", result.get(1));
assertEquals("test-1", result.get(0).getPath());
assertEquals("test-2", result.get(1).getPath());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking with offset 5 should skip nodes 1, 2, 3, 4, 5
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5);
assertEquals(2, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals("test-6", result.get(0).getPath());
assertEquals("test-7", result.get(1).getPath());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking with offset 5 for more nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid)
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5);

assertEquals(5, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals("test-8", result.get(2));
assertEquals("test-9", result.get(3));
assertEquals("test-6", result.get(0).getPath());
assertEquals("test-7", result.get(1).getPath());
assertEquals("test-8", result.get(2).getPath());
assertEquals("test-9", result.get(3).getPath());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking with offset 5 for fewer nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid)
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5);

assertEquals(4, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals("test-8", result.get(2));
assertEquals("test-9", result.get(3));
assertEquals("test-6", result.get(0).getPath());
assertEquals("test-7", result.get(1).getPath());
assertEquals("test-8", result.get(2).getPath());
assertEquals("test-9", result.get(3).getPath());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking from the last created node+1 should return an empty list and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0);
assertTrue("The result should be an empty list", result.isEmpty());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
}
Expand All @@ -464,7 +469,7 @@ public void getChildrenPaginatedEmpty() throws NodeExistsException, NoNodeExcept

// Asking from a negative would give me all children, and set the watch
int curWatchCount = dt.getWatchCount();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0, null);
List<PathWithStat> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0);
assertTrue("The result should be empty", result.isEmpty());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
}
Expand Down
Loading

0 comments on commit 37adb17

Please sign in to comment.