diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/ChildrenBatchIterator.java b/zookeeper-server/src/main/java/org/apache/zookeeper/ChildrenBatchIterator.java index f7a21d57134..e5624c92975 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/ChildrenBatchIterator.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/ChildrenBatchIterator.java @@ -21,24 +21,18 @@ import java.util.LinkedList; import java.util.List; import java.util.NoSuchElementException; +import org.apache.zookeeper.data.PathWithStat; /** * Iterator over children nodes of a given path. - *

- * Note: the final collection of children may not be strongly consistent with the server. - * If there are concurrent writes to the children during iteration, the final collection could - * miss some children or contain some duplicate children. - * - * @see ZooKeeper#getAllChildrenPaginated(String, boolean) */ -class ChildrenBatchIterator implements RemoteIterator { +class ChildrenBatchIterator implements RemoteIterator { private final ZooKeeper zooKeeper; private final String path; private final Watcher watcher; private final int batchSize; - private final LinkedList childrenQueue; - private final PaginationNextPage nextPage; + private final LinkedList childrenQueue; private long nextBatchMinZxid; private int nextBatchZxidOffset; @@ -53,44 +47,55 @@ class ChildrenBatchIterator implements RemoteIterator { this.nextBatchMinZxid = minZxid; this.childrenQueue = new LinkedList<>(); - this.nextPage = new PaginationNextPage(); - batchGetChildren(); + List firstChildrenBatch = zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset); + childrenQueue.addAll(firstChildrenBatch); + + updateOffsetsForNextBatch(firstChildrenBatch); } @Override public boolean hasNext() { + // next() never lets childrenQueue empty unless we iterated over all children return !childrenQueue.isEmpty(); } @Override - public String next() throws KeeperException, InterruptedException { + public PathWithStat next() throws KeeperException, InterruptedException, NoSuchElementException { + if (!hasNext()) { throw new NoSuchElementException("No more children"); } // If we're down to the last element, backfill before returning it - if (childrenQueue.size() == 1 && nextBatchMinZxid != ZooDefs.GetChildrenPaginated.lastPageMinCzxid - && nextBatchZxidOffset != ZooDefs.GetChildrenPaginated.lastPageMinCzxidOffset) { - batchGetChildren(); + if (childrenQueue.size() == 1) { + + List childrenBatch = zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset); + childrenQueue.addAll(childrenBatch); + + updateOffsetsForNextBatch(childrenBatch); } - return childrenQueue.pop(); + PathWithStat returnChildren = childrenQueue.pop(); + + return returnChildren; } /** - * Prepare minZxid and zxidOffset for the next batch request + * Prepare minZxid and zkidOffset for the next batch request based on the children returned in the current */ - private void updateOffsetsForNextBatch() { - nextBatchMinZxid = nextPage.getMinCzxid(); - nextBatchZxidOffset = nextPage.getMinCzxidOffset(); - } + private void updateOffsetsForNextBatch(List children) { - private void batchGetChildren() throws KeeperException, InterruptedException { - List childrenBatch = - zooKeeper.getChildren(path, watcher, batchSize, nextBatchMinZxid, nextBatchZxidOffset, nextPage); - childrenQueue.addAll(childrenBatch); - updateOffsetsForNextBatch(); + for (PathWithStat child : children) { + long childZxid = child.getStat().getCzxid(); + + if (nextBatchMinZxid == childZxid) { + ++nextBatchZxidOffset; + } else { + nextBatchZxidOffset = 1; + nextBatchMinZxid = childZxid; + } + } } } \ No newline at end of file diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/RemoteIterator.java b/zookeeper-server/src/main/java/org/apache/zookeeper/RemoteIterator.java index af25b71b748..2517786f92f 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/RemoteIterator.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/RemoteIterator.java @@ -27,18 +27,16 @@ public interface RemoteIterator { /** * Returns true if the iterator has more elements. - * * @return true if the iterator has more elements, false otherwise. */ boolean hasNext(); /** * Returns the next element in the iteration. - * * @return the next element in the iteration. * @throws InterruptedException if the thread is interrupted * @throws KeeperException if an error is encountered server-side * @throws NoSuchElementException if the iteration has no more elements */ - E next() throws InterruptedException, KeeperException; + E next() throws InterruptedException, KeeperException, NoSuchElementException; } \ No newline at end of file diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java index 90e2bd5a649..d748d156015 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java @@ -33,10 +33,10 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.PriorityQueue; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; -import org.apache.jute.BinaryInputArchive; import org.apache.jute.InputArchive; import org.apache.jute.OutputArchive; import org.apache.jute.Record; @@ -64,9 +64,6 @@ import org.apache.zookeeper.data.PathWithStat; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.data.StatPersisted; -import org.apache.zookeeper.proto.GetChildrenPaginatedResponse; -import org.apache.zookeeper.proto.ReplyHeader; -import org.apache.zookeeper.server.util.SerializeUtils; import org.apache.zookeeper.server.watch.IWatchManager; import org.apache.zookeeper.server.watch.WatchManagerFactory; import org.apache.zookeeper.server.watch.WatcherMode; @@ -180,18 +177,6 @@ public class DataTree { // to align and compare between servers. public static final int DIGEST_LOG_INTERVAL = 128; - // Constants used to calculate response packet length for the paginated children list. - // packetLength = (childNameLength + PAGINATION_PACKET_CHILD_EXTRA_BYTES) * numChildren - // + PAGINATION_PACKET_BASE_BYTES - private static final int PAGINATION_PACKET_BASE_BYTES; - private static final int PAGINATION_PACKET_CHILD_EXTRA_BYTES; - - static { - PAGINATION_PACKET_BASE_BYTES = getPaginationPacketLength(Collections.emptyList()); - PAGINATION_PACKET_CHILD_EXTRA_BYTES = - getPaginationPacketLength(Collections.singletonList("")) - PAGINATION_PACKET_BASE_BYTES; - } - // If this is not null, we are actively looking for a target zxid that we // want to validate the digest for private ZxidDigest digestFromLoadedSnapshot; @@ -277,19 +262,6 @@ private static long getNodeSize(String path, byte[] data) { return (path == null ? 0 : path.length()) + (data == null ? 0 : data.length); } - private static int getPaginationPacketLength(List children) { - try { - Record record = new GetChildrenPaginatedResponse(children, new Stat(), 0, 0); - ReplyHeader header = new ReplyHeader(); - byte[] recordBytes = SerializeUtils.serializeRecord(record); - byte[] headerBytes = SerializeUtils.serializeRecord(header); - return recordBytes.length + headerBytes.length; - } catch (IOException e) { - LOG.warn("Unexpected exception. Destruction averted.", e); - return 0; - } - } - public long cachedApproximateDataSize() { return nodeDataSize.get(); } @@ -340,9 +312,6 @@ public DataTree() { LOG.error("Unexpected exception when creating WatchManager, exiting abnormally", e); ServiceUtils.requestSystemExit(ExitCode.UNEXPECTED_ERROR.getValue()); } - - LOG.info("Pagination packet length formula constants: child extra = {} bytes, base = {} bytes", - PAGINATION_PACKET_CHILD_EXTRA_BYTES, PAGINATION_PACKET_BASE_BYTES); } /** @@ -840,9 +809,9 @@ public int compare(PathWithStat left, PathWithStat right) { * Produces a paginated list of the children of a given path * @param path path of node node to list * @param stat stat of the node to list - * @param watcher an optional watcher to attach to the node. The watcher is added only once - * when reaching the end of pagination - * @param maxReturned maximum number of children to return. + * @param watcher an optional watcher to attach to the node. The watcher is added only once when reaching the end of + * pagination + * @param maxReturned maximum number of children to return. Return one more than this number to indicate truncation * @param minCzxId only return children whose creation zxid equal or greater than minCzxId * @param czxIdOffset how many children with zxid == minCzxId to skip (as returned in previous pages) * @param nextPage info to be used for the next page call @@ -850,157 +819,66 @@ public int compare(PathWithStat left, PathWithStat right) { * @throws NoNodeException if the path does not exist */ public List getPaginatedChildren(String path, Stat stat, Watcher watcher, int maxReturned, - long minCzxId, int czxIdOffset, PaginationNextPage nextPage) + long minCzxId, long czxIdOffset, PaginationNextPage nextPage) throws NoNodeException { DataNode n = nodes.get(path); if (n == null) { throw new KeeperException.NoNodeException(); } - - if (maxReturned == Integer.MAX_VALUE && minCzxId <= 0 && czxIdOffset <= 0) { - // This request is to fetch all children. Check if all children can be returned in a page. - Set allChildren; - boolean isBelowMaxBuffer = false; - - // Need to lock the parent node for the whole block between reading children list and adding watch - synchronized (n) { - if (stat != null) { - n.copyStat(stat); - } - allChildren = n.getChildren(); - if (isPacketLengthBelowMaxBuffer(computeChildrenPacketLength(allChildren))) { - isBelowMaxBuffer = true; - if (watcher != null) { - childWatches.addWatch(path, watcher); - } - } + synchronized (n) { + if (stat != null) { + n.copyStat(stat); } - - if (isBelowMaxBuffer) { - // If all children can be returned in the first page, just return them without sorting. - int bytes = allChildren.stream().mapToInt(String::length).sum(); - updateReadStat(path, bytes); - if (nextPage != null) { - nextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); - nextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageMinCzxidOffset); + PriorityQueue childrenQueue; + Set actualChildren = n.getChildren(); + if (actualChildren == null) { + childrenQueue = new PriorityQueue(1); + } else { + childrenQueue = new PriorityQueue(maxReturned + 1, staticNodeCreationComparator); + for (String child : actualChildren) { + DataNode childNode = nodes.get(path + "/" + child); + if (null != childNode) { + final long czxId = childNode.stat.getCzxid(); + + if (czxId < minCzxId) { + // Filter out nodes that are below minCzxId + continue; + } + + Stat childStat = new Stat(); + childNode.copyStat(childStat); + + // Cannot discard before having sorted and removed offset + childrenQueue.add(new PathWithStat(child, childStat)); + } } - return new ArrayList<>(allChildren); } - } - - int index = 0; - int bytes = 0; - List targetChildren = new ArrayList(); - List paginatedChildren = new ArrayList(); - // Need to lock the parent node for the whole block between reading children list and adding watch - synchronized (n) { - buildChildrenPathWithStat(n, path, stat, minCzxId, targetChildren); - - targetChildren.sort(staticNodeCreationComparator); - - // Go over the ordered list of children and skip the first czxIdOffset - // that have czxid equal to minCzxId, if any - while (index < targetChildren.size() && index < czxIdOffset) { - if (targetChildren.get(index).getStat().getCzxid() > minCzxId) { + // Go over the ordered list of children and skip the first czxIdOffset that have czxid equal to minCzxId, if any + int skipped = 0; + while (!childrenQueue.isEmpty() && skipped < czxIdOffset) { + PathWithStat head = childrenQueue.peek(); + if (head.getStat().getCzxid() > minCzxId) { // We moved past the minCzxId, no point in looking further break; + } else { + childrenQueue.poll(); + ++skipped; } - index++; } - // Return as list preserving older-to-newer order - // Add children up to maxReturned and just below the max network buffer - for (int packetLength = PAGINATION_PACKET_BASE_BYTES; - index < targetChildren.size() && paginatedChildren.size() < maxReturned; - index++) { - String child = targetChildren.get(index).getPath(); - packetLength += child.length() + PAGINATION_PACKET_CHILD_EXTRA_BYTES; - if (!isPacketLengthBelowMaxBuffer(packetLength)) { - // Stop adding more children to ensure packet is below max buffer - break; - } - paginatedChildren.add(child); - bytes += child.length(); + // Return as list preserving newer-to-older order + LinkedList result = new LinkedList(); + while (!childrenQueue.isEmpty() && result.size() < maxReturned) { + result.addLast(childrenQueue.poll().getPath()); } - // Decrement index as it was incremented once before exiting the for-loop. - index--; - - if (index == targetChildren.size() - 1 && watcher != null) { - // All children are added so this is the last page, set the watch + // This is the last page, set the watch + if (childrenQueue.isEmpty()) { childWatches.addWatch(path, watcher); } + return result; } - - updateNextPage(nextPage, targetChildren, index); - updateReadStat(path, bytes); - - return paginatedChildren; - } - - private boolean isPacketLengthBelowMaxBuffer(int packetLength) { - return packetLength < BinaryInputArchive.maxBuffer; - } - - private void buildChildrenPathWithStat(DataNode n, String path, Stat stat, long minCzxId, - List targetChildren) { - synchronized (n) { - if (stat != null) { - n.copyStat(stat); - } - for (String child : n.getChildren()) { - DataNode childNode = nodes.get(path + "/" + child); - if (null != childNode) { - final long czxId = childNode.stat.getCzxid(); - - if (czxId < minCzxId) { - // Filter out nodes that are below minCzxId - continue; - } - - Stat childStat = new Stat(); - childNode.copyStat(childStat); - - // Cannot discard before having sorted and removed offset - targetChildren.add(new PathWithStat(child, childStat)); - } - } - } - } - - /* - * totalLength = (PAGINATION_PACKET_CHILD_EXTRA_BYTES + childLength) * numChildren + PAGINATION_PACKET_BASE_BYTES - */ - private int computeChildrenPacketLength(Collection children) { - int length = children.stream().mapToInt(child -> PAGINATION_PACKET_CHILD_EXTRA_BYTES + child.length()).sum(); - return length + PAGINATION_PACKET_BASE_BYTES; - } - - private void updateNextPage(PaginationNextPage nextPage, List children, int lastAddedIndex) { - if (nextPage == null) { - return; - } - if (lastAddedIndex == children.size() - 1) { - // All children are added, so this is the last page - nextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); - nextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageMinCzxidOffset); - return; - } - - // Find the minCzxidOffset next next page by searching the index (startIndex) of czxid - // that is not equal to current czxid. - // minCzxidOffset of next page = lastAddedIndex - startIndex - long lastCzxid = children.get(lastAddedIndex).getStat().getCzxid(); - int startIndex = lastAddedIndex; - while (startIndex >= 0) { - if (children.get(startIndex).getStat().getCzxid() != lastCzxid) { - break; - } - startIndex--; - } - nextPage.setMinCzxid(lastCzxid); - nextPage.setMinCzxidOffset(lastAddedIndex - startIndex); } public Stat setACL(String path, List acl, int version) throws KeeperException.NoNodeException { diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/SerializeUtils.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/SerializeUtils.java index 85f294c44a1..fcc5c8f4b70 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/SerializeUtils.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/util/SerializeUtils.java @@ -19,14 +19,12 @@ package org.apache.zookeeper.server.util; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.apache.jute.BinaryInputArchive; -import org.apache.jute.BinaryOutputArchive; import org.apache.jute.InputArchive; import org.apache.jute.OutputArchive; import org.apache.jute.Record; @@ -186,17 +184,4 @@ public static byte[] serializeRequest(Request request) { return data; } - /** - * Serializes a {@link Record} into a byte array. - * - * @param record the {@link Record} to be serialized - * @return a new byte array - * @throws IOException if there is an error during serialization - */ - public static byte[] serializeRecord(Record record) throws IOException { - ByteArrayOutputStream baos = new ByteArrayOutputStream(ZooKeeperServer.intBufferStartingSizeBytes); - BinaryOutputArchive bos = BinaryOutputArchive.getArchive(baos); - bos.writeRecord(record, null); - return baos.toByteArray(); - } } diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java index 9e87c7a35b5..f2d353db571 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java @@ -50,6 +50,7 @@ import org.apache.zookeeper.ZKTestCase; import org.apache.zookeeper.ZooDefs; import org.apache.zookeeper.common.PathTrie; +import org.apache.zookeeper.data.PathWithStat; import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.metrics.MetricsUtils; import org.apache.zookeeper.txn.CreateTxn; @@ -326,50 +327,53 @@ public void getChildrenPaginated() throws NodeExistsException, NoNodeException { // Asking from a negative for 5 nodes should return the 5, and not set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, null); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0); assertEquals(5, result.size()); assertEquals("The watch not should have been set", curWatchCount, dt.getWatchCount()); // Verify that the list is sorted String before = ""; - for (final String path : result) { - assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before), + for (final PathWithStat s : result) { + final String path = s.getPath(); + assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before), path.compareTo(before) > 0); before = path; } // Asking from a negative would give me all children, and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0); assertEquals(countNodes, result.size()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted before = ""; - for (final String path : result) { - assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before), + for (final PathWithStat s : result) { + final String path = s.getPath(); + assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before), path.compareTo(before) > 0); before = path; } - // Asking from the last one should return only one node + // Asking from the last one should return only onde node curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0); assertEquals(1, result.size()); - assertEquals("test-" + (countNodes - 1), result.get(0)); + assertEquals("test-" + (countNodes - 1), result.get(0).getPath()); + assertEquals(firstCzxId + countNodes - 1, result.get(0).getStat().getMzxid()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0); assertTrue("The result should be an empty list", result.isEmpty()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from -1 for one node should return two, and NOT set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0); assertEquals("No watch should be set", curWatchCount, dt.getWatchCount()); assertEquals("We only return up to ", 1, result.size()); // Check that we ordered correctly - assertEquals("test-0", result.get(0)); + assertEquals("test-0", result.get(0).getPath()); } @Test(timeout = 60000) @@ -398,58 +402,59 @@ public void getChildrenPaginatedWithOffset() throws NodeExistsException, NoNodeE // Asking from a negative would give me all children, and set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, null); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0); assertEquals(allNodes, result.size()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted String before = ""; - for (final String path : result) { - assertTrue(String.format("The next path (%s) should be > previous (%s)", path, before), + for (final PathWithStat s : result) { + final String path = s.getPath(); + assertTrue(String.format("The next path (%s) should be > previons (%s)", path, before), path.compareTo(before) > 0); before = path; } // Asking with offset minCzxId below childrenCzxId should not skip anything, regardless of offset curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3); assertEquals(2, result.size()); - assertEquals("test-1", result.get(0)); - assertEquals("test-2", result.get(1)); + assertEquals("test-1", result.get(0).getPath()); + assertEquals("test-2", result.get(1).getPath()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 should skip nodes 1, 2, 3, 4, 5 curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5); assertEquals(2, result.size()); - assertEquals("test-6", result.get(0)); - assertEquals("test-7", result.get(1)); + assertEquals("test-6", result.get(0).getPath()); + assertEquals("test-7", result.get(1).getPath()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 for more nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5); assertEquals(5, result.size()); - assertEquals("test-6", result.get(0)); - assertEquals("test-7", result.get(1)); - assertEquals("test-8", result.get(2)); - assertEquals("test-9", result.get(3)); + assertEquals("test-6", result.get(0).getPath()); + assertEquals("test-7", result.get(1).getPath()); + assertEquals("test-8", result.get(2).getPath()); + assertEquals("test-9", result.get(3).getPath()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking with offset 5 for fewer nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5); assertEquals(4, result.size()); - assertEquals("test-6", result.get(0)); - assertEquals("test-7", result.get(1)); - assertEquals("test-8", result.get(2)); - assertEquals("test-9", result.get(3)); + assertEquals("test-6", result.get(0).getPath()); + assertEquals("test-7", result.get(1).getPath()); + assertEquals("test-8", result.get(2).getPath()); + assertEquals("test-9", result.get(3).getPath()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0); assertTrue("The result should be an empty list", result.isEmpty()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); } @@ -464,7 +469,7 @@ public void getChildrenPaginatedEmpty() throws NodeExistsException, NoNodeExcept // Asking from a negative would give me all children, and set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0, null); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0); assertTrue("The result should be empty", result.isEmpty()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); } diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/GetChildrenPaginatedTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/GetChildrenPaginatedTest.java index 06cf970cae5..b9edd72504f 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/GetChildrenPaginatedTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/GetChildrenPaginatedTest.java @@ -20,14 +20,11 @@ import static org.junit.Assert.fail; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Random; -import java.util.Set; import java.util.UUID; -import org.apache.jute.BinaryInputArchive; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.OpResult; @@ -37,6 +34,7 @@ import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.PathWithStat; import org.apache.zookeeper.data.Stat; import org.junit.Assert; import org.junit.Test; @@ -80,22 +78,30 @@ public void testPagination() throws Exception { } long minCzxId = -1; - Set readChildrenMetadata = new HashSet(); + Map readChildrenMetadata = new HashMap(); final int pageSize = 3; - RemoteIterator it = zk.getChildrenIterator(basePath, null, pageSize, minCzxId); + RemoteIterator it = zk.getChildrenIterator(basePath, null, pageSize, minCzxId); while (it.hasNext()) { - final String nodePath = it.next(); + PathWithStat pathWithStat = it.next(); - LOG.info("Read: " + nodePath); - readChildrenMetadata.add(nodePath); + final String nodePath = pathWithStat.getPath(); + final Stat nodeStat = pathWithStat.getStat(); + + LOG.info("Read: " + nodePath + " czxid: " + nodeStat.getCzxid()); + readChildrenMetadata.put(nodePath, nodeStat); + + Assert.assertTrue(nodeStat.getCzxid() > minCzxId); + minCzxId = nodeStat.getCzxid(); - Assert.assertTrue(createdChildrenMetadata.get(nodePath).getCzxid() > minCzxId); - minCzxId = createdChildrenMetadata.get(nodePath).getCzxid(); } - Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata); + Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata.keySet()); + + for (String child : createdChildrenMetadata.keySet()) { + Assert.assertEquals(createdChildrenMetadata.get(child), readChildrenMetadata.get(child)); + } } @Test(timeout = 30000) @@ -106,21 +112,28 @@ public void testPaginationIterator() throws Exception { Map createdChildrenMetadata = createChildren(basePath, random.nextInt(50) + 1, 0); - Set readChildrenMetadata = new HashSet(); + Map readChildrenMetadata = new HashMap(); final int batchSize = random.nextInt(3) + 1; - RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); + RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); while (childrenIterator.hasNext()) { - String nodePath = childrenIterator.next(); + PathWithStat child = childrenIterator.next(); - LOG.info("Read: " + nodePath); - readChildrenMetadata.add(nodePath); + final String nodePath = child.getPath(); + final Stat nodeStat = child.getStat(); + + LOG.info("Read: " + nodePath + " czxid: " + nodeStat.getCzxid()); + readChildrenMetadata.put(nodePath, nodeStat); } - Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata); + Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata.keySet()); + + for (String child : createdChildrenMetadata.keySet()) { + Assert.assertEquals(createdChildrenMetadata.get(child), readChildrenMetadata.get(child)); + } } /* @@ -140,11 +153,11 @@ public void testPaginationWithServerDown() throws Exception { Map createdChildrenMetadata = createChildren(basePath, random.nextInt(15) + 10, 0); - Set readChildrenMetadata = new HashSet(); + Map readChildrenMetadata = new HashMap(); final int batchSize = random.nextInt(3) + 1; - RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); + RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); boolean serverDown = false; @@ -164,7 +177,7 @@ public void testPaginationWithServerDown() throws Exception { } } - String child = null; + PathWithStat child = null; boolean exception = false; try { @@ -178,12 +191,19 @@ public void testPaginationWithServerDown() throws Exception { // next() returned (either more elements in current batch or server is up) Assert.assertNotNull(child); - LOG.info("Read: " + child); - readChildrenMetadata.add(child); + final String nodePath = child.getPath(); + final Stat nodeStat = child.getStat(); + + LOG.info("Read: " + nodePath + " czxid: " + nodeStat.getCzxid()); + readChildrenMetadata.put(nodePath, nodeStat); } } - Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata); + Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata.keySet()); + + for (String child : createdChildrenMetadata.keySet()) { + Assert.assertEquals(createdChildrenMetadata.get(child), readChildrenMetadata.get(child)); + } } @@ -213,7 +233,7 @@ public void testPaginationWatch() throws Exception { FireOnlyOnceWatcher fireOnlyOnceWatcher = new FireOnlyOnceWatcher(); - RemoteIterator it = zk.getChildrenIterator(basePath, fireOnlyOnceWatcher, pageSize, minCzxId); + RemoteIterator it = zk.getChildrenIterator(basePath, fireOnlyOnceWatcher, pageSize, minCzxId); int childrenIndex = 0; @@ -221,9 +241,16 @@ public void testPaginationWatch() throws Exception { ++childrenIndex; - final String nodePath = it.next(); + PathWithStat pathWithStat = it.next(); + + final String nodePath = pathWithStat.getPath(); LOG.info("Read: " + nodePath); + final Stat nodeStat = pathWithStat.getStat(); + + Assert.assertTrue(nodeStat.getCzxid() > minCzxId); + minCzxId = nodeStat.getCzxid(); + // Create more children before pagination is completed -- should NOT trigger watch if (childrenIndex < 6) { String childPath = basePath + "/" + "before-pagination-" + childrenIndex; @@ -233,7 +260,7 @@ public void testPaginationWatch() throws Exception { // Modify the first child of each page. // This should not trigger additional watches or create duplicates in the set of children returned if (childrenIndex % pageSize == 0) { - zk.setData(basePath + "/" + nodePath, new byte[3], -1); + zk.setData(basePath + "/" + pathWithStat.getPath(), new byte[3], -1); } synchronized (fireOnlyOnceWatcher) { @@ -278,7 +305,7 @@ public void testPaginationWithNoChildren() throws Exception { final int batchSize = 10; - RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); + RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); Assert.assertFalse(childrenIterator.hasNext()); @@ -333,77 +360,24 @@ public void testPaginationWithMulti() throws Exception { LOG.info("Created: " + childPath + " zkId: " + stat.getCzxid()); } - Set readChildrenMetadata = new HashSet(); + Map readChildrenMetadata = new HashMap(); - RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); + RemoteIterator childrenIterator = zk.getChildrenIterator(basePath, null, batchSize, -1); while (childrenIterator.hasNext()) { - String children = childrenIterator.next(); + PathWithStat children = childrenIterator.next(); - LOG.info("Read: " + children); - readChildrenMetadata.add(children); + LOG.info("Read: " + children.getPath() + " zkId: " + children.getStat().getCzxid()); + readChildrenMetadata.put(children.getPath(), children.getStat()); } Assert.assertEquals(numChildren, readChildrenMetadata.size()); - Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata); - } + Assert.assertEquals(createdChildrenMetadata.keySet(), readChildrenMetadata.keySet()); - /* - * Tests if all children can be fetched in one page, the children are - * in the same order(no sorting by czxid) as the non-paginated result. - */ - @Test(timeout = 60000) - public void testGetAllChildrenPaginatedOnePage() throws KeeperException, InterruptedException { - final String basePath = "/testPagination-" + UUID.randomUUID().toString(); - createChildren(basePath, 100, 0); - - List expected = zk.getChildren(basePath, false); - List actual = zk.getAllChildrenPaginated(basePath, false); - - Assert.assertEquals(expected, actual); - } - - /* - * Tests if all children's packet exceeds jute.maxbuffer, it can still successfully fetch them. - * The packet length computation formula is also tested through this test. Otherwise, it'll - * fail to return all children with the paginated API. - */ - @Test(timeout = 60000) - public void testGetAllChildrenPaginatedMultiPages() throws InterruptedException, KeeperException { - // Get the number of children that would definitely exceed 1 MB. - int numChildren = BinaryInputArchive.maxBuffer / UUID.randomUUID().toString().length() + 1; - final String basePath = "/testPagination-" + UUID.randomUUID().toString(); - - zk.create(basePath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - - Set expectedChildren = new HashSet<>(); - - for (int i = 0; i < numChildren; i += 1000) { - Transaction transaction = zk.transaction(); - for (int j = i; j < i + 1000 && j < numChildren; j++) { - String child = UUID.randomUUID().toString(); - String childPath = basePath + "/" + child; - transaction.create(childPath, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - expectedChildren.add(child); - } - transaction.commit(); + for (String child : createdChildrenMetadata.keySet()) { + Assert.assertEquals(createdChildrenMetadata.get(child), readChildrenMetadata.get(child)); } - - try { - zk.getChildren(basePath, false); - Assert.fail("Should not succeed to get children because packet length is out of range"); - } catch (KeeperException.ConnectionLossException expected) { - // ConnectionLossException is expected because packet length exceeds jute.maxbuffer - } - - // Paginated API can successfully fetch all the children with pagination. - // If ConnectionLossException is thrown from this method, it possibly means - // the packet length computing formula in DataTree#getPaginatedChildren needs modification. - List actualChildren = zk.getAllChildrenPaginated(basePath, false); - - Assert.assertEquals(numChildren, actualChildren.size()); - Assert.assertEquals(expectedChildren, new HashSet<>(actualChildren)); } } \ No newline at end of file