Skip to content

Commit 6cf9dd9

Browse files
committed
DataHandle: fix findString(..) and support handles with unknown length
The findString(..) method used to require the exact length of the handle, this length can be unknown, e.g. for compressed handles. This is no longer the case.
1 parent 6c094f7 commit 6cf9dd9

File tree

1 file changed

+27
-29
lines changed

1 file changed

+27
-29
lines changed

Diff for: src/main/java/org/scijava/io/handle/DataHandle.java

+27-29
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
* %%
1010
* Redistribution and use in source and binary forms, with or without
1111
* modification, are permitted provided that the following conditions are met:
12-
*
12+
*
1313
* 1. Redistributions of source code must retain the above copyright notice,
1414
* this list of conditions and the following disclaimer.
1515
* 2. Redistributions in binary form must reproduce the above copyright notice,
1616
* this list of conditions and the following disclaimer in the documentation
1717
* and/or other materials provided with the distribution.
18-
*
18+
*
1919
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2020
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2121
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -48,7 +48,7 @@
4848
/**
4949
* A <em>data handle</em> is a plugin which provides both streaming and random
5050
* access to bytes at a {@link Location} (e.g., files or arrays).
51-
*
51+
*
5252
* @author Curtis Rueden
5353
* @see DataHandleInputStream
5454
* @see DataHandleOutputStream
@@ -58,7 +58,7 @@ public interface DataHandle<L extends Location> extends WrapperPlugin<L>,
5858
{
5959

6060
public enum ByteOrder {
61-
LITTLE_ENDIAN, BIG_ENDIAN
61+
LITTLE_ENDIAN, BIG_ENDIAN
6262
}
6363

6464
/** Default block size to use when searching through the stream. */
@@ -75,15 +75,15 @@ public enum ByteOrder {
7575

7676
/**
7777
* Tests whether this handle's location actually exists at the source.
78-
*
78+
*
7979
* @return True if the location exists; false if not.
8080
* @throws IOException If something goes wrong with the existence check.
8181
*/
8282
boolean exists() throws IOException;
8383

8484
/**
8585
* Gets the last modified timestamp of the location.
86-
*
86+
*
8787
* @return The last modified timestamp, or null if the handle does not support
8888
* this feature or if the location does not exist.
8989
* @throws IOException If something goes wrong with the last modified check.
@@ -107,7 +107,7 @@ default Date lastModified() throws IOException {
107107
* the checksum is still the same, the contents are highly likely to be
108108
* unchanged.
109109
* </p>
110-
*
110+
*
111111
* @return The checksum, or null if the handle does not support this feature.
112112
* @throws IOException If something goes wrong when accessing the checksum.
113113
*/
@@ -126,22 +126,22 @@ default String checksum() throws IOException {
126126

127127
/**
128128
* Returns the length of the data in bytes.
129-
*
129+
*
130130
* @return The length, or -1 if the length is unknown.
131131
*/
132132
long length() throws IOException;
133133

134134
/**
135135
* Sets the new length of the handle.
136-
*
136+
*
137137
* @param length New length.
138138
* @throws IOException If there is an error changing the handle's length.
139139
*/
140140
void setLength(long length) throws IOException;
141141

142142
/**
143-
* Gets the number of bytes which can be read from, or written to, the
144-
* data handle, bounded by the specified number of bytes.
143+
* Gets the number of bytes which can be read from, or written to, the data
144+
* handle, bounded by the specified number of bytes.
145145
* <p>
146146
* In the case of reading, attempting to read the returned number of bytes is
147147
* guaranteed not to throw {@link EOFException}. However, be aware that the
@@ -161,8 +161,8 @@ default String checksum() throws IOException {
161161
* </p>
162162
*
163163
* @param count Desired number of bytes to read/write.
164-
* @return The actual number of bytes which could be read/written,
165-
* which might be less than the requested value.
164+
* @return The actual number of bytes which could be read/written, which might
165+
* be less than the requested value.
166166
* @throws IOException If something goes wrong with the check.
167167
*/
168168
default long available(final long count) throws IOException {
@@ -172,7 +172,7 @@ default long available(final long count) throws IOException {
172172

173173
/**
174174
* Ensures that the handle has sufficient bytes available to read.
175-
*
175+
*
176176
* @param count Number of bytes to read.
177177
* @see #available(long)
178178
* @throws EOFException If there are insufficient bytes available.
@@ -187,7 +187,7 @@ default void ensureReadable(final long count) throws IOException {
187187
/**
188188
* Ensures that the handle has the correct length to be written to, and
189189
* extends it as required.
190-
*
190+
*
191191
* @param count Number of bytes to write.
192192
* @return {@code true} if the handle's length was sufficient, or
193193
* {@code false} if the handle's length required an extension.
@@ -271,7 +271,7 @@ default String readString(final int n) throws IOException {
271271

272272
/**
273273
* Reads a string ending with one of the characters in the given string.
274-
*
274+
*
275275
* @see #findString(String...)
276276
*/
277277
default String readString(final String lastChars) throws IOException {
@@ -285,7 +285,7 @@ default String readString(final String lastChars) throws IOException {
285285

286286
/**
287287
* Reads a string ending with one of the given terminating substrings.
288-
*
288+
*
289289
* @param terminators The strings for which to search.
290290
* @return The string from the initial position through the end of the
291291
* terminating sequence, or through the end of the stream if no
@@ -350,10 +350,7 @@ default String findString(final boolean saveString, final int blockSize,
350350
final StringBuilder out = new StringBuilder();
351351
final long startPos = offset();
352352
long bytesDropped = 0;
353-
final long inputLen = length();
354-
long maxLen = inputLen - startPos;
355-
final boolean tooLong = saveString && maxLen > MAX_SEARCH_SIZE;
356-
if (tooLong) maxLen = MAX_SEARCH_SIZE;
353+
final long maxLen = saveString ? MAX_SEARCH_SIZE : Long.MAX_VALUE;
357354
boolean match = false;
358355
int maxTermLen = 0;
359356
for (final String term : terminators) {
@@ -366,7 +363,10 @@ default String findString(final boolean saveString, final int blockSize,
366363
new DataHandleInputStream<>(this), getEncoding());
367364
final char[] buf = new char[blockSize];
368365
long loc = 0;
369-
while (loc < maxLen && offset() < length() - 1) {
366+
int r = 0;
367+
368+
// NB: we need at least 2 bytes to read a char
369+
while (loc < maxLen && ((r = in.read(buf, 0, blockSize)) > 1)) {
370370
// if we're not saving the string, drop any old, unnecessary output
371371
if (!saveString) {
372372
final int outLen = out.length();
@@ -378,16 +378,12 @@ default String findString(final boolean saveString, final int blockSize,
378378
bytesDropped += dropIndex;
379379
}
380380
}
381-
382-
// read block from stream
383-
final int r = in.read(buf, 0, blockSize);
384-
if (r <= 0) throw new IOException("Cannot read from stream: " + r);
385-
386381
// append block to output
387382
out.append(buf, 0, r);
388383

389384
// check output, returning smallest possible string
390-
int min = Integer.MAX_VALUE, tagLen = 0;
385+
int min = Integer.MAX_VALUE;
386+
int tagLen = 0;
391387
for (final String t : terminators) {
392388
final int len = t.length();
393389
final int start = (int) (loc - bytesDropped - len);
@@ -415,7 +411,9 @@ default String findString(final boolean saveString, final int blockSize,
415411
}
416412

417413
// no match
418-
if (tooLong) throw new IOException("Maximum search length reached.");
414+
if (loc > MAX_SEARCH_SIZE) {
415+
throw new IOException("Maximum search length reached.");
416+
}
419417
return saveString ? out.toString() : null;
420418
}
421419

0 commit comments

Comments
 (0)