-
Notifications
You must be signed in to change notification settings - Fork 5.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Iceberg] Add manifest file caching for HMS-based deployments #24481
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.facebook.presto.iceberg; | ||
|
||
import com.facebook.airlift.log.Logger; | ||
import org.apache.iceberg.io.ByteBufferInputStream; | ||
import org.apache.iceberg.io.InputFile; | ||
import org.apache.iceberg.io.SeekableInputStream; | ||
|
||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import static java.util.Objects.requireNonNull; | ||
import static org.apache.iceberg.io.IOUtil.readRemaining; | ||
|
||
public class HdfsCachedInputFile | ||
implements InputFile | ||
{ | ||
private static final Logger LOG = Logger.get(HdfsCachedInputFile.class); | ||
|
||
private final InputFile delegate; | ||
private final ManifestFileCacheKey cacheKey; | ||
private final ManifestFileCache cache; | ||
|
||
public HdfsCachedInputFile(InputFile delegate, ManifestFileCacheKey cacheKey, ManifestFileCache cache) | ||
{ | ||
this.delegate = requireNonNull(delegate, "delegate is null"); | ||
this.cacheKey = requireNonNull(cacheKey, "cacheKey is null"); | ||
this.cache = requireNonNull(cache, "cache is null"); | ||
} | ||
|
||
@Override | ||
public long getLength() | ||
{ | ||
ManifestFileCachedContent cachedContent = cache.getIfPresent(cacheKey); | ||
if (cachedContent != null) { | ||
return cachedContent.getLength(); | ||
} | ||
return delegate.getLength(); | ||
} | ||
|
||
@Override | ||
public SeekableInputStream newStream() | ||
{ | ||
ManifestFileCachedContent cachedContent = cache.getIfPresent(cacheKey); | ||
if (cachedContent != null) { | ||
return ByteBufferInputStream.wrap(cachedContent.getData()); | ||
} | ||
|
||
long fileLength = delegate.getLength(); | ||
if (fileLength <= cache.getMaxFileLength() && cache.isEnabled()) { | ||
try { | ||
ManifestFileCachedContent content = readFully(delegate, fileLength, cache.getBufferChunkSize()); | ||
cache.put(cacheKey, content); | ||
cache.recordFileSize(content.getLength()); | ||
return ByteBufferInputStream.wrap(content.getData()); | ||
} | ||
catch (IOException e) { | ||
LOG.warn("Failed to cache input file. Falling back to direct read.", e); | ||
} | ||
} | ||
|
||
return delegate.newStream(); | ||
} | ||
|
||
@Override | ||
public String location() | ||
{ | ||
return delegate.location(); | ||
} | ||
|
||
@Override | ||
public boolean exists() | ||
{ | ||
return cache.getIfPresent(cacheKey) != null || delegate.exists(); | ||
} | ||
|
||
private static ManifestFileCachedContent readFully(InputFile input, long fileLength, long chunkSize) | ||
throws IOException | ||
{ | ||
try (SeekableInputStream stream = input.newStream()) { | ||
long totalBytesToRead = fileLength; | ||
List<ByteBuffer> buffers = new ArrayList<>( | ||
((int) (fileLength / chunkSize)) + | ||
(fileLength % chunkSize == 0 ? 0 : 1)); | ||
|
||
while (totalBytesToRead > 0) { | ||
int bytesToRead = (int) Math.min(chunkSize, totalBytesToRead); | ||
byte[] buf = new byte[bytesToRead]; | ||
int bytesRead = readRemaining(stream, buf, 0, bytesToRead); | ||
totalBytesToRead -= bytesRead; | ||
|
||
if (bytesRead < bytesToRead) { | ||
throw new IOException( | ||
String.format( | ||
"Failed to read %d bytes: %d bytes in stream", | ||
fileLength, fileLength - totalBytesToRead)); | ||
} | ||
else { | ||
buffers.add(ByteBuffer.wrap(buf)); | ||
} | ||
} | ||
return new ManifestFileCachedContent(buffers, fileLength); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,10 +67,11 @@ public class IcebergConfig | |
|
||
private EnumSet<ColumnStatisticType> hiveStatisticsMergeFlags = EnumSet.noneOf(ColumnStatisticType.class); | ||
private String fileIOImpl = HadoopFileIO.class.getName(); | ||
private boolean manifestCachingEnabled; | ||
private boolean manifestCachingEnabled = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intended? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this is intentional. Performance is significantly worse with it disabled, and I don't think there are any known downsides to making this enabled by default other than an increased memory footprint |
||
private long maxManifestCacheSize = IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT; | ||
private long manifestCacheExpireDuration = IO_MANIFEST_CACHE_EXPIRATION_INTERVAL_MS_DEFAULT; | ||
private long manifestCacheMaxContentLength = IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT; | ||
private DataSize manifestCacheMaxChunkSize = succinctDataSize(2, MEGABYTE); | ||
private int splitManagerThreads = Runtime.getRuntime().availableProcessors(); | ||
private DataSize maxStatisticsFileCacheSize = succinctDataSize(256, MEGABYTE); | ||
|
||
|
@@ -348,6 +349,20 @@ public IcebergConfig setManifestCacheMaxContentLength(long manifestCacheMaxConte | |
return this; | ||
} | ||
|
||
public DataSize getManifestCacheMaxChunkSize() | ||
{ | ||
return manifestCacheMaxChunkSize; | ||
} | ||
|
||
@Min(1024) | ||
@Config("iceberg.io.manifest.cache.max-chunk-size") | ||
@ConfigDescription("Maximum length of a buffer used to cache manifest file content. Only applicable to HIVE catalog.") | ||
public IcebergConfig setManifestCacheMaxChunkSize(DataSize manifestCacheMaxChunkSize) | ||
{ | ||
this.manifestCacheMaxChunkSize = manifestCacheMaxChunkSize; | ||
return this; | ||
} | ||
|
||
@Min(0) | ||
public int getSplitManagerThreads() | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we just use Caffeine as the caching library since iceberg-core already brings it in ? It appears to have better performance and is recommended by the Guava team too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had the same thought too. Caching performance would likely improve too because eviction decisions in caffeine use global weight for eviction versus rather than per-segment weight in guava. However, most of the Presto codebase uses guava caches. Since caffeine and guava are different types, it would not be compatible with the current infrastructure such as the
CacheStatsMBean
object. Additionally, we use use guava'sSimpleForwardingCache
which is not available in caffeine, so I would have to roll my own. Not a terrible amount of effort, but I think there's enough work there to push that effort into a separate PR