Skip to content

Commit

Permalink
Changes to enable update of H2 to version 2.2.x
Browse files Browse the repository at this point in the history
Update H2 version 1.4.200 -> 2.2.224
Update Flyway version 8.2.1 -> 10.6.0 to support H2 update
Add new ProtobufDataType
Update ClinVarDataType, AlleleKeyDataType and AllelePropertiesDataType to extend ProtobufDataType
Delete static data from exomiser-spring-boot-autoconfigure/test/resources
Add new TestDataDirectories class to dynamically create test data for the exomiser-spring-boot-autoconfigure module to allow for simpler updating of H2
  • Loading branch information
julesjacobsen committed Jan 31, 2024
1 parent 65a8556 commit 1ae7df8
Show file tree
Hide file tree
Showing 39 changed files with 410 additions and 225 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,23 @@

package org.monarchinitiative.exomiser.core.genome.dao.serialisers;

import com.google.protobuf.InvalidProtocolBufferException;
import org.h2.mvstore.DataUtils;
import org.h2.mvstore.WriteBuffer;
import com.google.protobuf.Parser;
import org.h2.mvstore.type.DataType;
import org.h2.util.Utils;
import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleKey;

import java.nio.ByteBuffer;

/**
* Specialised {@link DataType} for (de)serialising {@link AlleleKey} objects into and out of
* the {@link org.h2.mvstore.MVStore}.
*
* @author Jules Jacobsen <[email protected]>
*/
public class AlleleKeyDataType implements DataType {
public class AlleleKeyDataType extends ProtobufDataType<AlleleKey> {

public static final AlleleKeyDataType INSTANCE = new AlleleKeyDataType();

private AlleleKeyDataType() {
}

/**
* Sorts variants according to their natural ordering of genome position. Variants are sorted according to
* chromosome number, chromosome position, reference sequence then alternative sequence.
Expand All @@ -47,59 +45,26 @@ public class AlleleKeyDataType implements DataType {
* @return comparator score consistent with equals.
*/
@Override
public int compare(Object a, Object b) {
AlleleKey keyA = (AlleleKey) a;
AlleleKey keyB = (AlleleKey) b;

if (keyA.getChr() != keyB.getChr()) {
return Integer.compare(keyA.getChr(), keyB.getChr());
public int compare(AlleleKey a, AlleleKey b) {
if (a.getChr() != b.getChr()) {
return Integer.compare(a.getChr(), b.getChr());
}
if (keyA.getPosition() != keyB.getPosition()) {
return Integer.compare(keyA.getPosition(), keyB.getPosition());
if (a.getPosition() != b.getPosition()) {
return Integer.compare(a.getPosition(), b.getPosition());
}
if (!keyA.getRef().equals(keyB.getRef())) {
return keyA.getRef().compareTo(keyB.getRef());
if (!a.getRef().equals(b.getRef())) {
return a.getRef().compareTo(b.getRef());
}
return keyA.getAlt().compareTo(keyB.getAlt());
return a.getAlt().compareTo(b.getAlt());
}

@Override
public int getMemory(Object obj) {
AlleleKey key = (AlleleKey) obj;
return key.getSerializedSize();
public Parser<AlleleKey> messageParser() {
return AlleleKey.parser();
}

@Override
public void read(ByteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
obj[i] = read(buff);
}
}

@Override
public void write(WriteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
write(buff, obj[i]);
}
public AlleleKey[] createStorage(int size) {
return new AlleleKey[size];
}

@Override
public AlleleKey read(ByteBuffer buff) {
int len = DataUtils.readVarInt(buff);
byte[] data = Utils.newBytes(len);
buff.get(data);
try {
return AlleleKey.parseFrom(data);
} catch (InvalidProtocolBufferException e) {
throw new InvalidAlleleProtoException(e);
}
}

@Override
public void write(WriteBuffer buff, Object obj) {
AlleleKey key = (AlleleKey) obj;
byte[] data = key.toByteArray();
buff.putVarInt(data.length).put(data);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,66 +20,38 @@

package org.monarchinitiative.exomiser.core.genome.dao.serialisers;

import com.google.protobuf.InvalidProtocolBufferException;
import org.h2.mvstore.DataUtils;
import org.h2.mvstore.WriteBuffer;
import com.google.protobuf.Parser;
import org.h2.mvstore.type.DataType;
import org.h2.util.Utils;
import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleProperties;

import java.nio.ByteBuffer;

/**
* Specialised {@link DataType} for (de)serialising {@link AlleleProperties} objects into and out of
* the {@link org.h2.mvstore.MVStore}.
*
* @author Jules Jacobsen <[email protected]>
*/
public class AllelePropertiesDataType implements DataType {
public class AllelePropertiesDataType extends ProtobufDataType<AlleleProperties> {

public static final AllelePropertiesDataType INSTANCE = new AllelePropertiesDataType();

@Override
public int compare(Object a, Object b) {
return -1;
}

@Override
public int getMemory(Object obj) {
AlleleProperties props = (AlleleProperties) obj;
return props.getSerializedSize();
}

@Override
public void read(ByteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
obj[i] = read(buff);
}
private AllelePropertiesDataType() {
}

@Override
public AlleleProperties read(ByteBuffer buff) {
int len = DataUtils.readVarInt(buff);
byte[] data = Utils.newBytes(len);
buff.get(data);
try {
return AlleleProperties.parseFrom(data);
} catch (InvalidProtocolBufferException e) {
throw new InvalidAlleleProtoException(e);
public int compare(AlleleProperties a, AlleleProperties b) {
if (a.equals(b)) {
return 0;
}
throw new UnsupportedOperationException("Unable to compare " + a + " with " + b);
}

@Override
public void write(WriteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
write(buff, obj[i]);
}
public AlleleProperties[] createStorage(int size) {
return new AlleleProperties[size];
}

@Override
public void write(WriteBuffer buff, Object obj) {
AlleleProperties props = (AlleleProperties) obj;
byte[] data = props.toByteArray();
buff.putVarInt(data.length).put(data);
public Parser<AlleleProperties> messageParser() {
return AlleleProperties.parser();
}
}
Original file line number Diff line number Diff line change
@@ -1,60 +1,28 @@
package org.monarchinitiative.exomiser.core.genome.dao.serialisers;

import com.google.protobuf.InvalidProtocolBufferException;
import org.h2.mvstore.DataUtils;
import org.h2.mvstore.WriteBuffer;
import org.h2.mvstore.type.DataType;
import org.h2.util.Utils;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import com.google.protobuf.Parser;
import org.monarchinitiative.exomiser.core.proto.AlleleProto.ClinVar;

import java.nio.ByteBuffer;

public class ClinVarDataType extends ProtobufDataType<ClinVar> {

public enum ClinVarDataType implements DataType {

INSTANCE;

@Override
public int compare(Object a, Object b) {
return -1;
}

@Override
public int getMemory(Object obj) {
AlleleProto.ClinVar clinVar = (AlleleProto.ClinVar) obj;
return clinVar.getSerializedSize();
}

@Override
public void read(ByteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
obj[i] = read(buff);
}
}
public static final ClinVarDataType INSTANCE = new ClinVarDataType();

@Override
public AlleleProto.ClinVar read(ByteBuffer buff) {
int len = DataUtils.readVarInt(buff);
byte[] data = Utils.newBytes(len);
buff.get(data);
try {
return AlleleProto.ClinVar.parseFrom(data);
} catch (InvalidProtocolBufferException e) {
throw new InvalidAlleleProtoException(e);
public int compare(ClinVar a, ClinVar b) {
if (a.equals(b)) {
return 0;
}
throw new UnsupportedOperationException("Unable to compare " + a + " with " + b);
}

@Override
public void write(WriteBuffer buff, Object[] obj, int len, boolean key) {
for (int i = 0; i < len; i++) {
write(buff, obj[i]);
}
public ClinVar[] createStorage(int size) {
return new ClinVar[size];
}

@Override
public void write(WriteBuffer buff, Object obj) {
AlleleProto.ClinVar clinVar = (AlleleProto.ClinVar) obj;
byte[] data = clinVar.toByteArray();
buff.putVarInt(data.length).put(data);
public Parser<ClinVar> messageParser() {
return ClinVar.parser();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.monarchinitiative.exomiser.core.genome.dao.serialisers;

import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Message;
import com.google.protobuf.Parser;
import org.h2.mvstore.DataUtils;
import org.h2.mvstore.WriteBuffer;
import org.h2.mvstore.type.BasicDataType;
import org.h2.util.Utils;

import java.nio.ByteBuffer;

public abstract class ProtobufDataType<T extends Message> extends BasicDataType<T> {

@Override
public int getMemory(T obj) {
return obj.getSerializedSize();
}

@Override
public void write(WriteBuffer buff, T obj) {
byte[] data = obj.toByteArray();
buff.putVarInt(data.length).put(data);
}

@Override
public T read(ByteBuffer buff) {
int len = DataUtils.readVarInt(buff);
byte[] data = Utils.newBytes(len);
buff.get(data);
try {
return messageParser().parseFrom(data);
} catch (InvalidProtocolBufferException e) {
throw new InvalidAlleleProtoException(e);
}
}

public abstract Parser<T> messageParser();

}
Original file line number Diff line number Diff line change
Expand Up @@ -240,18 +240,18 @@ void manualDataExplorer() {
// https://mart.ensembl.org/info/genome/genebuild/canonical.html (see also vitt). Ideally the Jannovar Annotations
// should be sorted before being converted to TranscriptAnnotations. This isn't an issue if MANE only
// transcripts are being used as these are the only ones available to report on.
GenomicVariant genomicVariant = parseVariant("10-123256215-T-G"); // 10-123256215-T-G
GenomicVariant genomicVariant = parseVariant("10-123256215-T-ACG"); // 10-123256215-T-G hg38:10-121496701-T-G

System.out.println("Searching for: " + toBroad(genomicVariant));
// encode as VariantKey (https://doi.org/10.1101/473744) == 8 bytes fixed size (long);
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(genomicVariant);
// encode as VariantKey (https://doi.org/10.1101/473744) == 8 bytes fixed size (long);
System.out.println("AlleleKey size (bytes): " + alleleKey.getSerializedSize()); // SNP = 13 bytes, 11 bases = 23
System.out.println();
AlleleProto.AlleleProperties alleleProperties = allelePropertiesDao.getAlleleProperties(alleleKey, GenomeAssembly.HG19);

System.out.println(clinVarDao.getClinVarData(genomicVariant));
Map<GenomicVariant, ClinVarData> clinVarRecordsOverlappingInterval = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
clinVarRecordsOverlappingInterval.forEach((variant, clinVarData) -> {System.out.println(toBroad(variant) + " : " + clinVarData);});
clinVarRecordsOverlappingInterval.forEach((variant, clinVarData) -> System.out.println(toBroad(variant) + " : " + clinVarData));
System.out.println(AlleleProtoAdaptor.toFrequencyData(alleleProperties));
PathogenicityData pathogenicityData = AlleleProtoAdaptor.toPathogenicityData(alleleProperties);
System.out.println(pathogenicityData.getPredictedPathogenicityScores());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
package org.monarchinitiative.exomiser.core.genome.dao;

import org.h2.mvstore.MVStore;
import org.h2.mvstore.db.SpatialKey;
import org.h2.mvstore.rtree.MVRTreeMap;
import org.h2.mvstore.rtree.SpatialKey;
import org.h2.mvstore.rtree.Spatial;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
Expand All @@ -40,7 +41,6 @@
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit.jupiter.SpringExtension;

import java.util.Iterator;
import java.util.List;

import static org.hamcrest.MatcherAssert.assertThat;
Expand Down Expand Up @@ -219,12 +219,11 @@ void mvStoreRtree() {
System.out.println("margin: " + margin);
System.out.println("Searching chr" + region.contigId() + " from " + (region.start() - margin) + " to " + (region
.end() + margin));
// iterate over the intersecting keys
Iterator<SpatialKey> it =
MVRTreeMap.RTreeCursor<SvFrequencyDao.SvResult> it =
// r.findContainedKeys(new SpatialKey(0, 0f, 9f, 3f, 6f));
r.findIntersectingKeys(new SpatialKey(0, boundaryCalculator.startMin(), boundaryCalculator.endMax(), region.contigId(), region.contigId()));
while (it.hasNext()) {
SpatialKey k = it.next();
Spatial k = it.next();
SvFrequencyDao.SvResult svResult = r.get(k);
System.out.println(k + ": " + svResult + ", simJ=" + SvDaoUtil.jaccard(region, svResult));
}
Expand Down
Loading

0 comments on commit 1ae7df8

Please sign in to comment.