Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cellbase-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>5.8.4</version>
<version>5.8.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,14 +411,14 @@ private void loadVariationData() throws NoSuchMethodException, InterruptedExcept
if (dbSnpFilePath.toFile().exists()) {
if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) {
logger.info("Loading dbSNP file '{}'", dbSnpFilePath);
loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease);
loadRunner.load(dbSnpFilePath, SNP_DATA, dataRelease);

// Create index
createIndex(SNP_COLLECTION_NAME);
createIndex(SNP_DATA);

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources);
dataReleaseManager.update(dataRelease, SNP_DATA, EtlCommons.VARIATION_DATA, sources);
} else {
logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME,
variationPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public ValidationCommandExecutor(AdminCliOptionsParser.ValidationCommandOptions
}

@Override
public void execute() {
public void execute() throws CellBaseException {
checkFilesExist();

VariantAnnotationCalculator variantAnnotationCalculator;
Expand Down
2 changes: 1 addition & 1 deletion cellbase-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>5.8.4</version>
<version>5.8.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion cellbase-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>5.8.4</version>
<version>5.8.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion cellbase-lib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.opencb.cellbase</groupId>
<artifactId>cellbase</artifactId>
<version>5.8.4</version>
<version>5.8.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class EtlCommons {
public static final String HOMO_SAPIENS_NAME ="Homo sapiens";

public static final String GENOME_DATA = "genome";
public static final String GENOME_SEQUENCE_DATA = "genome_sequence";
public static final String GENE_DATA = "gene";
public static final String REFSEQ_DATA = "refseq";
public static final String GENE_DISEASE_ASSOCIATION_DATA = "gene_disease_association";
Expand Down Expand Up @@ -69,7 +70,7 @@ public class EtlCommons {
public static final String DBSNP_FILE = "GCF_000001405.40.gz";
public static final String DBSNP_NAME = "dbSNP";
public static final String DBSNP_VERSION_FILENAME = DBSNP_NAME + "Version.json";
public static final String SNP_COLLECTION_NAME = "snp";
public static final String SNP_DATA = "snp";

public static final String STRUCTURAL_VARIANTS_DATA = "svs";
public static final String REPEATS_DATA = "repeats";
Expand All @@ -79,6 +80,8 @@ public class EtlCommons {
public static final String DOID_FILE = "doid.obo";
public static final String PFM_DATA = "regulatory_pfm";

public static final String REGULATORY_REGION_DATA = "regulatory_region";

// Build specific data options
public static final String GENOME_INFO_DATA = "genome_info";
public static final String DISGENET_DATA = "disgenet";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,25 @@

package org.opencb.cellbase.lib.impl.core;

import org.apache.commons.collections4.CollectionUtils;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.lib.impl.core.singleton.DataReleaseSingleton;
import org.opencb.commons.datastore.mongodb.MongoDBCollection;
import org.opencb.commons.datastore.mongodb.MongoDataStore;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class CellBaseDBAdaptor extends MongoDBAdaptor {

protected List<DataRelease> dataReleases;
protected Map<Integer, MongoDBCollection> mongoDBCollectionByRelease;

public static final String DATA_RELEASE_SEPARATOR = "__v";

public static String buildCollectionName(String data, int release) {
String name = data + DATA_RELEASE_SEPARATOR + release;
return name;
}

public Map<Integer, MongoDBCollection> buildCollectionByReleaseMap(String data) {
Map<Integer, MongoDBCollection> collectionMap = new HashMap<>();
if (CollectionUtils.isNotEmpty(dataReleases)) {
for (DataRelease dataRelease : dataReleases) {
if (dataRelease.getCollections().containsKey(data)) {
String collectionName = dataRelease.getCollections().get(data);
collectionMap.put(dataRelease.getRelease(), mongoDataStore.getCollection(collectionName));
}
}
} else {
// For backward compatibility (i.e., in case data_release collection is missing)
collectionMap.put(0, mongoDataStore.getCollection(data));
}

return collectionMap;
}

public MongoDBCollection getCollectionByRelease(Map<Integer, MongoDBCollection> collectionMap, Integer dataRelease)
throws CellBaseException {
int release = dataRelease == null ? 0 : dataRelease;
if (!collectionMap.containsKey(release)) {
// If the data release is invalid, throw an exception
String msg = "Data not found in release " + release + ". " + collectionMap.toString();
logger.error(msg);
throw new CellBaseException(msg);
}
return collectionMap.get(release);
}

public CellBaseDBAdaptor(MongoDataStore mongoDataStore) {
super(mongoDataStore);
this.dataReleases = new ReleaseMongoDBAdaptor(mongoDataStore).getAll().getResults();
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CellBaseDBAdaptor{");
sb.append("dataRelease=").append(dataReleases);
sb.append('}');
return sb.toString();
}

public List<DataRelease> getDataReleases() {
return dataReleases;
public static String buildCollectionName(String data, int release) {
String name = data + DATA_RELEASE_SEPARATOR + release;
return name;
}

public CellBaseDBAdaptor setDataReleases(List<DataRelease> dataReleases) {
this.dataReleases = dataReleases;
return this;
public MongoDBCollection getMongoDBCollection(String data, int release) throws CellBaseException {
return DataReleaseSingleton.getInstance().getMongoDBCollection(mongoDataStore.getDatabaseName(), data, release);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.util.function.Consumer;

import static org.opencb.cellbase.core.ParamConstants.DATA_RELEASE_PARAM;
import static org.opencb.cellbase.lib.EtlCommons.CLINICAL_VARIANTS_DATA;

/**
* Created by fjlopez on 06/12/16.
Expand All @@ -65,14 +66,6 @@ public ClinicalMongoDBAdaptor(MongoDataStore mongoDataStore, GenomeManager genom
super(mongoDataStore);

this.genomeManager = genomeManager;

init();
}

private void init() {
logger.debug("ClinicalMongoDBAdaptor: in 'constructor'");

mongoDBCollectionByRelease = buildCollectionByReleaseMap("clinical_variants");
}

public CellBaseDataResult<Variant> next(Query query, QueryOptions options) {
Expand Down Expand Up @@ -103,16 +96,14 @@ public CellBaseDataResult getIntervalFrequencies(Query query, int intervalSize,
public CellBaseDataResult<Long> count(Query query) throws CellBaseException {
Bson bson = parseQuery(query);

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease,
(Integer) query.getOrDefault(DATA_RELEASE_PARAM, 0));
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getInt(DATA_RELEASE_PARAM));
return new CellBaseDataResult<>(mongoDBCollection.count(bson));
}

public CellBaseDataResult distinct(Query query, String field) throws CellBaseException {
Bson bson = parseQuery(query);

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease,
(Integer) query.getOrDefault(DATA_RELEASE_PARAM, 0));
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getInt(DATA_RELEASE_PARAM));
return new CellBaseDataResult<>(mongoDBCollection.distinct(field, bson));
}

Expand All @@ -128,8 +119,7 @@ public CellBaseDataResult<Variant> get(Query query, QueryOptions options) throws
logger.debug("query: {}", bson.toBsonDocument().toJson());
logger.debug("queryOptions: {}", options.toJson());

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease,
(Integer) query.getOrDefault(DATA_RELEASE_PARAM, 0));
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getInt(DATA_RELEASE_PARAM));
return new CellBaseDataResult<>(mongoDBCollection.find(bson, null, Variant.class, parsedOptions));
}

Expand All @@ -140,8 +130,7 @@ public CellBaseDataResult nativeGet(Query query, QueryOptions options) throws Ce
logger.debug("query: {}", bson.toBsonDocument().toJson());
logger.debug("queryOptions: {}", options.toJson());

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease,
(Integer) query.getOrDefault(DATA_RELEASE_PARAM, 0));
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getInt(DATA_RELEASE_PARAM));
return new CellBaseDataResult<>(mongoDBCollection.find(bson, parsedOptions));
}

Expand All @@ -152,8 +141,7 @@ public Iterator<Variant> iterator(Query query, QueryOptions options) {
public Iterator nativeIterator(Query query, QueryOptions options) throws CellBaseException {
Bson bson = parseQuery(query);

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease,
(Integer) query.getOrDefault(DATA_RELEASE_PARAM, 0));
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getInt(DATA_RELEASE_PARAM));
return mongoDBCollection.nativeQuery().find(bson, options);
}

Expand Down Expand Up @@ -355,7 +343,7 @@ private CellBaseDataResult getClinvarPhenotypeGeneRelations(QueryOptions queryOp
fields.put("associatedGenes", 1);
pipeline.add(new Document("$project", fields));

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, dataRelease);
return executeAggregation2("", pipeline, queryOptions, mongoDBCollection);

}
Expand All @@ -377,7 +365,7 @@ private CellBaseDataResult getGwasPhenotypeGeneRelations(QueryOptions queryOptio
fields.put("associatedGenes", 1);
pipeline.add(new Document("$project", fields));

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, dataRelease);
return executeAggregation2("", pipeline, queryOptions, mongoDBCollection);
}

Expand Down Expand Up @@ -466,7 +454,7 @@ public CellBaseIterator iterator(ClinicalVariantQuery query) throws CellBaseExce
Bson projection = getProjection(query);
GenericDocumentComplexConverter<Variant> converter = new GenericDocumentComplexConverter<>(Variant.class);

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
MongoDBCollection mongoDBCollection = getMongoDBCollection(CLINICAL_VARIANTS_DATA, query.getDataRelease());
MongoDBIterator<Variant> iterator = mongoDBCollection.iterator(null, bson, projection, converter, queryOptions);
return new CellBaseMongoDBIterator<>(iterator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.cellbase.lib.MongoDBCollectionConfiguration;
import org.opencb.cellbase.lib.iterator.CellBaseIterator;
import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator;
Expand All @@ -43,13 +44,15 @@
import java.util.*;
import java.util.regex.Pattern;

import static org.opencb.cellbase.lib.EtlCommons.GENE_DATA;

/**
* Created by imedina on 25/11/15.
*/
public class GeneMongoDBAdaptor extends CellBaseDBAdaptor implements CellBaseCoreDBAdaptor<GeneQuery, Gene> {

private static final Set<String> CONSTRAINT_NAMES = new HashSet<>();
private Map<Integer, MongoDBCollection> refseqCollectionByRelease;
// private Map<Integer, MongoDBCollection> refseqCollectionByRelease;

private static final GenericDocumentComplexConverter<Gene> CONVERTER;

Expand All @@ -66,15 +69,15 @@ public class GeneMongoDBAdaptor extends CellBaseDBAdaptor implements CellBaseCor
public GeneMongoDBAdaptor(MongoDataStore mongoDataStore) {
super(mongoDataStore);

this.init();
// this.init();
}

private void init() {
mongoDBCollectionByRelease = buildCollectionByReleaseMap("gene");
refseqCollectionByRelease = buildCollectionByReleaseMap("refseq");

logger.debug("GeneMongoDBAdaptor initialised");
}
// private void init() {
// mongoDBCollectionByRelease = buildCollectionByReleaseMap("gene");
// refseqCollectionByRelease = buildCollectionByReleaseMap("refseq");
//
// logger.debug("GeneMongoDBAdaptor initialised");
// }

@Override
public CellBaseDataResult<Gene> aggregationStats(GeneQuery query) {
Expand All @@ -97,10 +100,10 @@ public List<CellBaseDataResult<Gene>> info(List<String> ids, ProjectionQueryOpti
orBsonList.add(Filters.eq("name", id));
Bson query = Filters.or(orBsonList);
if (StringUtils.isEmpty(source) || ParamConstants.QueryParams.ENSEMBL.key().equalsIgnoreCase(source)) {
MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, dataRelease);
results.add(new CellBaseDataResult<>(mongoDBCollection.find(query, projection, CONVERTER, new QueryOptions())));
} else {
MongoDBCollection mongoDBCollection = getCollectionByRelease(refseqCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(EtlCommons.REFSEQ_DATA, dataRelease);
results.add(new CellBaseDataResult<>(mongoDBCollection.find(query, projection, CONVERTER, new QueryOptions())));
}
}
Expand All @@ -115,10 +118,10 @@ public CellBaseIterator<Gene> iterator(GeneQuery query) throws CellBaseException
MongoDBIterator<Gene> iterator;
if (query.getSource() != null && !query.getSource().isEmpty() && ParamConstants.QueryParams.REFSEQ.key()
.equalsIgnoreCase(query.getSource().get(0))) {
MongoDBCollection mongoDBCollection = getCollectionByRelease(refseqCollectionByRelease, query.getDataRelease());
MongoDBCollection mongoDBCollection = getMongoDBCollection(EtlCommons.REFSEQ_DATA, query.getDataRelease());
iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions);
} else {
MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, query.getDataRelease());
iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions);
}
return new CellBaseMongoDBIterator<>(iterator);
Expand All @@ -127,15 +130,15 @@ public CellBaseIterator<Gene> iterator(GeneQuery query) throws CellBaseException
@Override
public CellBaseDataResult<String> distinct(GeneQuery geneQuery) throws CellBaseException {
Bson bsonDocument = parseQuery(geneQuery);
MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, geneQuery.getDataRelease());
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, geneQuery.getDataRelease());
return new CellBaseDataResult<>(mongoDBCollection.distinct(geneQuery.getFacet(), bsonDocument, String.class));
}

@Override
public CellBaseDataResult<Gene> groupBy(GeneQuery geneQuery) throws CellBaseException {
Bson bsonQuery = parseQuery(geneQuery);
logger.info("geneQuery: {}", bsonQuery.toBsonDocument().toJson());
MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, geneQuery.getDataRelease());
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, geneQuery.getDataRelease());
return groupBy(bsonQuery, geneQuery, "name", mongoDBCollection);
}

Expand All @@ -157,7 +160,7 @@ public CellBaseDataResult<Gene> startsWith(String id, QueryOptions options, int
projection = Projections.exclude("transcripts", "annotation");
}
}
MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, dataRelease);
return new CellBaseDataResult<>(mongoDBCollection.find(regex, projection, CONVERTER, options));
}

Expand Down Expand Up @@ -355,7 +358,7 @@ public CellBaseDataResult<TranscriptTfbs> getTfbs(String geneId, QueryOptions qu
List<Bson> pipeline = unwindAndMatchTranscripts(query, queryOptions);
GenericDocumentComplexConverter<TranscriptTfbs> converter = new GenericDocumentComplexConverter<>(TranscriptTfbs.class);

MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
MongoDBCollection mongoDBCollection = getMongoDBCollection(GENE_DATA, dataRelease);
MongoDBIterator<TranscriptTfbs> iterator = mongoDBCollection.iterator(pipeline, converter, queryOptions);

List<TranscriptTfbs> tfbs = new ArrayList<>();
Expand Down
Loading