diff --git a/src/python/dm/common/mongodb/api/datasetMongoDbApi.py b/src/python/dm/common/mongodb/api/datasetMongoDbApi.py new file mode 100755 index 0000000000000000000000000000000000000000..f89413746b2130231fc9b0150baa221aa4b73348 --- /dev/null +++ b/src/python/dm/common/mongodb/api/datasetMongoDbApi.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python + +import copy +import types +import re + +from dm.common.objects.dmObject import DmObject +from dm.common.exceptions.dmException import DmException +from dm.common.mongodb.api.dmMongoDbApi import DmMongoDbApi +from dm.common.mongodb.impl.datasetCollection import DatasetCollection +from dm.common.mongodb.api.fileMongoDbApi import FileMongoDbApi +from dm.common.mongodb.api.dmMongoDbApi import DmMongoDbApi +from dm.common.objects.datasetMetadata import DatasetMetadata + +class DatasetMongoDbApi(DmMongoDbApi): + + SYSTEM_KEY_LIST = ['_id', '_datasetName', '_experimentName'] + IGNORE_CASE_KEY = '_ignoreCase' + USE_DATASET_NAME_KEY = '_useDatasetName' + USE_EXPERIMENT_NAME_KEY = '_useDatasetName' + + def __init__(self): + DmMongoDbApi.__init__(self) + self.datasetCollection = DatasetCollection(self.dbClient) + self.fileMongoDbApi = FileMongoDbApi() + + @classmethod + def convertStringsToRegex(cls, dict, ignoreCase=True): + dict2 = copy.copy(dict) + for (key,value) in dict2.items(): + if key in cls.SYSTEM_KEY_LIST: + continue + elif type(value) == types.StringType or type(value) == types.UnicodeType: + cls.getLogger().debug('Converting to regex: %s for key %s' % (value,key)) + if ignoreCase: + regex = re.compile(value, re.IGNORECASE) + else: + regex = re.compile(value) + dict2[key] = regex + return dict2 + + @DmMongoDbApi.executeDbCall + def addExperimentDataset(self, datasetInfo, **kwargs): + datasetInfo2 = self.getMongoDict(datasetInfo) + dbDatasetMetadata = self.datasetCollection.addByUniqueKeys(datasetInfo2) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def getDatasets(self, queryDict={}, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs): + queryDict2 = self.getMongoDict(queryDict) + return self.listToDmObjects(self.datasetCollection.findByQueryDict(queryDict2, returnFieldDict), DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def getDatasetById(self, id, **kwargs): + dbDatasetMetadata = self.datasetCollection.findById(id) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def getExperimentDataset(self, experimentName, datasetName, **kwargs): + queryDict = { '_datasetName' : datasetName, '_experimentName' : experimentName } + dbDatasetMetadata = self.datasetCollection.findByUniqueKeys(queryDict) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def getExperimentDatasets(self, experimentName, queryDict={}, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs): + queryDict2 = copy.copy(queryDict) + queryDict2['_experimentName'] = experimentName + return self.getDatasets(queryDict2, returnFieldDict) + + @DmMongoDbApi.executeDbCall + def updateDatasetById(self, datasetInfo, **kwargs): + datasetInfo2 = self.getMongoDict(datasetInfo) + dbDatasetMetadata = self.datasetCollection.updateById(datasetInfo2) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def updateExperimentDataset(self, datasetInfo, **kwargs): + datasetInfo2 = self.getMongoDict(datasetInfo) + dbDatasetMetadata = self.datasetCollection.updateByUniqueKeys(datasetInfo2) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def updateOrAddExperimentDataset(self, datasetInfo, **kwargs): + datasetInfo2 = self.getMongoDict(datasetInfo) + dbDatasetMetadata = self.datasetCollection.updateOrAddByUniqueKeys(datasetInfo2) + return self.toDmObject(dbDatasetMetadata, DatasetMetadata) + + @DmMongoDbApi.executeDbCall + def getExperimentDatasetFiles(self, experimentName, datasetName, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs): + self.getLogger().debug('Looking for dataset %s (experiment %s)' % (datasetName, experimentName)) + datasetDict = self.getExperimentDataset(experimentName, datasetName).getDictRep(DmObject.ALL_KEYS) + del datasetDict['id'] + useDatasetName = kwargs.get(self.USE_DATASET_NAME_KEY, False) + if not useDatasetName: + del datasetDict['datasetName'] + useExperimentName = kwargs.get(self.USE_EXPERIMENT_NAME_KEY, False) + if not useExperimentName: + del datasetDict['experimentName'] + self.getLogger().debug('Converting dataset dict to regex patterns') + ignoreCase = kwargs.get(self.IGNORE_CASE_KEY, True) + queryDict = self.convertStringsToRegex(datasetDict, ignoreCase) + return self.fileMongoDbApi.getExperimentFiles(experimentName, queryDict, returnFieldDict) + +####################################################################### +# Testing. +if __name__ == '__main__': + api = DatasetMongoDbApi() + + import time + t = long(time.time()) + datasetName = 'd-%s' % t + experimentName = 'exp-01' + datasetInfo = {'datasetName' : datasetName, 'intKey' : 1, 'doubleKey' : 2.0, 'stringKey' : 'myString' , 'experimentName' : experimentName} + dataset = api.updateOrAddExperimentDataset(datasetInfo) + print '\nADDED DATASET\n', dataset + + datasets = api.getDatasets() + print '\nDATASETS: \n', api.getDatasets() + for dataset in datasets: + print dataset.getDictRep() + + #import re + #regex = re.compile("file0.*", re.IGNORECASE) + datasetName = 'dataset1' + datasetInfo = {'datasetName' : datasetName, 'experimentName' : experimentName, 'fileName' : 'file0.*'} + dataset = api.updateOrAddExperimentDataset(datasetInfo) + print '\nUPDATED DATASET\n', dataset + + print '\nDATASET FILES\n', api.getExperimentDatasetFiles(experimentName, datasetName) + + + diff --git a/src/python/dm/common/objects/datasetMetadata.py b/src/python/dm/common/objects/datasetMetadata.py new file mode 100755 index 0000000000000000000000000000000000000000..2711f8ed125f8ca132ff9ae1a6d243a62453c85e --- /dev/null +++ b/src/python/dm/common/objects/datasetMetadata.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from dmObject import DmObject + +class DatasetMetadata(DmObject): + + DEFAULT_KEY_LIST = [ 'id', 'datasetName', 'experimentName' ] + + def __init__(self, dict): + DmObject.__init__(self, dict) +