From 14d2513c13f986e3b0d21ca42102ec03dd76a5eb Mon Sep 17 00:00:00 2001
From: Sinisa Veseli <sveseli@aps.anl.gov>
Date: Tue, 7 Jul 2015 15:35:36 +0000
Subject: [PATCH] added dataset mongodb API class

---
 .../common/mongodb/api/datasetMongoDbApi.py   | 132 ++++++++++++++++++
 .../dm/common/objects/datasetMetadata.py      |  11 ++
 2 files changed, 143 insertions(+)
 create mode 100755 src/python/dm/common/mongodb/api/datasetMongoDbApi.py
 create mode 100755 src/python/dm/common/objects/datasetMetadata.py

diff --git a/src/python/dm/common/mongodb/api/datasetMongoDbApi.py b/src/python/dm/common/mongodb/api/datasetMongoDbApi.py
new file mode 100755
index 00000000..f8941374
--- /dev/null
+++ b/src/python/dm/common/mongodb/api/datasetMongoDbApi.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+
+import copy
+import types
+import re
+
+from dm.common.objects.dmObject import DmObject
+from dm.common.exceptions.dmException import DmException
+from dm.common.mongodb.api.dmMongoDbApi import DmMongoDbApi
+from dm.common.mongodb.impl.datasetCollection import DatasetCollection
+from dm.common.mongodb.api.fileMongoDbApi import FileMongoDbApi
+from dm.common.mongodb.api.dmMongoDbApi import DmMongoDbApi
+from dm.common.objects.datasetMetadata import DatasetMetadata
+
+class DatasetMongoDbApi(DmMongoDbApi):
+
+    SYSTEM_KEY_LIST = ['_id', '_datasetName', '_experimentName']
+    IGNORE_CASE_KEY = '_ignoreCase' 
+    USE_DATASET_NAME_KEY = '_useDatasetName' 
+    USE_EXPERIMENT_NAME_KEY = '_useDatasetName' 
+
+    def __init__(self):
+        DmMongoDbApi.__init__(self)
+        self.datasetCollection = DatasetCollection(self.dbClient)
+        self.fileMongoDbApi = FileMongoDbApi()
+
+    @classmethod
+    def convertStringsToRegex(cls, dict, ignoreCase=True):
+        dict2 = copy.copy(dict)
+        for (key,value) in dict2.items():
+            if key in cls.SYSTEM_KEY_LIST:
+                continue
+            elif type(value) == types.StringType or type(value) == types.UnicodeType:
+                cls.getLogger().debug('Converting to regex: %s for key %s' % (value,key))
+                if ignoreCase:
+                    regex = re.compile(value, re.IGNORECASE)
+                else:
+                    regex = re.compile(value)
+                dict2[key] = regex
+        return dict2
+
+    @DmMongoDbApi.executeDbCall
+    def addExperimentDataset(self, datasetInfo, **kwargs):
+        datasetInfo2 = self.getMongoDict(datasetInfo)
+        dbDatasetMetadata = self.datasetCollection.addByUniqueKeys(datasetInfo2)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def getDatasets(self, queryDict={}, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs):
+        queryDict2 = self.getMongoDict(queryDict)
+        return self.listToDmObjects(self.datasetCollection.findByQueryDict(queryDict2, returnFieldDict), DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def getDatasetById(self, id, **kwargs):
+        dbDatasetMetadata = self.datasetCollection.findById(id)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def getExperimentDataset(self, experimentName, datasetName, **kwargs):
+        queryDict = { '_datasetName' : datasetName, '_experimentName' : experimentName }
+        dbDatasetMetadata = self.datasetCollection.findByUniqueKeys(queryDict)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def getExperimentDatasets(self, experimentName, queryDict={}, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs):
+        queryDict2 = copy.copy(queryDict)
+        queryDict2['_experimentName'] = experimentName 
+        return self.getDatasets(queryDict2, returnFieldDict)
+
+    @DmMongoDbApi.executeDbCall
+    def updateDatasetById(self, datasetInfo, **kwargs):
+        datasetInfo2 = self.getMongoDict(datasetInfo)
+        dbDatasetMetadata = self.datasetCollection.updateById(datasetInfo2)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def updateExperimentDataset(self, datasetInfo, **kwargs):
+        datasetInfo2 = self.getMongoDict(datasetInfo)
+        dbDatasetMetadata = self.datasetCollection.updateByUniqueKeys(datasetInfo2)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def updateOrAddExperimentDataset(self, datasetInfo, **kwargs):
+        datasetInfo2 = self.getMongoDict(datasetInfo)
+        dbDatasetMetadata = self.datasetCollection.updateOrAddByUniqueKeys(datasetInfo2)
+        return self.toDmObject(dbDatasetMetadata, DatasetMetadata)
+
+    @DmMongoDbApi.executeDbCall
+    def getExperimentDatasetFiles(self, experimentName, datasetName, returnFieldDict=DatasetCollection.ALL_FIELDS_DICT, **kwargs):
+        self.getLogger().debug('Looking for dataset %s (experiment %s)' % (datasetName, experimentName))
+        datasetDict = self.getExperimentDataset(experimentName, datasetName).getDictRep(DmObject.ALL_KEYS)
+        del datasetDict['id']
+        useDatasetName = kwargs.get(self.USE_DATASET_NAME_KEY, False)
+        if not useDatasetName:
+            del datasetDict['datasetName']
+        useExperimentName = kwargs.get(self.USE_EXPERIMENT_NAME_KEY, False)
+        if not useExperimentName:
+            del datasetDict['experimentName']
+        self.getLogger().debug('Converting dataset dict to regex patterns')
+        ignoreCase = kwargs.get(self.IGNORE_CASE_KEY, True)
+        queryDict = self.convertStringsToRegex(datasetDict, ignoreCase)
+        return self.fileMongoDbApi.getExperimentFiles(experimentName, queryDict, returnFieldDict)
+
+#######################################################################
+# Testing.
+if __name__ == '__main__':
+    api = DatasetMongoDbApi()
+
+    import time
+    t = long(time.time())
+    datasetName = 'd-%s' % t
+    experimentName = 'exp-01'
+    datasetInfo = {'datasetName' : datasetName, 'intKey' : 1, 'doubleKey' : 2.0, 'stringKey' : 'myString' , 'experimentName' : experimentName}
+    dataset = api.updateOrAddExperimentDataset(datasetInfo)
+    print '\nADDED DATASET\n', dataset
+
+    datasets = api.getDatasets()
+    print '\nDATASETS: \n', api.getDatasets()
+    for dataset in datasets:
+        print dataset.getDictRep()
+
+    #import re
+    #regex = re.compile("file0.*", re.IGNORECASE)
+    datasetName = 'dataset1'
+    datasetInfo = {'datasetName' : datasetName, 'experimentName' : experimentName, 'fileName' : 'file0.*'}
+    dataset = api.updateOrAddExperimentDataset(datasetInfo)
+    print '\nUPDATED DATASET\n', dataset
+
+    print '\nDATASET FILES\n', api.getExperimentDatasetFiles(experimentName, datasetName)
+
+
+
diff --git a/src/python/dm/common/objects/datasetMetadata.py b/src/python/dm/common/objects/datasetMetadata.py
new file mode 100755
index 00000000..2711f8ed
--- /dev/null
+++ b/src/python/dm/common/objects/datasetMetadata.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+from dmObject import DmObject
+
+class DatasetMetadata(DmObject):
+
+    DEFAULT_KEY_LIST = [ 'id', 'datasetName', 'experimentName' ]
+
+    def __init__(self, dict):
+        DmObject.__init__(self, dict)
+
-- 
GitLab