Skip to content
Snippets Groups Projects
Commit fb90bd07 authored by sveseli's avatar sveseli
Browse files

limit metadata retrieval to 500000 files

parent 36bfce38
No related branches found
No related tags found
No related merge requests found
...@@ -2,11 +2,14 @@ ...@@ -2,11 +2,14 @@
import cherrypy import cherrypy
import json import json
from dm.common.constants import dmTypes
from dm.common.utility.encoder import Encoder from dm.common.utility.encoder import Encoder
from dm.common.objects.dmObject import DmObject from dm.common.objects.dmObject import DmObject
from dm.common.objects.fileMetadata import FileMetadata
from dm.common.utility.dictUtility import DictUtility from dm.common.utility.dictUtility import DictUtility
from dm.common.service.dmSessionController import DmSessionController from dm.common.service.dmSessionController import DmSessionController
from dm.cat_web_service.service.impl.fileSessionControllerImpl import FileSessionControllerImpl from dm.cat_web_service.service.impl.fileSessionControllerImpl import FileSessionControllerImpl
from dm.common.mongodb.api.fileMongoDbApi import FileMongoDbApi
class FileSessionController(DmSessionController): class FileSessionController(DmSessionController):
...@@ -126,11 +129,22 @@ class FileSessionController(DmSessionController): ...@@ -126,11 +129,22 @@ class FileSessionController(DmSessionController):
skip = int(kwargs.get('skip', 0)) skip = int(kwargs.get('skip', 0))
limit = int(kwargs.get('limit', 0)) limit = int(kwargs.get('limit', 0))
keyList = DmObject.ALL_KEYS keyList = DmObject.ALL_KEYS
returnFieldDict = {}
encodedKeyList = kwargs.get('keyList') encodedKeyList = kwargs.get('keyList')
if encodedKeyList: if encodedKeyList:
keyList = json.loads(Encoder.decode(encodedKeyList)) keyList = json.loads(Encoder.decode(encodedKeyList))
if type(keyList) == dmTypes.DM_STRING_TYPE or type(keyList) == dmTypes.DM_UNICODE_TYPE:
return self.listToJson(self.fileSessionControllerImpl.getExperimentFiles(experimentName, queryDict=queryDict, skip=skip, limit=limit), keyList=keyList) if keyList == DmObject.ALL_KEYS:
returnFieldDict = FileMongoDbApi.ALL_FIELDS_DICT
elif keyList == DmObject.DEFAULT_KEYS:
keyList = FileMetadata.DEFAULT_KEY_LIST
else:
keyList = keyList.split(',')
if isinstance(keyList, list):
returnFieldDict['_id'] = True
for key in keyList:
returnFieldDict[key] = True
return self.listToJson(self.fileSessionControllerImpl.getExperimentFiles(experimentName, queryDict=queryDict, returnFieldDict=returnFieldDict, skip=skip, limit=limit), keyList=keyList)
@cherrypy.expose @cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation()) @DmSessionController.require(DmSessionController.canManageStation())
......
...@@ -15,6 +15,7 @@ from dm.common.objects.asynchronousOperation import AsynchronousOperation ...@@ -15,6 +15,7 @@ from dm.common.objects.asynchronousOperation import AsynchronousOperation
from dm.common.utility.asynchronousOperationTracker import AsynchronousOperationTracker from dm.common.utility.asynchronousOperationTracker import AsynchronousOperationTracker
from dm.common.utility.statsTracker import StatsTracker from dm.common.utility.statsTracker import StatsTracker
from dm.common.exceptions.objectNotFound import ObjectNotFound from dm.common.exceptions.objectNotFound import ObjectNotFound
from dm.common.exceptions.invalidRequest import InvalidRequest
from .fileCollectionStatsTracker import FileCollectionStatsTracker from .fileCollectionStatsTracker import FileCollectionStatsTracker
...@@ -22,11 +23,13 @@ class FileSessionControllerImpl(DmObjectManager): ...@@ -22,11 +23,13 @@ class FileSessionControllerImpl(DmObjectManager):
""" File controller implementation class. """ """ File controller implementation class. """
UPDATE_TIMER_DELAY_IN_SECONDS = 1 UPDATE_TIMER_DELAY_IN_SECONDS = 1
MAX_FILE_RETRIEVAL_COUNT = 500000
def __init__(self): def __init__(self):
DmObjectManager.__init__(self) DmObjectManager.__init__(self)
self.fileMongoDbApi = FileMongoDbApi() self.fileMongoDbApi = FileMongoDbApi()
self.asyncOpTracker = AsynchronousOperationTracker() self.asyncOpTracker = AsynchronousOperationTracker()
# StatsTracker is a singleton
self.statsTracker = FileCollectionStatsTracker() self.statsTracker = FileCollectionStatsTracker()
def addExperimentFile(self, fileInfo): def addExperimentFile(self, fileInfo):
...@@ -61,8 +64,23 @@ class FileSessionControllerImpl(DmObjectManager): ...@@ -61,8 +64,23 @@ class FileSessionControllerImpl(DmObjectManager):
raise ObjectNotFound('Asynchronous update operation id %s not found.' % id) raise ObjectNotFound('Asynchronous update operation id %s not found.' % id)
return asyncOp return asyncOp
def getExperimentFiles(self, experimentName, queryDict, skip=0, limit=0): def getExperimentFiles(self, experimentName, queryDict, returnFieldDict=FileMongoDbApi.ALL_FIELDS_DICT, skip=0, limit=0):
return self.fileMongoDbApi.getExperimentFiles(experimentName, queryDict=queryDict, skip=skip, limit=limit) self.logger.debug('Retrieving experiment files for %s with return field dict: %s' % (experimentName, str(returnFieldDict)))
fileCount = self.fileMongoDbApi.getExperimentFileCount(experimentName, queryDict).get('count', 0)
self.logger.debug('Experiment %s has %s files that satisfy query dict: %s' % (experimentName, fileCount, str(queryDict)))
retrievalCount = fileCount
if skip > 0:
retrievalCount = retrievalCount - skip
if limit > 0 and retrievalCount > limit:
retrievalCount = limit
self.logger.debug('Requested retrieval count for experiment %s: %s' % (experimentName, retrievalCount))
if retrievalCount > self.MAX_FILE_RETRIEVAL_COUNT:
raise InvalidRequest('Requested retrieval of %s files exceeds maximum allowed %s files.' % (retrievalCount, self.MAX_FILE_RETRIEVAL_COUNT))
if not returnFieldDict:
returnFieldDict=FileMongoDbApi.ALL_FIELDS_DICT
experimentFiles = self.fileMongoDbApi.getExperimentFiles(experimentName, queryDict=queryDict, returnFieldDict=returnFieldDict, skip=skip, limit=limit)
self.logger.debug('Found %s experiment files for %s' % (len(experimentFiles), experimentName))
return experimentFiles
def getExperimentFileById(self, experimentName, id): def getExperimentFileById(self, experimentName, id):
return self.fileMongoDbApi.getExperimentFileById(experimentName, id) return self.fileMongoDbApi.getExperimentFileById(experimentName, id)
...@@ -71,10 +89,13 @@ class FileSessionControllerImpl(DmObjectManager): ...@@ -71,10 +89,13 @@ class FileSessionControllerImpl(DmObjectManager):
return self.fileMongoDbApi.getExperimentFile(experimentName, fileName) return self.fileMongoDbApi.getExperimentFile(experimentName, fileName)
def deleteExperimentFile(self, experimentName, experimentFilePath): def deleteExperimentFile(self, experimentName, experimentFilePath):
self.statsTracker.remove(experimentName)
return self.fileMongoDbApi.deleteExperimentFile(experimentName, experimentFilePath) return self.fileMongoDbApi.deleteExperimentFile(experimentName, experimentFilePath)
def deleteExperimentFileById(self, experimentName, id): def deleteExperimentFileById(self, experimentName, id):
self.statsTracker.remove(experimentName)
return self.fileMongoDbApi.deleteExperimentFileById(experimentName, id) return self.fileMongoDbApi.deleteExperimentFileById(experimentName, id)
self.statsTracker.remove(experimentName)
def getExperimentFileCollectionStats(self, experimentName, queryDict): def getExperimentFileCollectionStats(self, experimentName, queryDict):
lastFile = self.fileMongoDbApi.getLastAddedExperimentFile(experimentName) lastFile = self.fileMongoDbApi.getLastAddedExperimentFile(experimentName)
......
...@@ -7,6 +7,7 @@ from dm.common.exceptions.dmException import DmException ...@@ -7,6 +7,7 @@ from dm.common.exceptions.dmException import DmException
from dm.common.constants import dmTypes from dm.common.constants import dmTypes
from dm.common.utility.loggingManager import LoggingManager from dm.common.utility.loggingManager import LoggingManager
from dm.common.mongodb.impl.mongoDbManager import MongoDbManager from dm.common.mongodb.impl.mongoDbManager import MongoDbManager
from dm.common.mongodb.impl.dmMongoCollection import DmMongoCollection
class DmMongoDbApi: class DmMongoDbApi:
""" Base Mongo DB API class. """ """ Base Mongo DB API class. """
...@@ -14,6 +15,7 @@ class DmMongoDbApi: ...@@ -14,6 +15,7 @@ class DmMongoDbApi:
SYSTEM_KEY_LIST = ['_id'] SYSTEM_KEY_LIST = ['_id']
REGEX_IGNORE_KEY_LIST = ['_id'] REGEX_IGNORE_KEY_LIST = ['_id']
REGEX_IGNORE_CASE_KEY = '_ignoreCase' REGEX_IGNORE_CASE_KEY = '_ignoreCase'
ALL_FIELDS_DICT = DmMongoCollection.ALL_FIELDS_DICT
def __init__(self): def __init__(self):
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__) self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
......
...@@ -56,6 +56,12 @@ class FileMongoDbApi(DmMongoDbApi): ...@@ -56,6 +56,12 @@ class FileMongoDbApi(DmMongoDbApi):
dbFileMetadata = self.getFileCollection(experimentName).addByUniqueKeys(fileInfo2) dbFileMetadata = self.getFileCollection(experimentName).addByUniqueKeys(fileInfo2)
return self.toDmObject(dbFileMetadata, FileMetadata) return self.toDmObject(dbFileMetadata, FileMetadata)
@DmMongoDbApi.executeDbCall
def getExperimentFileCount(self, experimentName, queryDict={}):
self.checkExperimentName(experimentName)
queryDict2 = self.getMongoDict(queryDict)
return self.getFileCollection(experimentName).getItemCount(queryDict2)
@DmMongoDbApi.executeDbCall @DmMongoDbApi.executeDbCall
def getExperimentFiles(self, experimentName, queryDict={}, returnFieldDict=FileCollection.ALL_FIELDS_DICT, skip=0, limit=0, **kwargs): def getExperimentFiles(self, experimentName, queryDict={}, returnFieldDict=FileCollection.ALL_FIELDS_DICT, skip=0, limit=0, **kwargs):
return self.listToDmObjects(self.__getExperimentFiles(experimentName, queryDict, FileCollection.ALL_FIELDS_DICT, skip, limit, **kwargs), FileMetadata) return self.listToDmObjects(self.__getExperimentFiles(experimentName, queryDict, FileCollection.ALL_FIELDS_DICT, skip, limit, **kwargs), FileMetadata)
...@@ -143,12 +149,17 @@ class FileMongoDbApi(DmMongoDbApi): ...@@ -143,12 +149,17 @@ class FileMongoDbApi(DmMongoDbApi):
@DmMongoDbApi.executeDbCall @DmMongoDbApi.executeDbCall
def getFileCollectionStats(self, experimentName, queryDict={}, returnFieldDict=FileCollection.ALL_FIELDS_DICT, skip=0, limit=0, **kwargs): def getFileCollectionStats(self, experimentName, queryDict={}, returnFieldDict=FileCollection.ALL_FIELDS_DICT, skip=0, limit=0, **kwargs):
dbFiles = self.__getExperimentFiles(experimentName, queryDict, FileCollection.ALL_FIELDS_DICT, skip, limit, **kwargs) self.logger.debug('Retrieving file collection stats for %s' % experimentName)
statsDict = {'fileSize' : True}
#dbFiles = self.__getExperimentFiles(experimentName, queryDict, FileCollection.ALL_FIELDS_DICT, skip, limit, **kwargs)
dbFiles = self.__getExperimentFiles(experimentName, queryDict, statsDict, skip, limit, **kwargs)
self.logger.debug('File collection for %s has %s files' % (experimentName, len(dbFiles)))
tracker = StatsTracker('fileSize') tracker = StatsTracker('fileSize')
for f in dbFiles: for f in dbFiles:
fileSize = f.get('fileSize') fileSize = f.get('fileSize')
tracker.addValue(fileSize) tracker.addValue(fileSize)
fileSizeStats = tracker.getStats() fileSizeStats = tracker.getStats()
self.logger.debug('Finished retrieving file collection for %s' % (experimentName))
return FileCollectionStats({ return FileCollectionStats({
'experimentName' : experimentName, 'experimentName' : experimentName,
'queryDict' : queryDict, 'queryDict' : queryDict,
......
...@@ -62,6 +62,9 @@ class DmMongoClient(object): ...@@ -62,6 +62,9 @@ class DmMongoClient(object):
def deleteOne(self, collectionName, criteria={}): def deleteOne(self, collectionName, criteria={}):
return self.db.get_collection(collectionName).delete_one(criteria) return self.db.get_collection(collectionName).delete_one(criteria)
def count(self, collectionName, query={}):
return self.db.get_collection(collectionName).count(query)
def renameCollection(self, collectionName, newCollectionName): def renameCollection(self, collectionName, newCollectionName):
try: try:
return self.db.get_collection(collectionName).rename(newCollectionName) return self.db.get_collection(collectionName).rename(newCollectionName)
......
...@@ -296,10 +296,14 @@ class DmMongoCollection(object): ...@@ -296,10 +296,14 @@ class DmMongoCollection(object):
dbObjectDict = self.findByKeys(self.UNIQUE_KEYS_LIST, queryDict) dbObjectDict = self.findByKeys(self.UNIQUE_KEYS_LIST, queryDict)
return self.__deleteDbObject(dbObjectDict) return self.__deleteDbObject(dbObjectDict)
def getItemCount(self, queryDict={}):
count = self.dbClient.count(self.collectionName, queryDict)
return {'count' : count}
####################################################################### #######################################################################
# Testing # Testing
if __name__ == '__main__': if __name__ == '__main__':
from .dmMongoClient import DmMongoClient from dmMongoClient import DmMongoClient
from bson.objectid import ObjectId from bson.objectid import ObjectId
mongo = DmMongoClient('dm') mongo = DmMongoClient('dm')
fileCollectionImpl = DmMongoCollection('files', mongo) fileCollectionImpl = DmMongoCollection('files', mongo)
...@@ -319,4 +323,5 @@ if __name__ == '__main__': ...@@ -319,4 +323,5 @@ if __name__ == '__main__':
#print fileCollectionImpl.updateByKeys(['name', 'experiment'], objectDict) #print fileCollectionImpl.updateByKeys(['name', 'experiment'], objectDict)
print('COUNT: %s' % fileCollectionImpl.getItemCount())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment