Skip to content
Snippets Groups Projects
Commit a82e0f42 authored by sveseli's avatar sveseli
Browse files

adding support for collection statistics

parent 9fbdf537
No related branches found
No related tags found
No related merge requests found
#!/bin/sh
# Run command
if [ -z $DM_ROOT_DIR ]; then
cd `dirname $0` && myDir=`pwd`
setupFile=$myDir/../setup.sh
if [ ! -f $setupFile ]; then
echo "Cannot find setup file: $setupFile"
exit 1
fi
source $setupFile > /dev/null
fi
source dm_command_setup.sh
eval "$DM_ROOT_DIR/src/python/dm/cat_web_service/cli/getFileCollectionStatsCli.py $DM_COMMAND_ARGS"
......@@ -17,5 +17,5 @@ DM_CAT_WEB_SERVICE_HOST=DM_HOSTNAME
DM_CAT_WEB_SERVICE_PORT=44436
DM_PROC_WEB_SERVICE_HOST=DM_HOSTNAME
DM_PROC_WEB_SERVICE_PORT=55536
DM_SOFTWARE_VERSION=1.6
DM_SOFTWARE_VERSION=1.7
......@@ -9,6 +9,7 @@ from dm.common.exceptions.dmException import DmException
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.objects.fileMetadata import FileMetadata
from dm.common.objects.collectionMetadata import CollectionMetadata
from dm.common.objects.fileCollectionStats import FileCollectionStats
from dm.common.objects.asynchronousOperation import AsynchronousOperation
from catRestApi import CatRestApi
......@@ -117,6 +118,15 @@ class FileCatApi(CatRestApi):
responseData = self.sendSessionRequest(url=url, method='GET')
return FileMetadata(responseData)
@CatRestApi.execute
def getExperimentFileCollectionStats(self, experimentName, queryDict={}):
if not experimentName:
raise InvalidRequest('Invalid experiment name provided.')
url = '%s/fileCollectionStatsByExperiment/%s' % (self.getContextRoot(), experimentName)
url += '?queryDict=%s' % (Encoder.encode(json.dumps(queryDict)))
responseData = self.sendSessionRequest(url=url, method='GET')
return FileCollectionStats(responseData)
@CatRestApi.execute
def renameExperimentFileCollection(self, experimentName, newExperimentName):
if not experimentName:
......
#!/usr/bin/env python
from dm.cat_web_service.api.fileCatApi import FileCatApi
from dm.common.exceptions.invalidRequest import InvalidRequest
from catWebServiceSessionCli import CatWebServiceSessionCli
class GetFileCollectionStatsCli(CatWebServiceSessionCli):
def __init__(self):
CatWebServiceSessionCli.__init__(self, validArgCount=self.ANY_NUMBER_OF_POSITIONAL_ARGS)
self.addOption('', '--experiment', dest='experimentName', help='Experiment name.')
def checkArgs(self):
if self.options.experimentName is None:
raise InvalidRequest('Experiment name must be provided.')
def getExperimentName(self):
return self.options.experimentName
def runCommand(self):
self.parseArgs(usage="""
dm-get-file-collection-stats --experiment=EXPERIMENTNAME
[key1:value1, key2:value2, ...]
Description:
Retrieve statistic for experiment file collection from the metadata catalog.
Only those files that match provided key/value metadata pairs will be included.
If no specific metadata key/values are requested, all experiment files will be
taken into account.
""")
self.checkArgs()
api = FileCatApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
queryDict = self.splitArgsIntoDict()
fileCollectionStats = api.getExperimentFileCollectionStats(self.getExperimentName(), queryDict)
print fileCollectionStats.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = GetFileCollectionStatsCli()
cli.run()
......@@ -99,6 +99,15 @@ class FileRouteDescriptor:
'method' : ['GET']
},
# Get experiment collection stats
{
'name' : 'getExperimentFileCollectionStats',
'path' : '%s/fileCollectionStatsByExperiment/:(experimentName)' % contextRoot,
'controller' : fileSessionController,
'action' : 'getExperimentFileCollectionStats',
'method' : ['GET']
},
# Rename experiment file collections
{
'name' : 'renameExperimentFileCollection',
......
......@@ -148,6 +148,19 @@ class FileSessionController(DmSessionController):
response = self.fileSessionControllerImpl.getExperimentFile(experimentName, experimentFilePath).getFullJsonRep()
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getExperimentFileCollectionStats(self, experimentName, **kwargs):
if not experimentName:
raise InvalidRequest('Invalid experiment name provided.')
encodedQueryDict = kwargs.get('queryDict')
queryDict = {}
if encodedQueryDict:
queryDict = json.loads(Encoder.decode(encodedQueryDict))
return self.fileSessionControllerImpl.getExperimentFileCollectionStats(experimentName, queryDict=queryDict).getFullJsonRep()
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
......
......@@ -8,10 +8,12 @@ import threading
import uuid
from dm.common.constants import dmProcessingStatus
from dm.common.objects.dmObject import DmObject
from dm.common.objects.fileCollectionStats import FileCollectionStats
from dm.common.objects.dmObjectManager import DmObjectManager
from dm.common.mongodb.api.fileMongoDbApi import FileMongoDbApi
from dm.common.objects.asynchronousOperation import AsynchronousOperation
from dm.common.utility.asynchronousOperationTracker import AsynchronousOperationTracker
from dm.common.utility.statsTracker import StatsTracker
from dm.common.exceptions.objectNotFound import ObjectNotFound
class FileSessionControllerImpl(DmObjectManager):
......@@ -65,6 +67,23 @@ class FileSessionControllerImpl(DmObjectManager):
def getExperimentFile(self, experimentName, fileName):
return self.fileMongoDbApi.getExperimentFile(experimentName, fileName)
def getExperimentFileCollectionStats(self, experimentName, queryDict):
files = self.fileMongoDbApi.getExperimentFiles(experimentName, queryDict=queryDict)
tracker = StatsTracker('fileSize')
for f in files:
fileSize = f.get('fileSize')
tracker.addValue(fileSize)
fileSizeStats = fileSizeStats.getStats()
return FileCollectionStats({
'experimentName' : experimentName,
'queryDict' : queryDict,
'nFiles' : tracker.get('n'),
'averageFileSize' : tracker.get('average'),
'minFileSize' : tracker.get('min'),
'maxFileSize' : tracker.get('max'),
'collectionSize' : tracker.get('sum'),
})
def renameExperimentFileCollection(self, experimentName, newExperimentName):
fileCollection = self.fileMongoDbApi.renameExperimentFileCollection(experimentName, newExperimentName)
self.logger.debug('Renamed file collection for experiment %s: %s' % (experimentName, fileCollection))
......
#!/usr/bin/env python
from dmObject import DmObject
class FileCollectionStats(DmObject):
DEFAULT_KEY_LIST = [ 'experimentName', 'nFiles', 'averageFileSize', 'minFileSize', 'maxFileSize', 'collectionSize' ]
def __init__(self, dict):
DmObject.__init__(self, dict)
#!/usr/bin/env python
from dmObject import DmObject
class StatsData(DmObject):
DEFAULT_KEY_LIST = [ 'name', 'n', 'average', 'sigma', 'last', 'min', 'max' ]
def __init__(self, dict):
DmObject.__init__(self, dict)
#!/usr/bin/env python
import datetime
import math
from dm.common.objects.statsData import StatsData
class StatsTracker:
def __init__(self, name, units=None, trackTime=False):
self.name = name
self.units = units
self.trackTime = trackTime
self.reset()
def reset(self):
self.n = 0
self.sum = 0.0;
self.sum2 = 0.0;
self.last = None
self.min = None
self.max = None
self.lastTimestamp = None
def addValue(self, value, timestamp=None):
if value is None:
return
self.n += 1
self.last = value
if timestamp:
self.lastTimestamp = timestamp
elif self.trackTime:
self.lastTimestamp = datetime.datetime.now()
self.sum += value
self.sum2 += value*value
if not self.min or self.min > value:
self.min = value
if not self.max or self.max < value:
self.max = value
def getStats(self):
average = None
sigma = None
if self.n:
average = self.sum/self.n
sigma = math.sqrt(self.sum2/self.n-average*average)
return StatsData({
'name' : self.name,
'units' : self.units,
'n' : self.n,
'average' : average,
'sigma' : sigma,
'sum' : self.sum,
'min' : self.min,
'max' : self.max,
'last' : self.last,
'lastTimestamp' : self.lastTimestamp,
})
#######################################################################
# Testing.
if __name__ == '__main__':
s = StatsTracker('fileSize', 'B', trackTime=True)
s.addValue(10)
s.addValue(50)
s.addValue(150)
print s.getStats()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment