Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • DM/dm-docs
  • hammonds/dm-docs
  • hparraga/dm-docs
3 results
Show changes
Showing
with 1480 additions and 0 deletions
#!/usr/bin/env python
from daqWebServiceSessionCli import DaqWebServiceSessionCli
from dm.common.constants import dmProcessingStatus
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from dm.common.exceptions.invalidRequest import InvalidRequest
class ListUploadsCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self, validArgCount=self.ANY_NUMBER_OF_POSITIONAL_ARGS)
self.addOption('', '--status', dest='status', default=dmProcessingStatus.DM_PROCESSING_STATUS_ANY, help='Processing status, must be one of %s (default: %s).' % (dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST,dmProcessingStatus.DM_PROCESSING_STATUS_ANY))
def checkArgs(self):
if self.options.status not in dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST:
raise InvalidRequest('Processing status must be one of %s.' % dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST)
def getStatus(self):
return self.options.status
def runCommand(self):
self.parseArgs(usage="""
dm-list-uploads [--status=STATUS]
Description:
Retrieves all known uploads.
""")
self.checkArgs()
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
uploads = api.listUploads(self.getStatus())
for upload in uploads:
print upload.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = ListUploadsCli()
cli.run()
#!/usr/bin/env python
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from dm.common.exceptions.invalidRequest import InvalidRequest
from daqWebServiceSessionCli import DaqWebServiceSessionCli
class StartDaqCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self, validArgCount=self.ANY_NUMBER_OF_POSITIONAL_ARGS)
self.addOption('', '--experiment', dest='experimentName', help='Experiment name.')
self.addOption('', '--data-directory', dest='dataDirectory', help='Experiment data directory. If specified string does not already contain file server URL, value of the %s environment variable will be prepended to it.' % self.DM_FILE_SERVER_URL_ENV_VAR)
self.addOption('', '--process-hidden', dest='processHidden', action='store_true', default=False, help='Process hidden source files.')
def checkArgs(self):
if self.options.experimentName is None:
raise InvalidRequest('Experiment name must be provided.')
if self.options.dataDirectory is None:
raise InvalidRequest('Experiment data directory must be provided.')
def updateDaqInfoFromOptions(self, daqInfo):
if self.options.processHidden:
daqInfo['processHiddenFiles'] = True
def runCommand(self):
self.parseArgs(usage="""
dm-start-daq --experiment=EXPERIMENTNAME --data-directory=DATADIRECTORY
[--process-hidden]
[key1:value1, key2:value2, ...]
Description:
Starts DAQ for a given experiment. Specified data directory will be
monitored for data files. All provided key/value pairs will be passed to
file processing plugins.
""")
self.checkArgs()
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
daqInfo = self.splitArgsIntoDict()
self.updateDaqInfoFromOptions(daqInfo)
daqInfo = api.startDaq(self.getExperimentName(), self.getDataDirectory(), daqInfo=daqInfo)
print daqInfo.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = StartDaqCli()
cli.run()
#!/usr/bin/env python
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from dm.common.exceptions.invalidRequest import InvalidRequest
from daqWebServiceSessionCli import DaqWebServiceSessionCli
class StopDaqCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self)
self.addOption('', '--experiment', dest='experimentName', help='Experiment name.')
self.addOption('', '--data-directory', dest='dataDirectory', help='Experiment data directory. If specified string does not already contain file server URL, value of the %s environment variable will be prepended to it.' % self.DM_FILE_SERVER_URL_ENV_VAR)
def checkArgs(self):
if self.options.experimentName is None:
raise InvalidRequest('Experiment name must be provided.')
if self.options.dataDirectory is None:
raise InvalidRequest('Experiment data directory must be provided.')
def runCommand(self):
self.parseArgs(usage="""
dm-stop-daq --experiment=EXPERIMENTNAME --data-directory=DATADIRECTORY
Description:
Stop DAQ for a given experiment and data directory.
""")
self.checkArgs()
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
daqInfo = api.stopDaq(self.getExperimentName(), self.getDataDirectory())
print daqInfo.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = StopDaqCli()
cli.run()
#!/usr/bin/env python
from daqWebServiceSessionCli import DaqWebServiceSessionCli
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from dm.common.exceptions.invalidRequest import InvalidRequest
class StopUploadCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self, validArgCount=self.ANY_NUMBER_OF_POSITIONAL_ARGS)
self.addOption('', '--id', dest='id', help='Upload id.')
def checkArgs(self):
if self.options.id is None:
raise InvalidRequest('Upload id must be provided.')
def getId(self):
return self.options.id
def runCommand(self):
self.parseArgs(usage="""
dm-stop-upload --id=ID
Description:
Aborts specified data upload.
""")
self.checkArgs()
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
uploadInfo = api.stopUpload(self.getId())
print uploadInfo.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = StopUploadCli()
cli.run()
#!/usr/bin/env python
from daqWebServiceSessionCli import DaqWebServiceSessionCli
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.constants import dmProcessingMode
class UploadCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self, validArgCount=self.ANY_NUMBER_OF_POSITIONAL_ARGS)
self.addOption('', '--experiment', dest='experimentName', help='Experiment name.')
self.addOption('', '--data-directory', dest='dataDirectory', help='Experiment data directory. If specified string does not already contain file server URL, value of the %s environment variable will be prepended to it.' % self.DM_FILE_SERVER_URL_ENV_VAR)
self.addOption('', '--reprocess', dest='reprocess', action='store_true', default=False, help='Reprocess source files that are already in storage, even if they have not been modified.')
self.addOption('', '--process-hidden', dest='processHidden', action='store_true', default=False, help='Process hidden source files.')
self.addOption('', '--processing-mode', dest='processingMode', default=dmProcessingMode.DM_PROCESSING_MODE_FILES, help='Processing mode can be one of %s (default: %s). In the "%s" mode files are processed individually, while in the "%s" mode processing plugins work on directories (if possible).' % (dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST, dmProcessingMode.DM_PROCESSING_MODE_FILES, dmProcessingMode.DM_PROCESSING_MODE_FILES, dmProcessingMode.DM_PROCESSING_MODE_DIRECTORY))
self.addOption('', '--skip-plugins', dest='skipPlugins', help='Comma-separated list of plugins which should not process the given directory.')
def checkArgs(self):
if self.options.experimentName is None:
raise InvalidRequest('Experiment name must be provided.')
if self.options.dataDirectory is None:
raise InvalidRequest('Experiment data directory must be provided.')
if self.options.processingMode not in dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST:
raise InvalidRequest('Processing mode must be one of %s.' % dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST)
def updateDaqInfoFromOptions(self, daqInfo):
if self.options.reprocess:
daqInfo['reprocessFiles'] = True
if self.options.processHidden:
daqInfo['processHiddenFiles'] = True
if self.options.skipPlugins:
daqInfo['skipPlugins'] = self.options.skipPlugins
daqInfo['processingMode'] = self.options.processingMode
def runCommand(self):
self.parseArgs(usage="""
dm-upload --experiment=EXPERIMENTNAME --data-directory=DATADIRECTORY
[--reprocess]
[--process-hidden]
[--processing-mode=PROCESSINGMODE]
[--skip-plugins=SKIPPLUGINS]
[key1:value1, key2:value2, ...]
Description:
Schedules data upload for a given experiment. All existing files in the
specified directory will be uploaded to storage. Relative directory
structure will be preserved. All provided key/value pairs will be passed
to file processing plugins.
""")
self.checkArgs()
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
daqInfo = self.splitArgsIntoDict()
self.updateDaqInfoFromOptions(daqInfo)
uploadInfo = api.upload(self.getExperimentName(), self.getDataDirectory(), daqInfo=daqInfo)
print uploadInfo.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = UploadCli()
cli.run()
#!/usr/bin/env python
#
# DM DAQ Web Service
#
from dm.common.service.dmRestWebServiceBase import DmRestWebServiceBase
from dm.common.utility.dmModuleManager import DmModuleManager
from dm.common.utility.configurationManager import ConfigurationManager
from dm.common.processing.fileProcessingManager import FileProcessingManager
from dm.daq_web_service.service.impl.daqProcessingCompleteNotificationPlugin import DaqProcessingCompleteNotificationPlugin
from dm.daq_web_service.service.impl.fileSystemObserver import FileSystemObserver
from daqWebServiceRouteMapper import DaqWebServiceRouteMapper
class DaqWebService(DmRestWebServiceBase):
def __init__(self):
DmRestWebServiceBase.__init__(self, DaqWebServiceRouteMapper)
def initDmModules(self):
self.logger.debug('Initializing dm modules')
# Add modules that will be started.
moduleManager = DmModuleManager.getInstance()
moduleManager.addModule(FileSystemObserver.getInstance())
moduleManager.addModule(FileProcessingManager.getInstance())
# Requred processing plugin
#notificationPlugin = DaqProcessingCompleteNotificationPlugin()
#FileProcessingManager.getInstance().appendFileProcessor(notificationPlugin)
self.logger.debug('Initialized dm modules')
def getDefaultServerHost(self):
return ConfigurationManager.getInstance().getServiceHost()
def getDefaultServerPort(self):
return ConfigurationManager.getInstance().getServicePort()
####################################################################
# Run service
if __name__ == '__main__':
ConfigurationManager.getInstance().setServiceName('daq-web-service')
service = DaqWebService();
service.run()
#!/usr/bin/env python
#
# Route mapper for DM DAQ web service.
#
import sys
import os
import cherrypy
from dm.common.utility.loggingManager import LoggingManager
from dm.common.utility.configurationManager import ConfigurationManager
from dm.common.service.loginRouteDescriptor import LoginRouteDescriptor
from experimentRouteDescriptor import ExperimentRouteDescriptor
class DaqWebServiceRouteMapper:
@classmethod
def setupRoutes(cls):
""" Setup RESTFul routes. """
logger = LoggingManager.getInstance().getLogger(cls.__name__)
contextRoot = ConfigurationManager.getInstance().getContextRoot()
logger.debug('Using context root: %s' % contextRoot)
# Get routes.
routes = LoginRouteDescriptor.getRoutes()
routes += ExperimentRouteDescriptor.getRoutes()
# Add routes to dispatcher.
d = cherrypy.dispatch.RoutesDispatcher()
for route in routes:
logger.debug('Connecting route: %s' % route)
d.connect(route['name'], route['path'], action=route['action'], controller=route['controller'], conditions=dict(method=route['method']))
return d
#!/usr/bin/env python
#
# User route descriptor.
#
from dm.common.utility.configurationManager import ConfigurationManager
from experimentSessionController import ExperimentSessionController
class ExperimentRouteDescriptor:
@classmethod
def getRoutes(cls):
contextRoot = ConfigurationManager.getInstance().getContextRoot()
# Static instances shared between different routes
experimentSessionController = ExperimentSessionController()
# Define routes.
routes = [
# Start experiment daq
{
'name' : 'startDaq',
'path' : '%s/experimentsByName/:(experimentName)/startDaq/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'startDaq',
'method' : ['POST']
},
# Stop experiment daq
{
'name' : 'stopDaq',
'path' : '%s/experimentsByName/:(experimentName)/stopDaq/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'stopDaq',
'method' : ['POST']
},
# Get daq info
{
'name' : 'getDaqInfo',
'path' : '%s/experimentDaqs/:(id)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getDaqInfo',
'method' : ['GET']
},
# List DAQs
{
'name' : 'listDaqs',
'path' : '%s/experimentDaqsByStatus/:(status)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'listDaqs',
'method' : ['GET']
},
# Upload experiment data
{
'name' : 'upload',
'path' : '%s/experimentsByName/:(experimentName)/upload/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'upload',
'method' : ['POST']
},
# Get upload info
{
'name' : 'getUploadInfo',
'path' : '%s/experimentUploads/:(id)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getUploadInfo',
'method' : ['GET']
},
# List uploads
{
'name' : 'listUploads',
'path' : '%s/experimentUploadsByStatus/:(status)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'listUploads',
'method' : ['GET']
},
# Stop upload
{
'name' : 'stopUpload',
'controller' : experimentSessionController,
'path' : '%s/experimentUploads/stopUpload/:(id)' % contextRoot,
'action' : 'stopUpload',
'method' : ['POST']
},
# Get processing plugins
{
'name' : 'getProcessingPlugins',
'path' : '%s/processingPlugins' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getProcessingPlugins',
'method' : ['GET']
},
]
return routes
#!/usr/bin/env python
import cherrypy
import json
import os
from dm.common.constants import dmProcessingMode
from dm.common.constants import dmProcessingStatus
from dm.common.service.dmSessionController import DmSessionController
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.exceptions.authorizationError import AuthorizationError
from dm.common.utility.encoder import Encoder
from dm.common.utility.dictUtility import DictUtility
from dm.daq_web_service.service.impl.experimentSessionControllerImpl import ExperimentSessionControllerImpl
class ExperimentSessionController(DmSessionController):
def __init__(self):
DmSessionController.__init__(self)
self.experimentSessionControllerImpl = ExperimentSessionControllerImpl()
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def startDaq(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
if not dataDirectory.startswith('/') and not dataDirectory.count('://'):
raise InvalidRequest('Data directory must be an absolute path.')
daqInfo = {}
encodedDaqInfo = kwargs.get('daqInfo')
if encodedDaqInfo is not None:
daqInfo = json.loads(Encoder.decode(encodedDaqInfo))
response = self.experimentSessionControllerImpl.startDaq(experimentName, dataDirectory, daqInfo).getFullJsonRep()
self.logger.debug('Started DAQ: %s' % response)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def stopDaq(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
response = self.experimentSessionControllerImpl.stopDaq(experimentName, dataDirectory).getFullJsonRep()
self.logger.debug('Stopped DAQ: %s' % response)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getDaqInfo(self, id, **kwargs):
response = self.experimentSessionControllerImpl.getDaqInfo(id).getFullJsonRep()
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def listDaqs(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_ANY, **kwargs):
if not status:
status = dmProcessingStatus.DM_PROCESSING_STATUS_ANY
if status not in dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST:
raise InvalidRequest('Invalid processing status "%s". Status must be one of %s.' % (status,dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST))
return self.listToJson(self.experimentSessionControllerImpl.listDaqs(status))
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def upload(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
if not dataDirectory.startswith('/') and not dataDirectory.count('://'):
raise InvalidRequest('Data directory must be an absolute path: %s' % dataDirectory)
daqInfo = {}
encodedDaqInfo = kwargs.get('daqInfo')
if encodedDaqInfo:
daqInfo = json.loads(Encoder.decode(encodedDaqInfo))
processingMode = DictUtility.getAndRemoveKey(daqInfo, 'processingMode', dmProcessingMode.DM_PROCESSING_MODE_FILES)
if processingMode not in dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST:
raise InvalidRequest('Allowed processing modes: %s' % dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST)
if processingMode == dmProcessingMode.DM_PROCESSING_MODE_FILES:
response = self.experimentSessionControllerImpl.uploadFiles(experimentName, dataDirectory, daqInfo).getFullJsonRep()
else:
response = self.experimentSessionControllerImpl.uploadDirectory(experimentName, dataDirectory, daqInfo).getFullJsonRep()
self.logger.debug('Returning upload info for directory %s' % dataDirectory)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getUploadInfo(self, id, **kwargs):
response = self.experimentSessionControllerImpl.getUploadInfo(id).getFullJsonRep()
self.logger.debug('Returning info for upload id %s' % id)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def listUploads(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_ANY, **kwargs):
if not status:
status = dmProcessingStatus.DM_PROCESSING_STATUS_ANY
if status not in dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST:
raise InvalidRequest('Invalid processing status "%s". Status must be one of %s.' % (status,dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST))
return self.listToJson(self.experimentSessionControllerImpl.listUploads(status))
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def stopUpload(self, id, **kwargs):
response = self.experimentSessionControllerImpl.stopUpload(id).getFullJsonRep()
self.logger.debug('Stopped upload id %s' % id)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getProcessingPlugins(self, **kwargs):
return self.listToJson(self.experimentSessionControllerImpl.getProcessingPlugins())
#!/usr/bin/env python
import os
from dm.common.utility.loggingManager import LoggingManager
from dm.common.processing.plugins.fileProcessor import FileProcessor
from uploadTracker import UploadTracker
from daqTracker import DaqTracker
class DaqProcessingCompleteNotificationPlugin(FileProcessor):
def __init__(self):
FileProcessor.__init__(self)
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
def processFile(self, fileInfo):
filePath = fileInfo.get('filePath')
uploadId = fileInfo.get('uploadId')
daqId = fileInfo.get('daqInfo', {}).get('id')
trackedInfo = None
if uploadId != None:
self.logger.debug('Upload id for file %s: %s' %(filePath, uploadId))
trackedInfo = UploadTracker.getInstance().get(uploadId)
if daqId != None:
self.logger.debug('Daq id for file %s: %s' %(filePath, daqId))
trackedInfo = DaqTracker.getInstance().get(daqId)
if trackedInfo != None:
fileDict = trackedInfo.get('fileDict', {})
trackedFileInfo = fileDict.get(filePath)
if trackedFileInfo:
trackedFileInfo['processed'] = True
else:
self.logger.error('%s object does not have file path %s' %(trackedInfo, filePath))
trackedInfo.updateStatus()
#######################################################################
# Testing.
if __name__ == '__main__':
pass
#!/usr/bin/env python
import os
import uuid
import time
from dm.common.constants import dmProcessingStatus
from dm.common.objects.daqInfo import DaqInfo
from dm.common.utility.objectTracker import ObjectTracker
from dm.common.utility.timeUtility import TimeUtility
from dm.common.exceptions.objectAlreadyExists import ObjectAlreadyExists
from dm.common.exceptions.objectNotFound import ObjectNotFound
class DaqTracker(ObjectTracker):
# Cache configuration
objectClass = DaqInfo
cacheSize = 100
def __init__(self, *args, **kwargs):
ObjectTracker.__init__(self, args, kwargs)
self.activeDaqDict = {}
def startDaq(self, experiment, dataDirectory, daqInfo={}):
# Prevent second daq to be started in the same directory
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
oldDaqInfo = self.activeDaqDict.get(activeDaqKey)
if oldDaqInfo:
raise ObjectAlreadyExists('DAQ is already active for experiment %s in data directory %s.' % (experimentName, dataDirectory))
daqId = str(uuid.uuid4())
daqInfo['id'] = daqId
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
# Create DaqInfo object with keys that we want to save with file
# metadata, and add other keys later
daqInfo2 = DaqInfo(daqInfo)
daqInfo2['nFiles'] = 0
daqInfo2['nProcessedFiles'] = 0
daqInfo2['nProcessingErrors'] = 0
daqInfo2['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
startTime = time.time()
daqInfo2['startTime'] = startTime
daqInfo2['startTimestamp'] = TimeUtility.formatLocalTimestamp(startTime)
self.activeDaqDict[activeDaqKey] = daqInfo2
self.put(daqId, daqInfo2)
return daqInfo2
def stopDaq(self, experiment, dataDirectory):
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
daqInfo = self.activeDaqDict.get(activeDaqKey)
if not daqInfo:
raise ObjectNotFound('DAQ is not active for experiment %s in data directory %s.' % (experimentName, dataDirectory))
endTime = time.time()
daqInfo['endTime'] = endTime
daqInfo['endTimestamp'] = TimeUtility.formatLocalTimestamp(endTime)
daqInfo.updateStatus()
del self.activeDaqDict[activeDaqKey]
return daqInfo
def getDaqInfo(self, id):
return self.get(id)
def updateDaqInfos(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING):
daqInfoList = self.getAll()
for daqInfo in daqInfoList:
if daqInfo.get('status', '') == status:
daqInfo.updateStatus()
def getDaqInfos(self, status=None):
daqInfoList = self.getAll()
if status is None or status == dmProcessingStatus.DM_PROCESSING_STATUS_ANY:
return daqInfoList
filteredDaqInfoList = []
for daqInfo in daqInfoList:
if daqInfo.get('status', '') == status:
filteredDaqInfoList.append(daqInfo)
return filteredDaqInfoList
def getDaqInfoByExperimentAndDataDirectory(self, experiment, dataDirectory):
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
return self.activeDaqDict.get(activeDaqKey)
####################################################################
# Testing
if __name__ == '__main__':
tracker = DaqTracker.getInstance()
print tracker
experiment = {'name' : 'e1', 'owner' : 'sv'}
dataDirectory = 'ftp://wolf:2811/data/e1'
daqInfo = tracker.startDaq(experiment, dataDirectory)
daqId = daqInfo['id']
print 'DAQ ID: ', daqId
print 'DAQ INFO: ', tracker.getDaqInfo(daqId)
print 'DAQS: ', tracker.getDaqInfos()
print 'REMOVED DAQ: ', tracker.stopDaq(experiment, dataDirectory)
dataDirectory = 'ftp:///wolf:2811///data/e1'
daqId = tracker.startDaq(experiment, dataDirectory)
print 'DAQ ID: ', daqId
#!/usr/bin/env python
import os
import glob
from watchdog.events import FileSystemEventHandler
from dm.common.utility.loggingManager import LoggingManager
class DmFileSystemEventHandler(FileSystemEventHandler):
def __init__(self, fileSystemObserver, dataDirectory, experiment):
FileSystemEventHandler.__init__(self)
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.fileSystemObserver = fileSystemObserver
self.dataDirectory = dataDirectory
self.experiment = experiment
def dispatch(self, event):
FileSystemEventHandler.dispatch(self, event)
def on_any_event(self, event):
FileSystemEventHandler.on_any_event(self, event)
self.logger.debug('File system any_event event: %s' % (event.__dict__))
def on_created(self, event):
FileSystemEventHandler.on_created(self, event)
self.logger.debug('File system created event: %s' % (event.__dict__))
self.processEvent(event)
def on_moved(self, event):
FileSystemEventHandler.on_moved(self, event)
self.logger.debug('File system moved event: %s' % (event.__dict__))
def on_deleted(self, event):
FileSystemEventHandler.on_deleted(self, event)
self.logger.debug('File system deleted event: %s' % (event.__dict__))
def on_modified(self, event):
FileSystemEventHandler.on_modified(self, event)
self.logger.debug('File system directory modified event: %s' % (event.__dict__))
self.processEvent(event)
def processEvent(self, event):
if event.is_directory:
try:
files = glob.glob(os.path.join(event.src_path,'*'))
self.logger.debug('Processing directory event: %s , src path: %s' % (event.__dict__, event.src_path))
if len(files) > 0:
sortedFiles = sorted(files, key=os.path.getctime, reverse=True)
for filePath in sortedFiles:
if os.path.isfile(filePath):
self.logger.debug('Latest file: %s' % (filePath))
self.fileSystemObserver.fileUpdated(filePath, self.dataDirectory, self.experiment)
return
self.logger.debug('No new files found in %s' % (event.src_path))
except Exception, ex:
self.logger.error('Exception occured when searching for file in directory %s: %s' % (event.__dict__, ex))
else:
filePath = event.src_path
self.logger.debug('Processing file event: %s' % (event.__dict__))
self.fileSystemObserver.fileUpdated(filePath, self.dataDirectory, self.experiment)
####################################################################
# Testing
if __name__ == '__main__':
import sys
import time
import logging
from watchdog.observers import Observer
from watchdog.observers.polling import PollingObserver
from watchdog.observers.api import ObservedWatch
from watchdog.observers.api import EventQueue
from watchdog.observers.api import EventEmitter
from watchdog.events import LoggingEventHandler
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
path = sys.argv[1] if len(sys.argv) > 1 else '.'
eventHandler = DmFileSystemEventHandler()
observer = PollingObserver()
observedWatch = observer.schedule(eventHandler, path, recursive=True)
print 'OBSERVED WATCH: ', observedWatch
#observer.add_handler_for_watch(eventHandler2, observedWatch)
#observer._clear_emitters()
print observer.emitters
observer.start()
try:
while True:
time.sleep(1)
print time.time()
except KeyboardInterrupt:
observer.stop()
observer.join()
#!/usr/bin/env python
import os
import copy
from dm.common.utility.loggingManager import LoggingManager
from dm.common.processing.plugins.fileProcessor import FileProcessor
from dm.ds_web_service.api.dsRestApiFactory import DsRestApiFactory
class DsProcessFileNotificationPlugin(FileProcessor):
def __init__(self, dependsOn=[]):
FileProcessor.__init__(self, dependsOn=dependsOn)
self.dsFileApi = DsRestApiFactory.getFileRestApi()
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
def processFile(self, fileInfo):
experimentFilePath = fileInfo.get('experimentFilePath')
experimentName = fileInfo.get('experimentName')
daqInfo = fileInfo.get('daqInfo', {})
md5Sum = fileInfo.get('md5Sum')
self.logger.debug('Processing file %s for experiment %s' % (experimentFilePath, experimentName))
# Prepare dictionary for processing. Only send needed data.
fileInfo2 = {}
fileInfo2['experimentFilePath'] = experimentFilePath
fileInfo2['experimentName'] = experimentName
if md5Sum:
fileInfo2['md5Sum'] = md5Sum
fileInfo2['daqInfo'] = daqInfo
self.logger.debug('File info sent to DS service: %s' % (str(fileInfo2)))
self.dsFileApi.processFile(experimentFilePath, experimentName, fileInfo2)
def processDirectory(self, directoryInfo):
uploadInfo = directoryInfo.get('uploadInfo')
experimentName = uploadInfo.get('experimentName')
experimentDirectoryPath = ''
daqInfo = directoryInfo.get('daqInfo')
directoryInfo2 = {}
directoryInfo['experimentDirectoryPath'] = experimentDirectoryPath
directoryInfo2['experimentName'] = experimentName
directoryInfo2['daqInfo'] = daqInfo
self.logger.debug('Directory info sent to DS service: %s' % (str(directoryInfo2)))
self.dsFileApi.processDirectory(experimentDirectoryPath, experimentName, directoryInfo2)
#######################################################################
# Testing.
if __name__ == '__main__':
pass
#!/usr/bin/env python
#
# Implementation for experiment session controller.
#
import os
import time
import uuid
import copy
import threading
from dm.common.constants import dmProcessingStatus
from dm.common.objects.experiment import Experiment
from dm.common.objects.dmObjectManager import DmObjectManager
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.exceptions.objectNotFound import ObjectNotFound
from dm.common.objects.observedFile import ObservedFile
from dm.common.objects.uploadInfo import UploadInfo
from dm.common.objects.pluginInfo import PluginInfo
from dm.common.objects.directoryUploadInfo import DirectoryUploadInfo
from dm.common.processing.fileProcessingManager import FileProcessingManager
from dm.common.utility.fileUtility import FileUtility
from dm.common.utility.timeUtility import TimeUtility
from dm.common.utility.dictUtility import DictUtility
from dm.ds_web_service.api.dsRestApiFactory import DsRestApiFactory
from experimentTracker import ExperimentTracker
from uploadTracker import UploadTracker
from daqTracker import DaqTracker
from fileSystemObserver import FileSystemObserver
class ExperimentSessionControllerImpl(DmObjectManager):
""" Experiment session controller implementation class. """
UPLOAD_DELAY_IN_SECONDS = 1.0
UPLOAD_CHUNK_SIZE_IN_FILES = 100
UPLOAD_CHUNK_REFRESH_IN_SECONDS = 10.0
DIRECTORY_UPLOAD_PROCESSING_WAIT_IN_SECONDS = 15.0
def __init__(self):
DmObjectManager.__init__(self)
self.dsExperimentApi = DsRestApiFactory.getExperimentRestApi()
def startDaq(self, experimentName, dataDirectory, daqInfo):
FileSystemObserver.getInstance().createDirectory(dataDirectory)
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
daqInfo = DaqTracker.getInstance().startDaq(experiment, dataDirectory, daqInfo)
FileSystemObserver.getInstance().startObservingPath(dataDirectory, experiment)
return daqInfo
def stopDaq(self, experimentName, dataDirectory):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
daqInfo = DaqTracker.getInstance().stopDaq(experiment, dataDirectory)
FileSystemObserver.getInstance().stopObservingPath(dataDirectory, experiment)
daqInfo.updateStatus()
return daqInfo
def getDaqInfo(self, id):
daqInfo = DaqTracker.getInstance().getDaqInfo(id)
if not daqInfo:
raise ObjectNotFound('Daq id %s not found.' % id)
daqInfo.updateStatus()
return daqInfo
def listDaqs(self, status):
daqInfoList = DaqTracker.getInstance().getDaqInfos(status)
return daqInfoList
def uploadFiles(self, experimentName, dataDirectory, daqInfo):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
UploadTracker.getInstance().checkForActiveUpload(experiment, dataDirectory)
experiment['daqInfo'] = daqInfo
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
fileProcessingManager = FileProcessingManager.getInstance()
uploadId = str(uuid.uuid4())
self.logger.debug('Starting upload id %s' % uploadId)
uploadInfo = UploadInfo(daqInfo)
uploadInfo['id'] = uploadId
uploadInfo['experimentName'] = experimentName
uploadInfo['storageDirectory'] = experiment.get('storageDirectory')
uploadInfo['storageHost'] = experiment.get('storageHost')
uploadInfo['storageUrl'] = experiment.get('storageUrl')
uploadInfo['dataDirectory'] = dataDirectory
uploadInfo['nProcessedFiles'] = 0
uploadInfo['nProcessingErrors'] = 0
startTime = time.time()
uploadInfo['startTime'] = startTime
uploadInfo['startTimestamp '] = TimeUtility.formatLocalTimestamp(startTime)
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
daqInfo['uploadId'] = uploadId
skipPlugins = DictUtility.getAndRemoveKey(daqInfo, 'skipPlugins', '')
if len(skipPlugins):
skipPlugins = skipPlugins.split(',')
uploadInfo['skipPlugins'] = skipPlugins
else:
skipPlugins = []
# Check that there is at least one processor that can process files
processorList = []
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
processorName = processor.name
if processorName not in skipPlugins:
processorList.append(processor)
if not len(processorList):
raise InvalidRequest('There are no plugins that can process files for upload in directory %s.' % dataDirectory)
UploadTracker.getInstance().startUpload(uploadId, uploadInfo)
uploadInfo['nFiles'] = 0
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_PENDING
self.logger.debug('Starting upload timer for %s' % dataDirectory)
timer = threading.Timer(self.UPLOAD_DELAY_IN_SECONDS, self.prepareUploadFiles, args=[uploadInfo, daqInfo, experiment])
timer.start()
return uploadInfo
def prepareUploadFiles(self, uploadInfo, daqInfo, experiment):
uploadId = uploadInfo.get('id')
self.logger.debug('Preparing upload id: %s' % uploadId)
dataDirectory = uploadInfo.get('dataDirectory')
fileProcessingManager = FileProcessingManager.getInstance()
try:
# Get files
self.logger.debug('Retrieving file paths for %s' % dataDirectory)
filePathsDict = FileSystemObserver.getInstance().getFiles(dataDirectory)
# Remove hidden files
self.logger.debug('Checking %s processing candidates' % len(filePathsDict))
filePathsDict = fileProcessingManager.removeHiddenFilesFromProcessing(filePathsDict, uploadInfo)
# Check which files need to be processed
filePathsDict = fileProcessingManager.checkUploadFilesForProcessing(filePathsDict, uploadInfo)
if not len(filePathsDict):
raise InvalidRequest('There are no new files for upload in directory %s.' % dataDirectory)
except Exception, ex:
self.logger.error('Processing error for upload %s: %s' % (uploadId, str(ex)))
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_FAILED
uploadInfo['errorMessage'] = str(ex)
return
uploadInfo['nFiles'] = len(filePathsDict)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
self.logger.debug('Will prepare upload of %s files' % len(filePathsDict))
nProcessedFiles = 0
nFiles = len(filePathsDict)
for (filePath,filePathDict) in filePathsDict.items():
try:
# Only create new uploads if we have less than
# UPLOAD_CHUNK_SIZE_IN_FILES waiting to be completed
while True:
status = uploadInfo.get('status')
if status == dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING:
nCancelledFiles = nFiles - nProcessedFiles
uploadInfo.uploadAborted(nCancelledFiles)
self.logger.debug('Upload id %s aborted, will not process %s files)' % (uploadId, nCancelledFiles))
return
nCompletedFiles = uploadInfo.get('nCompletedFiles', 0)
nWaitingFiles = nProcessedFiles - nCompletedFiles
if nWaitingFiles < self.UPLOAD_CHUNK_SIZE_IN_FILES:
# We need to add more files for upload
break
self.logger.debug('Upload %s has %s files waiting for upload, will not add any more for %s seconds' % (uploadId, nWaitingFiles, self.UPLOAD_CHUNK_REFRESH_IN_SECONDS))
time.sleep(self.UPLOAD_CHUNK_REFRESH_IN_SECONDS)
fileInfo = ObservedFile(filePath=filePath, dataDirectory=dataDirectory, experiment=experiment)
fileInfo.update(filePathDict)
fileInfo['daqInfo'] = daqInfo
fileInfo['uploadId'] = uploadId
fileInfo['statusMonitor'] = uploadInfo
fileInfo['skipPlugins'] = uploadInfo.get('skipPlugins', [])
fileProcessingManager.processFile(fileInfo)
nProcessedFiles += 1
except Exception, ex:
self.logger.error('Processing error: %s', ex)
self.logger.debug('Done preparing upload id: %s (total of %s files)' % (uploadId, len(filePathsDict)))
def getUploadInfo(self, id):
uploadInfo = UploadTracker.getInstance().get(id)
if not uploadInfo:
raise ObjectNotFound('Upload id %s not found.' % id)
uploadInfo.updateStatus()
return uploadInfo
def listUploads(self, status):
uploadInfoList = UploadTracker.getInstance().getUploadInfos(status)
return uploadInfoList
def stopUpload(self, id):
uploadInfo = UploadTracker.getInstance().get(id)
if not uploadInfo:
raise ObjectNotFound('Upload id %s not found.' % id)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING
uploadInfo.updateStatus()
return uploadInfo
def uploadDirectory(self, experimentName, dataDirectory, daqInfo):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
UploadTracker.getInstance().checkForActiveUpload(experiment, dataDirectory)
experiment['daqInfo'] = daqInfo
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
filePathsDict = FileSystemObserver.getInstance().getFiles(dataDirectory)
uploadId = str(uuid.uuid4())
self.logger.debug('Starting upload id %s' % uploadId)
uploadInfo = DirectoryUploadInfo(daqInfo)
uploadInfo['id'] = uploadId
uploadInfo['experimentName'] = experimentName
uploadInfo['storageDirectory'] = experiment.get('storageDirectory')
uploadInfo['storageHost'] = experiment.get('storageHost')
uploadInfo['storageUrl'] = experiment.get('storageUrl')
uploadInfo['dataDirectory'] = dataDirectory
startTime = time.time()
uploadInfo['startTime'] = startTime
uploadInfo['startTimestamp '] = TimeUtility.formatLocalTimestamp(startTime)
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
daqInfo['uploadId'] = uploadId
skipPlugins = DictUtility.getAndRemoveKey(daqInfo, 'skipPlugins', '')
if len(skipPlugins):
skipPlugins = skipPlugins.split(',')
uploadInfo['skipPlugins'] = skipPlugins
else:
skipPlugins = []
fileProcessingManager = FileProcessingManager.getInstance()
processingInfo = {}
uploadInfo['processingInfo'] = processingInfo
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
processorName = processor.name
if processorName in skipPlugins:
processingInfo[processorName] = {'status' : dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED}
else:
self.logger.debug('Starting %s processing timer for directory %s upload' % (processorName, dataDirectory))
timer = threading.Timer(self.UPLOAD_DELAY_IN_SECONDS, self.processUploadDirectory, args=[processor, uploadInfo, daqInfo, experiment, filePathsDict])
processingInfo[processorName] = {'status' : dmProcessingStatus.DM_PROCESSING_STATUS_PENDING}
timer.start()
UploadTracker.getInstance().startUpload(uploadId, uploadInfo)
uploadInfo['nFiles'] = len(filePathsDict)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
return uploadInfo
def processUploadDirectory(self, processor, uploadInfo, daqInfo, experiment, filePathsDict):
uploadId = uploadInfo.get('id')
dataDirectory = uploadInfo.get('dataDirectory')
processorName = processor.name
processingInfo = uploadInfo.get('processingInfo')
self.logger.debug('Starting %s processing for upload %s by %s' % (dataDirectory, uploadId, processorName))
try:
dependsOn = processor.dependsOn
while True:
# Check status
if uploadInfo['status'] == dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING:
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_ABORTED
return
# Check that processor can proceed
canProcess = False
if not len(dependsOn):
canProcess = True
for depProcessorName in dependsOn:
depProcessorStatus = processingInfo.get(depProcessorName).get('status')
if depProcessorStatus in ['skipped', 'aborted', 'failed']:
# We must skip processing
self.logger.debug('Skipping %s processing for upload %s due to %s status of %s' % (processorName, uploadId, depProcessorName, depProcessorStatus))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED
return
elif depProcessorStatus in [dmProcessingStatus.DM_PROCESSING_STATUS_PENDING, dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING]:
# Do nothing
pass
elif depProcessorStatus == 'done':
# We can proceed
canProcess = True
else:
# This should not happen
self.logger.error('Skipping %s processing for upload %s due to %s unrecognized status of %s' % (processorName, uploadId, depProcessorName, depProcessorStatus))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED
return
# Process directory if we can
if canProcess:
directoryInfo = {'uploadInfo' : uploadInfo,
'daqInfo' : daqInfo,
'experiment' : experiment,
'filePathsDict' : filePathsDict
}
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
processingStartTime = time.time()
processor.processDirectory(directoryInfo)
if processingInfo[processorName]['status'] == dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING:
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_DONE
self.logger.debug('Directory %s processing complete for upload %s by %s' % (dataDirectory, uploadId, processorName))
else:
self.logger.debug('Incomplete directory %s processing upload %s by %s, status: %s' % (dataDirectory, uploadId, processorName, processingInfo[processorName]['status']))
break
# Wait a bit longer
time.sleep(self.DIRECTORY_UPLOAD_PROCESSING_WAIT_IN_SECONDS)
except Exception, ex:
self.logger.error('%s processing for upload %s failed: %s' % (processorName, uploadId, str(ex)))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_FAILED
processingInfo[processorName]['processingError'] = str(ex)
processingEndTime = time.time()
processingInfo[processorName]['processingEndTime'] = processingEndTime
processingInfo[processorName]['processingStartTime'] = processingStartTime
processingInfo[processorName]['processingRunTime'] = processingEndTime-processingStartTime
def getProcessingPlugins(self):
pluginList = []
fileProcessingManager = FileProcessingManager.getInstance()
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
pluginInfo = {'name' : processor.name, 'dependsOn' : processor.dependsOn}
pluginList.append(PluginInfo(pluginInfo))
return pluginList
#!/usr/bin/env python
from dm.common.objects.experiment import Experiment
from dm.common.utility.objectTracker import ObjectTracker
class ExperimentTracker(ObjectTracker):
# Cache configuration
objectClass = Experiment
####################################################################
# Testing
if __name__ == '__main__':
et = ExperimentTracker.getInstance()
print et
et2 = ExperimentTracker.getInstance()
print et2
#!/usr/bin/env python
import threading
import time
import os
from watchdog.observers.polling import PollingObserver
from dm.common.utility.loggingManager import LoggingManager
from dm.common.utility.configurationManager import ConfigurationManager
from dm.common.objects.observedFile import ObservedFile
from dm.common.utility.valueUtility import ValueUtility
from dm.common.utility.objectUtility import ObjectUtility
from dm.common.utility.singleton import Singleton
from dm.common.utility.threadingUtility import ThreadingUtility
from dm.common.processing.fileProcessingManager import FileProcessingManager
from dmFileSystemEventHandler import DmFileSystemEventHandler
from daqTracker import DaqTracker
class FileSystemObserver(threading.Thread,Singleton):
CONFIG_SECTION_NAME = 'FileSystemObserver'
MIN_FILE_PROCESSING_DELAY_IN_SECONDS_KEY = 'minfileprocessingdelayinseconds'
FILE_SYSTEM_EVENT_TIMEOUT_IN_SECONDS_KEY = 'filesystemeventtimeoutinseconds'
FILE_SYSTEM_OBSERVER_AGENT_KEY = 'filesystemobserveragent'
DAQ_CHUNK_SIZE_IN_FILES = 500
# Singleton.
__instanceLock = threading.RLock()
__instance = None
def __init__(self):
FileSystemObserver.__instanceLock.acquire()
try:
if FileSystemObserver.__instance:
return
FileSystemObserver.__instance = self
threading.Thread.__init__(self)
self.setName('FileSystemObserverThread')
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.logger.debug('Initializing')
self.lock = threading.RLock()
self.eventFlag = threading.Event()
self.exitFlag = False
self.observedFileMap = {}
self.__configure()
self.fileProcessingManager = FileProcessingManager.getInstance()
self.logger.debug('Initialization complete')
finally:
FileSystemObserver.__instanceLock.release()
def __configure(self):
cm = ConfigurationManager.getInstance()
configItems = cm.getConfigItems(FileSystemObserver.CONFIG_SECTION_NAME)
self.logger.debug('Got config items: %s' % configItems)
self.minFileProcessingDelayInSeconds = int(cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.MIN_FILE_PROCESSING_DELAY_IN_SECONDS_KEY))
self.logger.debug('Minimum file processing delay: %s seconds' % self.minFileProcessingDelayInSeconds)
self.fileSystemEventTimeoutInSeconds = int(cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.FILE_SYSTEM_EVENT_TIMEOUT_IN_SECONDS_KEY))
self.logger.debug('File system event timeout: %s seconds' % self.fileSystemEventTimeoutInSeconds)
agentClass = cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.FILE_SYSTEM_OBSERVER_AGENT_KEY)
(moduleName,className,constructor) = cm.getModuleClassConstructorTuple(agentClass)
self.logger.debug('Creating file system observer agent instance of class %s' % className)
self.fileSystemObserverAgent = ObjectUtility.createObjectInstance(moduleName, className, constructor)
self.fileSystemObserverAgent.setFileSystemObserver(self)
@ThreadingUtility.synchronize
def createDirectory(self, dataDirectory):
self.fileSystemObserverAgent.createDirectory(dataDirectory)
@ThreadingUtility.synchronize
def getFiles(self, dataDirectory):
self.logger.debug('Agent is retrieving files for %s' % dataDirectory)
return self.fileSystemObserverAgent.getFiles(dataDirectory)
@ThreadingUtility.synchronize
def startObservingPath(self, dataDirectory, experiment):
self.logger.debug('Agent is starting observer for %s' % dataDirectory)
self.fileSystemObserverAgent.startObservingPath(dataDirectory, experiment)
@ThreadingUtility.synchronize
def stopObservingPath(self, dataDirectory, experiment):
self.logger.debug('Agent is stopping observer for %s' % dataDirectory)
self.fileSystemObserverAgent.stopObservingPath(dataDirectory, experiment)
@ThreadingUtility.synchronize
def fileUpdated(self, filePath, dataDirectory, experiment):
daqInfo = DaqTracker.getInstance().getDaqInfoByExperimentAndDataDirectory(experiment, dataDirectory)
experimentName = experiment.get('name')
# No daq info, ignore
if not daqInfo:
self.logger.debug('No daq for data directory %s and experiment %s, file path %s will not be processed' % (dataDirectory, experimentName, experimentfilePath))
return
# Do not process hidden files unless requested
if not ValueUtility.toBoolean(daqInfo.get('processHiddenFiles')):
fileName = os.path.basename(filePath)
if fileName.startswith('.'):
self.logger.debug('File path %s is hidden file, will not process it' % filePath)
return
daqId = daqInfo['id']
observedFile = self.observedFileMap.get(filePath)
if not observedFile:
observedFile = ObservedFile(filePath=filePath, dataDirectory=dataDirectory, experiment=experiment)
observedFile['daqInfo'] = daqInfo.toDictWithOriginalKeys()
observedFile['statusMonitor'] = daqInfo
self.observedFileMap[filePath] = observedFile
self.logger.debug('New observed file: %s (daq id: %s)' % (filePath, daqId))
daqInfo.fileAdded(filePath)
else:
self.logger.debug('Observed file updated: %s (daq id: %s)' % (filePath, daqId))
observedFile.setLastUpdateTimeToNow()
@ThreadingUtility.synchronize
def checkObservedFilesForProcessing(self):
now = time.time()
filePathsForProcessing = []
# We use number of waiting files to determine whether
# more files should be added for processing, so we need to
# update all daq infos before going over observed files
DaqTracker.getInstance().updateDaqInfos()
nWaitingFilesDict = {}
for (filePath,observedFile) in self.observedFileMap.items():
daqInfo = observedFile['daqInfo']
daqId = daqInfo['id']
nWaitingFiles = nWaitingFilesDict.get(daqId, daqInfo.get('nWaitingFiles', 0))
if nWaitingFiles >= self.DAQ_CHUNK_SIZE_IN_FILES:
# We do not need to add more files for processing for this DAQ
#self.logger.debug('There are %s waiting files for DAQ id %s, will not add more for processing.' % (nWaitingFiles, daqInfo['id']))
continue
timestamp = observedFile.get('lastUpdateTime')
deltaT = now - timestamp
if deltaT > self.minFileProcessingDelayInSeconds:
self.logger.debug('File %s was last modified %s seconds ago, will process it.' % (filePath, deltaT))
filePathsForProcessing.append(filePath)
nWaitingFilesDict[daqId] = nWaitingFiles+1
return filePathsForProcessing
@ThreadingUtility.synchronize
def processFile(self, filePath):
self.logger.debug('Processing file %s' % filePath)
observedFile = self.observedFileMap.get(filePath)
if observedFile is not None:
del self.observedFileMap[filePath]
self.fileProcessingManager.processFile(observedFile)
@ThreadingUtility.synchronize
def start(self):
self.logger.debug('Starting file observer thread')
threading.Thread.start(self)
self.logger.debug('Starting file observer agent')
self.fileSystemObserverAgent.start()
def run(self):
self.logger.debug('Starting thread: %s' % self.getName())
while True:
if self.exitFlag:
self.logger.debug('Exit flag set, %s done' % self.getName())
break
try:
filePathsForProcessing = self.checkObservedFilesForProcessing()
if len(filePathsForProcessing):
self.logger.debug('Will queue %s new files for processing' % (len(filePathsForProcessing)))
for filePath in filePathsForProcessing:
self.processFile(filePath)
except Exception, ex:
self.logger.exception(ex)
self.eventFlag.wait(timeout=self.fileSystemEventTimeoutInSeconds)
@ThreadingUtility.synchronize
def stop(self):
self.logger.debug('Stopping file observer agent')
self.fileSystemObserverAgent.stop()
self.logger.debug('Stopping file observer thread')
self.exitFlag = True
self.eventFlag.set()
self.logger.debug('Event is set, joining thread')
threading.Thread.join(self)
self.logger.debug('Module stopped')
@ThreadingUtility.synchronize
def setEvent(self):
self.eventFlag.set()
@ThreadingUtility.synchronize
def clearEvent(self):
self.eventFlag.clear()
####################################################################
# Testing
if __name__ == '__main__':
fp = FileSystemObserver.getInstance()
fp.start()
time.sleep(30)
fp.stop()
#!/usr/bin/env python
from dm.common.utility.loggingManager import LoggingManager
class FileSystemObserverAgent:
def __init__(self):
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.fileSystemObserver = None
def setFileSystemObserver(self, fileSystemObserver):
self.fileSystemObserver = fileSystemObserver
def createDirectory(self, dataDirectory):
pass
def getFiles(self, dataDirectory):
pass
def startObservingPath(self, dataDirectory, experiment):
pass
def stopObservingPath(self, dataDirectory, experiment):
pass
def start(self):
pass
def stop(self):
pass
#!/usr/bin/env python
from threading import Timer
from pollingFileSystemObserverAgent import PollingFileSystemObserverAgent
from dm.common.utility.ftpUtility import FtpUtility
class FtpFileSystemObserverAgent(PollingFileSystemObserverAgent):
DEFAULT_POLLING_PERIOD_IN_SECONDS = 15
def __init__(self, host, port, username=None, password=None, pollingPeriod=DEFAULT_POLLING_PERIOD_IN_SECONDS):
PollingFileSystemObserverAgent.__init__(self, pollingPeriod)
self.host = host
self.port = port
self.username = username
self.password = password
def getFiles(self, dataDirectory):
(scheme, host, port, dirPath) = FtpUtility.parseFtpUrl(dataDirectory, defaultHost=self.host, defaultPort=self.port)
self.logger.debug('Retrieving files from FTP host: %s, port: %s, directory path: %s' % (host, port, dirPath))
ftpUtility = FtpUtility(host, port, self.username, self.password)
return ftpUtility.getFiles(dirPath, {})
####################################################################
# Testing
if __name__ == '__main__':
import time
agent = FtpFileSystemObserverAgent('zagreb', 2811)
print 'ORIGINAL FILES: ', agent.getFiles('/tmp/test')
agent.startObservingPath('/tmp/test', 'e1')
time.sleep(100)
agent.stopObservingPath('/tmp/test', 'e1')