Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • DM/dm-docs
  • hammonds/dm-docs
  • hparraga/dm-docs
3 results
Show changes
Showing
with 1609 additions and 0 deletions
#!/usr/bin/env python
#
# Route mapper for DM DAQ web service.
#
import sys
import os
import cherrypy
from dm.common.utility.loggingManager import LoggingManager
from dm.common.utility.configurationManager import ConfigurationManager
from dm.common.service.loginRouteDescriptor import LoginRouteDescriptor
from experimentRouteDescriptor import ExperimentRouteDescriptor
class DaqWebServiceRouteMapper:
@classmethod
def setupRoutes(cls):
""" Setup RESTFul routes. """
logger = LoggingManager.getInstance().getLogger(cls.__name__)
contextRoot = ConfigurationManager.getInstance().getContextRoot()
logger.debug('Using context root: %s' % contextRoot)
# Get routes.
routes = LoginRouteDescriptor.getRoutes()
routes += ExperimentRouteDescriptor.getRoutes()
# Add routes to dispatcher.
d = cherrypy.dispatch.RoutesDispatcher()
for route in routes:
logger.debug('Connecting route: %s' % route)
d.connect(route['name'], route['path'], action=route['action'], controller=route['controller'], conditions=dict(method=route['method']))
return d
#!/usr/bin/env python
#
# User route descriptor.
#
from dm.common.utility.configurationManager import ConfigurationManager
from experimentSessionController import ExperimentSessionController
class ExperimentRouteDescriptor:
@classmethod
def getRoutes(cls):
contextRoot = ConfigurationManager.getInstance().getContextRoot()
# Static instances shared between different routes
experimentSessionController = ExperimentSessionController()
# Define routes.
routes = [
# Start experiment daq
{
'name' : 'startDaq',
'path' : '%s/experimentsByName/:(experimentName)/startDaq/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'startDaq',
'method' : ['POST']
},
# Stop experiment daq
{
'name' : 'stopDaq',
'path' : '%s/experimentsByName/:(experimentName)/stopDaq/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'stopDaq',
'method' : ['POST']
},
# Get daq info
{
'name' : 'getDaqInfo',
'path' : '%s/experimentDaqs/:(id)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getDaqInfo',
'method' : ['GET']
},
# List DAQs
{
'name' : 'listDaqs',
'path' : '%s/experimentDaqsByStatus/:(status)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'listDaqs',
'method' : ['GET']
},
# Upload experiment data
{
'name' : 'upload',
'path' : '%s/experimentsByName/:(experimentName)/upload/:(dataDirectory)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'upload',
'method' : ['POST']
},
# Get upload info
{
'name' : 'getUploadInfo',
'path' : '%s/experimentUploads/:(id)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getUploadInfo',
'method' : ['GET']
},
# List uploads
{
'name' : 'listUploads',
'path' : '%s/experimentUploadsByStatus/:(status)' % contextRoot,
'controller' : experimentSessionController,
'action' : 'listUploads',
'method' : ['GET']
},
# Stop upload
{
'name' : 'stopUpload',
'controller' : experimentSessionController,
'path' : '%s/experimentUploads/stopUpload/:(id)' % contextRoot,
'action' : 'stopUpload',
'method' : ['POST']
},
# Get processing plugins
{
'name' : 'getProcessingPlugins',
'path' : '%s/processingPlugins' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getProcessingPlugins',
'method' : ['GET']
},
]
return routes
#!/usr/bin/env python
import cherrypy
import json
import os
from dm.common.constants import dmProcessingMode
from dm.common.constants import dmProcessingStatus
from dm.common.service.dmSessionController import DmSessionController
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.exceptions.authorizationError import AuthorizationError
from dm.common.utility.encoder import Encoder
from dm.common.utility.dictUtility import DictUtility
from dm.daq_web_service.service.impl.experimentSessionControllerImpl import ExperimentSessionControllerImpl
class ExperimentSessionController(DmSessionController):
def __init__(self):
DmSessionController.__init__(self)
self.experimentSessionControllerImpl = ExperimentSessionControllerImpl()
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def startDaq(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
if not dataDirectory.startswith('/') and not dataDirectory.count('://'):
raise InvalidRequest('Data directory must be an absolute path.')
daqInfo = {}
encodedDaqInfo = kwargs.get('daqInfo')
if encodedDaqInfo is not None:
daqInfo = json.loads(Encoder.decode(encodedDaqInfo))
response = self.experimentSessionControllerImpl.startDaq(experimentName, dataDirectory, daqInfo).getFullJsonRep()
self.logger.debug('Started DAQ: %s' % response)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def stopDaq(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
response = self.experimentSessionControllerImpl.stopDaq(experimentName, dataDirectory).getFullJsonRep()
self.logger.debug('Stopped DAQ: %s' % response)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getDaqInfo(self, id, **kwargs):
response = self.experimentSessionControllerImpl.getDaqInfo(id).getFullJsonRep()
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def listDaqs(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_ANY, **kwargs):
if not status:
status = dmProcessingStatus.DM_PROCESSING_STATUS_ANY
if status not in dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST:
raise InvalidRequest('Invalid processing status "%s". Status must be one of %s.' % (status,dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST))
return self.listToJson(self.experimentSessionControllerImpl.listDaqs(status))
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def upload(self, experimentName, dataDirectory, **kwargs):
if not experimentName:
raise InvalidRequest('Missing experiment name.')
experimentName = Encoder.decode(experimentName)
if not dataDirectory:
raise InvalidRequest('Missing data directory.')
dataDirectory = Encoder.decode(dataDirectory)
if not dataDirectory.startswith('/') and not dataDirectory.count('://'):
raise InvalidRequest('Data directory must be an absolute path: %s' % dataDirectory)
daqInfo = {}
encodedDaqInfo = kwargs.get('daqInfo')
if encodedDaqInfo:
daqInfo = json.loads(Encoder.decode(encodedDaqInfo))
processingMode = DictUtility.getAndRemoveKey(daqInfo, 'processingMode', dmProcessingMode.DM_PROCESSING_MODE_FILES)
if processingMode not in dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST:
raise InvalidRequest('Allowed processing modes: %s' % dmProcessingMode.DM_ALLOWED_PROCESSING_MODE_LIST)
if processingMode == dmProcessingMode.DM_PROCESSING_MODE_FILES:
response = self.experimentSessionControllerImpl.uploadFiles(experimentName, dataDirectory, daqInfo).getFullJsonRep()
else:
response = self.experimentSessionControllerImpl.uploadDirectory(experimentName, dataDirectory, daqInfo).getFullJsonRep()
self.logger.debug('Returning upload info for directory %s' % dataDirectory)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getUploadInfo(self, id, **kwargs):
response = self.experimentSessionControllerImpl.getUploadInfo(id).getFullJsonRep()
self.logger.debug('Returning info for upload id %s' % id)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def listUploads(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_ANY, **kwargs):
if not status:
status = dmProcessingStatus.DM_PROCESSING_STATUS_ANY
if status not in dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST:
raise InvalidRequest('Invalid processing status "%s". Status must be one of %s.' % (status,dmProcessingStatus.DM_ALLOWED_PROCESSING_STATUS_LIST))
return self.listToJson(self.experimentSessionControllerImpl.listUploads(status))
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def stopUpload(self, id, **kwargs):
response = self.experimentSessionControllerImpl.stopUpload(id).getFullJsonRep()
self.logger.debug('Stopped upload id %s' % id)
return response
@cherrypy.expose
@DmSessionController.require(DmSessionController.canManageStation())
@DmSessionController.execute
def getProcessingPlugins(self, **kwargs):
return self.listToJson(self.experimentSessionControllerImpl.getProcessingPlugins())
#!/usr/bin/env python
import os
from dm.common.utility.loggingManager import LoggingManager
from dm.common.processing.plugins.fileProcessor import FileProcessor
from uploadTracker import UploadTracker
from daqTracker import DaqTracker
class DaqProcessingCompleteNotificationPlugin(FileProcessor):
def __init__(self):
FileProcessor.__init__(self)
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
def processFile(self, fileInfo):
filePath = fileInfo.get('filePath')
uploadId = fileInfo.get('uploadId')
daqId = fileInfo.get('daqInfo', {}).get('id')
trackedInfo = None
if uploadId != None:
self.logger.debug('Upload id for file %s: %s' %(filePath, uploadId))
trackedInfo = UploadTracker.getInstance().get(uploadId)
if daqId != None:
self.logger.debug('Daq id for file %s: %s' %(filePath, daqId))
trackedInfo = DaqTracker.getInstance().get(daqId)
if trackedInfo != None:
fileDict = trackedInfo.get('fileDict', {})
trackedFileInfo = fileDict.get(filePath)
if trackedFileInfo:
trackedFileInfo['processed'] = True
else:
self.logger.error('%s object does not have file path %s' %(trackedInfo, filePath))
trackedInfo.updateStatus()
#######################################################################
# Testing.
if __name__ == '__main__':
pass
#!/usr/bin/env python
import os
import uuid
import time
from dm.common.constants import dmProcessingStatus
from dm.common.objects.daqInfo import DaqInfo
from dm.common.utility.objectTracker import ObjectTracker
from dm.common.utility.timeUtility import TimeUtility
from dm.common.exceptions.objectAlreadyExists import ObjectAlreadyExists
from dm.common.exceptions.objectNotFound import ObjectNotFound
class DaqTracker(ObjectTracker):
# Cache configuration
objectClass = DaqInfo
cacheSize = 100
def __init__(self, *args, **kwargs):
ObjectTracker.__init__(self, args, kwargs)
self.activeDaqDict = {}
def startDaq(self, experiment, dataDirectory, daqInfo={}):
# Prevent second daq to be started in the same directory
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
oldDaqInfo = self.activeDaqDict.get(activeDaqKey)
if oldDaqInfo:
raise ObjectAlreadyExists('DAQ is already active for experiment %s in data directory %s.' % (experimentName, dataDirectory))
daqId = str(uuid.uuid4())
daqInfo['id'] = daqId
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
# Create DaqInfo object with keys that we want to save with file
# metadata, and add other keys later
daqInfo2 = DaqInfo(daqInfo)
daqInfo2['nFiles'] = 0
daqInfo2['nProcessedFiles'] = 0
daqInfo2['nProcessingErrors'] = 0
daqInfo2['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
startTime = time.time()
daqInfo2['startTime'] = startTime
daqInfo2['startTimestamp'] = TimeUtility.formatLocalTimestamp(startTime)
self.activeDaqDict[activeDaqKey] = daqInfo2
self.put(daqId, daqInfo2)
return daqInfo2
def stopDaq(self, experiment, dataDirectory, status=None, errorMessage=None):
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
daqInfo = self.activeDaqDict.get(activeDaqKey)
if not daqInfo:
raise ObjectNotFound('DAQ is not active for experiment %s in data directory %s.' % (experimentName, dataDirectory))
endTime = time.time()
daqInfo['endTime'] = endTime
daqInfo['endTimestamp'] = TimeUtility.formatLocalTimestamp(endTime)
if status:
daqInfo['status'] = status
else:
daqInfo.updateStatus()
if errorMessage:
daqInfo['errorMessage'] = errorMessage
del self.activeDaqDict[activeDaqKey]
return daqInfo
def getDaqInfo(self, id):
return self.get(id)
def updateDaqInfos(self, status=dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING):
daqInfoList = self.getAll()
for daqInfo in daqInfoList:
if daqInfo.get('status', '') == status:
daqInfo.updateStatus()
def getDaqInfos(self, status=None):
daqInfoList = self.getAll()
if status is None or status == dmProcessingStatus.DM_PROCESSING_STATUS_ANY:
return daqInfoList
filteredDaqInfoList = []
for daqInfo in daqInfoList:
if daqInfo.get('status', '') == status:
filteredDaqInfoList.append(daqInfo)
return filteredDaqInfoList
def getDaqInfoByExperimentAndDataDirectory(self, experiment, dataDirectory):
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeDaqKey = experimentName + dataDir
return self.activeDaqDict.get(activeDaqKey)
####################################################################
# Testing
if __name__ == '__main__':
tracker = DaqTracker.getInstance()
print tracker
experiment = {'name' : 'e1', 'owner' : 'sv'}
dataDirectory = 'ftp://wolf:2811/data/e1'
daqInfo = tracker.startDaq(experiment, dataDirectory)
daqId = daqInfo['id']
print 'DAQ ID: ', daqId
print 'DAQ INFO: ', tracker.getDaqInfo(daqId)
print 'DAQS: ', tracker.getDaqInfos()
print 'REMOVED DAQ: ', tracker.stopDaq(experiment, dataDirectory)
dataDirectory = 'ftp:///wolf:2811///data/e1'
daqId = tracker.startDaq(experiment, dataDirectory)
print 'DAQ ID: ', daqId
#!/usr/bin/env python
import os
import glob
from watchdog.events import FileSystemEventHandler
from dm.common.utility.loggingManager import LoggingManager
class DmFileSystemEventHandler(FileSystemEventHandler):
def __init__(self, fileSystemObserver, dataDirectory, experiment):
FileSystemEventHandler.__init__(self)
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.fileSystemObserver = fileSystemObserver
self.dataDirectory = dataDirectory
self.experiment = experiment
def dispatch(self, event):
FileSystemEventHandler.dispatch(self, event)
def on_any_event(self, event):
FileSystemEventHandler.on_any_event(self, event)
self.logger.debug('File system any_event event: %s' % (event.__dict__))
def on_created(self, event):
FileSystemEventHandler.on_created(self, event)
self.logger.debug('File system created event: %s' % (event.__dict__))
self.processEvent(event)
def on_moved(self, event):
FileSystemEventHandler.on_moved(self, event)
self.logger.debug('File system moved event: %s' % (event.__dict__))
def on_deleted(self, event):
FileSystemEventHandler.on_deleted(self, event)
self.logger.debug('File system deleted event: %s' % (event.__dict__))
def on_modified(self, event):
FileSystemEventHandler.on_modified(self, event)
self.logger.debug('File system directory modified event: %s' % (event.__dict__))
self.processEvent(event)
def processEvent(self, event):
if event.is_directory:
try:
files = glob.glob(os.path.join(event.src_path,'*'))
self.logger.debug('Processing directory event: %s , src path: %s' % (event.__dict__, event.src_path))
if len(files) > 0:
sortedFiles = sorted(files, key=os.path.getctime, reverse=True)
for filePath in sortedFiles:
if os.path.isfile(filePath):
self.logger.debug('Latest file: %s' % (filePath))
self.fileSystemObserver.fileUpdated(filePath, self.dataDirectory, self.experiment)
return
self.logger.debug('No new files found in %s' % (event.src_path))
except Exception, ex:
self.logger.error('Exception occured when searching for file in directory %s: %s' % (event.__dict__, ex))
else:
filePath = event.src_path
self.logger.debug('Processing file event: %s' % (event.__dict__))
self.fileSystemObserver.fileUpdated(filePath, self.dataDirectory, self.experiment)
####################################################################
# Testing
if __name__ == '__main__':
import sys
import time
import logging
from watchdog.observers import Observer
from watchdog.observers.polling import PollingObserver
from watchdog.observers.api import ObservedWatch
from watchdog.observers.api import EventQueue
from watchdog.observers.api import EventEmitter
from watchdog.events import LoggingEventHandler
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
path = sys.argv[1] if len(sys.argv) > 1 else '.'
eventHandler = DmFileSystemEventHandler()
observer = PollingObserver()
observedWatch = observer.schedule(eventHandler, path, recursive=True)
print 'OBSERVED WATCH: ', observedWatch
#observer.add_handler_for_watch(eventHandler2, observedWatch)
#observer._clear_emitters()
print observer.emitters
observer.start()
try:
while True:
time.sleep(1)
print time.time()
except KeyboardInterrupt:
observer.stop()
observer.join()
#!/usr/bin/env python
import os
import copy
from dm.common.utility.loggingManager import LoggingManager
from dm.common.processing.plugins.fileProcessor import FileProcessor
from dm.ds_web_service.api.dsRestApiFactory import DsRestApiFactory
class DsProcessFileNotificationPlugin(FileProcessor):
def __init__(self, dependsOn=[]):
FileProcessor.__init__(self, dependsOn=dependsOn)
self.dsFileApi = DsRestApiFactory.getFileDsApi()
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
def processFile(self, fileInfo):
experimentFilePath = fileInfo.get('experimentFilePath')
experimentName = fileInfo.get('experimentName')
daqInfo = fileInfo.get('daqInfo', {})
md5Sum = fileInfo.get('md5Sum')
self.logger.debug('Processing file %s for experiment %s' % (experimentFilePath, experimentName))
# Prepare dictionary for processing. Only send needed data.
fileInfo2 = {}
fileInfo2['experimentFilePath'] = experimentFilePath
fileInfo2['experimentName'] = experimentName
if md5Sum:
fileInfo2['md5Sum'] = md5Sum
fileInfo2['daqInfo'] = daqInfo
self.logger.debug('File info sent to DS service: %s' % (str(fileInfo2)))
self.dsFileApi.processFile(experimentFilePath, experimentName, fileInfo2)
def processDirectory(self, directoryInfo):
uploadInfo = directoryInfo.get('uploadInfo')
experimentName = uploadInfo.get('experimentName')
experimentDirectoryPath = ''
daqInfo = directoryInfo.get('daqInfo')
directoryInfo2 = {}
directoryInfo['experimentDirectoryPath'] = experimentDirectoryPath
directoryInfo2['experimentName'] = experimentName
directoryInfo2['daqInfo'] = daqInfo
self.logger.debug('Directory info sent to DS service: %s' % (str(directoryInfo2)))
self.dsFileApi.processDirectory(experimentDirectoryPath, experimentName, directoryInfo2)
#######################################################################
# Testing.
if __name__ == '__main__':
pass
#!/usr/bin/env python
#
# Implementation for experiment session controller.
#
import os
import time
import uuid
import copy
import threading
from dm.common.constants import dmProcessingStatus
from dm.common.objects.experiment import Experiment
from dm.common.objects.dmObjectManager import DmObjectManager
from dm.common.exceptions.invalidRequest import InvalidRequest
from dm.common.exceptions.objectNotFound import ObjectNotFound
from dm.common.objects.observedFile import ObservedFile
from dm.common.objects.uploadInfo import UploadInfo
from dm.common.objects.pluginInfo import PluginInfo
from dm.common.objects.directoryUploadInfo import DirectoryUploadInfo
from dm.common.processing.fileProcessingManager import FileProcessingManager
from dm.common.utility.fileUtility import FileUtility
from dm.common.utility.timeUtility import TimeUtility
from dm.common.utility.dictUtility import DictUtility
from dm.ds_web_service.api.dsRestApiFactory import DsRestApiFactory
from experimentTracker import ExperimentTracker
from uploadTracker import UploadTracker
from daqTracker import DaqTracker
from fileSystemObserver import FileSystemObserver
class ExperimentSessionControllerImpl(DmObjectManager):
""" Experiment session controller implementation class. """
UPLOAD_DELAY_IN_SECONDS = 1.0
UPLOAD_CHUNK_SIZE_IN_FILES = 100
UPLOAD_CHUNK_REFRESH_IN_SECONDS = 10.0
DIRECTORY_UPLOAD_PROCESSING_WAIT_IN_SECONDS = 15.0
SECONDS_PER_HOUR = 60*60
def __init__(self):
DmObjectManager.__init__(self)
self.dsExperimentApi = DsRestApiFactory.getExperimentDsApi()
def startDaq(self, experimentName, dataDirectory, daqInfo):
FileSystemObserver.getInstance().createDirectory(dataDirectory)
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
daqInfo = DaqTracker.getInstance().startDaq(experiment, dataDirectory, daqInfo)
FileSystemObserver.getInstance().startObservingPath(dataDirectory, experiment)
maxRunTimeInHours = daqInfo.get('maxRunTimeInHours')
if maxRunTimeInHours:
daqId = daqInfo['id']
self.logger.debug('Starting timer to automatically stop DAQ id %s for experiment %s, after max runtime of %s hours' % (daqId, experimentName, maxRunTimeInHours))
maxRunTimeInSeconds = maxRunTimeInHours*self.SECONDS_PER_HOUR
timer = threading.Timer(maxRunTimeInSeconds, self.stopDaqTimer, args=[experimentName, dataDirectory, daqId])
timer.start()
return daqInfo
def stopDaqTimer(self, experimentName, dataDirectory, daqId):
try:
daqInfo = DaqTracker.getInstance().getDaqInfo(daqId)
maxRunTimeInHours = daqInfo.get('maxRunTimeInHours')
self.logger.debug('Attempting to automatically stop DAQ id %s for experiment %s, after max runtime of %s hours was exceeded' % (daqId, experimentName, maxRunTimeInHours))
daqStatus = daqInfo.get('status')
if daqStatus != dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING:
self.logger.debug('DAQ id %s has status of %s, will not stop it automatically' % (daqId, daqStatus))
return
self.stopDaq(experimentName, dataDirectory)
except Exception, ex:
self.logger.error('Could not automatically stop DAQ id %s: %s' % (daqId, str(ex)))
def stopDaq(self, experimentName, dataDirectory):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
daqInfo = DaqTracker.getInstance().stopDaq(experiment, dataDirectory)
FileSystemObserver.getInstance().stopObservingPath(dataDirectory, experiment)
daqInfo.updateStatus()
daqId = daqInfo.get('id')
self.logger.debug('Stopped DAQ id %s for experiment %s' % (daqId, experimentName))
# Prepare upload on exit
uploadDataDirectoryOnExit = daqInfo.get('uploadDataDirectoryOnExit')
if uploadDataDirectoryOnExit:
self.logger.debug('Attempting automatic upload on exit for DAQ id %s, experiment %s' % (daqId, experimentName))
daqInfo2 = {}
daqInfo2['originalDaqId'] = daqId
uploadDestDirectoryOnExit = daqInfo.get('uploadDestDirectoryOnExit')
if uploadDestDirectoryOnExit:
self.logger.debug('Automatic upload on exit for DAQ id %s (experiment %s) is using dest directory: %s' % (daqId, experimentName, uploadDestDirectoryOnExit))
daqInfo2['destDirectory'] = uploadDestDirectoryOnExit
try:
uploadInfo = self.uploadFiles(experimentName, uploadDataDirectoryOnExit, daqInfo2)
daqInfo['uploadIdOnExit'] = uploadInfo.get('id')
except Exception, ex:
self.logger.error('Could not automatically upload DAQ id %s: %s' % (daqId, str(ex)))
daqInfo['uploadErrorOnExit'] = str(ex)
return daqInfo
def getDaqInfo(self, id):
daqInfo = DaqTracker.getInstance().getDaqInfo(id)
if not daqInfo:
raise ObjectNotFound('Daq id %s not found.' % id)
daqInfo.updateStatus()
return daqInfo
def listDaqs(self, status):
daqInfoList = DaqTracker.getInstance().getDaqInfos(status)
return daqInfoList
def uploadFiles(self, experimentName, dataDirectory, daqInfo):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
UploadTracker.getInstance().checkForActiveUpload(experiment, dataDirectory)
experiment['daqInfo'] = daqInfo
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
fileProcessingManager = FileProcessingManager.getInstance()
uploadId = str(uuid.uuid4())
self.logger.debug('Starting upload id %s' % uploadId)
uploadInfo = UploadInfo(daqInfo)
uploadInfo['id'] = uploadId
uploadInfo['experimentName'] = experimentName
uploadInfo['storageDirectory'] = experiment.get('storageDirectory')
uploadInfo['storageHost'] = experiment.get('storageHost')
uploadInfo['storageUrl'] = experiment.get('storageUrl')
uploadInfo['dataDirectory'] = dataDirectory
uploadInfo['nProcessedFiles'] = 0
uploadInfo['nProcessingErrors'] = 0
startTime = time.time()
uploadInfo['startTime'] = startTime
uploadInfo['startTimestamp'] = TimeUtility.formatLocalTimestamp(startTime)
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
daqInfo['uploadId'] = uploadId
skipPlugins = DictUtility.getAndRemoveKey(daqInfo, 'skipPlugins', '')
if len(skipPlugins):
skipPlugins = skipPlugins.split(',')
uploadInfo['skipPlugins'] = skipPlugins
else:
skipPlugins = []
# Check that there is at least one processor that can process files
processorList = []
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
processorName = processor.name
if processorName not in skipPlugins:
processorList.append(processor)
if not len(processorList):
raise InvalidRequest('There are no plugins that can process files for upload in directory %s.' % dataDirectory)
UploadTracker.getInstance().startUpload(uploadId, uploadInfo)
uploadInfo['nFiles'] = 0
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_PENDING
self.logger.debug('Starting upload timer for %s' % dataDirectory)
timer = threading.Timer(self.UPLOAD_DELAY_IN_SECONDS, self.prepareUploadFiles, args=[uploadInfo, daqInfo, experiment])
timer.start()
return uploadInfo
def prepareUploadFiles(self, uploadInfo, daqInfo, experiment):
uploadId = uploadInfo.get('id')
self.logger.debug('Preparing upload id: %s' % uploadId)
dataDirectory = uploadInfo.get('dataDirectory')
destDirectory = uploadInfo.get('destDirectory')
fileProcessingManager = FileProcessingManager.getInstance()
try:
# Get files
self.logger.debug('Retrieving file paths for %s' % dataDirectory)
filePathsDict = FileSystemObserver.getInstance().getFiles(dataDirectory)
# Remove hidden files
self.logger.debug('Checking %s processing candidates' % len(filePathsDict))
filePathsDict = fileProcessingManager.removeHiddenFilesFromProcessing(filePathsDict, uploadInfo)
# Check which files need to be processed
filePathsDict = fileProcessingManager.checkUploadFilesForProcessing(filePathsDict, uploadInfo)
if not len(filePathsDict):
raise InvalidRequest('There are no new files for upload in directory %s.' % dataDirectory)
except Exception, ex:
self.logger.error('Processing error for upload %s: %s' % (uploadId, str(ex)))
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_FAILED
uploadInfo['errorMessage'] = str(ex)
return
uploadInfo['nFiles'] = len(filePathsDict)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
self.logger.debug('Will prepare upload of %s files' % len(filePathsDict))
nProcessedFiles = 0
nFiles = len(filePathsDict)
for (filePath,filePathDict) in filePathsDict.items():
try:
# Only create new uploads if we have less than
# UPLOAD_CHUNK_SIZE_IN_FILES waiting to be completed
while True:
status = uploadInfo.get('status')
if status == dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING:
nCancelledFiles = nFiles - nProcessedFiles
uploadInfo.uploadAborted(nCancelledFiles)
self.logger.debug('Upload id %s aborted, will not process %s files)' % (uploadId, nCancelledFiles))
return
nCompletedFiles = uploadInfo.get('nCompletedFiles', 0)
nWaitingFiles = nProcessedFiles - nCompletedFiles
if nWaitingFiles < self.UPLOAD_CHUNK_SIZE_IN_FILES:
# We need to add more files for upload
break
self.logger.debug('Upload %s has %s files waiting for upload, will not add any more for %s seconds' % (uploadId, nWaitingFiles, self.UPLOAD_CHUNK_REFRESH_IN_SECONDS))
time.sleep(self.UPLOAD_CHUNK_REFRESH_IN_SECONDS)
fileInfo = ObservedFile(filePath=filePath, dataDirectory=dataDirectory, experiment=experiment, destDirectory=destDirectory)
fileInfo.update(filePathDict)
fileInfo['daqInfo'] = daqInfo
fileInfo['uploadId'] = uploadId
fileInfo['statusMonitor'] = uploadInfo
fileInfo['skipPlugins'] = uploadInfo.get('skipPlugins', [])
fileProcessingManager.processFile(fileInfo)
nProcessedFiles += 1
except Exception, ex:
self.logger.error('Processing error: %s', ex)
self.logger.debug('Done preparing upload id: %s (total of %s files)' % (uploadId, len(filePathsDict)))
def getUploadInfo(self, id):
uploadInfo = UploadTracker.getInstance().get(id)
if not uploadInfo:
raise ObjectNotFound('Upload id %s not found.' % id)
uploadInfo.updateStatus()
return uploadInfo
def listUploads(self, status):
uploadInfoList = UploadTracker.getInstance().getUploadInfos(status)
return uploadInfoList
def stopUpload(self, id):
uploadInfo = UploadTracker.getInstance().get(id)
if not uploadInfo:
raise ObjectNotFound('Upload id %s not found.' % id)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING
uploadInfo.updateStatus()
return uploadInfo
def uploadDirectory(self, experimentName, dataDirectory, daqInfo):
experiment = self.dsExperimentApi.getExperimentByName(experimentName)
UploadTracker.getInstance().checkForActiveUpload(experiment, dataDirectory)
experiment['daqInfo'] = daqInfo
storageDirectory = experiment.get('storageDirectory')
if storageDirectory is None:
raise InvalidRequest('Experiment %s has not been started.' % experimentName)
uploadId = str(uuid.uuid4())
self.logger.debug('Starting upload id %s' % uploadId)
uploadInfo = DirectoryUploadInfo(daqInfo)
uploadInfo['id'] = uploadId
uploadInfo['experimentName'] = experimentName
uploadInfo['storageDirectory'] = experiment.get('storageDirectory')
uploadInfo['storageHost'] = experiment.get('storageHost')
uploadInfo['storageUrl'] = experiment.get('storageUrl')
uploadInfo['dataDirectory'] = dataDirectory
startTime = time.time()
uploadInfo['startTime'] = startTime
uploadInfo['startTimestamp'] = TimeUtility.formatLocalTimestamp(startTime)
daqInfo['experimentName'] = experimentName
daqInfo['storageDirectory'] = experiment.get('storageDirectory')
daqInfo['storageHost'] = experiment.get('storageHost')
daqInfo['storageUrl'] = experiment.get('storageUrl')
daqInfo['dataDirectory'] = dataDirectory
daqInfo['uploadId'] = uploadId
skipPlugins = DictUtility.getAndRemoveKey(daqInfo, 'skipPlugins', '')
if len(skipPlugins):
skipPlugins = skipPlugins.split(',')
uploadInfo['skipPlugins'] = skipPlugins
UploadTracker.getInstance().startUpload(uploadId, uploadInfo)
uploadInfo['nFiles'] = 0
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_PENDING
fileProcessingManager = FileProcessingManager.getInstance()
processingInfo = {}
uploadInfo['processingInfo'] = processingInfo
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
processorName = processor.name
if processorName in skipPlugins:
processingInfo[processorName] = {'status' : dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED}
else:
processingInfo[processorName] = {'status' : dmProcessingStatus.DM_PROCESSING_STATUS_PENDING}
self.logger.debug('Starting upload directory %s timer for experiment %s' % (dataDirectory, experimentName))
timer = threading.Timer(self.UPLOAD_DELAY_IN_SECONDS, self.prepareUploadDirectory, args=[uploadInfo, daqInfo, experiment])
timer.start()
return uploadInfo
def prepareUploadDirectory(self, uploadInfo, daqInfo, experiment):
uploadId = uploadInfo['id']
dataDirectory = uploadInfo['dataDirectory']
experimentName = uploadInfo['experimentName']
skipPlugins = uploadInfo.get('skipPlugins', [])
self.logger.debug('Preparing directory %s upload for experiment %s' % (dataDirectory, experimentName))
try:
filePathsDict = FileSystemObserver.getInstance().getFiles(dataDirectory)
self.logger.debug('There are %s files in directory %s (experiment %s)' % (len(filePathsDict), dataDirectory, experimentName))
except Exception, ex:
self.logger.error('Cannot retrieve files for directory upload %s: %s' % (uploadId, str(ex)))
self.logger.error('Marking directory upload %s as failed' % (uploadId))
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_FAILED
uploadInfo['errorMessage'] = str(ex)
return
fileProcessingManager = FileProcessingManager.getInstance()
self.logger.debug('Preparing plugin timers for directory %s upload (experiment %s)' % (dataDirectory, experimentName))
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
processorName = processor.name
if not processorName in skipPlugins:
self.logger.debug('Starting %s processing timer for directory %s upload' % (processorName, dataDirectory))
timer = threading.Timer(self.UPLOAD_DELAY_IN_SECONDS, self.processUploadDirectory, args=[processor, uploadInfo, daqInfo, experiment, filePathsDict])
timer.start()
uploadInfo['nFiles'] = len(filePathsDict)
uploadInfo['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
def processUploadDirectory(self, processor, uploadInfo, daqInfo, experiment, filePathsDict):
uploadId = uploadInfo.get('id')
dataDirectory = uploadInfo.get('dataDirectory')
processorName = processor.name
processingInfo = uploadInfo.get('processingInfo')
self.logger.debug('Starting %s processing for upload %s by %s' % (dataDirectory, uploadId, processorName))
try:
dependsOn = processor.dependsOn
while True:
# Check status
if uploadInfo['status'] == dmProcessingStatus.DM_PROCESSING_STATUS_ABORTING:
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_ABORTED
return
# Check that processor can proceed
canProcess = False
if not len(dependsOn):
canProcess = True
for depProcessorName in dependsOn:
depProcessorStatus = processingInfo.get(depProcessorName).get('status')
if depProcessorStatus in [
dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED,
dmProcessingStatus.DM_PROCESSING_STATUS_ABORTED,
dmProcessingStatus.DM_PROCESSING_STATUS_FAILED]:
# We must skip processing
self.logger.debug('Skipping %s processing for upload %s due to %s status of %s' % (processorName, uploadId, depProcessorName, depProcessorStatus))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED
return
elif depProcessorStatus in [dmProcessingStatus.DM_PROCESSING_STATUS_PENDING, dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING]:
# Do nothing
pass
elif depProcessorStatus == dmProcessingStatus.DM_PROCESSING_STATUS_DONE:
# We can proceed
canProcess = True
else:
# This should not happen
self.logger.error('Skipping %s processing for upload %s due to %s unrecognized status of %s' % (processorName, uploadId, depProcessorName, depProcessorStatus))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_SKIPPED
return
# Process directory if we can
if canProcess:
directoryInfo = {'uploadInfo' : uploadInfo,
'daqInfo' : daqInfo,
'experiment' : experiment,
'filePathsDict' : filePathsDict
}
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING
processingStartTime = time.time()
processor.processDirectory(directoryInfo)
if processingInfo[processorName]['status'] == dmProcessingStatus.DM_PROCESSING_STATUS_RUNNING:
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_DONE
self.logger.debug('Directory %s processing complete for upload %s by %s' % (dataDirectory, uploadId, processorName))
else:
self.logger.debug('Incomplete directory %s processing upload %s by %s, status: %s' % (dataDirectory, uploadId, processorName, processingInfo[processorName]['status']))
break
# Wait a bit longer
time.sleep(self.DIRECTORY_UPLOAD_PROCESSING_WAIT_IN_SECONDS)
except Exception, ex:
self.logger.error('%s processing for upload %s failed: %s' % (processorName, uploadId, str(ex)))
processingInfo[processorName]['status'] = dmProcessingStatus.DM_PROCESSING_STATUS_FAILED
processingInfo[processorName]['processingError'] = str(ex)
processingEndTime = time.time()
processingInfo[processorName]['processingEndTime'] = processingEndTime
processingInfo[processorName]['processingStartTime'] = processingStartTime
processingInfo[processorName]['processingRunTime'] = processingEndTime-processingStartTime
def getProcessingPlugins(self):
pluginList = []
fileProcessingManager = FileProcessingManager.getInstance()
for processorKey in fileProcessingManager.fileProcessorKeyList:
processor = fileProcessingManager.fileProcessorDict.get(processorKey)
pluginInfo = {'name' : processor.name, 'dependsOn' : processor.dependsOn}
pluginList.append(PluginInfo(pluginInfo))
return pluginList
#!/usr/bin/env python
from dm.common.objects.experiment import Experiment
from dm.common.utility.objectTracker import ObjectTracker
class ExperimentTracker(ObjectTracker):
# Cache configuration
objectClass = Experiment
####################################################################
# Testing
if __name__ == '__main__':
et = ExperimentTracker.getInstance()
print et
et2 = ExperimentTracker.getInstance()
print et2
#!/usr/bin/env python
import threading
import time
import os
from watchdog.observers.polling import PollingObserver
from dm.common.utility.loggingManager import LoggingManager
from dm.common.utility.configurationManager import ConfigurationManager
from dm.common.objects.observedFile import ObservedFile
from dm.common.utility.valueUtility import ValueUtility
from dm.common.utility.objectUtility import ObjectUtility
from dm.common.utility.singleton import Singleton
from dm.common.utility.threadingUtility import ThreadingUtility
from dm.common.processing.fileProcessingManager import FileProcessingManager
from dmFileSystemEventHandler import DmFileSystemEventHandler
from daqTracker import DaqTracker
class FileSystemObserver(threading.Thread,Singleton):
CONFIG_SECTION_NAME = 'FileSystemObserver'
MIN_FILE_PROCESSING_DELAY_IN_SECONDS_KEY = 'minfileprocessingdelayinseconds'
FILE_SYSTEM_EVENT_TIMEOUT_IN_SECONDS_KEY = 'filesystemeventtimeoutinseconds'
FILE_SYSTEM_OBSERVER_AGENT_KEY = 'filesystemobserveragent'
DAQ_CHUNK_SIZE_IN_FILES = 500
# Singleton.
__instanceLock = threading.RLock()
__instance = None
def __init__(self):
FileSystemObserver.__instanceLock.acquire()
try:
if FileSystemObserver.__instance:
return
FileSystemObserver.__instance = self
threading.Thread.__init__(self)
self.setName('FileSystemObserverThread')
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.logger.debug('Initializing')
self.lock = threading.RLock()
self.eventFlag = threading.Event()
self.exitFlag = False
self.observedFileMap = {}
self.__configure()
self.fileProcessingManager = FileProcessingManager.getInstance()
self.nProcessedFilesDict = {}
self.logger.debug('Initialization complete')
finally:
FileSystemObserver.__instanceLock.release()
def __configure(self):
cm = ConfigurationManager.getInstance()
configItems = cm.getConfigItems(FileSystemObserver.CONFIG_SECTION_NAME)
self.logger.debug('Got config items: %s' % configItems)
self.minFileProcessingDelayInSeconds = int(cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.MIN_FILE_PROCESSING_DELAY_IN_SECONDS_KEY))
self.logger.debug('Minimum file processing delay: %s seconds' % self.minFileProcessingDelayInSeconds)
self.fileSystemEventTimeoutInSeconds = int(cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.FILE_SYSTEM_EVENT_TIMEOUT_IN_SECONDS_KEY))
self.logger.debug('File system event timeout: %s seconds' % self.fileSystemEventTimeoutInSeconds)
agentClass = cm.getConfigOption(FileSystemObserver.CONFIG_SECTION_NAME, FileSystemObserver.FILE_SYSTEM_OBSERVER_AGENT_KEY)
(moduleName,className,constructor) = cm.getModuleClassConstructorTuple(agentClass)
self.logger.debug('Creating file system observer agent instance of class %s' % className)
self.fileSystemObserverAgent = ObjectUtility.createObjectInstance(moduleName, className, constructor)
self.fileSystemObserverAgent.setFileSystemObserver(self)
@ThreadingUtility.synchronize
def createDirectory(self, dataDirectory):
self.fileSystemObserverAgent.createDirectory(dataDirectory)
@ThreadingUtility.synchronize
def getFiles(self, dataDirectory):
self.logger.debug('Agent is retrieving files for %s' % dataDirectory)
return self.fileSystemObserverAgent.getFiles(dataDirectory)
@ThreadingUtility.synchronize
def startObservingPath(self, dataDirectory, experiment):
self.logger.debug('Agent is starting observer for %s' % dataDirectory)
self.fileSystemObserverAgent.startObservingPath(dataDirectory, experiment)
@ThreadingUtility.synchronize
def stopObservingPath(self, dataDirectory, experiment):
self.logger.debug('Agent is stopping observer for %s' % dataDirectory)
self.fileSystemObserverAgent.stopObservingPath(dataDirectory, experiment)
@ThreadingUtility.synchronize
def fileUpdated(self, filePath, dataDirectory, experiment):
daqInfo = DaqTracker.getInstance().getDaqInfoByExperimentAndDataDirectory(experiment, dataDirectory)
experimentName = experiment.get('name')
# No daq info, ignore
if not daqInfo:
self.logger.debug('No daq for data directory %s and experiment %s, file path %s will not be processed' % (dataDirectory, experimentName, experimentfilePath))
return
# Do not process hidden files unless requested
if not ValueUtility.toBoolean(daqInfo.get('processHiddenFiles')):
fileName = os.path.basename(filePath)
if fileName.startswith('.'):
self.logger.debug('File path %s is hidden file, will not process it' % filePath)
return
daqId = daqInfo['id']
observedFile = self.observedFileMap.get(filePath)
destDirectory = daqInfo.get('destDirectory')
if not observedFile:
observedFile = ObservedFile(filePath=filePath, dataDirectory=dataDirectory, experiment=experiment, destDirectory=destDirectory)
observedFile['daqInfo'] = daqInfo.toDictWithOriginalKeys()
observedFile['statusMonitor'] = daqInfo
self.observedFileMap[filePath] = observedFile
self.logger.debug('New observed file: %s (daq id: %s)' % (filePath, daqId))
daqInfo.fileAdded(filePath)
else:
self.logger.debug('Observed file updated: %s (daq id: %s)' % (filePath, daqId))
observedFile.setLastUpdateTimeToNow()
@ThreadingUtility.synchronize
def checkObservedFilesForProcessing(self):
now = time.time()
filePathsForProcessing = []
# We use number of waiting files to determine whether
# more files should be added for processing, so we need to
# update all daq infos before going over observed files
DaqTracker.getInstance().updateDaqInfos()
for (filePath,observedFile) in self.observedFileMap.items():
daqId = observedFile['daqInfo']['id']
daqInfo = DaqTracker.getInstance().getDaqInfo(daqId)
nProcessedFiles = self.nProcessedFilesDict.get(daqId, 0)
nCompletedFiles = daqInfo.get('nCompletedFiles', 0)
nWaitingFiles = nProcessedFiles - nCompletedFiles
if nWaitingFiles >= self.DAQ_CHUNK_SIZE_IN_FILES:
# We do not need to add more files for processing for this DAQ
#self.logger.debug('There are %s waiting files for DAQ id %s, will not add more for processing.' % (nWaitingFiles, daqInfo['id']))
continue
timestamp = observedFile.get('lastUpdateTime')
deltaT = now - timestamp
if deltaT > self.minFileProcessingDelayInSeconds:
self.logger.debug('File %s was last modified %s seconds ago, will process it.' % (filePath, deltaT))
filePathsForProcessing.append(filePath)
self.nProcessedFilesDict[daqId] = nProcessedFiles+1
return filePathsForProcessing
@ThreadingUtility.synchronize
def processFile(self, filePath):
self.logger.debug('Processing file %s' % filePath)
observedFile = self.observedFileMap.get(filePath)
if observedFile is not None:
del self.observedFileMap[filePath]
self.fileProcessingManager.processFile(observedFile)
@ThreadingUtility.synchronize
def start(self):
self.logger.debug('Starting file observer thread')
threading.Thread.start(self)
self.logger.debug('Starting file observer agent')
self.fileSystemObserverAgent.start()
def run(self):
self.logger.debug('Starting thread: %s' % self.getName())
while True:
if self.exitFlag:
self.logger.debug('Exit flag set, %s done' % self.getName())
break
try:
filePathsForProcessing = self.checkObservedFilesForProcessing()
if len(filePathsForProcessing):
self.logger.debug('Will queue %s new files for processing' % (len(filePathsForProcessing)))
for filePath in filePathsForProcessing:
self.processFile(filePath)
except Exception, ex:
self.logger.exception(ex)
self.eventFlag.wait(timeout=self.fileSystemEventTimeoutInSeconds)
@ThreadingUtility.synchronize
def stop(self):
self.logger.debug('Stopping file observer agent')
self.fileSystemObserverAgent.stop()
self.logger.debug('Stopping file observer thread')
self.exitFlag = True
self.eventFlag.set()
self.logger.debug('Event is set, joining thread')
threading.Thread.join(self)
self.logger.debug('Module stopped')
@ThreadingUtility.synchronize
def setEvent(self):
self.eventFlag.set()
@ThreadingUtility.synchronize
def clearEvent(self):
self.eventFlag.clear()
####################################################################
# Testing
if __name__ == '__main__':
fp = FileSystemObserver.getInstance()
fp.start()
time.sleep(30)
fp.stop()
#!/usr/bin/env python
from dm.common.utility.loggingManager import LoggingManager
class FileSystemObserverAgent:
def __init__(self):
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
self.fileSystemObserver = None
def setFileSystemObserver(self, fileSystemObserver):
self.fileSystemObserver = fileSystemObserver
def createDirectory(self, dataDirectory):
pass
def getFiles(self, dataDirectory):
pass
def startObservingPath(self, dataDirectory, experiment):
pass
def stopObservingPath(self, dataDirectory, experiment):
pass
def start(self):
pass
def stop(self):
pass
#!/usr/bin/env python
from threading import Timer
from pollingFileSystemObserverAgent import PollingFileSystemObserverAgent
from dm.common.utility.ftpUtility import FtpUtility
class FtpFileSystemObserverAgent(PollingFileSystemObserverAgent):
DEFAULT_POLLING_PERIOD_IN_SECONDS = 15
def __init__(self, host, port, username=None, password=None, pollingPeriod=DEFAULT_POLLING_PERIOD_IN_SECONDS):
PollingFileSystemObserverAgent.__init__(self, pollingPeriod)
self.host = host
self.port = port
self.username = username
self.password = password
def getFiles(self, dataDirectory):
(scheme, host, port, dirPath) = FtpUtility.parseFtpUrl(dataDirectory, defaultHost=self.host, defaultPort=self.port)
self.logger.debug('Retrieving files from FTP host: %s, port: %s, directory path: %s' % (host, port, dirPath))
ftpUtility = FtpUtility(host, port, self.username, self.password)
return ftpUtility.getFiles(dirPath, {})
####################################################################
# Testing
if __name__ == '__main__':
import time
agent = FtpFileSystemObserverAgent('zagreb', 2811)
print 'ORIGINAL FILES: ', agent.getFiles('/tmp/test')
agent.startObservingPath('/tmp/test', 'e1')
time.sleep(100)
agent.stopObservingPath('/tmp/test', 'e1')
#!/usr/bin/env python
from threading import Timer
from dm.common.constants import dmProcessingStatus
from dm.common.utility.osUtility import OsUtility
from fileSystemObserverAgent import FileSystemObserverAgent
from daqTracker import DaqTracker
class PollingFileSystemObserverAgent(FileSystemObserverAgent):
DEFAULT_START_OBSERVING_PATH_DELAY_IN_SECONDS = 3
DEFAULT_POLLING_PERIOD_IN_SECONDS = 15
DEFAULT_RETRY_PERIOD_IN_SECONDS = 60
def __init__(self, pollingPeriod=DEFAULT_POLLING_PERIOD_IN_SECONDS):
FileSystemObserverAgent.__init__(self)
self.pollingPeriod = pollingPeriod
self.retryDelay = 0
self.observedDirDict = {}
self.isDone = False
def getFiles(self, dataDirectory):
self.logger.debug('Retrieving files for directory: %s' % (dataDirectory))
return OsUtility.findFilesAsDict(dataDirectory, {})
def updateFile(self, filePath, dataDirectory, experiment):
if self.fileSystemObserver:
self.logger.debug('Processing file path: %s' % filePath)
self.fileSystemObserver.fileUpdated(filePath, dataDirectory, experiment)
def processFiles(self, fileDict, oldFileDict, dataDirectory, experiment):
for filePath in fileDict.keys():
if not oldFileDict.has_key(filePath):
# new file, must be updated
self.logger.debug('New file path detected: %s' % filePath)
self.updateFile(filePath, dataDirectory, experiment)
else:
# old file, check timestamp
oldFileInfo = oldFileDict.get(filePath)
oldModifyTime = oldFileInfo.get('fileModificationTime', '')
fileInfo = fileDict.get(filePath)
modifyTime = fileInfo.get('fileModificationTime')
if modifyTime != oldModifyTime:
# file has been modified, need to process it
self.logger.debug('Modified file path detected: %s' % filePath)
self.updateFile(filePath, dataDirectory, experiment)
def pollFileSystem(self, dataDirectory, experiment):
try:
observedDirInfo = self.observedDirDict.get(dataDirectory)
if not observedDirInfo:
self.logger.debug('Polling cancelled for directory: %s' % dataDirectory)
return
oldFileDict = observedDirInfo.get('files')
fileDict = self.getFiles(dataDirectory)
observedDirInfo['files'] = fileDict
self.processFiles(fileDict, oldFileDict, dataDirectory, experiment)
self.retryDelay = 0
except Exception, ex:
self.logger.error('Could not poll directory %s: %s' % (dataDirectory,ex))
self.retryDelay = self.DEFAULT_RETRY_PERIOD_IN_SECONDS
self.logger.debug('Next polling for directory %s will be delayed by: %s seconds' % (dataDirectory, self.retryDelay))
self.startPollingTimer(observedDirInfo, dataDirectory, experiment)
def startPollingTimer(self, observedDirInfo, dataDirectory, experiment):
if self.isDone:
return
delay = self.pollingPeriod + self.retryDelay
t = Timer(delay, self.pollFileSystem, [dataDirectory, experiment])
observedDirInfo['pollTimer'] = t
t.start()
def startObservingPath(self, dataDirectory, experiment):
observedDirInfo = self.observedDirDict.get(dataDirectory)
if observedDirInfo:
self.logger.debug('Observer for %s is already active' % dataDirectory)
return
self.logger.debug('Starting observer for %s' % dataDirectory)
observedDirInfo = self.observedDirDict.get(dataDirectory, {})
observedDirInfo['experiment'] = experiment
self.observedDirDict[dataDirectory] = observedDirInfo
t = Timer(self.DEFAULT_START_OBSERVING_PATH_DELAY_IN_SECONDS, self.startObservingPathTimer, [observedDirInfo, dataDirectory, experiment])
t.start()
def startObservingPathTimer(self, observedDirInfo, dataDirectory, experiment):
try:
self.logger.debug('Starting initial retrieval of files for directory %s' % (dataDirectory))
fileDict = self.getFiles(dataDirectory)
observedDirInfo['files'] = fileDict
self.startPollingTimer(observedDirInfo, dataDirectory, experiment)
except Exception, ex:
self.logger.error('Could not retrieve files for directory %s: %s' % (dataDirectory,ex))
DaqTracker.getInstance().stopDaq(experiment, dataDirectory, status=dmProcessingStatus.DM_PROCESSING_STATUS_FAILED, errorMessage=str(ex))
self.logger.error('Marked as failed DAQ for experiment %s, data directory %s' % (experiment['name'], dataDirectory))
def stopObservingPath(self, dataDirectory, experiment):
observedDirInfo = self.observedDirDict.get(dataDirectory)
if not observedDirInfo:
self.logger.debug('Observer for %s is not active' % dataDirectory)
return
self.logger.debug('Stopping observer for %s' % dataDirectory)
t = observedDirInfo.get('pollTimer')
if t:
t.cancel()
del self.observedDirDict[dataDirectory]
def start(self):
self.logger.debug('Starting ftp observer agent')
def stop(self):
self.logger.debug('Stopping ftp observer agent')
self.isDone = True
for (dataDirectory,observedDirInfo) in self.observedDirDict.items():
experiment = observedDirInfo.get('experiment')
self.stopObservingPath(dataDirectory, experiment)
####################################################################
# Testing
if __name__ == '__main__':
import time
agent = PollingFileSystemObserverAgent()
print 'ORIGINAL FILES: ', agent.getFiles('/tmp/test')
agent.startObservingPath('/tmp/test', 'e1')
time.sleep(100)
agent.stopObservingPath('/tmp/test', 'e1')
#!/usr/bin/env python
from threading import Timer
from pollingFileSystemObserverAgent import PollingFileSystemObserverAgent
from dm.common.utility.sftpUtility import SftpUtility
class SftpFileSystemObserverAgent(PollingFileSystemObserverAgent):
DEFAULT_POLLING_PERIOD_IN_SECONDS = 15
DEFAULT_PORT = 22
def __init__(self, host, port=DEFAULT_PORT, username=None, password=None, privateKey=None, pollingPeriod=DEFAULT_POLLING_PERIOD_IN_SECONDS):
PollingFileSystemObserverAgent.__init__(self, pollingPeriod)
self.host = host
self.port = port
self.username = username
self.password = password
self.privateKey = privateKey
def getFiles(self, dataDirectory):
(scheme, host, port, dirPath) = SftpUtility.parseFtpUrl(dataDirectory, defaultHost=self.host, defaultPort=self.port)
self.logger.debug('Retrieving files from SFTP host: %s, port: %s, directory path: %s' % (host, port, dirPath))
sftpUtility = SftpUtility(host, port, self.username, self.password, self.privateKey)
return sftpUtility.getFiles(dirPath, {})
####################################################################
# Testing
if __name__ == '__main__':
import time
dirPath='/export/beams12/S1IDUSER/mnt/orthros/park_apr16_rec_reduced'
agent = SftpFileSystemObserverAgent('s1dserv', privateKey='/home/beams/DMADMIN/.ssh/id_dsa')
print 'TIME1: ', time.time()
print 'ORIGINAL FILES: ', len(agent.getFiles(dirPath))
print 'TIME2: ', time.time()
#agent.startObservingPath('/export/dm/test', 'e1')
#time.sleep(100)
#agent.stopObservingPath('/export/dm/test', 'e1')
#!/usr/bin/env python
import os
from dm.common.constants import dmProcessingStatus
from dm.common.objects.uploadInfo import UploadInfo
from dm.common.utility.objectTracker import ObjectTracker
from dm.common.exceptions.objectAlreadyExists import ObjectAlreadyExists
class UploadTracker(ObjectTracker):
# Cache configuration
objectClass = UploadInfo
cacheSize = 100
def __init__(self, *args, **kwargs):
ObjectTracker.__init__(self, args, kwargs)
self.activeUploadDict = {}
def checkForActiveUpload(self, experiment, dataDirectory):
experimentName = experiment.get('name')
dataDir = os.path.normpath(dataDirectory)
activeUploadKey = experimentName + dataDir
uploadId = self.activeUploadDict.get(activeUploadKey)
if uploadId:
uploadInfo = self.get(uploadId)
if uploadInfo is not None:
uploadInfo.updateStatus()
if uploadInfo.get('status') in dmProcessingStatus.DM_ACTIVE_PROCESSING_STATUS_LIST:
raise ObjectAlreadyExists('Upload id %s is already active for experiment %s in data directory %s.' % (uploadId, experimentName, dataDir))
del self.activeUploadDict[activeUploadKey]
def startUpload(self, uploadId, uploadInfo):
experimentName = uploadInfo.get('experimentName')
dataDirectory = uploadInfo.get('dataDirectory')
dataDir = os.path.normpath(dataDirectory)
activeUploadKey = experimentName + dataDir
self.activeUploadDict[activeUploadKey] = uploadId
self.put(uploadId, uploadInfo)
def getUploadInfo(self, id):
return self.get(id)
def getUploadInfos(self, status=None):
uploadInfoList = self.getAll()
filteredUploadInfoList = []
for uploadInfo in uploadInfoList:
uploadInfo.updateStatus()
if status is None or status == dmProcessingStatus.DM_PROCESSING_STATUS_ANY or uploadInfo.get('status', '') == status:
filteredUploadInfoList.append(uploadInfo)
return filteredUploadInfoList
####################################################################
# Testing
if __name__ == '__main__':
pass
#!/usr/bin/env python
#
# Implementation for user info controller.
#
#######################################################################
from dm.common.objects.dmObject import DmObject
from dm.common.objects.dmObjectManager import DmObjectManager
from dm.common.db.api.userDbApi import UserDbApi
#######################################################################
class UserInfoControllerImpl(DmObjectManager):
""" User info controller implementation class. """
def __init__(self):
DmObjectManager.__init__(self)
self.userDbApi = UserDbApi()
def getUsers(self):
return self.userDbApi.getUsers()
def getUserById(self, id):
return self.userDbApi.getUserById(id)
def getUserByUsername(self, username):
return self.userDbApi.getUserByUsername(username)
#!/usr/bin/env python
from dm.common.utility.osUtility import OsUtility
from watchdog.observers.polling import PollingObserver
from fileSystemObserverAgent import FileSystemObserverAgent
from dmFileSystemEventHandler import DmFileSystemEventHandler
class WatchdogFileSystemObserverAgent(FileSystemObserverAgent):
DAQ_PERMISSIONS_MODE = 0777
def __init__(self):
FileSystemObserverAgent.__init__(self)
self.observer = PollingObserver()
self.observedWatchDict = {}
def createDirectory(self, dataDirectory):
try:
OsUtility.createDir(dataDirectory, mode=self.DAQ_PERMISSIONS_MODE)
except Exception, ex:
self.logger.warn('Unable to create directory %s: %s' % (dataDirectory, ex))
def getFiles(self, dataDirectory):
return OsUtility.findFilesAsDict(dataDirectory)
def startObservingPath(self, dataDirectory, experiment):
self.logger.debug('Starting observer for %s' % dataDirectory)
eventHandler = DmFileSystemEventHandler(self.fileSystemObserver, dataDirectory, experiment)
observedWatch = self.observer.schedule(eventHandler, dataDirectory, recursive=True)
self.observedWatchDict[dataDirectory] = observedWatch
def stopObservingPath(self, dataDirectory, experiment):
observedWatch = self.observedWatchDict.get(dataDirectory)
if observedWatch:
self.logger.debug('Stopping observer for %s' % dataDirectory)
self.observer.unschedule(observedWatch)
del self.observedWatchDict[dataDirectory]
else:
self.logger.debug('Observer for %s is not active' % dataDirectory)
def start(self):
self.logger.debug('Starting watchdog observer agent')
self.observer.start()
def stop(self):
self.logger.debug('Stopping watchdog observer agent')
self.observer.stop()
self.observer.join()