Skip to content
Snippets Groups Projects
Commit a08e668a authored by sveseli's avatar sveseli
Browse files

added directory processing mode for uploads

parent 5a154835
No related branches found
No related tags found
No related merge requests found
Showing
with 287 additions and 24 deletions
#!/usr/bin/env python
#######################################################################
DM_PROCESSING_MODE_FILES = 'files'
DM_PROCESSING_MODE_DIRECTORY = 'directory'
DM_ALLOWED_PROCESSING_MODE_LIST = [DM_PROCESSING_MODE_FILES, DM_PROCESSING_MODE_DIRECTORY]
......@@ -52,7 +52,7 @@ class DaqInfo(DmObject):
def updateStatus(self):
now = time.time()
daqStatus = self.get('status', 'running')
if daqStatus == 'done':
if daqStatus in ['done', 'failed']:
return
nFiles = self.get('nFiles', 0)
nProcessedFiles = self.get('nProcessedFiles', 0)
......@@ -77,6 +77,8 @@ class DaqInfo(DmObject):
if self.get('endTime') and nCompletedFiles == nFiles:
daqStatus = 'done'
if nProcessingErrors:
daqStatus = 'failed'
lastFileProcessingErrorTime = self.get('lastFileProcessingErrorTime')
lastFileProcessedTime = self.get('lastFileProcessedTime')
endTime = lastFileProcessedTime
......
#!/usr/bin/env python
from dmObject import DmObject
class DirectoryMetadata(DmObject):
DEFAULT_KEY_LIST = [ 'id', 'directoryPath', 'experimentDirectoryPath' ]
def __init__(self, dict):
DmObject.__init__(self, dict)
#!/usr/bin/env python
import time
import threading
from dmObject import DmObject
from dm.common.utility.dictUtility import DictUtility
from dm.common.utility.timeUtility import TimeUtility
class DirectoryUploadInfo(DmObject):
DEFAULT_KEY_LIST = [ 'id', 'experimentName', 'dataDirectory', 'status', 'nFiles', 'startTime', 'endTime', 'runTime', 'startTimestamp', 'endTimestamp' ]
def __init__(self, dict={}):
DmObject.__init__(self, dict)
self.lock = threading.RLock()
def updateStatus(self):
now = time.time()
uploadStatus = self.get('status', 'running')
if uploadStatus in ['done', 'aborted', 'failed']:
return
startTime = self.get('startTime', now)
runTime = now - startTime
processingInfo = self.get('processingInfo')
endTime = 0
uploadStatus = 'done'
for processorName in processingInfo.keys():
processingEndTime = processingInfo[processorName].get('processingEndTime')
status = processingInfo[processorName].get('status')
if status in ['aborted', 'failed']:
uploadStatus = status
if not processingEndTime:
endTime = None
break
if processingEndTime > endTime:
endTime = processingEndTime
if endTime:
runTime = endTime - startTime
self['endTime'] = endTime
self['endTimestamp'] = TimeUtility.formatLocalTimestamp(endTime)
self['status'] = uploadStatus
self['runTime'] = runTime
#!/usr/bin/env python
from dmObject import DmObject
class PluginInfo(DmObject):
DEFAULT_KEY_LIST = [ 'id', 'name', 'dependsOn' ]
def __init__(self, dict):
DmObject.__init__(self, dict)
......@@ -59,7 +59,7 @@ class UploadInfo(DmObject):
def updateStatus(self):
now = time.time()
uploadStatus = self.get('status', 'running')
if uploadStatus == 'done' or uploadStatus == 'aborted':
if uploadStatus in ['done', 'aborted', 'failed']:
return
nFiles = self.get('nFiles', 0)
nProcessedFiles = self.get('nProcessedFiles', 0)
......@@ -73,6 +73,8 @@ class UploadInfo(DmObject):
endTime = None
if nCompletedFiles == nFiles:
uploadStatus = 'done'
if nProcessingErrors:
uploadStatus = 'failed'
lastFileProcessingErrorTime = self.get('lastFileProcessingErrorTime')
lastFileProcessedTime = self.get('lastFileProcessedTime')
endTime = lastFileProcessedTime
......
......@@ -67,6 +67,26 @@ class FileProcessingManager(threading.Thread,Singleton):
self.fileProcessorKeyList.sort()
self.logger.debug('File processor keys: %s' % self.fileProcessorKeyList)
# Assign processor names
processorNumber = 0
for processorKey in self.fileProcessorKeyList:
processorNumber += 1
processor = self.fileProcessorDict.get(processorKey)
processorName = '%s-%s' % (processor.__class__.__name__,processorNumber)
processor.name = processorName
# Corect processor dependenciens
for processorKey in self.fileProcessorKeyList:
self.logger.debug('Determining dependencies for processor %s' % (processorKey))
processor = self.fileProcessorDict.get(processorKey)
dependsOn = []
for depProcessorKey in processor.dependsOn:
depProcessor = self.fileProcessorDict.get(depProcessorKey.lower())
if depProcessor:
dependsOn.append(depProcessor.name)
processor.dependsOn = dependsOn
self.logger.debug('Processor %s depends on: %s' % (processor.name, processor.dependsOn))
# Remove hidden files from dictionary of files to be processed
def removeHiddenFilesFromProcessing(self, filePathsDict, uploadInfo):
if ValueUtility.toBoolean(uploadInfo.get('processHiddenFiles')):
......
......@@ -45,7 +45,7 @@ class FileProcessingThread(threading.Thread):
for processorKey in self.fileProcessorKeyList:
processorNumber += 1
processor = self.fileProcessorDict.get(processorKey)
processorName = '%s-%s' % (processor.__class__.__name__,processorNumber)
processorName = processor.name
if processedByDict.has_key(processorName):
self.logger.debug('%s has already been processed by %s ' % (fileInfo, processorName))
......
......@@ -6,8 +6,14 @@ class FileProcessor:
DEFAULT_NUMBER_OF_RETRIES = 0
DEFAULT_RETRY_WAIT_PERIOD_IN_SECONDS = 60
def __init__(self):
def __init__(self, dependsOn=[]):
self.configDict = {}
self.processorName = self.__class__.__name__
self.dependsOn = dependsOn
@abc.abstractmethod
def processDirectory(self, directoryInfo):
return NotImplemented
@abc.abstractmethod
def processFile(self, fileInfo):
......
......@@ -10,8 +10,8 @@ from fileProcessor import FileProcessor
class FileTransferPlugin(FileProcessor):
def __init__(self, command, src=None, dest=None):
FileProcessor.__init__(self)
def __init__(self, command, src=None, dest=None, dependsOn=[]):
FileProcessor.__init__(self, dependsOn)
self.src = src
self.dest = dest
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
......@@ -31,7 +31,7 @@ class FileTransferPlugin(FileProcessor):
storageDirectory = fileInfo.get('storageDirectory')
destUrl = self.getDestUrl(filePath, dataDirectory, storageHost, storageDirectory)
srcUrl = self.getSrcUrl(filePath, dataDirectory)
self.start(srcUrl, destUrl, fileInfo)
self.start(src=srcUrl, dest=destUrl, templateInfo=fileInfo)
def getSrcUrl(self, filePath, dataDirectory):
# Use relative path with respect to data directory as a source
......@@ -45,8 +45,21 @@ class FileTransferPlugin(FileProcessor):
destUrl = '%s:%s' % (storageHost, storageDirectory)
return destUrl
def getFullCommand(self, src, dest):
return '%s "%s" "%s"' % (self.command, src, dest)
def getSrcDirUrl(self, dataDirectory):
return '%s/' % dataDirectory
def getDestDirUrl(self, dataDirectory, storageHost, storageDirectory):
if self.dest:
destUrl = '%s/' % (self.dest)
else:
destUrl = '%s:%s/' % (storageHost, storageDirectory)
return destUrl
def getFullCommand(self, src, dest, command=None):
if command:
return '%s "%s" "%s"' % (command, src, dest)
else:
return '%s "%s" "%s"' % (self.command, src, dest)
def setSrc(self, src):
self.src = src
......@@ -54,7 +67,7 @@ class FileTransferPlugin(FileProcessor):
def setDest(self, dest):
self.dest = dest
def start(self, src=None, dest=None, fileInfo={}, cwd=None):
def start(self, src=None, dest=None, command=None, templateInfo={}, cwd=None):
# Use preconfigured source if provided source is None
fileSrc = src
if src is None:
......@@ -65,17 +78,17 @@ class FileTransferPlugin(FileProcessor):
# If destination is local, attempt to create it
if self.dest is not None and self.dest.find(':') < 0:
destDir = self.replaceTemplates(self.dest, fileInfo)
destDir = self.replaceTemplates(self.dest, templateInfo)
try:
OsUtility.createDir(destDir)
except Exception, ex:
self.logger.warn('Transfer may fail due to failure to create destination directory %s: %s' % (destDir, str(ex)))
fileSrc = self.replaceTemplates(fileSrc, fileInfo)
fileDest = self.replaceTemplates(fileDest, fileInfo)
fileSrc = self.replaceTemplates(fileSrc, templateInfo)
fileDest = self.replaceTemplates(fileDest, templateInfo)
if not fileSrc or not fileDest:
raise InvalidRequest('Both source and destination must be non-empty strings.')
self.subprocess = DmSubprocess.getSubprocess(self.getFullCommand(fileSrc, fileDest), cwd=cwd)
self.subprocess = DmSubprocess.getSubprocess(self.getFullCommand(fileSrc, fileDest, command), cwd=cwd)
return self.subprocess.run()
def wait(self):
......
......@@ -16,8 +16,8 @@ class GridftpFileTransferPlugin(FileTransferPlugin):
DEFAULT_PORT = 2811
def __init__(self, src=None, dest=None, command=DEFAULT_COMMAND, localMd5Sum=True, remoteMd5Sum=False, deleteOriginal=False, pluginMustProcessFiles=True):
FileTransferPlugin.__init__(self, command, src, dest)
def __init__(self, src=None, dest=None, command=DEFAULT_COMMAND, localMd5Sum=True, remoteMd5Sum=False, deleteOriginal=False, pluginMustProcessFiles=True, dependsOn=[]):
FileTransferPlugin.__init__(self, command, src, dest, dependsOn=dependsOn)
self.dsFileApi = DsRestApiFactory.getFileRestApi()
self.localMd5Sum = localMd5Sum
self.remoteMd5Sum = remoteMd5Sum
......@@ -107,7 +107,7 @@ class GridftpFileTransferPlugin(FileTransferPlugin):
# Transfer file
self.logger.debug('Starting transfer: %s -> %s' % (srcUrl, destUrl))
self.start(src=srcUrl, dest=destUrl, fileInfo=fileInfo, cwd=dataDirectory)
self.start(src=srcUrl, dest=destUrl, templateInfo=fileInfo, cwd=dataDirectory)
# Get remote checksum
if self.remoteMd5Sum:
......@@ -130,6 +130,39 @@ class GridftpFileTransferPlugin(FileTransferPlugin):
# self.logger.debug('Deleting file %s' % filePath)
# OsUtility.removeFile(srcUrl)
def getSrcDirUrl(self, dataDirectory):
(scheme, host, port, dirPath) = FtpUtility.parseFtpUrl(dataDirectory, defaultPort=self.DEFAULT_PORT)
if scheme:
srcUrl = '%s/' % (dataDirectory)
elif self.src is None:
srcUrl = 'file://%s/' % (dataDirectory)
else:
srcUrl = '%s/%s/' % (self.src,dataDirectory)
return srcUrl
def getDestDirUrl(self, dataDirectory, storageHost, storageDirectory):
(scheme, host, port, dirPath) = FtpUtility.parseFtpUrl(dataDirectory, defaultPort=self.DEFAULT_PORT)
dirName = os.path.dirname(os.path.relpath(dataDirectory, dirPath)).strip()
if self.dest:
destUrl = '%s/%s/' % (self.dest, dirPath)
else:
destUrl = 'sshftp://%s/%s/%s/' % (storageHost, storageDirectory, dirName)
return destUrl
def processDirectory(self, directoryInfo):
uploadInfo = directoryInfo.get('uploadInfo')
dataDirectory = uploadInfo.get('dataDirectory')
experimentName = uploadInfo.get('experimentName')
storageHost = uploadInfo.get('storageHost')
storageDirectory = uploadInfo.get('storageDirectory')
destUrl = self.getDestDirUrl(dataDirectory, storageHost, storageDirectory)
srcUrl = self.getSrcDirUrl(dataDirectory)
# Transfer directory
self.logger.debug('Starting transfer: %s -> %s' % (srcUrl, destUrl))
self.start(src=srcUrl, dest=destUrl, templateInfo=uploadInfo, cwd=dataDirectory)
#######################################################################
# Testing.
if __name__ == '__main__':
......
......@@ -3,14 +3,15 @@
import os
import time
from dm.common.utility.loggingManager import LoggingManager
from dm.common.objects.observedFile import ObservedFile
from dm.common.utility.timeUtility import TimeUtility
from dm.common.processing.plugins.fileProcessor import FileProcessor
from dm.common.mongodb.api.fileMongoDbApi import FileMongoDbApi
class MongoDbFileCatalogPlugin(FileProcessor):
def __init__(self):
FileProcessor.__init__(self)
def __init__(self, dependsOn=[]):
FileProcessor.__init__(self, dependsOn=dependsOn)
self.fileMongoDbApi = FileMongoDbApi()
self.logger = LoggingManager.getInstance().getLogger(self.__class__.__name__)
......@@ -57,6 +58,32 @@ class MongoDbFileCatalogPlugin(FileProcessor):
self.logger.debug('File "%s" catalog entry: %s' % (experimentFilePath, str(fileInfo2)))
self.fileMongoDbApi.updateOrAddExperimentFile(fileInfo2)
def processDirectory(self, directoryInfo):
uploadInfo = directoryInfo.get('uploadInfo')
daqInfo = directoryInfo.get('daqInfo')
experiment = directoryInfo.get('experiment')
filePathsDict = directoryInfo.get('filePathsDict')
uploadId = uploadInfo.get('id')
dataDirectory = uploadInfo.get('dataDirectory')
nProcessedFiles = 0
nFiles = len(filePathsDict)
for (filePath,filePathDict) in filePathsDict.items():
fileInfo = ObservedFile(filePath=filePath, dataDirectory=dataDirectory, experiment=experiment)
fileInfo.update(filePathDict)
fileInfo['daqInfo'] = daqInfo
fileInfo['uploadId'] = uploadId
if uploadInfo.get('status') != 'aborting':
self.processFile(fileInfo)
nProcessedFiles += 1
else:
nCancelledFiles = nFiles - nProcessedFiles
self.logger.debug('Upload id %s aborted, will not process %s files)' % (uploadId, nCancelledFiles))
processingInfo = uploadInfo.get('processingInfo')
processingInfo[self.name]['status'] = 'aborted'
break
#######################################################################
# Testing.
......
......@@ -12,11 +12,11 @@ from dm.ds_web_service.api.dsRestApiFactory import DsRestApiFactory
class RsyncFileTransferPlugin(FileTransferPlugin):
DEFAULT_COMMAND = 'rsync -arvlPR'
DIRECTORY_TRANSFER_COMMAND = 'rsync -arvlP'
DRY_RUN_COMMAND = 'rsync -arvlP'
def __init__(self, src=None, dest=None, command=DEFAULT_COMMAND, localMd5Sum=True,
remoteMd5Sum=False, deleteOriginal=False, pluginMustProcessFiles=True):
FileTransferPlugin.__init__(self, command, src, dest)
def __init__(self, src=None, dest=None, command=DEFAULT_COMMAND, localMd5Sum=True, remoteMd5Sum=False, deleteOriginal=False, pluginMustProcessFiles=True, dependsOn=[]):
FileTransferPlugin.__init__(self, command, src, dest, dependsOn=dependsOn)
self.dsFileApi = DsRestApiFactory.getFileRestApi()
self.localMd5Sum = localMd5Sum
self.remoteMd5Sum = remoteMd5Sum
......@@ -65,7 +65,7 @@ class RsyncFileTransferPlugin(FileTransferPlugin):
# Transfer file
self.logger.debug('Starting transfer: %s' % fileInfo)
self.start(src=srcUrl, dest=destUrl, fileInfo=fileInfo, cwd=dataDirectory)
self.start(src=srcUrl, dest=destUrl, templateInfo=fileInfo, cwd=dataDirectory)
# Get remote checksum
if self.remoteMd5Sum:
......@@ -88,6 +88,20 @@ class RsyncFileTransferPlugin(FileTransferPlugin):
self.logger.debug('Deleting file %s' % filePath)
OsUtility.removeFile(srcUrl)
def processDirectory(self, directoryInfo):
uploadInfo = directoryInfo.get('uploadInfo')
dataDirectory = uploadInfo.get('dataDirectory')
experimentName = uploadInfo.get('experimentName')
storageHost = uploadInfo.get('storageHost')
storageDirectory = uploadInfo.get('storageDirectory')
destUrl = self.getDestDirUrl(dataDirectory, storageHost, storageDirectory)
srcUrl = self.getSrcDirUrl(dataDirectory)
# Transfer directory
self.logger.debug('Starting transfer: %s -> %s' % (srcUrl, destUrl))
self.start(src=srcUrl, dest=destUrl, command=self.DIRECTORY_TRANSFER_COMMAND, templateInfo=uploadInfo, cwd=dataDirectory)
#######################################################################
# Testing.
if __name__ == '__main__':
......
......@@ -34,7 +34,7 @@ class RsyncWithChecksumAndDeleteFileTransferPlugin(FileTransferPlugin):
self.logger.debug('File info before transfer: %s' % fileInfo)
# Transfer file
self.start(src=srcUrl, dest=destUrl, fileInfo=fileInfo, cwd=dataDirectory)
self.start(src=srcUrl, dest=destUrl, templateInfo=fileInfo, cwd=dataDirectory)
# Get remote checksum
fileInfo2 = {}
fileInfo2['experimentFilePath'] = experimentFilePath
......
......@@ -4,6 +4,13 @@ import copy
class DictUtility:
@classmethod
def getAndRemoveKey(cls, dict, key, default=None):
value = dict.get(key, default)
if dict.has_key(key):
del dict[key]
return value
@classmethod
def deepCopy(cls, dict, includeKeys=[], excludeKeys=[]):
dict2 = {}
......
......@@ -303,6 +303,13 @@ class LdapLinuxPlatformUtility:
cmd = '%s \:%s "%s"' % (cls.CHOWN_CMD, groupName, path)
cls.executeSudoCommand(cmd)
@classmethod
def recursivelyChangePathGroupOwner(cls, path, groupName):
logger = cls.getLogger()
logger.debug('Recursively changing group owner to %s for path %s' % (groupName, path))
cmd = '%s -R \:%s "%s"' % (cls.CHOWN_CMD, groupName, path)
cls.executeSudoCommand(cmd)
@classmethod
def refreshNscdGroupCache(cls):
logger = cls.getLogger()
......
......@@ -85,6 +85,13 @@ class LinuxUtility:
cmd = '%s \:%s "%s"' % (cls.CHOWN_CMD, groupName, path)
cls.executeSudoCommand(cmd)
@classmethod
def recursivelyChangePathGroupOwner(cls, path, groupName):
logger = cls.getLogger()
logger.debug('Recursively changing group owner to %s for path %s' % (groupName, path))
cmd = '%s -R \:%s "%s"' % (cls.CHOWN_CMD, groupName, path)
cls.executeSudoCommand(cmd)
#######################################################################
# Testing.
......
......@@ -8,6 +8,7 @@ from dm.common.utility.encoder import Encoder
from dm.common.exceptions.dmException import DmException
from dm.common.objects.experiment import Experiment
from dm.common.objects.uploadInfo import UploadInfo
from dm.common.objects.pluginInfo import PluginInfo
from dm.common.objects.daqInfo import DaqInfo
from daqRestApi import DaqRestApi
......@@ -72,6 +73,12 @@ class ExperimentRestApi(DaqRestApi):
responseDict = self.sendSessionRequest(url=url, method='POST')
return UploadInfo(responseDict)
@DaqRestApi.execute
def getProcessingPlugins(self):
url = '%s/processingPlugins' % (self.getContextRoot())
responseData = self.sendSessionRequest(url=url, method='GET')
return self.toDmObjectList(responseData, PluginInfo)
#######################################################################
# Testing.
......
#!/usr/bin/env python
from dm.daq_web_service.api.experimentRestApi import ExperimentRestApi
from daqWebServiceSessionCli import DaqWebServiceSessionCli
class GetProcessingPluginsCli(DaqWebServiceSessionCli):
def __init__(self):
DaqWebServiceSessionCli.__init__(self)
def runCommand(self):
self.parseArgs(usage="""
dm-get-processing-plugins
Description:
Retrieves list of known processing plugins.
""")
api = ExperimentRestApi(self.getLoginUsername(), self.getLoginPassword(), self.getServiceHost(), self.getServicePort(), self.getServiceProtocol())
plugins = api.getProcessingPlugins()
for plugin in plugins:
print plugin.getDisplayString(self.getDisplayKeys(), self.getDisplayFormat())
#######################################################################
# Run command.
if __name__ == '__main__':
cli = GetProcessingPluginsCli()
cli.run()
......@@ -72,6 +72,16 @@ class ExperimentRouteDescriptor:
'action' : 'stopUpload',
'method' : ['POST']
},
# Get processing plugins
{
'name' : 'getProcessingPlugins',
'path' : '%s/processingPlugins' % contextRoot,
'controller' : experimentSessionController,
'action' : 'getProcessingPlugins',
'method' : ['GET']
},
]
return routes
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment