From b8cdfa88437f0be85a0d747b625739b56b03548f Mon Sep 17 00:00:00 2001 From: Sinisa Veseli <sveseli@aps.anl.gov> Date: Sun, 31 Jan 2016 20:33:48 +0000 Subject: [PATCH] optimized detection of existing files --- .../plugins/gridftpFileTransferPlugin.py | 45 ++++++++++--------- src/python/dm/common/utility/ftpUtility.py | 3 ++ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/python/dm/common/processing/plugins/gridftpFileTransferPlugin.py b/src/python/dm/common/processing/plugins/gridftpFileTransferPlugin.py index 53288d86..ca0cf64e 100755 --- a/src/python/dm/common/processing/plugins/gridftpFileTransferPlugin.py +++ b/src/python/dm/common/processing/plugins/gridftpFileTransferPlugin.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os +import copy from fileTransferPlugin import FileTransferPlugin from dm.common.utility.fileUtility import FileUtility from dm.common.utility.ftpUtility import FtpUtility @@ -48,27 +49,31 @@ class GridftpFileTransferPlugin(FileTransferPlugin): (scheme, host, port, replacementDirPath) = FtpUtility.parseFtpUrl(dataDirectory) ftpUtility = SftpUtility(storageHost) storageFilePathsDict = ftpUtility.getFiles(storageDirectory, {}, replacementDirPath) - pluginFilePathsDict = {} - filePaths = filePathsDict.keys() - for filePath in filePaths: + pluginFilePathsDict = copy.copy(filePathsDict) + # Remove file from plugin dict if we do not need to transfer it + for (filePath,storageFilePathDict) in storageFilePathsDict.items(): filePathDict = filePathsDict.get(filePath) - storageFilePathDict = storageFilePathsDict.get(filePath) - - if not storageFilePathDict: - # remote directory does not have the file - pluginFilePathsDict[filePath] = filePathDict - else: - fSize = filePathDict.get('fileSize') - sfSize = storageFilePathDict.get('fileSize') - # check size - if not fSize or not sfSize or fSize != sfSize: - pluginFilePathsDict[filePath] = filePathDict - else: - # sizes are the same, check modify time - mTime = filePathDict.get('fileModificationTime') - smTime = storageFilePathDict.get('fileModificationTime') - if not mTime or not smTime or mTime > smTime: - pluginFilePathsDict[filePath] = filePathDict + if filePathDict is None: + # We are not attempting to transfer this file + # No need to change plugin file dict + continue + + # Check size + fSize = filePathDict.get('fileSize') + sfSize = storageFilePathDict.get('fileSize') + if not fSize or not sfSize or fSize != sfSize: + # Sizes differ, need to transfer file + continue + + # Sizes are the same, check modify time + mTime = filePathDict.get('fileModificationTime') + smTime = storageFilePathDict.get('fileModificationTime') + if not mTime or not smTime or mTime > smTime: + # Source time is later than storage time, need to transfer file + continue + + # No need to transfer file + del pluginFilePathsDict[filePath] self.logger.debug('Number of original files: %s, number of plugin files: %s', len(filePathsDict), len(pluginFilePathsDict)) return pluginFilePathsDict diff --git a/src/python/dm/common/utility/ftpUtility.py b/src/python/dm/common/utility/ftpUtility.py index 42efebd8..312faf28 100755 --- a/src/python/dm/common/utility/ftpUtility.py +++ b/src/python/dm/common/utility/ftpUtility.py @@ -143,3 +143,6 @@ if __name__ == '__main__': print files print ftpUtility.getMd5Sum('/export/8-id-i/test/testfile01') print ftpUtility.statFile('/export/8-id-i/test/testfile01') + #ftpUtility = FtpUtility('xstor-devel', 22) + #files = ftpUtility.getFiles('/data/testing') + #print files -- GitLab