#!/usr/bin/env python import copy import os import time from ftplib import FTP from dm.common.utility.timeUtility import TimeUtility from dm.common.utility.loggingManager import LoggingManager import urlparse class FtpUtility: def __init__(self, host, port, username=None, password=None, serverUsesUtcTime=True): self.host = host self.port = port self.username = username self.password = password self.ftpClient = None self.serverUsesUtcTime = serverUsesUtcTime self.mlsdFileStatDict = {} @classmethod def parseFtpUrl(cls, url, defaultHost=None, defaultPort=None): host = defaultHost port = defaultPort scheme = None dirPath = url if url.startswith('ftp://'): parseResult = urlparse.urlparse(url) scheme = parseResult.scheme netlocTokens = parseResult.netloc.split(':') host = netlocTokens[0] if len(netlocTokens) > 1: port = int(netlocTokens[1]) dirPath = parseResult.path return (scheme, host, port, dirPath) @classmethod def getFtpClient(cls, host, port, username=None, password=None): ftp = FTP() ftp.connect(host, port) ftp.login(username, password) return ftp @classmethod def getLogger(cls): logger = LoggingManager.getInstance().getLogger(cls.__name__) return logger def __parseKeyValue(cls, origKeyValue, outputDict={}, newKey=None): key,value = origKeyValue.split('=') value = value.strip() if newKey is not None: key = newKey outputDict[key] = value return outputDict def __parseMlsdOutput(self, line): # ['Type=dir', 'Modify=20151018024430', 'Size=4096', 'Perm=el', 'UNIX.mode=0775', 'UNIX.owner=sveseli', 'UNIX.uid=500', 'UNIX.group=sveseli', 'UNIX.gid=500', 'Unique=fd00-c2e3e', ' dir2\r'] parts = line.split(';') parseDict = {} self.__parseKeyValue(parts[0], parseDict) self.__parseKeyValue(parts[1], parseDict) self.__parseKeyValue(parts[2], parseDict, 'fileSize') name = parts[-1].strip() parseDict['Name'] = name type = parseDict.get('Type', '') if type == 'dir' : self.mlsdDirList.append(name) elif type == 'file': self.mlsdFileDict[name] = parseDict def __parseMlsdFileStat(self, line): # ['Type=dir', 'Modify=20151018024430', 'Size=4096', 'Perm=el', 'UNIX.mode=0775', 'UNIX.owner=sveseli', 'UNIX.uid=500', 'UNIX.group=sveseli', 'UNIX.gid=500', 'Unique=fd00-c2e3e', ' dir2\r'] parts = line.split(';') parseDict = {} self.__parseKeyValue(parts[0], parseDict) self.__parseKeyValue(parts[1], parseDict) self.__parseKeyValue(parts[2], parseDict, 'fileSize') name = parts[-1].strip() self.mlsdFileStatDict[name] = parseDict def __processFileStatDict(self, fileStatDict): modifyTime = fileStatDict.get('Modify') modifyTime = time.mktime(time.strptime(modifyTime, '%Y%m%d%H%M%S')) if self.serverUsesUtcTime: modifyTime = TimeUtility.utcToLocalTime(modifyTime) fileStatDict['fileModificationTime'] = modifyTime fileStatDict['fileSize'] = int(fileStatDict.get('fileSize')) del fileStatDict['Modify'] del fileStatDict['Type'] def getFiles2(self, dirPath, fileDict={}, replacementDirPath=None, initialCall=True): if not self.ftpClient: self.ftpClient = self.getFtpClient(self.host, self.port, self.username, self.password) # Need these to be class members for the callback function self.mlsdFileDict = {} self.mlsdDirList = [] self.ftpClient.retrlines('MLSD %s' % dirPath, self.__parseMlsdOutput) if not replacementDirPath: replacementDirPath = dirPath for (fileName,fileInfo) in self.mlsdFileDict.items(): self.__processFileStatDict(fileInfo) del fileInfo['Name'] filePath = '%s/%s' % (replacementDirPath, fileName) fileDict[filePath] = fileInfo mlsdDirList = copy.copy(self.mlsdDirList) for d in mlsdDirList: dirPath2 = '%s/%s' % (dirPath,d) replacementDirPath2 = '%s/%s' % (replacementDirPath,d) self.getFiles2(dirPath2,fileDict, replacementDirPath2, initialCall=False) if initialCall: del self.mlsdFileDict del self.mlsdDirList return fileDict def getFiles(self, dirPath, fileDict={}, replacementDirPath=None): if not self.ftpClient: self.ftpClient = self.getFtpClient(self.host, self.port, self.username, self.password) # Need these to be class members for the callback function self.mlsdFileDict = {} self.mlsdDirList = [] self.ftpClient.retrlines('MLSD %s' % dirPath, self.__parseMlsdOutput) if not replacementDirPath: replacementDirPath = dirPath for (fileName,fileInfo) in self.mlsdFileDict.items(): filePath = '%s/%s' % (replacementDirPath, fileName) modifyTime = fileInfo.get('Modify') modifyTime = time.mktime(time.strptime(modifyTime, '%Y%m%d%H%M%S')) if self.serverUsesUtcTime: modifyTime = TimeUtility.utcToLocalTime(modifyTime) fileSize = int(fileInfo.get('fileSize')) #print '%s %s %s' % (filePath, fileSize, modifyTime) fileDict[filePath] = {'fileSize' : fileSize, 'fileModificationTime' : modifyTime} self.mlsdFileDict.clear() mlsdDirList = copy.copy(self.mlsdDirList) for d in mlsdDirList: dirPath2 = '%s/%s' % (dirPath,d) replacementDirPath2 = '%s/%s' % (replacementDirPath,d) self.getFiles(dirPath2, fileDict, replacementDirPath2) self.mlsdDirList = [] return fileDict def getMd5Sum(self, filePath, fileInfo={}): if not self.ftpClient: self.ftpClient = self.getFtpClient(self.host, self.port, self.username, self.password) md5Sum = self.ftpClient.sendcmd('CKSM MD5 0 -1 %s' % filePath).split()[-1] fileInfo['md5Sum'] = md5Sum return md5Sum def statFile(self, filePath, fileInfo={}): fileName = os.path.basename(filePath) if not self.ftpClient: self.ftpClient = self.getFtpClient(self.host, self.port, self.username, self.password) # Need this to be class members for the callback function self.ftpClient.retrlines('MLSD %s' % filePath, self.__parseMlsdFileStat) fileStatDict = self.mlsdFileStatDict.get(fileName) if fileStatDict: self.__processFileStatDict(fileStatDict) fileInfo.update(fileStatDict) del self.mlsdFileStatDict[fileName] return fileInfo ####################################################################### # Testing. if __name__ == '__main__': print "Round 1: " ftpUtility = FtpUtility('s33dserv', 2811) #files = ftpUtility.getFiles2('/export/7IDSprayimage/Cummins/Data') files = ftpUtility.getFiles2('/export/dm/test') print files files = ftpUtility.getFiles('/export/dm/test') print ftpUtility.parseFtpUrl('/export/dm/test') print files #files = ftpUtility.getFiles('/export/7IDSprayimage/Cummins/Data') #files = ftpUtility.getFiles2('/export/8-id-i/test', replacementDirPath='/data/testing/8-id-i') #print "Number of files: ", len(files) #time.sleep(60) #print "Removing files" #del files #print "Files removed" #time.sleep(60) #del ftpUtility #print "Utility removed" #time.sleep(60) #print "Round 2: " #ftpUtility = FtpUtility('s7dserv', 2811) #files = ftpUtility.getFiles2('/export/7IDSprayimage/Cummins/Data') print ftpUtility.getMd5Sum('/export/dm/test/testfile01') #print ftpUtility.statFile('/export/8-id-i/test/testfile01') #ftpUtility = FtpUtility('xstor-devel', 22) #files = ftpUtility.getFiles2('/data/testing') #print files