diff --git a/src/python/dm/proc_web_service/api/workflowProcApi.py b/src/python/dm/proc_web_service/api/workflowProcApi.py index 86693717d50888fbb3960c9d7b35a4d772311852..34ca6ff42088e07a5c4b4eef5663ea699db44048 100755 --- a/src/python/dm/proc_web_service/api/workflowProcApi.py +++ b/src/python/dm/proc_web_service/api/workflowProcApi.py @@ -24,7 +24,7 @@ class WorkflowProcApi(ProcRestApi): :param username: DM username (it may be omitted if environment variable DM_LOGIN_FILE points to a file containing the "<username>|<password>" pair) :type username: str - + :param password: DM password (it may be omitted if environment variable DM_LOGIN_FILE points to a file containing the "<username>|<password>" pair) :type password: str @@ -41,7 +41,7 @@ class WorkflowProcApi(ProcRestApi): Add workflow to the DM workflow database. Workflows are defined using dictionaries, and they serve as templates for running processing jobs. - + Workflow keys: - name (required) - owner (required) @@ -52,7 +52,7 @@ class WorkflowProcApi(ProcRestApi): Stage dictionary keys can be anything; they will get sorted, and stages will get executed in the sorted order - + Stage keys: - command (required; may use $variable strings that would get their values at runtime, via <key>:<value> arguments) @@ -64,6 +64,7 @@ class WorkflowProcApi(ProcRestApi): subsequent workflow stages) - repeatPeriod, repeatUntil, maxRepeats (optional; must be used together): + - repeatPeriod: - defines period in seconds after which the stage command will be repeated @@ -78,12 +79,11 @@ class WorkflowProcApi(ProcRestApi): - maxRepeats: - defines maximum number of repeats; if this number is reached, stage will fail - + Reserved keys that cannot be used in a stage definition: - workflow: processing job workflow specification - - Reserved keys that may be used in a stage definition as command input - variables: + + Reserved keys that may be used in a stage definition as command input variables: - id: processing job id - stage: processing job workflow stage - status: processing job status @@ -101,59 +101,53 @@ class WorkflowProcApi(ProcRestApi): - nFailedFiles: number of failed files - nSkippedFiles: number of skipped files - nAbortedFiles: number of aborted files - - nCompletedFiles: number of completed input files - - sum of processed, failed, skipped and aborted files + - nCompletedFiles: number of completed input files (sum of processed, failed, skipped and aborted files) - processedFiles: list of processed files - failedFiles: list of failed files - skippedFiles: list of skipped files - abortedFiles: list of aborted files - Reserved keys designated for specifying processing job input - files that may be used in a stage definition as command input - variables: - - filePath: input file path - - if filePath is specified as one of the stage command - input variables, the workflow stage will iterate over - all job input files + Reserved keys designated for specifying processing job input files that may be used in a stage definition as command input variables: + - filePath: input file path (if filePath is specified as one of the stage command input variables, the workflow stage will iterate over all job input files) - filePathList: list of all input file paths - filePathPattern: glob pattern for input file paths - - fileQueryDict: metadata catalog query dictionary - - not yet implemented (reserved for future use) + - fileQueryDict: metadata catalog query dictionary (not yet implemented) - dataDir: directory containing data files Any keys that are not reserved may be used in a stage definition as command input or output variables. Stage output variables can be used as input for any of the subsequent stages. - + Example workflow definition: - - { - 'name' : 'example-01' - 'owner' : 'anOwner', - 'stages' : { - '01-START' : { - 'command' : '/bin/date +%Y%m%d%H%M%S', - 'outputVariableRegexList' : ['(?P<timeStamp>.*)'] - }, - '02-MKDIR' : { - 'command' : '/bin/mkdir -p /tmp/workflow.$timeStamp' - }, - '03-ECHO' : { - 'command' : '/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out' - }, - '04-MD5SUM' : { - 'command' : '/bin/md5sum $filePath | cut -f1 -d" "', - 'outputVariableRegexList' : ['(?P<md5Sum>.*)'] - }, - '05-ECHO' : { - 'command' : 'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out' - }, - '06-DONE' : { - 'command' : '/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out' - }, - }, - 'description' : 'Workflow Example 01' - } + + { + 'name' : 'example-01', + 'owner' : 'anOwner', + 'stages' : { + '01-START' : { + 'command' : '/bin/date +%Y%m%d%H%M%S', + 'outputVariableRegexList' : ['(?P<timeStamp>.*)'] + }, + '02-MKDIR' : { + 'command' : '/bin/mkdir -p /tmp/workflow.$timeStamp' + }, + '03-ECHO' : { + 'command' : '/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out' + }, + '04-MD5SUM' : { + 'command' : '/bin/md5sum $filePath | cut -f1 -d" "', + 'outputVariableRegexList' : ['(?P<md5Sum>.*)'] + }, + '05-ECHO' : { + 'command' : 'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out' + }, + '06-DONE' : { + 'command' : '/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out' + }, + }, + 'description' : 'Workflow Example 01' + } + :param workflow: Workflow description :type workflow: Workflow or dict @@ -430,15 +424,13 @@ class WorkflowProcApi(ProcRestApi): - nFailedFiles: number of failed files - nSkippedFiles: number of skipped files - nAbortedFiles: number of aborted files - - nCompletedFiles: number of completed input files - - sum of processed, failed, skipped and aborted files + - nCompletedFiles: number of completed input files (sum of processed, failed, skipped and aborted files) - processedFiles: list of processed files - failedFiles: list of failed files - skippedFiles: list of skipped files - abortedFiles: list of aborted files - Reserved keys designated for specifying processing job input - files that may be passed as job input at runtime: + Reserved keys designated for specifying processing job input files that may be passed as job input at runtime: - filePath: input file path - if filePath:<aPath> is specified as part of job input, the job input file list will consist of a single file