From 0b9c78592263746d93ecba023282da3ba3c1eb2a Mon Sep 17 00:00:00 2001
From: hammonds <JPHammonds@anl.gov>
Date: Thu, 5 Dec 2019 12:38:30 -0600
Subject: [PATCH] More corrections from use and also add a section
 demonstrating workflows and processing jobs.

---
 Installation/APSDeveloperInstallation.md      | 182 +++++++++++++++++-
 .../DataManagementSplitSystemSetup.md         | 181 ++++++++++++++++-
 2 files changed, 360 insertions(+), 3 deletions(-)

diff --git a/Installation/APSDeveloperInstallation.md b/Installation/APSDeveloperInstallation.md
index 537c83b2..2f1bc793 100644
--- a/Installation/APSDeveloperInstallation.md
+++ b/Installation/APSDeveloperInstallation.md
@@ -169,7 +169,7 @@ This command will
  * Create an experiment named `e1`with
    - The three experimenters `jprofessor`, `gpostdoc` & `jgradstudent`
    - The data that is being collected will be found at `/home/dmadmin/testData`
-   - Any data/files found in `/home/dmadmin/testData` will be found in a directory `TEST/e1/MyFirstExperiment` of the storage location defined for the Data Storage service.
+   - Any data/files found in `/home/dmadmin/testData` will be found in a directory `TEST/e1/MyFirstExperiment` of the storage location defined for the Data Storage service.  NOTE: if the directory `/home/dmadmin/testData` does not exist, then the upload process will fail.
  	
 
 Output like the following
@@ -210,4 +210,182 @@ and
   u'storageDirectory': u'/home/dmadmin/storage/TEST/e1',
   u'storageHost': u'localhost',
   u'storageUrl': u'extrepid://localhost/home/dmadmin/storage/TEST/e1'}
-```
\ No newline at end of file
+```
+
+Next step will add a workflow and then execute this workflow.  This workflow is an example pulled from the comments in the file workflowProcApi.py (owner name has been changed to match user dmtest). It creates a minimal version of a workflow that grabs the md5sum of a given file.  The workflow is defined by the following 
+
+```
+            {
+                'name'        : 'example-01',
+                'owner'       : 'dmtest',
+                'stages'      : {
+                    '01-START'  : {
+                        'command' : '/bin/date +%Y%m%d%H%M%S',
+                        'outputVariableRegexList' : ['(?P<timeStamp>.*)']
+                    },
+                    '02-MKDIR'  : {
+                        'command' : '/bin/mkdir -p /tmp/workflow.$timeStamp'
+                    },
+                    '03-ECHO'   : {
+                        'command' : '/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out'
+                    },
+                    '04-MD5SUM' : {
+                        'command' : '/bin/md5sum $filePath | cut -f1 -d" "',
+                        'outputVariableRegexList' : ['(?P<md5Sum>.*)']
+                    },
+                    '05-ECHO'   : {
+                        'command' : 'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out'
+                    },
+                    '06-DONE'   : {
+                        'command' : '/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out'
+                    },
+                },
+                'description' : 'Workflow Example 01'
+            }
+```
+
+This workflow can be added to the system with the command:
+
+ > dm-upsert-workflow --py-spec=sampleWorkflow
+
+and will yield a result like:
+
+```
+id=5de938931d9a2030403a7dd0 name=example-02 owner=dmtest 
+```
+
+This workflow can be executend by the command:
+
+>   dm-start-processing-job --workflow-name=example-02 --workflow-owner=dmtest filePath:/home/dmadmin/testData/myData
+
+This will have a result like:
+
+```
+id=2f004219-0694-4955-af05-b29b48ce4c0a owner=dmtest status=pending startTime=1575566109.86 startTimestamp=2019/12/05 12:15:09 EST
+```
+
+More information can be found with `dm-get-processing-job` like:
+
+ > dm-get-processing-job --id=2f004219-0694-4955-af05-b29b48ce4c0a --display-keys=ALL --display-format=pprint
+ 
+which returns
+
+```json
+{ u'endTime': 1575566111.014859,
+  u'endTimestamp': u'2019/12/05 12:15:11 EST',
+  u'filePath': u'/home/dmadmin/testData/myData',
+  u'id': u'2f004219-0694-4955-af05-b29b48ce4c0a',
+  u'md5Sum': u'bac0be486ddc69992ab4e01eeade0b92',
+  u'nFiles': 1,
+  u'owner': u'dmtest',
+  u'runTime': 1.1574599742889404,
+  u'stage': u'06-DONE',
+  u'startTime': 1575566109.857399,
+  u'startTimestamp': u'2019/12/05 12:15:09 EST',
+  u'status': u'done',
+  u'timeStamp': u'20191205121510',
+  u'workflow': { u'description': u'Workflow Example 01',
+                 u'id': u'5de938931d9a2030403a7dd0',
+                 u'name': u'example-02',
+                 u'owner': u'dmtest',
+                 u'stages': { u'01-START': { u'childProcesses': { u'0': { u'childProcessNumber': 0,
+                                                                          u'command': u'/bin/date +%Y%m%d%H%M%S',
+                                                                          u'endTime': 1575566110.898553,
+                                                                          u'exitStatus': 0,
+                                                                          u'runTime': 0.007671833038330078,
+                                                                          u'stageId': u'01-START',
+                                                                          u'startTime': 1575566110.890881,
+                                                                          u'status': u'done',
+                                                                          u'stdErr': u'',
+                                                                          u'stdOut': u'20191205121510\n',
+                                                                          u'submitTime': 1575566110.859169,
+                                                                          u'workingDir': None}},
+                                             u'command': u'/bin/date +%Y%m%d%H%M%S',
+                                             u'nCompletedChildProcesses': 1,
+                                             u'nQueuedChildProcesses': 0,
+                                             u'nRunningChildProcesses': 0,
+                                             u'outputVariableRegexList': [ u'(?P<timeStamp>.*)']},
+                              u'02-MKDIR': { u'childProcesses': { u'1': { u'childProcessNumber': 1,
+                                                                          u'command': u'/bin/mkdir -p /tmp/workflow.20191205121510',
+                                                                          u'endTime': 1575566110.942735,
+                                                                          u'exitStatus': 0,
+                                                                          u'runTime': 0.0035638809204101562,
+                                                                          u'stageId': u'02-MKDIR',
+                                                                          u'startTime': 1575566110.939171,
+                                                                          u'status': u'done',
+                                                                          u'stdErr': u'',
+                                                                          u'stdOut': u'',
+                                                                          u'submitTime': 1575566110.925104,
+                                                                          u'workingDir': None}},
+                                             u'command': u'/bin/mkdir -p /tmp/workflow.$timeStamp',
+                                             u'nCompletedChildProcesses': 1,
+                                             u'nQueuedChildProcesses': 0,
+                                             u'nRunningChildProcesses': 0},
+                              u'03-ECHO': { u'childProcesses': { u'2': { u'childProcessNumber': 2,
+                                                                         u'command': u'/bin/echo "START JOB ID: 2f004219-0694-4955-af05-b29b48ce4c0a" > /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566110.972364,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.003882884979248047,
+                                                                         u'stageId': u'03-ECHO',
+                                                                         u'startTime': 1575566110.968481,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566110.960305,
+                                                                         u'workingDir': None}},
+                                            u'command': u'/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0},
+                              u'04-MD5SUM': { u'childProcesses': { u'3': { u'childProcessNumber': 3,
+                                                                           u'command': u'/bin/md5sum /home/dmadmin/testData/myData | cut -f1 -d" "',
+                                                                           u'endTime': 1575566110.985139,
+                                                                           u'exitStatus': 0,
+                                                                           u'runTime': 0.0030689239501953125,
+                                                                           u'stageId': u'04-MD5SUM',
+                                                                           u'startTime': 1575566110.98207,
+                                                                           u'status': u'done',
+                                                                           u'stdErr': u'',
+                                                                           u'stdOut': u'bac0be486ddc69992ab4e01eeade0b92\n',
+                                                                           u'submitTime': 1575566110.973093,
+                                                                           u'workingDir': None}},
+                                              u'command': u'/bin/md5sum $filePath | cut -f1 -d" "',
+                                              u'nCompletedChildProcesses': 1,
+                                              u'nQueuedChildProcesses': 0,
+                                              u'nRunningChildProcesses': 0,
+                                              u'outputVariableRegexList': [ u'(?P<md5Sum>.*)']},
+                              u'05-ECHO': { u'childProcesses': { u'4': { u'childProcessNumber': 4,
+                                                                         u'command': u'echo "FILE /home/dmadmin/testData/myData MD5 SUM: bac0be486ddc69992ab4e01eeade0b92" >> /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566110.997652,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.0005791187286376953,
+                                                                         u'stageId': u'05-ECHO',
+                                                                         u'startTime': 1575566110.997073,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566110.987421,
+                                                                         u'workingDir': None}},
+                                            u'command': u'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0},
+                              u'06-DONE': { u'childProcesses': { u'5': { u'childProcessNumber': 5,
+                                                                         u'command': u'/bin/echo "STOP JOB ID: 2f004219-0694-4955-af05-b29b48ce4c0a" >> /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566111.011913,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.001583099365234375,
+                                                                         u'stageId': u'06-DONE',
+                                                                         u'startTime': 1575566111.01033,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566111.002148,
+                                                                         u'workingDir': None}},
+                                            u'command': u'/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0}}}}
+```
+
+Note that the md5 sum of the file `/home/dmadmin/testData/myData` is listed in the `stdOut` of stage `04-MD5SUM` and is used in the command in stage `05-ECHO` which in creates a temp file in /tmp.
\ No newline at end of file
diff --git a/Installation/DataManagementSplitSystemSetup.md b/Installation/DataManagementSplitSystemSetup.md
index 64dc43b7..1fa0a9c6 100644
--- a/Installation/DataManagementSplitSystemSetup.md
+++ b/Installation/DataManagementSplitSystemSetup.md
@@ -300,4 +300,183 @@ and
   u'storageDirectory': u'/home/dmadmin/storage/TEST/e1',
   u'storageHost': u'localhost',
   u'storageUrl': u'extrepid://localhost/home/dmadmin/storage/TEST/e1'}
-```  
\ No newline at end of file
+```
+
+
+Next step will add a workflow and then execute this workflow.  This workflow is an example pulled from the comments in the file workflowProcApi.py (owner name has been changed to match user dmtest). It creates a minimal version of a workflow that grabs the md5sum of a given file.  The workflow is defined by the following 
+
+```
+            {
+                'name'        : 'example-01',
+                'owner'       : 'dmtest',
+                'stages'      : {
+                    '01-START'  : {
+                        'command' : '/bin/date +%Y%m%d%H%M%S',
+                        'outputVariableRegexList' : ['(?P<timeStamp>.*)']
+                    },
+                    '02-MKDIR'  : {
+                        'command' : '/bin/mkdir -p /tmp/workflow.$timeStamp'
+                    },
+                    '03-ECHO'   : {
+                        'command' : '/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out'
+                    },
+                    '04-MD5SUM' : {
+                        'command' : '/bin/md5sum $filePath | cut -f1 -d" "',
+                        'outputVariableRegexList' : ['(?P<md5Sum>.*)']
+                    },
+                    '05-ECHO'   : {
+                        'command' : 'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out'
+                    },
+                    '06-DONE'   : {
+                        'command' : '/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out'
+                    },
+                },
+                'description' : 'Workflow Example 01'
+            }
+```
+
+This workflow can be added to the system with the command:
+
+ > dm-upsert-workflow --py-spec=sampleWorkflow
+
+and will yield a result like:
+
+```
+id=5de938931d9a2030403a7dd0 name=example-02 owner=dmtest 
+```
+
+This workflow can be executend by the command:
+
+>   dm-start-processing-job --workflow-name=example-02 --workflow-owner=dmtest filePath:/home/dmadmin/testData/myData
+
+This will have a result like:
+
+```
+id=2f004219-0694-4955-af05-b29b48ce4c0a owner=dmtest status=pending startTime=1575566109.86 startTimestamp=2019/12/05 12:15:09 EST
+```
+
+More information can be found with `dm-get-processing-job` like:
+
+ > dm-get-processing-job --id=2f004219-0694-4955-af05-b29b48ce4c0a --display-keys=ALL --display-format=pprint
+ 
+which returns
+
+```json
+{ u'endTime': 1575566111.014859,
+  u'endTimestamp': u'2019/12/05 12:15:11 EST',
+  u'filePath': u'/home/dmadmin/testData/myData',
+  u'id': u'2f004219-0694-4955-af05-b29b48ce4c0a',
+  u'md5Sum': u'bac0be486ddc69992ab4e01eeade0b92',
+  u'nFiles': 1,
+  u'owner': u'dmtest',
+  u'runTime': 1.1574599742889404,
+  u'stage': u'06-DONE',
+  u'startTime': 1575566109.857399,
+  u'startTimestamp': u'2019/12/05 12:15:09 EST',
+  u'status': u'done',
+  u'timeStamp': u'20191205121510',
+  u'workflow': { u'description': u'Workflow Example 01',
+                 u'id': u'5de938931d9a2030403a7dd0',
+                 u'name': u'example-02',
+                 u'owner': u'dmtest',
+                 u'stages': { u'01-START': { u'childProcesses': { u'0': { u'childProcessNumber': 0,
+                                                                          u'command': u'/bin/date +%Y%m%d%H%M%S',
+                                                                          u'endTime': 1575566110.898553,
+                                                                          u'exitStatus': 0,
+                                                                          u'runTime': 0.007671833038330078,
+                                                                          u'stageId': u'01-START',
+                                                                          u'startTime': 1575566110.890881,
+                                                                          u'status': u'done',
+                                                                          u'stdErr': u'',
+                                                                          u'stdOut': u'20191205121510\n',
+                                                                          u'submitTime': 1575566110.859169,
+                                                                          u'workingDir': None}},
+                                             u'command': u'/bin/date +%Y%m%d%H%M%S',
+                                             u'nCompletedChildProcesses': 1,
+                                             u'nQueuedChildProcesses': 0,
+                                             u'nRunningChildProcesses': 0,
+                                             u'outputVariableRegexList': [ u'(?P<timeStamp>.*)']},
+                              u'02-MKDIR': { u'childProcesses': { u'1': { u'childProcessNumber': 1,
+                                                                          u'command': u'/bin/mkdir -p /tmp/workflow.20191205121510',
+                                                                          u'endTime': 1575566110.942735,
+                                                                          u'exitStatus': 0,
+                                                                          u'runTime': 0.0035638809204101562,
+                                                                          u'stageId': u'02-MKDIR',
+                                                                          u'startTime': 1575566110.939171,
+                                                                          u'status': u'done',
+                                                                          u'stdErr': u'',
+                                                                          u'stdOut': u'',
+                                                                          u'submitTime': 1575566110.925104,
+                                                                          u'workingDir': None}},
+                                             u'command': u'/bin/mkdir -p /tmp/workflow.$timeStamp',
+                                             u'nCompletedChildProcesses': 1,
+                                             u'nQueuedChildProcesses': 0,
+                                             u'nRunningChildProcesses': 0},
+                              u'03-ECHO': { u'childProcesses': { u'2': { u'childProcessNumber': 2,
+                                                                         u'command': u'/bin/echo "START JOB ID: 2f004219-0694-4955-af05-b29b48ce4c0a" > /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566110.972364,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.003882884979248047,
+                                                                         u'stageId': u'03-ECHO',
+                                                                         u'startTime': 1575566110.968481,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566110.960305,
+                                                                         u'workingDir': None}},
+                                            u'command': u'/bin/echo "START JOB ID: $id" > /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0},
+                              u'04-MD5SUM': { u'childProcesses': { u'3': { u'childProcessNumber': 3,
+                                                                           u'command': u'/bin/md5sum /home/dmadmin/testData/myData | cut -f1 -d" "',
+                                                                           u'endTime': 1575566110.985139,
+                                                                           u'exitStatus': 0,
+                                                                           u'runTime': 0.0030689239501953125,
+                                                                           u'stageId': u'04-MD5SUM',
+                                                                           u'startTime': 1575566110.98207,
+                                                                           u'status': u'done',
+                                                                           u'stdErr': u'',
+                                                                           u'stdOut': u'bac0be486ddc69992ab4e01eeade0b92\n',
+                                                                           u'submitTime': 1575566110.973093,
+                                                                           u'workingDir': None}},
+                                              u'command': u'/bin/md5sum $filePath | cut -f1 -d" "',
+                                              u'nCompletedChildProcesses': 1,
+                                              u'nQueuedChildProcesses': 0,
+                                              u'nRunningChildProcesses': 0,
+                                              u'outputVariableRegexList': [ u'(?P<md5Sum>.*)']},
+                              u'05-ECHO': { u'childProcesses': { u'4': { u'childProcessNumber': 4,
+                                                                         u'command': u'echo "FILE /home/dmadmin/testData/myData MD5 SUM: bac0be486ddc69992ab4e01eeade0b92" >> /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566110.997652,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.0005791187286376953,
+                                                                         u'stageId': u'05-ECHO',
+                                                                         u'startTime': 1575566110.997073,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566110.987421,
+                                                                         u'workingDir': None}},
+                                            u'command': u'echo "FILE $filePath MD5 SUM: $md5Sum" >> /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0},
+                              u'06-DONE': { u'childProcesses': { u'5': { u'childProcessNumber': 5,
+                                                                         u'command': u'/bin/echo "STOP JOB ID: 2f004219-0694-4955-af05-b29b48ce4c0a" >> /tmp/workflow.20191205121510/2f004219-0694-4955-af05-b29b48ce4c0a.out',
+                                                                         u'endTime': 1575566111.011913,
+                                                                         u'exitStatus': 0,
+                                                                         u'runTime': 0.001583099365234375,
+                                                                         u'stageId': u'06-DONE',
+                                                                         u'startTime': 1575566111.01033,
+                                                                         u'status': u'done',
+                                                                         u'stdErr': u'',
+                                                                         u'stdOut': u'',
+                                                                         u'submitTime': 1575566111.002148,
+                                                                         u'workingDir': None}},
+                                            u'command': u'/bin/echo "STOP JOB ID: $id" >> /tmp/workflow.$timeStamp/$id.out',
+                                            u'nCompletedChildProcesses': 1,
+                                            u'nQueuedChildProcesses': 0,
+                                            u'nRunningChildProcesses': 0}}}}
+```
+
+Note that the md5 sum of the file `/home/dmadmin/testData/myData` is listed in the `stdOut` of stage `04-MD5SUM` and is used in the command in stage `05-ECHO` which in creates a temp file in /tmp.
\ No newline at end of file
-- 
GitLab