-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathhr4e_gen_patient_files.py
More file actions
158 lines (131 loc) · 5.69 KB
/
hr4e_gen_patient_files.py
File metadata and controls
158 lines (131 loc) · 5.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
'''
Created on May 17, 2011
Simple python script to generate a file name for an HR4E patient file.
The input to this script is an instance of an hr4e patient data xml document that
was generated from hr4e_patient_template.xml.
The patient's given and family name and the document ID are extracted from the file and
concatenated to produce a file name with a .xml suffix that will be stored in the local directory.
@author: torkroth
@modified: superrawr
'''
import libxml2
import libxslt
import os
import sys
import StringIO, sys
import subprocess
import argparse
import cPickle as pickle
#
# Parse the command line
#
# -i <input file>: patient data file to write out
# The default is hr4e_patient_template.xml in current directory
#
parser = argparse.ArgumentParser()
parser.add_argument('-i', metavar='in-file', type=str, default="hr4e_patient.xml")
parser.add_argument('-g', metavar='xpath_given_name', type=str, default="//gcda:greenCCD/gcda:header/gcda:personalInformation/gcda:patientInformation/gcda:personInformation/gcda:personName/gcda:given")
parser.add_argument('-f', metavar='xpath_family_name', type=str, default="//gcda:greenCCD/gcda:header/gcda:personalInformation/gcda:patientInformation/gcda:personInformation/gcda:personName/gcda:family")
parser.add_argument('-d', metavar='xpath_docID', type=str, default="//gcda:greenCCD/gcda:header/gcda:documentID/@root")
parser.add_argument('-n', metavar='namespace', type=str, default="AlschulerAssociates::GreenCDA")
parser.add_argument('-green', metavar='green-prefix', type=str, default="green_")
parser.add_argument('-cda', metavar='cda-prefix', type=str, default="cda_")
parser.add_argument('-hxslt', metavar='hr4e-xslt-file', type=str, default="templates/hr4e_patient_to_ccd.xslt")
parser.add_argument('-gxslt', metavar='green-xslt-file', type=str, default="templates/green_ccd.xslt")
try:
results = parser.parse_args()
input = results.i
given_xpath = results.g
family_xpath = results.f
docID_xpath = results.d
namespace_name = results.n
green_filename_prefix = results.green
cda_filename_prefix = results.cda
hr4e_xslt = results.hxslt
green_xslt = results.gxslt
# print 'Input file:', input
# print 'XPath to given name', given_xpath
# print 'XPath to family name', family_xpath
# print 'XPath to docID', docID_xpath
# print 'Namespace', namespace_name
# print 'Green filename prefix ', green_filename_prefix
# print 'CDA filename prefix ', cda_filename_prefix
# print 'XSLT file to transform HR4E format to green CCD formta:', hr4e_xslt
# print 'XSLT file to transform green CCD to CDA:', green_xslt
except IOError, msg:
parser.error(str(msg))
#Let's get the patient pickle dict
#patient = pickle.load(file('tmp/patient.pkl','r+b'))
#Do some csv magic...
#print patient
#
# Parse the file
#
patient_doc = libxml2.parseFile(input)
#
# Use XPath to get given name, family name and document ID
#
# For this to work, we need to set the namespace and then prefix each element name with it
# in the XPath expression. Yuck!
#
ctxt = patient_doc.xpathNewContext()
ctxt.xpathRegisterNs("gcda", namespace_name)
result = ctxt.xpathEval(given_xpath)
for node in result:
given_name = node.content
result = ctxt.xpathEval(family_xpath)
for node in result:
family_name = node.content
result = ctxt.xpathEval(docID_xpath)
for node in result:
docID = node.content
# Construct new file names out of the pieces
#
new_filename = family_name + "_" + given_name + "_" + docID + ".xml"
make_directory = "data/patient_data/" + family_name + "_" + given_name + "_" + docID+ "/"
green_filename = green_filename_prefix + new_filename
cda_filename = cda_filename_prefix + new_filename
#
# Create a directory for the patient
#
os.makedirs(make_directory)
print "Just created " + make_directory + " directory for patient data storage"
print "######################################################################################"
print
#
# Write out the XML document and the new patient pkl
#
print "######################################################################################"
print "Generating patient file " + new_filename + " from input file " + input
new_file = open(make_directory + new_filename, 'w')
new_file.write(patient_doc.serialize())
new_file.close()
print "Generating patient file " + new_filename[0:len(new_filename)-3] + "pkl from the tmp pickle"
#pickle.dump(patient, file(new_filename[0:len(new_filename)-3] + 'pkl','wb'))
print "Done. HR4E patient " + new_filename + " generated along with patient's pickle"
print "#######################################################################################"
# Build the command line to remove hr4e elements from options to this script
#
print "Transforming HR4E patient data in file " + new_filename + "..."
command = "java -jar saxon/saxon9he.jar " + " -s:" + make_directory + new_filename + " -xsl:" + hr4e_xslt + " > " + make_directory + green_filename
#print command
#
# Fire off a process to invoke java and run the HR4E stylesheet
#
p = subprocess.Popen(command, shell=True)
retval = p.wait()
print "Done. green CCD patient data file " + green_filename + " generated with hr4e namespace elements removed."
#
# Now built the command line to transform to CDA document
#
print "Transforming green patient data in file " + green_filename + "..."
command = "java -jar saxon/saxon9he.jar " + " -s:" + make_directory + green_filename + " -xsl:" + green_xslt + " > " + make_directory +cda_filename
#print command
#
# Fire off a process to invoke java and run the the green CCD stylesheet
#
p = subprocess.Popen(command, shell=True)
retval = p.wait()
print "Done. CDA patient data file " + cda_filename + " generated."
print
print