1
2
3
4
5 from __future__ import print_function
6
7 import time
8 from xml.etree.ElementTree import ElementTree, Element, SubElement
9
10
43
44
45 -def PackageToXml(pkg, summary="N/A", trainingDataId='N/A', dataPerformance=[],
46 recommendedThreshold=None, classDescriptions=None, modelType=None,
47 modelOrganism=None):
48 """ generates XML for a package that follows the RD_Model.dtd
49
50 If provided, dataPerformance should be a sequence of 2-tuples:
51 ( note, performance )
52 where performance is of the form:
53 ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped )
54 the last four elements are optional
55
56 """
57 head = Element("RDModelInfo")
58 name = SubElement(head, "ModelName")
59 notes = pkg.GetNotes()
60 if not notes:
61 notes = "Unnamed model"
62 name.text = notes
63 summ = SubElement(head, "ModelSummary")
64 summ.text = summary
65 calc = pkg.GetCalculator()
66 descrs = SubElement(head, "ModelDescriptors")
67 for name, summary, func in zip(calc.GetDescriptorNames(), calc.GetDescriptorSummaries(),
68 calc.GetDescriptorFuncs()):
69 descr = SubElement(descrs, "Descriptor")
70 elem = SubElement(descr, "DescriptorName")
71 elem.text = name
72 elem = SubElement(descr, "DescriptorDetail")
73 elem.text = summary
74 if hasattr(func, 'version'):
75 vers = SubElement(descr, "DescriptorVersion")
76 major, minor, patch = func.version.split('.')
77 elem = SubElement(vers, "VersionMajor")
78 elem.text = major
79 elem = SubElement(vers, "VersionMinor")
80 elem.text = minor
81 elem = SubElement(vers, "VersionPatch")
82 elem.text = patch
83
84 elem = SubElement(head, "TrainingDataId")
85 elem.text = trainingDataId
86
87 for description, perfData in dataPerformance:
88 dataNode = SubElement(head, "ValidationData")
89 note = SubElement(dataNode, 'ScreenNote')
90 note.text = description
91 perf = SubElement(dataNode, "PerformanceData")
92 _ConvertModelPerformance(perf, perfData)
93
94 if recommendedThreshold:
95 elem = SubElement(head, "RecommendedThreshold")
96 elem.text = str(recommendedThreshold)
97
98 if classDescriptions:
99 elem = SubElement(head, "ClassDescriptions")
100 for val, text in classDescriptions:
101 descr = SubElement(elem, 'ClassDescription')
102 valElem = SubElement(descr, 'ClassVal')
103 valElem.text = str(val)
104 valText = SubElement(descr, 'ClassText')
105 valText.text = str(text)
106
107 if modelType:
108 elem = SubElement(head, "ModelType")
109 elem.text = modelType
110 if modelOrganism:
111 elem = SubElement(head, "ModelOrganism")
112 elem.text = modelOrganism
113
114 hist = SubElement(head, "ModelHistory")
115 revision = SubElement(hist, "Revision")
116 tm = time.localtime()
117 date = SubElement(revision, "RevisionDate")
118 elem = SubElement(date, "Year")
119 elem.text = str(tm[0])
120 elem = SubElement(date, "Month")
121 elem.text = str(tm[1])
122 elem = SubElement(date, "Day")
123 elem.text = str(tm[2])
124 note = SubElement(revision, "RevisionNote")
125 note.text = "Created"
126 return ElementTree(head)
127
128
129 if __name__ == '__main__':
130 import sys
131 from rdkit.six.moves import cPickle
132 from rdkit.six import StringIO
133 pkg = cPickle.load(open(sys.argv[1], 'rb'))
134 perf = (.80, .95, .70, [[4, 1], [1, 4]])
135 tree = PackageToXml(pkg, dataPerformance=[('training data performance', perf)])
136 io = StringIO()
137 tree.write(io)
138 txt = io.getvalue()
139 header = """<?xml version="1.0"?>
140 <!DOCTYPE RDModelInfo PUBLIC "-//RD//DTD RDModelInfo //EN" "RD_Model.dtd">
141 """
142 print(header)
143 print(txt.replace('><', '>\n<'))
144