Package rdkit :: Package ML :: Package ModelPackage :: Module PackageUtils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.ModelPackage.PackageUtils

  1  # 
  2  # Copyright (C) 2003 Rational Discovery LLC 
  3  # All rights are reserved. 
  4  # 
  5  from __future__ import print_function 
  6  # from elementtree.ElementTree import ElementTree, Element, SubElement 
  7  import time 
  8  from xml.etree.ElementTree import ElementTree, Element, SubElement 
  9   
 10   
11 -def _ConvertModelPerformance(perf, modelPerf):
12 if len(modelPerf) > 3: 13 confMat = modelPerf[3] 14 accum = 0 15 for row in confMat: 16 for entry in row: 17 accum += entry 18 accum = str(accum) 19 else: 20 confMat = None 21 accum = 'N/A' 22 23 if len(modelPerf) > 4: 24 elem = SubElement(perf, "ScreenThreshold") 25 elem.text = str(modelPerf[4]) 26 elem = SubElement(perf, "NumScreened") 27 elem.text = accum 28 if len(modelPerf) > 4: 29 elem = SubElement(perf, "NumSkipped") 30 elem.text = str(modelPerf[6]) 31 elem = SubElement(perf, "Accuracy") 32 elem.text = str(modelPerf[0]) 33 elem = SubElement(perf, "AvgCorrectConf") 34 elem.text = str(modelPerf[1]) 35 elem = SubElement(perf, "AvgIncorrectConf") 36 elem.text = str(modelPerf[2]) 37 if len(modelPerf) > 4: 38 elem = SubElement(perf, "AvgSkipConf") 39 elem.text = str(modelPerf[5]) 40 if confMat: 41 elem = SubElement(perf, "ConfusionMatrix") 42 elem.text = str(confMat)
43 44
45 -def PackageToXml(pkg, summary="N/A", trainingDataId='N/A', dataPerformance=[], 46 recommendedThreshold=None, classDescriptions=None, modelType=None, 47 modelOrganism=None):
48 """ generates XML for a package that follows the RD_Model.dtd 49 50 If provided, dataPerformance should be a sequence of 2-tuples: 51 ( note, performance ) 52 where performance is of the form: 53 ( accuracy, avgCorrectConf, avgIncorrectConf, confusionMatrix, thresh, avgSkipConf, nSkipped ) 54 the last four elements are optional 55 56 """ 57 head = Element("RDModelInfo") 58 name = SubElement(head, "ModelName") 59 notes = pkg.GetNotes() 60 if not notes: 61 notes = "Unnamed model" 62 name.text = notes 63 summ = SubElement(head, "ModelSummary") 64 summ.text = summary 65 calc = pkg.GetCalculator() 66 descrs = SubElement(head, "ModelDescriptors") 67 for name, summary, func in zip(calc.GetDescriptorNames(), calc.GetDescriptorSummaries(), 68 calc.GetDescriptorFuncs()): 69 descr = SubElement(descrs, "Descriptor") 70 elem = SubElement(descr, "DescriptorName") 71 elem.text = name 72 elem = SubElement(descr, "DescriptorDetail") 73 elem.text = summary 74 if hasattr(func, 'version'): 75 vers = SubElement(descr, "DescriptorVersion") 76 major, minor, patch = func.version.split('.') 77 elem = SubElement(vers, "VersionMajor") 78 elem.text = major 79 elem = SubElement(vers, "VersionMinor") 80 elem.text = minor 81 elem = SubElement(vers, "VersionPatch") 82 elem.text = patch 83 84 elem = SubElement(head, "TrainingDataId") 85 elem.text = trainingDataId 86 87 for description, perfData in dataPerformance: 88 dataNode = SubElement(head, "ValidationData") 89 note = SubElement(dataNode, 'ScreenNote') 90 note.text = description 91 perf = SubElement(dataNode, "PerformanceData") 92 _ConvertModelPerformance(perf, perfData) 93 94 if recommendedThreshold: 95 elem = SubElement(head, "RecommendedThreshold") 96 elem.text = str(recommendedThreshold) 97 98 if classDescriptions: 99 elem = SubElement(head, "ClassDescriptions") 100 for val, text in classDescriptions: 101 descr = SubElement(elem, 'ClassDescription') 102 valElem = SubElement(descr, 'ClassVal') 103 valElem.text = str(val) 104 valText = SubElement(descr, 'ClassText') 105 valText.text = str(text) 106 107 if modelType: 108 elem = SubElement(head, "ModelType") 109 elem.text = modelType 110 if modelOrganism: 111 elem = SubElement(head, "ModelOrganism") 112 elem.text = modelOrganism 113 114 hist = SubElement(head, "ModelHistory") 115 revision = SubElement(hist, "Revision") 116 tm = time.localtime() 117 date = SubElement(revision, "RevisionDate") 118 elem = SubElement(date, "Year") 119 elem.text = str(tm[0]) 120 elem = SubElement(date, "Month") 121 elem.text = str(tm[1]) 122 elem = SubElement(date, "Day") 123 elem.text = str(tm[2]) 124 note = SubElement(revision, "RevisionNote") 125 note.text = "Created" 126 return ElementTree(head)
127 128 129 if __name__ == '__main__': # pragma: nocover 130 import sys 131 from rdkit.six.moves import cPickle 132 from rdkit.six import StringIO 133 pkg = cPickle.load(open(sys.argv[1], 'rb')) 134 perf = (.80, .95, .70, [[4, 1], [1, 4]]) 135 tree = PackageToXml(pkg, dataPerformance=[('training data performance', perf)]) 136 io = StringIO() 137 tree.write(io) 138 txt = io.getvalue() 139 header = """<?xml version="1.0"?> 140 <!DOCTYPE RDModelInfo PUBLIC "-//RD//DTD RDModelInfo //EN" "RD_Model.dtd"> 141 """ 142 print(header) 143 print(txt.replace('><', '>\n<')) 144