File indexing completed on 2024-05-12 16:28:27
0001 #!/usr/bin/env python 0002 0003 import sys, os, tempfile, subprocess, lxml.etree, zipfile, urllib, hashlib 0004 0005 def getODFVersion(zip): 0006 content = lxml.etree.parse(zip.open("content.xml", "r")) 0007 return content.getroot().get( 0008 "{urn:oasis:names:tc:opendocument:xmlns:office:1.0}version") 0009 0010 def getJing(): 0011 jingjar = "jing-20091111/bin/jing.jar" 0012 path = os.path.join(sys.path[0], jingjar) 0013 if os.path.isfile(path): 0014 return path 0015 print "Downloading jing.jar" 0016 z = "jing-20091111.zip" 0017 urllib.urlretrieve("http://jing-trang.googlecode.com/files/" + z, z); 0018 zip = zipfile.ZipFile(z, "r"); 0019 zip.extract(jingjar, sys.path[0]) 0020 os.unlink(z) 0021 f = open(path, "rb") 0022 h = hashlib.sha1() 0023 h.update(f.read()) 0024 f.close() 0025 hash = h.hexdigest() 0026 if hash != "daa0cf7b1679264f8e68171f7f253255794773f7": 0027 print "Wrong hash code: wrong file." 0028 os.unlink(path) 0029 return 0030 return path 0031 0032 schemas = { 0033 "1.0": ["OpenDocument-schema-v1.0-os.rng", 0034 "OpenDocument-manifest-schema-v1.0-os.rng"], 0035 "1.1": ["OpenDocument-schema-v1.1.rng", 0036 "OpenDocument-manifest-schema-v1.1.rng"], 0037 "1.2": ["OpenDocument-v1.2-cs01-schema-calligra.rng", 0038 "OpenDocument-v1.2-cs01-manifest-schema.rng"] 0039 } 0040 0041 def getScriptPath(): 0042 return os.path.dirname(os.path.realpath(sys.argv[0])) 0043 0044 class jingodfvalidator: 0045 def __init__(self, jingjar): 0046 self.jingjar = jingjar; 0047 self.xmlparser = lxml.etree.XMLParser() 0048 xsltpath = os.path.join(getScriptPath(), "removeForeign.xsl") 0049 self.removeForeignXSLT = self.loadXSLT(xsltpath) 0050 0051 def validate(self, odfpath): 0052 try: 0053 zip = zipfile.ZipFile(odfpath, 'r') 0054 except: 0055 self.validateFlatXML(odfpath) 0056 return 0057 odfversion = getODFVersion(zip) 0058 if not odfversion in schemas: 0059 return "Document has no version number" 0060 err = self.validateFile(zip, 'content.xml', 0061 schemas[odfversion][0]) 0062 if (err): 0063 return err 0064 err = self.validateFile(zip, 'styles.xml', 0065 schemas[odfversion][0]) 0066 if (err): 0067 return err 0068 err = self.validateFile(zip, 'META-INF/manifest.xml', 0069 schemas[odfversion][1]) 0070 if (err): 0071 return err 0072 err = self.validateFile(zip, 'meta.xml', 0073 schemas[odfversion][0]) 0074 if (err): 0075 return err 0076 err = self.validateFile(zip, 'settings.xml', 0077 schemas[odfversion][0]) 0078 if (err): 0079 return err 0080 return None 0081 0082 def validateFlatXML(self, filepath): 0083 schema = schemas["1.2"][0] 0084 schema = os.path.join(sys.path[0], schema) 0085 r = self.validateXML(schema, filepath) 0086 if r: 0087 return filepath + " is not valid." 0088 0089 def validateFile(self, zip, filepath, schema): 0090 schema = os.path.join(sys.path[0], schema) 0091 suffix = "_" + filepath.replace("/", "_") 0092 tmp = tempfile.NamedTemporaryFile(suffix = suffix) 0093 tmp.write(zip.open(filepath, "r").read()) 0094 tmp.flush() 0095 r = self.validateXML(schema, tmp.name) 0096 tmp.close() 0097 if r: 0098 return filepath + " is not valid." 0099 0100 def loadXML(self, filepath): 0101 return lxml.etree.parse(open(filepath, 'r'), self.xmlparser) 0102 0103 def loadXSLT(self, filepath): 0104 xsl = self.loadXML(filepath) 0105 ac = lxml.etree.XSLTAccessControl(read_network=False, write_file=False) 0106 return lxml.etree.XSLT(xsl, access_control=ac) 0107 0108 def removeForeign(self, filepath): 0109 xml = self.loadXML(filepath) 0110 xml = self.removeForeignXSLT(xml) 0111 xml.write(filepath) 0112 0113 # Validate the XML and optionally remove the foreign elements and attributes 0114 # first. Calligra currently write ODF 1.2 Extended which is allowed to 0115 # contain foreign elements and attributes. If Calligra adds a mode to save 0116 # ODF 1.2, the validator should not remove them when validation. 0117 def validateXML(self, schema, xmlpath, removeForeign = True): 0118 if removeForeign: 0119 self.removeForeign(xmlpath) 0120 0121 args = ["java", "-jar", self.jingjar, "-i", schema, xmlpath] 0122 return subprocess.call(args) 0123 0124 def createValidator(name): 0125 xml = lxml.etree.parse(open(os.path.join(sys.path[0], name), "rb")) 0126 return lxml.etree.RelaxNG(xml) 0127 0128 class odfvalidator: 0129 def __init__(self): 0130 path = sys.path[0] 0131 self.validators = {} 0132 for key in schemas.keys(): 0133 self.validators[key] = [ 0134 createValidator(schemas[key][0]), 0135 createValidator(schemas[key][1]) 0136 ] 0137 # returns error string on error, None otherwise 0138 def validate(self, odfpath): 0139 zip = zipfile.ZipFile(odfpath, 'r') 0140 odfversion = getODFVersion(zip) 0141 if not odfversion in schemas: 0142 return "Document has no version number" 0143 err = self.validateFile(zip, 'content.xml', 0144 self.validators[odfversion][0]) 0145 if (err): 0146 return err 0147 err = self.validateFile(zip, 'styles.xml', 0148 self.validators[odfversion][0]) 0149 if (err): 0150 return err 0151 err = self.validateFile(zip, 'META-INF/manifest.xml', 0152 self.validators[odfversion][1]) 0153 if (err): 0154 return err 0155 return None 0156 0157 def validateFile(self, zip, file, validator): 0158 try: 0159 xml = lxml.etree.XML(zip.read(file)); 0160 except lxml.etree.XMLSyntaxError as e: 0161 return file + ':' + str(e) 0162 except KeyError as e: 0163 return e 0164 if not validator.validate(xml): 0165 return file + ':' + str(validator.error_log.last_error) 0166 0167 if __name__ == '__main__': 0168 jingjar = getJing() 0169 if jingjar: 0170 validator = jingodfvalidator(jingjar) 0171 else: 0172 validator = odfvalidator() 0173 for f in sys.argv[1:]: 0174 if os.path.isfile(f): 0175 e = validator.validate(f) 0176 if e: 0177 print str(e)