File indexing completed on 2025-02-02 04:22:25
0001 """ 0002 SPDX-FileCopyrightText: 2018 Wolthera van Hövell tot Westerflier <griffinvalley@gmail.com> 0003 0004 This file is part of the Comics Project Management Tools(CPMT). 0005 0006 SPDX-License-Identifier: GPL-3.0-or-later 0007 """ 0008 0009 """ 0010 A class for getting translatable strings out. 0011 0012 This class does several things: 0013 1) It can parse through kra files' document.xml, and then through the svgs that file is pointing at. 0014 2) It can parse a preexisting POT file to ensure it isn't making duplicates. 0015 3) It can write a POT file. 0016 4) Writing to a csv file was considered until the realisation hit that comic dialog itself contains commas. 0017 """ 0018 0019 import sys 0020 import os 0021 import csv 0022 import zipfile 0023 import types 0024 from xml.dom import minidom 0025 from PyQt5.QtCore import QDateTime, Qt 0026 0027 0028 class translation_scraper(): 0029 projectURL = str() 0030 translation_folder = str() 0031 textLayerNameList = [] 0032 translationDict = {} 0033 translationKeys = [] # separate so that the keys will be somewhat according to the order of appearance. 0034 pageTitleKeys= [] 0035 projectName = str() 0036 languageKey = "AA_language" 0037 0038 def __init__(self, projectURL=str(), translation_folder=str(), textLayerNameList=[], projectName=str()): 0039 self.projectURL = projectURL 0040 self.projectName = projectName 0041 self.translation_folder = translation_folder 0042 self.textLayerNameList = textLayerNameList 0043 self.translationDict = {} 0044 self.pageTitleKeys = [] 0045 0046 # Check for a preexisting translation file and parse that. 0047 for entry in os.scandir(os.path.join(self.projectURL, self.translation_folder)): 0048 if entry.name.endswith(projectName + '.pot') and entry.is_file(): 0049 self.parse_pot(os.path.join(self.projectURL, self.translation_folder, entry.name)) 0050 break 0051 0052 def start(self, pagesList, language, metaData={}): 0053 if self.languageKey not in self.translationDict.keys(): 0054 self.translationDict[self.languageKey] = language 0055 for p in pagesList: 0056 self.get_svg_layers(os.path.join(self.projectURL, p)) 0057 self.write_pot(metaData) 0058 0059 def parse_pot(self, location): 0060 if (os.path.exists(location)): 0061 file = open(location, "r", newline="", encoding="utf8") 0062 multiLine = "" 0063 key = None 0064 entry = {} 0065 0066 def addEntryToTranslationDict(key, entry): 0067 if len(entry.keys()) > 0: 0068 if key is None: 0069 key = entry.get("text", None) 0070 if key is not None: 0071 if len(key) > 0: 0072 self.translationDict[key] = entry 0073 0074 for line in file or len(line) < 1: 0075 if line.isspace(): 0076 addEntryToTranslationDict(key, entry) 0077 entry = {} 0078 key = None 0079 multiLine = "" 0080 if line.startswith("msgid "): 0081 string = line.strip("msgid \"") 0082 string = string[:-len('"\n')] 0083 string = string.replace("\\\"", "\"") 0084 string = string.replace("\\\'", "\'") 0085 string = string.replace("\\#", "#") 0086 entry["text"] = string 0087 multiLine = "text" 0088 if line.startswith("msgstr "): 0089 string = line.strip("msgstr \"") 0090 string = string[:-len('"\n')] 0091 string = string.replace("\\\"", "\"") 0092 string = string.replace("\\\'", "\'") 0093 string = string.replace("\\#", "#") 0094 entry["trans"] = string 0095 multiLine = "trans" 0096 if line.startswith("# "): 0097 # Translator comment 0098 entry["translator"] = line 0099 if line.startswith("#. "): 0100 entry["extract"] = line 0101 if line.startswith("msgctxt "): 0102 string = line.strip("msgctxt \"") 0103 string = string[:-len('"\n')] 0104 string = string.replace("\\\"", "\"") 0105 string = string.replace("\\\'", "\'") 0106 string = string.replace("\\#", "#") 0107 key = string 0108 if line.startswith("\"") and len(multiLine) > 0: 0109 string = line[1:] 0110 string = string[:-len('"\n')] 0111 string = string.replace("\\\"", "\"") 0112 string = string.replace("\\\'", "\'") 0113 string = string.replace("\\#", "#") 0114 entry[multiLine] += string 0115 addEntryToTranslationDict(key, entry) 0116 file.close() 0117 0118 def get_svg_layers(self, location): 0119 page = zipfile.ZipFile(location, "a") 0120 xmlroot = minidom.parseString(page.read("maindoc.xml")) 0121 doc = xmlroot.documentElement 0122 0123 candidates = [] 0124 0125 for member in page.namelist(): 0126 info = page.getinfo(member) 0127 if info.filename.endswith('svg'): 0128 candidates.append(info.filename) 0129 0130 def parseThroughChildNodes(node): 0131 for childNode in node.childNodes: 0132 if childNode.nodeType != minidom.Node.TEXT_NODE: 0133 if childNode.tagName == "layer" and childNode.getAttribute("nodetype") == "shapelayer": 0134 isTextLayer = False 0135 for t in self.textLayerNameList: 0136 if t in childNode.getAttribute("name"): 0137 isTextLayer = True 0138 if isTextLayer: 0139 filename = childNode.getAttribute("filename") 0140 for c in candidates: 0141 if str(filename + ".shapelayer/content.svg") in c: 0142 self.get_txt(page.read(c)) 0143 if childNode.childNodes: 0144 parseThroughChildNodes(childNode) 0145 0146 parseThroughChildNodes(doc) 0147 0148 # Get page title if the keywords contain acbf_title 0149 xmlroot = minidom.parseString(page.read("documentinfo.xml")) 0150 dict = {} 0151 def parseThroughDocumentInfo(node, dict): 0152 for childNode in node.childNodes: 0153 if childNode.nodeType != minidom.Node.TEXT_NODE and childNode.nodeType != minidom.Node.CDATA_SECTION_NODE: 0154 if childNode.tagName == "title": 0155 title = "" 0156 for text in childNode.childNodes: 0157 title += text.data 0158 dict["title"] = title 0159 elif childNode.tagName == "keyword": 0160 k = "" 0161 for text in childNode.childNodes: 0162 k += text.data 0163 keywords = k.split(",") 0164 for i in range(len(keywords)): 0165 keywords[i] = str(keywords[i]).strip() 0166 dict["key"] = keywords 0167 if childNode.childNodes: 0168 parseThroughDocumentInfo(childNode, dict) 0169 0170 parseThroughDocumentInfo(xmlroot.documentElement, dict) 0171 keywords = dict["key"] 0172 if "acbf_title" in keywords: 0173 self.pageTitleKeys.append(dict["title"]) 0174 0175 page.close() 0176 0177 def get_txt(self, string): 0178 svg = minidom.parseString(string) 0179 # parse through string as if svg. 0180 0181 def parseThroughChildNodes(node): 0182 for childNode in node.childNodes: 0183 if childNode.nodeType != minidom.Node.TEXT_NODE: 0184 if childNode.tagName == "text": 0185 text = "" 0186 for c in childNode.childNodes: 0187 text += c.toxml() 0188 if text not in self.translationDict.keys(): 0189 entry = {} 0190 entry["text"] = text 0191 self.translationDict[text] = entry 0192 if text not in self.translationKeys: 0193 self.translationKeys.append(text) 0194 elif childNode.childNodes: 0195 parseThroughChildNodes(childNode) 0196 0197 parseThroughChildNodes(svg.documentElement) 0198 0199 def write_pot(self, metaData): 0200 quote = "\"" 0201 newLine = "\n" 0202 location = os.path.join(self.projectURL, self.translation_folder, self.projectName + ".pot") 0203 file = open(location, "w", newline="", encoding="utf8") 0204 0205 file.write("msgid " + quote + quote + newLine) 0206 file.write("msgstr " + quote + quote + newLine) 0207 date = QDateTime.currentDateTimeUtc().toString(Qt.ISODate) 0208 file.write(quote + "POT-Creation-Date:" + date + "\\n" + quote + newLine) 0209 file.write(quote + "Content-Type: text/plain; charset=UTF-8\\n" + quote + newLine) 0210 file.write(quote + "Content-Transfer-Encoding: 8bit\\n" + quote + newLine) 0211 file.write(quote + "X-Generator: Krita Comics Project Manager Tools Plugin\\n" + quote + newLine) 0212 0213 file.write(newLine) 0214 file.write("#. Title of the work" + newLine) 0215 file.write("msgctxt \"@meta-title\"" + newLine) 0216 file.write("msgid " + quote + metaData.get("title", "") + quote + newLine) 0217 file.write("msgstr " + quote + quote + newLine) 0218 file.write(newLine) 0219 0220 file.write("#. The summary" + newLine) 0221 file.write("msgctxt \"@meta-summary\"" + newLine) 0222 file.write("msgid " + quote + metaData.get("summary", "") + quote + newLine) 0223 file.write("msgstr " + quote + quote + newLine) 0224 file.write(newLine) 0225 0226 file.write("#. The keywords, these need to be comma separated." + newLine) 0227 file.write("msgctxt \"@meta-keywords\"" + newLine) 0228 file.write("msgid " + quote + metaData.get("keywords", "") + quote + newLine) 0229 file.write("msgstr " + quote + quote + newLine) 0230 file.write(newLine) 0231 0232 file.write("#. The header that will prepend translator's notes" + newLine) 0233 file.write("msgctxt \"@meta-translator\"" + newLine) 0234 file.write("msgid " + quote + metaData.get("transnotes", "") + quote + newLine) 0235 file.write("msgstr " + quote + quote + newLine) 0236 0237 for i in range(len(self.pageTitleKeys)): 0238 title = self.pageTitleKeys[i] 0239 file.write(newLine) 0240 file.write("msgctxt " + quote + "@page-title" + quote + newLine) 0241 file.write("msgid " + quote + title + quote + newLine) 0242 file.write("msgstr " + quote + quote + newLine) 0243 0244 for key in self.translationKeys: 0245 if key != self.languageKey: 0246 file.write(newLine) 0247 if "translComment" in self.translationDict[key].keys(): 0248 file.write("# " + self.translationDict[key]["translator"] + newLine) 0249 if "extract" in self.translationDict[key].keys(): 0250 file.write("#. " + self.translationDict[key]["extract"] + newLine) 0251 string = self.translationDict[key]["text"] 0252 uniqueContext = False 0253 if string != key: 0254 uniqueContext = True 0255 string = string.replace(quote, "\\\"") 0256 string = string.replace("\'", "\\\'") 0257 string = string.replace("#", "\\#") 0258 if uniqueContext: 0259 file.write("msgctxt " + quote + key + quote + newLine) 0260 file.write("msgid " + quote + string + quote + newLine) 0261 file.write("msgstr " + quote + quote + newLine) 0262 file.close() 0263 print("CPMT: Translations have been written to:", location)