File indexing completed on 2025-02-02 04:11:12
0001 #!/usr/bin/env python3 0002 0003 # SPDX-FileCopyrightText: 2021 Mattia Basaglia <dev@dragon.best> 0004 # SPDX-License-Identifier: GPL-3.0-or-later 0005 0006 import re 0007 import sys 0008 import json 0009 import enum 0010 import datetime 0011 import argparse 0012 from urllib.request import urlopen 0013 0014 0015 class EmojiObject: 0016 def to_dict(self): 0017 return { 0018 k: EmojiObject._to_dict(v) 0019 for k, v in vars(self).items() 0020 } 0021 0022 @staticmethod 0023 def _to_dict(v): 0024 if isinstance(v, EmojiObject): 0025 return v.to_dict() 0026 elif isinstance(v, enum.Enum): 0027 return v.name() 0028 elif isinstance(v, list): 0029 return list(map(EmojiObject._to_dict, v)) 0030 return v 0031 0032 0033 class EmojiGroup(EmojiObject): 0034 def __init__(self, name): 0035 self.name = name 0036 self.children = [] 0037 0038 def prune(self): 0039 if not self.children: 0040 return True 0041 0042 if isinstance(self.children[0], Emoji): 0043 return False 0044 0045 children = self.children 0046 self.children = [] 0047 for ch in children: 0048 if not ch.prune(): 0049 self.children.append(ch) 0050 0051 return not self.children 0052 0053 0054 class Emoji(EmojiObject): 0055 class Status(enum.Enum): 0056 component = enum.auto() 0057 fully_qualified = enum.auto() 0058 minimally_qualified = enum.auto() 0059 unqualified = enum.auto() 0060 0061 def __init__(self, emoji, name, status, since_version, flag_gender=False, flag_skin_tone=False, flag_hair_style=False): 0062 self.emoji = emoji 0063 self.name = name 0064 self.status = status 0065 self.flag_gender = flag_gender 0066 self.flag_skin_tone = flag_skin_tone 0067 self.flag_hair_style = flag_hair_style 0068 self.since_version = since_version 0069 0070 @staticmethod 0071 def hexord(char): 0072 return "%x" % ord(char) 0073 0074 @property 0075 def hexes(self): 0076 return map(self.hexord, self.emoji) 0077 0078 @property 0079 def slug(self): 0080 return "-".join(self.hexes) 0081 0082 @property 0083 def hex_title(self): 0084 return " ".join(self.hexes) 0085 0086 def __str__(self): 0087 return "%s - %s" % (self.emoji, self.hex_title) 0088 0089 @staticmethod 0090 def slug2emoji(slug): 0091 try: 0092 return "".join(chr(int(p, 16)) for p in slug.split("-")) 0093 except Exception: 0094 return "" 0095 0096 def is_variant(self): 0097 return self.flag_gender or self.flag_hair_style or self.flag_skin_tone 0098 0099 0100 def pull_emoji(uri, max_version, ignore_variants, fully_qualified): 0101 qualifiers = { 0102 "skin_tone": ["1F3FB", "1F3FC", "1F3FD", "1F3FE", "1F3FF"], 0103 "gender": ["200D 2640", "200D 2642"], 0104 "hair_style": ["1F9B0", "1F9B1" "1F9B3", "1F9B2"], 0105 } 0106 for name, quallist in qualifiers.items(): 0107 qualifiers[name] = [" %s " % i for i in quallist] 0108 0109 current_group = None 0110 current_subgroup = None 0111 0112 reparser = re.compile( 0113 r'(?P<group>^\s*# (?P<grp_sub>sub)?group:\s*(?P<grp_name>.*))|' + 0114 r'(?P<emoji>^(?P<seq>[0-9A-Fa-f ]+); (?P<status>[-a-z ]+)# (?P<unicode>[^ ]+) (?:E(?P<version>[0-9]+\.[0-9]+) )?(?P<name>.+)$)' 0115 ) 0116 0117 if uri.startswith("http"): 0118 r = urlopen(uri) 0119 if r.code != 200: 0120 raise Exception("Could not fetch file: %s" % r.code) 0121 0122 encoding = r.headers.get_content_charset() 0123 else: 0124 r = open(uri, "rb") 0125 encoding = "utf8" 0126 0127 table = EmojiGroup("emoji") 0128 0129 for line in r: 0130 decoded = line.decode(encoding).strip() 0131 match = reparser.match(decoded) 0132 if not match: 0133 continue 0134 if match.group("group"): 0135 model = EmojiGroup(match.group("grp_name")) 0136 if fully_qualified and model.name == "Component": 0137 continue 0138 0139 if match.group("grp_sub"): 0140 current_subgroup = model 0141 current_group.children.append(model) 0142 else: 0143 current_group = model 0144 table.children.append(model) 0145 elif match.group("emoji"): 0146 version = version_tuple(match.group("version")) 0147 if max_version and version > max_version: 0148 continue 0149 0150 status = Emoji.Status[match.group("status").strip().replace("-", "_")] 0151 if fully_qualified and status != Emoji.Status.fully_qualified: 0152 continue 0153 0154 emoji = Emoji(match.group("unicode"), match.group("name"), status, version) 0155 0156 seq = match.group("seq") 0157 for name, quallist in qualifiers.items(): 0158 for qualifier in quallist: 0159 if qualifier in seq: 0160 setattr(emoji, "flag_" + name, True) 0161 break 0162 else: 0163 setattr(emoji, "flag_" + name, False) 0164 0165 if not ignore_variants or not emoji.is_variant(): 0166 current_subgroup.children.append(emoji) 0167 0168 table.prune() 0169 return table 0170 0171 0172 def version_tuple(version_string): 0173 if not version_string: 0174 return tuple() 0175 return tuple(map(int, version_string.split("."))) 0176 0177 0178 def write_line(x, indent, comma=True): 0179 sys.stdout.write((indent + 4) * ' ') 0180 sys.stdout.write(x) 0181 if comma: 0182 sys.stdout.write(",") 0183 sys.stdout.write("\n") 0184 0185 0186 def _emoji_to_cxx_model(grp, indent): 0187 sys.stdout.write(indent * ' ') 0188 sys.stdout.write('{\n') 0189 write_line(json.dumps(grp.name), indent) 0190 write_line('"%s"' % "".join("\\x%02x" % c for c in grp.emoji.encode("utf8")), indent) 0191 write_line(json.dumps(grp.slug), indent) 0192 sys.stdout.write(indent * ' ') 0193 sys.stdout.write('},\n') 0194 0195 def _subgroup_to_cxx_model(grp, groups, prefix): 0196 name = "%s_%s" % (prefix, len(groups)) 0197 sys.stdout.write('static const glaxnimate::emoji::EmojiSubGroup %s {\n' % name) 0198 groups.append(name) 0199 indent = 0 0200 write_line(json.dumps(grp.name), indent) 0201 write_line('{', indent, False) 0202 for child in grp.children: 0203 _emoji_to_cxx_model(child, indent + 8) 0204 write_line('}', indent, False) 0205 sys.stdout.write('};\n') 0206 0207 def _group_to_cxx_model(grp, indent, groups): 0208 name = "group_%s" % len(groups) 0209 subgroups = [] 0210 for child in grp.children: 0211 _subgroup_to_cxx_model(child, subgroups, name) 0212 0213 sys.stdout.write('static const glaxnimate::emoji::EmojiGroup %s {\n' % name) 0214 groups.append(name) 0215 0216 write_line(json.dumps(grp.name), indent) 0217 write_line('{', indent, False) 0218 for i in range(len(grp.children)): 0219 write_line("&%s_%s" % (name, i), indent + 4, True) 0220 write_line('}', indent, False) 0221 sys.stdout.write('};\n') 0222 0223 0224 def to_cxx_model(table): 0225 print("// SPDX-FileCopyrightText: 2019-%s Mattia Basaglia <dev@dragon.best>" % datetime.date.today().year) 0226 print("// SPDX-License-Identifier: GPL-3.0-or-later") 0227 print('#include <QtGlobal>') 0228 print('#include "emoji_data.hpp"\n\n') 0229 print("// File generated by update_emoji.py %s\n\n" % " ".join(sys.argv[1:])) 0230 print("#ifndef Q_OS_WIN32") 0231 groups = [] 0232 for child in table.children: 0233 _group_to_cxx_model(child, 0, groups) 0234 print("const std::vector<const glaxnimate::emoji::EmojiGroup*> glaxnimate::emoji::EmojiGroup::table = {") 0235 for name in groups: 0236 print(" &%s," % name) 0237 print("};") 0238 print("#else") 0239 print("const std::vector<const glaxnimate::emoji::EmojiGroup*> glaxnimate::emoji::EmojiGroup::table = {};") 0240 print("#endif") 0241 0242 0243 def to_slug(string): 0244 slug = "" 0245 for c in string: 0246 if c.isascii() and c.isalnum(): 0247 slug += c 0248 else: 0249 slug += "-" 0250 return slug 0251 0252 0253 def _emoji_to_html(emoji: Emoji): 0254 print("<div class='emoji' title='{name}'>".format(name=emoji.name)) 0255 print("<span class='emoji-text'>{emoji}</span>".format(emoji=emoji.emoji)) 0256 print("<ul class='emoji-details'>") 0257 print("<li>{}</li>".format(emoji.name)) 0258 print("<li>Emoji {}</li>".format(".".join(map(str, emoji.since_version)))) 0259 print("<li>{}</li>".format(emoji.status.name)) 0260 print("<li><a href='https://emojipedia.org/{}/'>Emojipedia</a></li>".format(to_slug(emoji.name))) 0261 print("</ul>") 0262 print("</div>") 0263 0264 0265 def _group_to_html(group: EmojiGroup, level: int): 0266 print("<h{level} id='{id}'><a href='#{id}'>{name}</a></h{level}>".format(level=level, name=group.name, id=to_slug(group.name))) 0267 if isinstance(group.children[0], Emoji): 0268 print("<div class='emoji-group'>") 0269 for emoji in group.children: 0270 _emoji_to_html(emoji) 0271 print("</div>") 0272 else: 0273 for sub in group.children: 0274 _group_to_html(sub, level+1) 0275 0276 0277 def to_html(table): 0278 print("""<?DOCTYPE html?> 0279 <html> 0280 <head> 0281 <title>Emoji Chart</title> 0282 <style> 0283 .emoji-group { 0284 display: flex; 0285 flex-flow: row wrap; 0286 } 0287 .emoji-text { 0288 font-size: 64px; 0289 padding: 0.5ex; 0290 border: 1px solid transparent; 0291 border-top-left-radius: 3px; 0292 border-top-right-radius: 3px; 0293 } 0294 .emoji-details { 0295 margin: 0; 0296 padding: 1ex; 0297 border: 1px solid black; 0298 border-radius: 3px; 0299 border-top-left-radius: 0; 0300 list-style: none; 0301 position: absolute; 0302 background: white; 0303 display: none; 0304 } 0305 .emoji:hover .emoji-details { 0306 display: block; 0307 } 0308 .emoji { 0309 margin: 0ex; 0310 } 0311 .emoji:hover .emoji-text { 0312 background: rgb(200, 200, 200); 0313 border-color: black; 0314 } 0315 </style> 0316 </head> 0317 """) 0318 print("<body>") 0319 _group_to_html(table, 1) 0320 print("</body></html>") 0321 0322 0323 if __name__ == "__main__": 0324 parser = argparse.ArgumentParser() 0325 0326 parser.add_argument( 0327 "--emoji-version", 0328 "-e", 0329 default="latest", 0330 help="Load a specific version of the Emoji standard" 0331 ) 0332 parser.add_argument( 0333 "--file", 0334 "-f", 0335 default="https://unicode.org/Public/emoji/%s/emoji-test.txt", 0336 help="Input file" 0337 ) 0338 parser.add_argument( 0339 "--max-version", 0340 default=None, 0341 type=version_tuple, 0342 help="Max emoji version" 0343 ) 0344 parser.add_argument( 0345 "--ignore-variants", 0346 action="store_true", 0347 help="Ignore skin tone / gender / hair variants" 0348 ) 0349 parser.add_argument( 0350 "--fully-qualified", 0351 action="store_true", 0352 help="Ignore components / unqualified emoji" 0353 ) 0354 parser.add_argument( 0355 "action", 0356 choices=["download", "json", "model", "html"] 0357 ) 0358 0359 ns = parser.parse_args() 0360 0361 filename = ns.file 0362 if "%s" in filename: 0363 filename = filename % ns.emoji_version 0364 0365 if ns.action == "download": 0366 r = urlopen(filename) 0367 if r.code != 200: 0368 raise Exception("Could not fetch file: %s" % r.code) 0369 0370 encoding = r.headers.get_content_charset() 0371 for line in r: 0372 print(line.decode(encoding).strip()) 0373 else: 0374 table = pull_emoji(filename, ns.max_version, ns.ignore_variants, ns.fully_qualified) 0375 if ns.action == "json": 0376 json.dump(table.to_dict(), sys.stdout, indent=4) 0377 elif ns.action == "model": 0378 to_cxx_model(table) 0379 elif ns.action == "html": 0380 to_html(table)