Warning, /multimedia/kid3/src/qml/script/FixId3v2StandardViolations.qml is written in an unsupported language. File is not indexed.

0001 /**
0002  * \file FixId3v2StandardViolations.qml
0003  * Fix standard violations in ID3v2 frames to conform to the specification.
0004  *
0005  * \b Project: Kid3
0006  * \author Urs Fleisch
0007  * \date 01 Oct 2022
0008  *
0009  * Copyright (C) 2022  Urs Fleisch
0010  *
0011  * This program is free software; you can redistribute it and/or modify
0012  * it under the terms of the GNU Lesser General Public License as published by
0013  * the Free Software Foundation; version 3.
0014  *
0015  * This program is distributed in the hope that it will be useful,
0016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0018  * GNU Lesser General Public License for more details.
0019  *
0020  * You should have received a copy of the GNU Lesser General Public License
0021  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0022  */
0023 
0024 import Kid3 1.1
0025 
0026 Kid3Script {
0027   onRun: {
0028     function frameModelRowToString(model, row) {
0029       var values = []
0030       var roleNames = ["name", "value", "internalName", "fieldIds", "fieldValues", "notice"]
0031       for (var i = 0; i < roleNames.length; ++i) {
0032         var roleName = roleNames[i]
0033         values.push(roleName + "=" + script.getRoleData(model, row, roleName))
0034       }
0035       return "" + row + ": " + values.join(", ")
0036     }
0037 
0038     function doNothing(model, row) {
0039       return {
0040         fixed: false,
0041         message: ""
0042       }
0043     }
0044 
0045     // ISO-639-2 language codes as found on
0046     // https://www.loc.gov/standards/iso639-2/php/code_list.php
0047     var languageCodes = ["aar", "abk", "ace", "ach", "ada", "ady", "afa",
0048     "afh", "afr", "ain", "aka", "akk", "alb", "sqi", "ale", "alg", "alt",
0049     "amh", "ang", "anp", "apa", "ara", "arc", "arg", "arm", "hye", "arn",
0050     "arp", "art", "arw", "asm", "ast", "ath", "aus", "ava", "ave", "awa",
0051     "aym", "aze", "bad", "bai", "bak", "bal", "bam", "ban", "baq", "eus",
0052     "bas", "bat", "bej", "bel", "bem", "ben", "ber", "bho", "bih", "bik",
0053     "bin", "bis", "bla", "bnt", "tib", "bod", "bos", "bra", "bre", "btk",
0054     "bua", "bug", "bul", "bur", "mya", "byn", "cad", "cai", "car", "cat",
0055     "cau", "ceb", "cel", "cze", "ces", "cha", "chb", "che", "chg", "chi",
0056     "zho", "chk", "chm", "chn", "cho", "chp", "chr", "chu", "chv", "chy",
0057     "cmc", "cnr", "cop", "cor", "cos", "cpe", "cpf", "cpp", "cre", "crh",
0058     "crp", "csb", "cus", "wel", "cym", "cze", "ces", "dak", "dan", "dar",
0059     "day", "del", "den", "ger", "deu", "dgr", "din", "div", "doi", "dra",
0060     "dsb", "dua", "dum", "dut", "nld", "dyu", "dzo", "efi", "egy", "eka",
0061     "gre", "ell", "elx", "eng", "enm", "epo", "est", "baq", "eus", "ewe",
0062     "ewo", "fan", "fao", "per", "fas", "fat", "fij", "fil", "fin", "fiu",
0063     "fon", "fre", "fra", "fre", "fra", "frm", "fro", "frr", "frs", "fry",
0064     "ful", "fur", "gaa", "gay", "gba", "gem", "geo", "kat", "ger", "deu",
0065     "gez", "gil", "gla", "gle", "glg", "glv", "gmh", "goh", "gon", "gor",
0066     "got", "grb", "grc", "gre", "ell", "grn", "gsw", "guj", "gwi", "hai",
0067     "hat", "hau", "haw", "heb", "her", "hil", "him", "hin", "hit", "hmn",
0068     "hmo", "hrv", "hsb", "hun", "hup", "arm", "hye", "iba", "ibo", "ice",
0069     "isl", "ido", "iii", "ijo", "iku", "ile", "ilo", "ina", "inc", "ind",
0070     "ine", "inh", "ipk", "ira", "iro", "ice", "isl", "ita", "jav", "jbo",
0071     "jpn", "jpr", "jrb", "kaa", "kab", "kac", "kal", "kam", "kan", "kar",
0072     "kas", "geo", "kat", "kau", "kaw", "kaz", "kbd", "kha", "khi", "khm",
0073     "kho", "kik", "kin", "kir", "kmb", "kok", "kom", "kon", "kor", "kos",
0074     "kpe", "krc", "krl", "kro", "kru", "kua", "kum", "kur", "kut", "lad",
0075     "lah", "lam", "lao", "lat", "lav", "lez", "lim", "lin", "lit", "lol",
0076     "loz", "ltz", "lua", "lub", "lug", "lui", "lun", "luo", "lus", "mac",
0077     "mkd", "mad", "mag", "mah", "mai", "mak", "mal", "man", "mao", "mri",
0078     "map", "mar", "mas", "may", "msa", "mdf", "mdr", "men", "mga", "mic",
0079     "min", "mis", "mac", "mkd", "mkh", "mlg", "mlt", "mnc", "mni", "mno",
0080     "moh", "mon", "mos", "mao", "mri", "may", "msa", "mul", "mun", "mus",
0081     "mwl", "mwr", "bur", "mya", "myn", "myv", "nah", "nai", "nap", "nau",
0082     "nav", "nbl", "nde", "ndo", "nds", "nep", "new", "nia", "nic", "niu",
0083     "dut", "nld", "nno", "nob", "nog", "non", "nor", "nqo", "nso", "nub",
0084     "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji", "ori", "orm",
0085     "osa", "oss", "ota", "oto", "paa", "pag", "pal", "pam", "pan", "pap",
0086     "pau", "peo", "per", "fas", "phi", "phn", "pli", "pol", "pon", "por",
0087     "pra", "pro", "pus", "que", "raj", "rap", "rar", "roa", "roh", "rom",
0088     "rum", "ron", "rum", "ron", "run", "rup", "rus", "sad", "sag", "sah",
0089     "sai", "sal", "sam", "san", "sas", "sat", "scn", "sco", "sel", "sem",
0090     "sga", "sgn", "shn", "sid", "sin", "sio", "sit", "sla", "slo", "slk",
0091     "slo", "slk", "slv", "sma", "sme", "smi", "smj", "smn", "smo", "sms",
0092     "sna", "snd", "snk", "sog", "som", "son", "sot", "spa", "alb", "sqi",
0093     "srd", "srn", "srp", "srr", "ssa", "ssw", "suk", "sun", "sus", "sux",
0094     "swa", "swe", "syc", "syr", "tah", "tai", "tam", "tat", "tel", "tem",
0095     "ter", "tet", "tgk", "tgl", "tha", "tib", "bod", "tig", "tir", "tiv",
0096     "tkl", "tlh", "tli", "tmh", "tog", "ton", "tpi", "tsi", "tsn", "tso",
0097     "tuk", "tum", "tup", "tur", "tut", "tvl", "twi", "tyv", "udm", "uga",
0098     "uig", "ukr", "umb", "und", "urd", "uzb", "vai", "ven", "vie", "vol",
0099     "vot", "wak", "wal", "war", "was", "wel", "cym", "wen", "wln", "wol",
0100     "xal", "xho", "yao", "yap", "yid", "yor", "ypk", "zap", "zbl", "zen",
0101     "zgh", "zha", "chi", "zho", "znd", "zul", "zun", "zxx", "zza"]
0102     // ID3v2 frames containing a language field
0103     var consideredFrames = ["COMM", "USLT", "SYLT", "USER"]
0104 
0105     /**
0106      * Convert to valid ISO-639-2 language code.
0107      * The conversion just transforms to lower case. A more sophisticated
0108      * conversion could convert ISO-639-1 to ISO-639-2.
0109      * @param lang existing language code
0110      * @return language code, "XXX" if invalid or unknown, "" if not existing.
0111      */
0112     function convertToValidLanguageCode(lang) {
0113       // If lang is empty the frame probably does not exist. An empty language
0114       // field would be "   " and not "".
0115       if (!lang) {
0116         return ""
0117       }
0118       if (lang.length === 3 && lang !== "XXX") {
0119         lang = lang.toLowerCase()
0120         // qaa-qtz is a range reserved for local use
0121         if (languageCodes.indexOf(lang) !== -1 ||
0122             (lang >= "qaa" && lang <= "qtz")) {
0123           return lang
0124         }
0125       }
0126       return "XXX"
0127     }
0128 
0129     /**
0130      * Replace invalid language codes by "XXX".
0131      * The three byte language field, present in several frames, is used to
0132      * describe the language of the frame's content, according to ISO-639-2
0133      * [ISO-639-2]. The language should be represented in lower case. If the
0134      * language is not known the string "XXX" should be used.
0135      */
0136     function fixLanguageCode(model, row) {
0137       var oldText, newText
0138       var message = ""
0139       var fixed = false
0140       var name = script.getRoleData(model, row, "internalName")
0141       if (name.startsWith("TLAN")) {
0142         oldText = script.getRoleData(model, row, "value")
0143         newText = convertToValidLanguageCode(oldText)
0144         if (newText !== oldText) {
0145           app.setFrame(tagv2, name, newText)
0146           message = "Changed '" + oldText + "' to '" + newText + "'"
0147           fixed = true
0148         }
0149       } else {
0150         var fieldIds = script.getRoleData(model, row, "fieldIds")
0151         var fieldIdx = fieldIds.indexOf(Frame.ID_Language)
0152         if (fieldIdx !== -1) {
0153           var fieldValues = script.getRoleData(model, row, "fieldValues")
0154           oldText = fieldValues[fieldIdx]
0155           newText = convertToValidLanguageCode(oldText)
0156           if (newText !== oldText) {
0157             app.setFrame(tagv2, name + ".language", newText)
0158             message = "Changed '" + oldText + "' to '" + newText + "'"
0159             fixed = true
0160           }
0161         }
0162       }
0163       return { fixed, message }
0164     }
0165 
0166     /**
0167      * Replace new lines ("\n" or "\r\n") by spaces.
0168      */
0169     function fixNlForbidden(model, row) {
0170       var message = ""
0171       var fixed = false
0172       var name = script.getRoleData(model, row, "internalName")
0173       var oldText = script.getRoleData(model, row, "value")
0174       var newText = oldText.replace(/[\r\n]+/g, " ")
0175       if (newText !== oldText) {
0176         app.setFrame(tagv2, name, newText)
0177         message = "Changed '" +
0178             oldText.replace(/\r/g, "\\r").replace(/\n/g, "\\n") +
0179             "' to '" + newText + "'"
0180         fixed = true
0181       }
0182       return { fixed, message }
0183     }
0184 
0185     /**
0186      * Replace "\r\n" by "\n".
0187      */
0188     function fixCrForbidden(model, row) {
0189       var message = ""
0190       var fixed = false
0191       var name = script.getRoleData(model, row, "internalName")
0192       var oldText = script.getRoleData(model, row, "value")
0193       var newText = oldText.replace(/\r\n/g, "\n")
0194       if (newText !== oldText) {
0195         app.setFrame(tagv2, name, newText)
0196         message = "Changed '" +
0197             oldText.replace(/\r/g, "\\r").replace(/\n/g, "\\n") +
0198             "' to '" + newText.replace(/\n/g, "\\n") + "'"
0199         fixed = true
0200       }
0201       return { fixed, message }
0202     }
0203 
0204     /**
0205      * Replace empty owner field by "http://www.id3.org/dummy/ufid.html".
0206      */
0207     function fixOwnerEmpty(model, row) {
0208       var message = ""
0209       var fixed = false
0210       var fieldIds = script.getRoleData(model, row, "fieldIds")
0211       var fieldIdx = fieldIds.indexOf(Frame.ID_Owner)
0212       if (fieldIdx !== -1) {
0213         var name = script.getRoleData(model, row, "internalName")
0214         var fieldValues = script.getRoleData(model, row, "fieldValues")
0215         var oldText = fieldValues[fieldIdx]
0216         var newText = "http://www.id3.org/dummy/ufid.html"
0217         if (oldText === "") {
0218           app.setFrame(tagv2, name + ".owner", newText)
0219           message = "Changed '" + oldText + "' to '" + newText + "'"
0220           fixed = true
0221         }
0222       }
0223       return { fixed, message }
0224     }
0225 
0226     /**
0227      * Replace with first number found in string.
0228      */
0229     function fixNumeric(model, row) {
0230       var message = ""
0231       var fixed = false
0232       var name = script.getRoleData(model, row, "internalName")
0233       var oldText = script.getRoleData(model, row, "value")
0234       var newText = oldText.replace(/^[^\d]*(\d+).*$/, "$1")
0235       if (newText !== oldText) {
0236         app.setFrame(tagv2, name, newText)
0237         message = "Changed '" + oldText + "' to '" + newText + "'"
0238         fixed = true
0239       }
0240       return { fixed, message }
0241     }
0242 
0243     /**
0244      * Replace with "n1/n2" or just "n1" if numbers found in string.
0245      */
0246     function fixNrTotal(model, row) {
0247       var message = ""
0248       var fixed = false
0249       var name = script.getRoleData(model, row, "internalName")
0250       var oldText = script.getRoleData(model, row, "value")
0251       var twoNumbersRe = /^[^\d]*(\d+)[^\d]+(\d+).*$/
0252       var match = twoNumbersRe.exec(oldText)
0253       var newText = match && match.length === 3 && +match[1] <= +match[2]
0254           ? match[1] + "/" + match[2]
0255           : oldText.replace(/^[^\d]*(\d+).*$/, "$1")
0256       if (newText !== oldText) {
0257         app.setFrame(tagv2, name, newText)
0258         message = "Changed '" + oldText + "' to '" + newText + "'"
0259         fixed = true
0260       }
0261       return { fixed, message }
0262     }
0263 
0264     /**
0265      * Replace with "ddMM" if valid day and month numbers found in string.
0266      */
0267     function fixDayMonth(model, row) {
0268       var message = ""
0269       var fixed = false
0270       var name = script.getRoleData(model, row, "internalName")
0271       var oldText = script.getRoleData(model, row, "value")
0272       var twoNumbersRe = /^[^\d]*(\d+)[^\d]+(\d+).*$/
0273       var match = twoNumbersRe.exec(oldText)
0274       var newText = oldText
0275       if (match && match.length === 3) {
0276         var n1 = +match[1]
0277         var n2 = +match[2]
0278         if (n1 >= 1 && n1 <= 31 && n2 >= 1 && n2 <= 12) {
0279           newText = ("0" + n1).slice(-2) + ("0" + n2).slice(-2)
0280         } else if (n2 >= 1 && n2 <= 31 && n1 >= 1 && n1 <= 12) {
0281           newText = ("0" + n2).slice(-2) + ("0" + n1).slice(-2)
0282         }
0283       }
0284       if (newText !== oldText) {
0285         app.setFrame(tagv2, name, newText)
0286         message = "Changed '" + oldText + "' to '" + newText + "'"
0287         fixed = true
0288       }
0289       return { fixed, message }
0290     }
0291 
0292     /**
0293      * Replace with "hhmm" if valid hour and minute numbers found in string.
0294      */
0295     function fixHourMinute(model, row) {
0296       var message = ""
0297       var fixed = false
0298       var name = script.getRoleData(model, row, "internalName")
0299       var oldText = script.getRoleData(model, row, "value")
0300       var twoNumbersRe = /^[^\d]*(\d+)[^\d]+(\d+).*$/
0301       var match = twoNumbersRe.exec(oldText)
0302       var newText = oldText
0303       if (match && match.length === 3) {
0304         var n1 = +match[1]
0305         var n2 = +match[2]
0306         if (n1 >= 0 && n1 <= 23 && n2 >= 0 && n2 <= 59) {
0307           newText = ("0" + n1).slice(-2) + ("0" + n2).slice(-2)
0308         } else if (n2 >= 0 && n2 <= 23 && n1 >= 0 && n1 <= 59) {
0309           newText = ("0" + n2).slice(-2) + ("0" + n1).slice(-2)
0310         }
0311       }
0312       if (newText !== oldText) {
0313         app.setFrame(tagv2, name, newText)
0314         message = "Changed '" + oldText + "' to '" + newText + "'"
0315         fixed = true
0316       }
0317       return { fixed, message }
0318     }
0319 
0320     /**
0321      * Replace with "yyyy" if valid year number found in string.
0322      */
0323     function fixYear(model, row) {
0324       var message = ""
0325       var fixed = false
0326       var name = script.getRoleData(model, row, "internalName")
0327       var oldText = script.getRoleData(model, row, "value")
0328       var oneNumberRe = /^[^\d]*(\d+).*$/
0329       var match = oneNumberRe.exec(oldText)
0330       var newText = oldText
0331       if (match && match.length === 2) {
0332         var n = +match[1]
0333         if (n >= 1000 && n < 2100) {
0334           newText = "" + n
0335         }
0336       }
0337       if (newText !== oldText) {
0338         app.setFrame(tagv2, name, newText)
0339         message = "Changed '" + oldText + "' to '" + newText + "'"
0340         fixed = true
0341       }
0342       return { fixed, message }
0343     }
0344 
0345     /**
0346      * Replace strings like "(C) yyyy " with "yyyy ".
0347      */
0348     function fixYearSpace(model, row) {
0349       var message = ""
0350       var fixed = false
0351       var name = script.getRoleData(model, row, "internalName")
0352       var oldText = script.getRoleData(model, row, "value")
0353       var newText =
0354           oldText.replace(/^.*(?:\(C\)|\(c\)|\xa9)\s*(\d{4} .*)$/, "$1")
0355       if (newText !== oldText) {
0356         app.setFrame(tagv2, name, newText)
0357         message = "Changed '" + oldText + "' to '" + newText + "'"
0358         fixed = true
0359       }
0360       return { fixed, message }
0361     }
0362 
0363     /**
0364      * Replace strings like "yyyyMMdd", "yyyy/MM/dd" or "dd.MM.yyyy" with
0365      * "yyyy-MM-dd", try to fix swapped month/day number or keep at least year.
0366      */
0367     function fixIsoDate(model, row) {
0368       var message = ""
0369       var fixed = false
0370       var name = script.getRoleData(model, row, "internalName")
0371       var oldText = script.getRoleData(model, row, "value")
0372       var startsWithYearRe = /^[^\d]*(\d{4})[-/.]?(\d{1,2})[-/.]?(\d{1,2}).*$/
0373       var endsWithYearRe = /^[^\d]*(\d{1,2})[-/.]?(\d{1,2})[-/.]?(\d{4}).*$/
0374       var newText = oldText
0375       var year = 0
0376       var month = 0
0377       var day = 0
0378       var match
0379       if ((match = startsWithYearRe.exec(oldText)) && match.length === 4) {
0380         year = +match[1]
0381         month = +match[2]
0382         day = +match[3]
0383       } else if ((match = endsWithYearRe.exec(oldText)) && match.length === 4) {
0384         year = +match[3]
0385         month = +match[2]
0386         day = +match[1]
0387       }
0388       if (year >= 1000 && year < 2100) {
0389         if (day >= 1 && month >= 1) {
0390           if ((day > 31 || month > 12) && day <= 12 && month <= 31) {
0391             var tmp = day
0392             day = month
0393             month = tmp
0394           }
0395           newText = year + "-" +
0396               ("0" + month).slice(-2) + "-" + ("0" + day).slice(-2)
0397         } else {
0398           newText = "" + year
0399         }
0400       }
0401       if (newText !== oldText) {
0402         app.setFrame(tagv2, name, newText)
0403         message = "Changed '" + oldText + "' to '" + newText + "'"
0404         fixed = true
0405       }
0406       return { fixed, message }
0407     }
0408 
0409     /**
0410      * Try to generate a list with "involvement 1|involvee 1|..." from
0411      * "involvement 1:involvee 1;..." or
0412      * "involvee 1 (involvement 1);...".
0413      */
0414     function fixStringList(model, row) {
0415       var message = ""
0416       var fixed = false
0417       var name = script.getRoleData(model, row, "internalName")
0418       var oldText = script.getRoleData(model, row, "value")
0419       var invRe = oldText.indexOf(":") !== -1 ? /([^:]+):\s*([^;]+);?/
0420                                               : /([^(]+)\s*\([^)]+\)\s*[;,/]?/
0421       var newText = oldText
0422       var strs = []
0423       var involvRe, match
0424       if (oldText.indexOf(":") !== -1) {
0425         involvRe = /([^:]+):\s*([^;]+);?/g
0426         while ((match = involvRe.exec(oldText))) {
0427           strs.push(match[1])
0428           strs.push(match[2])
0429         }
0430       } else {
0431         involvRe = /([^(]+)\s*\([^)]+\)\s*[;,/]?/g
0432         while ((match = involvRe.exec(oldText))) {
0433           strs.push(match[2])
0434           strs.push(match[1])
0435         }
0436       }
0437       if (strs.length) {
0438         newText = strs.join("|")
0439       }
0440       if (newText !== oldText) {
0441         app.setFrame(tagv2, name, newText)
0442         message = "Changed '" + oldText + "' to '" + newText + "'"
0443         fixed = true
0444       }
0445       return { fixed, message }
0446     }
0447 
0448     /**
0449      * Remove space at begin or end of string.
0450      */
0451     function fixExcessSpace(model, row) {
0452       var message = ""
0453       var fixed = false
0454       var name = script.getRoleData(model, row, "internalName")
0455       var oldText = script.getRoleData(model, row, "value")
0456       var newText = oldText.replace(/^\s+/, "").replace(/\s+$/, "")
0457       if (newText !== oldText) {
0458         app.setFrame(tagv2, name, newText)
0459         message = "Changed '" + oldText + "' to '" + newText + "'"
0460         fixed = true
0461       }
0462       return { fixed, message }
0463     }
0464 
0465     var functionForWarning = {
0466       [FrameNotice.LanguageCode]: fixLanguageCode,
0467       [FrameNotice.NlForbidden]: fixNlForbidden,
0468       [FrameNotice.CrForbidden]: fixCrForbidden,
0469       [FrameNotice.OwnerEmpty]: fixOwnerEmpty,
0470       [FrameNotice.Numeric]: fixNumeric,
0471       [FrameNotice.NrTotal]: fixNrTotal,
0472       [FrameNotice.DayMonth]: fixDayMonth,
0473       [FrameNotice.HourMinute]: fixHourMinute,
0474       [FrameNotice.Year]: fixYear,
0475       [FrameNotice.YearSpace]: fixYearSpace,
0476       [FrameNotice.IsoDate]: fixIsoDate,
0477       [FrameNotice.StringList]: fixStringList,
0478       [FrameNotice.ExcessSpace]: fixExcessSpace
0479     }
0480 
0481     function doWork() {
0482       ///console.debug("Checking file " + app.selectionInfo.filePath)
0483       var tagFormat = app.selectionInfo.tag(Frame.Tag_2).tagFormat
0484       if (tagFormat && tagFormat.startsWith("ID3v2")) {
0485         var frameModel = app.tag(Frame.Tag_2).frameModel
0486         for (var i = 0; i < frameModel.rowCount(); ++i) {
0487           ///console.debug(frameModelRowToString(frameModel, i))
0488           var warning = script.getRoleData(frameModel, i, "noticeWarning")
0489           if (warning) {
0490             var notice = script.getRoleData(frameModel, i, "notice")
0491             var name = script.getRoleData(frameModel, i, "name")
0492             var func = functionForWarning[warning] || doNothing
0493             var result = func(frameModel, i)
0494             if (result) {
0495               console.log((result.fixed ? "MODIFIED " : "UNCHANGED") +
0496                           (result.message ? " " + result.message + ": " : " ") +
0497                           notice.replace(/\n/g, " ") + " in '" + name +
0498                           "' of '" + app.selectionInfo.filePath + "'")
0499             }
0500           }
0501         }
0502       }
0503       if (!nextFile()) {
0504         if (isStandalone()) {
0505           // Save the changes if the script is started stand-alone, not from Kid3.
0506           app.saveDirectory()
0507         }
0508         Qt.quit()
0509       } else {
0510         setTimeout(doWork, 1)
0511       }
0512     }
0513 
0514     firstFile()
0515     doWork()
0516   }
0517 }