File indexing completed on 2024-04-28 15:39:51

0001 #!/usr/bin/perl
0002 # SPDX-FileCopyrightText: 2019 Robert Krawitz <rlk@alum.mit.edu>
0003 # SPDX-License-Identifier: GPL-2.0-or-later
0004 
0005 # Maintain KPhotoAlbum index.xml files
0006 
0007 use strict;
0008 use warnings;
0009 
0010 use XML::LibXML;
0011 use DBI;
0012 use DBD::SQLite::Constants qw/:file_open/;
0013 use Getopt::Long;
0014 use Carp("cluck");
0015 use POSIX;
0016 
0017 my (%categories);
0018 # Map between category name and ID for compressed files
0019 my (%category_map);
0020 my (@category_names);
0021 # This is stored as {category}{member}{groupname}, as members can only be
0022 # members of one group.
0023 my (%member_groups);
0024 # $group_closure{$category}{value}{values...}
0025 my (%group_closure);
0026 
0027 # Global image (for filtering)
0028 my (%current_image);
0029 
0030 my (%exif_data);
0031 
0032 my ($opt_index_file);
0033 my ($opt_filter_file);
0034 my ($opt_list_type);
0035 my ($opt_use_exif);
0036 my ($opt_print_count);
0037 my ($opt_dryrun);
0038 my (@opt_exif_vars);
0039 our ($kpa_filter);
0040 
0041 # Needs to be kept up to date with XMLDB/Database.cpp
0042 my (@standard_vars) = (
0043     "file",
0044     "label",
0045     "description",
0046     "startDate",
0047     "endDate",
0048     "angle",
0049     "md5sum",
0050     "width",
0051     "height",
0052     "rating",
0053     "stackId",
0054     "stackOrder",
0055     "videoLength");
0056 
0057 my ($rootdir);
0058 
0059 my (@exif_vars) = (
0060     "Exif_Photo_FocalLength",
0061     "Exif_Photo_ExposureTime",
0062     "Exif_Photo_ApertureValue",
0063     "Exif_Photo_FNumber",
0064     "Exif_Photo_Flash",
0065     "Exif_Photo_Contrast",
0066     "Exif_Photo_Sharpness",
0067     "Exif_Photo_Saturation",
0068     "Exif_Image_Orientation",
0069     "Exif_Photo_MeteringMode",
0070     "Exif_Photo_ISOSpeedRatings",
0071     "Exif_Photo_ExposureProgram",
0072     "Exif_Image_Make",
0073     "Exif_Image_Model",
0074     "Exif_GPSInfo_GPSVersionID",
0075     "Exif_GPSInfo_GPSAltitude",
0076     "Exif_GPSInfo_GPSAltitudeRef",
0077     "Exif_GPSInfo_GPSMeasureMode",
0078     "Exif_GPSInfo_GPSDOP",
0079     "Exif_GPSInfo_GPSImgDirection",
0080     "Exif_GPSInfo_GPSLatitude",
0081     "Exif_GPSInfo_GPSLatitudeRef",
0082     "Exif_GPSInfo_GPSLongitude",
0083     "Exif_GPSInfo_GPSLongitudeRef",
0084     "Exif_GPSInfo_GPSTimeStamp",
0085     "Exif_Photo_LensModel"
0086     );
0087 
0088 my (@all_vars) = (
0089     @standard_vars,
0090     "mediaType");
0091 
0092 sub max_width(\@) {
0093     my ($strings) = @_;
0094     my ($answer) = 0;
0095     map { $answer = length if (length > $answer); } @$strings;
0096     return $answer;
0097 }
0098 
0099 sub cols(\@) {
0100     my ($strings) = @_;
0101     my ($width) = max_width(@$strings) + 2;
0102     my ($answer) = floor((80 - 12)/$width);
0103     $answer = 1 if ($answer == 0);
0104     return $answer;
0105 }
0106 
0107 sub rows(\@) {
0108     my ($strings) = @_;
0109     my ($cols) = cols(@$strings);
0110     return ceil(scalar @$strings / $cols);
0111 }
0112 
0113 sub generate_vars(\@) {
0114     my ($strings) = @_;
0115     my (@strings) = sort sort map {my $a = $_; $a =~ s/ //g; "\$$a"} @$strings;
0116     my ($count) = scalar @strings;
0117     my ($width) = max_width(@strings);
0118     my ($cols) = cols(@strings);
0119     my ($pcols) = $cols;
0120     my ($rows) = rows(@strings);
0121     my ($full_rows) = $count % $rows;
0122     $full_rows = $rows if ($full_rows == 0);
0123     my ($answer) = "";
0124     foreach my $i (0..$rows - 1) {
0125     $pcols-- if ($i == $full_rows);
0126     my ($fmt) = "            " . join(" ", map {sprintf("%%-%ds", $width)} (1..$pcols));
0127     my (@pvars) = map { $strings[$_]} grep { $_ % $rows == $i; } (0..$count - 1);
0128     $answer .= sprintf("$fmt\n", @pvars);
0129     }
0130     return $answer;
0131 }
0132 
0133 sub usage() {
0134     my ($known_vars) = generate_vars(@all_vars);
0135     my ($exif_vars) = generate_vars(@exif_vars);
0136     my ($usage) = << "FINIS";
0137 Usage: $0 [options...] [filter]
0138 
0139     Filter a KPhotoAlbum database file for either images files or
0140     category values.
0141 
0142     <filter> is a filter, written as a Perl expressions.  Variable
0143     names are generated from the attributes.  The supported
0144     variables are:
0145 
0146 $known_vars
0147     The following EXIF attributes may be specified in the same way:
0148 
0149 $exif_vars
0150     In addition, the following functions are provided to match on
0151     categoried information:
0152 
0153         hasKeyword([\$value])
0154         hasPerson([\$value])
0155         hasPlace([\$value])
0156         hasToken([\$value])
0157         hasAttribute(\$category, [\$value])
0158         matchesKeyword(\$pattern)
0159         matchesPerson(\$pattern)
0160         matchesPlace(\$pattern)
0161         matchesToken(\$pattern)
0162         matchesAttribute(\$category, \$pattern)
0163 
0164     The "matches" functions accept Perl regular expressions (see
0165     perlre(1)).  The "has" functions take the value as an optional
0166     argument; if not provided, they search for images that have any
0167     value of the specified category.
0168 
0169     The following options are available:
0170 
0171     -f|--filter-file file   Location of file defining the filter.
0172                 The filter-file is a Perl fragment; at
0173                     a minimum, it must define `\$kpa_filter'.
0174     -d|--db filename    Location of the index file.  Defaults to
0175                 your normal KPhotoAlbum database file.
0176                 If you use EXIF filtering, the EXIF data
0177                 is extracted from exif-info.db.
0178     -l|--list attribute     Rather than listing matching files,
0179                     this lists all values of the specified
0180                     attribute.  If you want to use
0181                     a category such as keywords for this
0182                     specify ``cat:Keywords'' as appropriate.
0183     -c|--count      In combination with --list, prints the
0184                     number of matching images for each
0185                     value (histogram).
0186     --exif          Extract EXIF data in addition to index
0187                 file information.  Enabled if the filter
0188                 expression appears to require it.  Normally
0189                     not needed.  You can specify --no-exif if
0190                 you know you don't need EXIF data; this
0191                 improves performance.
0192     --exif-vars=var,... List of EXIF variables to extract.
0193                 See above for available variables.
0194                 Restricting the extracted variables may
0195                     improve performance.
0196     --dry-run       Print a dry run of the filtering loop,
0197                 for debugging.
0198     -h|--help       Print this message.
0199 FINIS
0200     print STDERR $usage;
0201     exit(1);
0202 }
0203 
0204 ################################################################
0205 ################################################################
0206 # Load files ###################################################
0207 ################################################################
0208 ################################################################
0209 
0210 
0211 ################################################################
0212 # Utilities
0213 ################################################################
0214 
0215 sub isNode($$) {
0216     my ($node, $name) = @_;
0217     return ($node->nodeType() == 1 && lc $node->nodeName() eq $name);
0218 }
0219 
0220 sub computeClosure($\@) {
0221     my ($category, $members) = @_;
0222     my (%answer);
0223     foreach my $value (@$members) {
0224     map { $answer{$_} = 1;} keys %{$group_closure{$category}{$value}};
0225     }
0226     return sort keys %answer;
0227 }
0228 
0229 ################################################################
0230 # Categories
0231 ################################################################
0232 
0233 sub loadCategories($$) {
0234     my ($node, $compressed) = @_;
0235     foreach my $child($node->childNodes()) {
0236     next if !isNode($child, "category");
0237     my ($category) = $child->getAttribute("name");
0238     $categories{$category} = {};
0239     $category_map{$category} = [];
0240     $member_groups{$category} = {};
0241     $group_closure{$category} = {};
0242     my (@members);
0243     foreach my $grandchild ($child->childNodes()) {
0244         next if !isNode($grandchild, "value");
0245         my ($value) = $grandchild->getAttribute("value");
0246         # This works for both compact and original file format
0247         $categories{$category}{$value} = 1;
0248         $category_map{$category}[$grandchild->getAttribute("id")] = $value;
0249         $group_closure{$category}{$value} = {};
0250         $group_closure{$category}{$value}{$value} = 1;
0251     }
0252     }
0253     @category_names = sort keys %category_map;
0254 }
0255 
0256 ################################################################
0257 # Images
0258 ################################################################
0259 
0260 # Image options and values for uncompressed files.
0261 
0262 sub loadUncompressedOptions(\%) {
0263     my ($image) = @_;
0264     my (%options);
0265     my ($node) = $$image{"__node"};
0266     foreach my $child ($node->childNodes()) {
0267     next if !isNode($child, "options");
0268     my (@members);
0269     foreach my $grandchild ($child->childNodes()) {
0270         next if !isNode($grandchild, "option");
0271         my ($category) = $grandchild->getAttribute("name");
0272         foreach my $greatgrandchild ($grandchild->childNodes()) {
0273         next if !isNode($greatgrandchild, "value");
0274         my ($val) = $greatgrandchild->getAttribute("value");
0275         push @members, $val;
0276         }
0277         map { $options{$category}{$_} = 1; } computeClosure($category, @members);
0278     }
0279     }
0280     return \%options;
0281 }
0282 
0283 # Compressed XML files are simpler to parse; there's simply an attribute
0284 # for each category
0285 
0286 sub loadCompressedOptions(\%) {
0287     my ($image) = @_;
0288     my (%options);
0289     foreach my $category (@category_names) {
0290     my ($members) = $$image{$category};
0291     my (@members);
0292     if (defined $members && $members ne '') {
0293         my ($map) = $category_map{$category};
0294         @members = map {$$map[$_]} split(/,/, $members);
0295         $options{$category} = {};
0296         map { $options{$category}{$_} = 1; } computeClosure($category, @members);
0297     }
0298     }
0299     return \%options;
0300 }
0301 
0302 sub loadOptions() {
0303     if (! defined $current_image{"options"}) {
0304     if ($current_image{"__compressed"}) {
0305         $current_image{"options"} = loadCompressedOptions(%current_image);
0306     } else {
0307         $current_image{"options"} = return loadUncompressedOptions(%current_image);
0308     }
0309     }
0310 }
0311 
0312 sub hasAttribute($;$) {
0313     my ($category, $value) = @_;
0314     loadOptions();
0315     if (! defined $category_map{$category}) {
0316     die "hasAttribute: no such category $category\n";
0317     }
0318     if (defined $value) {
0319     return defined $current_image{"options"}{$category}{$value};
0320     } else {
0321     return defined $current_image{"options"}{$category};
0322     }
0323 }
0324 
0325 sub hasKeyword(;$) {
0326     my ($value) = @_;
0327     return hasAttribute("Keywords", $value);
0328 }
0329 
0330 sub hasPerson(;$) {
0331     my ($value) = @_;
0332     return hasAttribute("People", $value);
0333 }
0334 
0335 sub hasPlace(;$) {
0336     my ($value) = @_;
0337     return hasAttribute("Places", $value);
0338 }
0339 
0340 sub hasToken(;$) {
0341     my ($value) = @_;
0342     return hasAttribute("Token", $value);
0343 }
0344 
0345 sub matchesAttribute($$) {
0346     my ($category, $value) = @_;
0347     loadOptions();
0348     return grep(/$value/, keys %{$current_image{"options"}{$category}}) > 0;
0349 }
0350 
0351 sub matchesKeyword($) {
0352     my ($value) = @_;
0353     return matchesAttribute("Keywords", $value);
0354 }
0355 
0356 sub matchesPerson($) {
0357     my ($value) = @_;
0358     return matchesAttribute("People", $value);
0359 }
0360 
0361 sub matchesPlace($) {
0362     my ($value) = @_;
0363     return matchesAttribute("Places", $value);
0364 }
0365 
0366 sub matchesToken($) {
0367     my ($value) = @_;
0368     return matchesAttribute("Token", $value);
0369 }
0370 
0371 sub makeVarcode($) {
0372     my ($identifier) = @_;
0373     my ($varname) = $identifier;
0374     $varname =~ s/ //g;
0375     return "        my (\$$varname) = \$current_image{\"$identifier\"};";
0376 }
0377 
0378 sub loadImages($$) {
0379     my ($node, $compressed) = @_;
0380     my ($varcode) = join("\n", map {makeVarcode($_)} @standard_vars);
0381     if (! defined $kpa_filter) {
0382     $kpa_filter = '1';
0383     }
0384     my ($code) = << 'EOF';
0385 {
0386     no warnings "uninitialized";
0387     no warnings "numeric";
0388     my %items_found;
0389     my $matched_count = 0;
0390     foreach my $child ($node->childNodes()) {
0391     next if !isNode($child, "image");
0392     %current_image=();
0393     $current_image{"__node"} = $child;
0394     $current_image{"__compressed"} = $compressed;
0395     map { $current_image{$_->nodeName} = $_->value } $child->attributes();
0396 EOF
0397     $code .= "$varcode\n";
0398     $code .= << 'EOF';
0399     # Restore any attributes defaulted in version 8
0400     $current_image{"angle"} = 0 if (! defined $current_image{"angle"});
0401     $current_image{"endDate"} = $current_image{"startDate"} if (! defined $current_image{"endDate"});
0402     if (! defined $current_image{"label"}) {
0403         my ($label) = $file;
0404         $label =~ s,^.*/(.*)\.[^.]*$,$1,;
0405         $current_image{"label"} = $label;
0406     }
0407 EOF
0408     $code .= << 'EOF';
0409     my ($mediaType) = defined $videoLength ? "Video" : "Image";
0410 EOF
0411     if ($opt_use_exif) {
0412     my ($exifdecl) = join("\n        ", map {"my (\$$_);"} @exif_vars);
0413     my ($exifcode) = join("\n       ", map {"\$$exif_vars[$_] = \$\$row[$_];"} (0..$#exif_vars));
0414     $code .= << "EOF";
0415     $exifdecl;
0416     if (my \$row = \$exif_data{"\$rootdir\$file"}) {
0417         $exifcode
0418     }
0419 EOF
0420     }
0421     $code .= << "EOF";
0422         if ($kpa_filter) {
0423         \$matched_count++;
0424 EOF
0425     if ($opt_list_type) {
0426     if ($opt_list_type =~ /[Cc]at(egory)?:(.*)/) {
0427         $code .= << "EOF"
0428         loadOptions();
0429         map { \$items_found{\$_}++; } keys \%{\$current_image{"options"}{"$2"}};
0430 EOF
0431     } else {
0432         $code .= "            \$items_found{\$$opt_list_type}++;\n";
0433     }
0434     $code .= << 'EOF';
0435     }
0436     }
0437 EOF
0438     if ($opt_print_count) {
0439         $code .= << 'EOF'
0440     print join("\n", map {sprintf("%7d %s", $items_found{$_}, $_);} sort keys %items_found), "\n";
0441     printf("%7d Total\n", $matched_count);
0442 EOF
0443     } else {
0444         $code .= << 'EOF'
0445     print join("\n", sort keys %items_found), "\n";
0446 EOF
0447     }
0448     } else {
0449     $code .= << 'EOF';
0450             print "$file\n";
0451         }
0452     }
0453 EOF
0454     }
0455     $code .= "}\n";
0456     if ($opt_dryrun) {
0457     print STDERR $code;
0458     exit;
0459     }
0460     eval $code;
0461     if ($@) {
0462     my $known_vars = join("\n", sort map {my $a = $_; $a =~ s/ //g; "    $a"} @all_vars);
0463     die "Filter $kpa_filter failed:\n\n$@\n";
0464 
0465     }
0466 }
0467 
0468 ################################################################
0469 # Member groups
0470 ################################################################
0471 
0472 sub loadMemberGroups($$) {
0473     my ($node, $compressed) = @_;
0474     foreach my $child ($node->childNodes()) {
0475     next if !isNode($child, "member");
0476     my ($category) = $child->getAttribute("category");
0477     my ($groupname) = $child->getAttribute("group-name");
0478     if ($compressed) {
0479         my ($members) = $child->getAttribute("members");
0480         if ($members) {
0481         my ($map) = $category_map{$category};
0482         my (@members) = grep { ! $_ == 0 } split(/,/, $members);
0483         map {
0484             if (defined $$map[$_]) {
0485             $member_groups{$category}{$$map[$_]} = $groupname;
0486             } else {
0487             warn "Unknown keyword ID $_ in group $groupname\n";
0488             }
0489         } @members;
0490         }
0491     } else {
0492         my ($member) = $child->getAttribute("member");
0493         $member_groups{$category}{$member} = $groupname;
0494     }
0495     }
0496     foreach my $category (sort keys %member_groups) {
0497     foreach my $member (keys %{$member_groups{$category}}) {
0498         my ($parent) = $member_groups{$category}{$member};
0499         # Break up any circular member groups
0500         my (%seen_parents);
0501         my ($parentid) = 1;
0502         do {
0503         if (defined $seen_parents{$parent}) {
0504             my (%reverse_parents) = reverse %seen_parents;
0505             warn "Circular member group found, members: " . join(" <= ", map { $reverse_parents{$_}} sort keys %reverse_parents) . "\n";
0506             last;
0507         }
0508         $group_closure{$category}{$member}{$parent} = 1;
0509         $seen_parents{$parent} = $parentid++;
0510         } while (defined ($parent = $member_groups{$category}{$parent}));
0511     }
0512     }
0513 }
0514 
0515 ################################################################
0516 # Top level file loader
0517 ################################################################
0518 
0519 sub load_file($) {
0520     my ($file) = @_;
0521     my ($images);
0522     if ($opt_use_exif && @opt_exif_vars) {
0523     my (%exif_keys);
0524     my (%known_exif_keys);
0525     map {$known_exif_keys{$_} = 1;} @exif_vars;
0526     foreach my $exif (@opt_exif_vars) {
0527         map { $exif_keys{$_} = 1; } grep {defined $known_exif_keys{$_}}split(/[ ,]+/, $exif);
0528     }
0529     delete $exif_keys{"filename"};
0530     @exif_vars = ("filename", keys %exif_keys);
0531     }
0532 
0533     if ($opt_dryrun) {
0534     loadImages($images, 1);
0535     exit;
0536     }
0537 
0538     my $doc = XML::LibXML->load_xml(location => $file);
0539     if (! $doc) {
0540     die "Can't open $file as a KPhotoAlbum database.\n";
0541     }
0542 
0543     my $kpa = ${$doc->findnodes('KPhotoAlbum')}[0];
0544     if (! $kpa) {
0545     die "$file is not a KPhotoAlbum database.\n";
0546     } elsif ($kpa->getAttribute("version") != 7 &&
0547     $kpa->getAttribute("version") != 8) {
0548     die "kpa-list-images only works with version 7 and 8 files.\n";
0549     }
0550 
0551     $rootdir = $file;
0552     $rootdir =~ s,[^/]*$,,;
0553     if ($opt_use_exif) {
0554     my ($querystr) = 'SELECT ' . join(', ', "filename", @exif_vars) . " FROM exif";
0555 
0556     my $exif_db .= "${rootdir}exif-info.db";
0557 
0558     if (! -f $exif_db) {
0559         die "Expected EXIF database at $exif_db, but can't find it.\n"
0560     }
0561     my $EXIF_DB = DBI->connect("dbi:SQLite:$exif_db", undef, undef, {
0562         sqlite_open_flags => SQLITE_OPEN_READONLY,
0563                    });
0564     if (! defined $EXIF_DB) {
0565         die "Can't open EXIF database $exif_db.\n"
0566     }
0567     my $exif_query = $EXIF_DB->prepare($querystr);
0568     $exif_query->execute();
0569     # This is measured to be considerably (about 15% total,
0570     # considerably more for EXIF database alone) than individual
0571     # lookups on a prepared query.
0572     while (my @row = $exif_query->fetchrow_array) {
0573         my ($filename) = shift @row;
0574         $exif_data{$filename} = \@row;
0575     }
0576     }
0577     my ($compressed) = int $kpa->getAttribute("compressed");
0578 
0579     foreach my $topcn ($kpa->childNodes()) {
0580     if (isNode($topcn, "categories")) {
0581         loadCategories($topcn, $compressed);
0582         if (! $opt_print_count && $opt_list_type && $opt_list_type =~ /^cat(egory)?:(.*)/ && !defined $kpa_filter) {
0583         $opt_list_type = $2;
0584         if (! defined $category_map{$opt_list_type}) {
0585             die "Can't list category $opt_list_type: no such category\n";
0586         }
0587         my $stuff = join("\n", sort grep(defined $_ && $_ ne '', @{$category_map{$opt_list_type}}));
0588         if ($stuff) {
0589             print "$stuff\n";
0590         }
0591         exit;
0592         }
0593     } elsif (isNode($topcn, "images")) {
0594         $images = $topcn;
0595     } elsif (isNode($topcn, "member-groups")) {
0596         loadMemberGroups($topcn, $compressed);
0597     } elsif (isNode($topcn, "blocklist")) {
0598     } elsif ($topcn->nodeType() == 1) {
0599         warn "Found unknown node " . $topcn->nodeName() . "\n";
0600     }
0601     }
0602     # Load images last so that we can stream them through.
0603     loadImages($images, $compressed);
0604 }
0605 
0606 sub get_standard_kpa_index_file() {
0607     my ($kpa_config) = $ENV{"HOME"} . "/.config/kphotoalbumrc";
0608     open KPACONFIG, "<", "$kpa_config" or return "";
0609     my ($imageDBFile) = "";
0610     while (<KPACONFIG>) {
0611     if (/^imageDBFile=(.*)$/) {
0612         $imageDBFile = $1;
0613         last;
0614     }
0615     }
0616     close KPACONFIG;
0617     return $imageDBFile;
0618 }
0619 
0620 my ($do_help);
0621 
0622 my (%options) = ("f=s"           => \$opt_filter_file,
0623          "filter-file=s" => \$opt_filter_file,
0624          "d=s"           => \$opt_index_file,
0625          "db=s"          => \$opt_index_file,
0626          "l=s"           => \$opt_list_type,
0627          "list=s"        => \$opt_list_type,
0628          "list-values=s" => \$opt_list_type,
0629          "exif!"         => \$opt_use_exif,
0630          "exif-vars=s"   => \@opt_exif_vars,
0631          "c"             => \$opt_print_count,
0632          "count!"        => \$opt_print_count,
0633          "dryrun!"       => \$opt_dryrun,
0634          "dry-run!"      => \$opt_dryrun,
0635          "h"             => \$do_help,
0636          "help"          => \$do_help,
0637     );
0638 
0639 Getopt::Long::Configure("bundling", "require_order");
0640 if (!Getopt::Long::GetOptions(%options) || $do_help) {
0641     usage();
0642 }
0643 
0644 if ($opt_filter_file) {
0645     if (!($opt_filter_file =~ m,/,)) {
0646     $opt_filter_file = "./$opt_filter_file";
0647     }
0648     my $retval = do $opt_filter_file;
0649     if ($@) {
0650     die "Cannot process filter file $opt_filter_file: $@\n";
0651     } elsif (! defined $retval) {
0652     die "Cannot read filter file $opt_filter_file: $!\n";
0653     } elsif (! defined $kpa_filter) {
0654     die "Filter file $opt_filter_file does not define \$kpa_filter.\n"
0655     }
0656 } elsif ($#ARGV >= 0) {
0657     $kpa_filter = $ARGV[0];
0658 }
0659 if (! defined $opt_use_exif) {
0660     if ((defined $kpa_filter && $kpa_filter =~ /Exif_/) ||
0661     (defined $opt_list_type && $opt_list_type =~ /Exif_/)) {
0662     $opt_use_exif = 1;
0663     } else {
0664     $opt_use_exif = 0;
0665     }
0666 }
0667 my ($index_file);
0668 if ($opt_index_file) {
0669     $index_file = $opt_index_file;
0670 } else {
0671     $index_file = get_standard_kpa_index_file();
0672 }
0673 load_file($index_file);