File indexing completed on 2024-11-17 04:01:00
0001 #!/usr/bin/perl 0002 # SPDX-FileCopyrightText: 2019 Robert Krawitz <rlk@alum.mit.edu> 0003 # SPDX-License-Identifier: GPL-2.0-or-later 0004 0005 # Maintain KPhotoAlbum index.xml files 0006 0007 use strict; 0008 use warnings; 0009 0010 use XML::LibXML; 0011 use DBI; 0012 use DBD::SQLite::Constants qw/:file_open/; 0013 use Getopt::Long; 0014 use Carp("cluck"); 0015 use POSIX; 0016 0017 my (%categories); 0018 # Map between category name and ID for compressed files 0019 my (%category_map); 0020 my (@category_names); 0021 # This is stored as {category}{member}{groupname}, as members can only be 0022 # members of one group. 0023 my (%member_groups); 0024 # $group_closure{$category}{value}{values...} 0025 my (%group_closure); 0026 0027 # Global image (for filtering) 0028 my (%current_image); 0029 0030 my (%exif_data); 0031 0032 my ($opt_index_file); 0033 my ($opt_filter_file); 0034 my ($opt_list_type); 0035 my ($opt_use_exif); 0036 my ($opt_print_count); 0037 my ($opt_dryrun); 0038 my (@opt_exif_vars); 0039 our ($kpa_filter); 0040 0041 # Needs to be kept up to date with XMLDB/Database.cpp 0042 my (@standard_vars) = ( 0043 "file", 0044 "label", 0045 "description", 0046 "startDate", 0047 "endDate", 0048 "angle", 0049 "md5sum", 0050 "width", 0051 "height", 0052 "rating", 0053 "stackId", 0054 "stackOrder", 0055 "videoLength"); 0056 0057 my ($rootdir); 0058 0059 my (@exif_vars) = ( 0060 "Exif_Photo_FocalLength", 0061 "Exif_Photo_ExposureTime", 0062 "Exif_Photo_ApertureValue", 0063 "Exif_Photo_FNumber", 0064 "Exif_Photo_Flash", 0065 "Exif_Photo_Contrast", 0066 "Exif_Photo_Sharpness", 0067 "Exif_Photo_Saturation", 0068 "Exif_Image_Orientation", 0069 "Exif_Photo_MeteringMode", 0070 "Exif_Photo_ISOSpeedRatings", 0071 "Exif_Photo_ExposureProgram", 0072 "Exif_Image_Make", 0073 "Exif_Image_Model", 0074 "Exif_GPSInfo_GPSVersionID", 0075 "Exif_GPSInfo_GPSAltitude", 0076 "Exif_GPSInfo_GPSAltitudeRef", 0077 "Exif_GPSInfo_GPSMeasureMode", 0078 "Exif_GPSInfo_GPSDOP", 0079 "Exif_GPSInfo_GPSImgDirection", 0080 "Exif_GPSInfo_GPSLatitude", 0081 "Exif_GPSInfo_GPSLatitudeRef", 0082 "Exif_GPSInfo_GPSLongitude", 0083 "Exif_GPSInfo_GPSLongitudeRef", 0084 "Exif_GPSInfo_GPSTimeStamp", 0085 "Exif_Photo_LensModel" 0086 ); 0087 0088 my (@all_vars) = ( 0089 @standard_vars, 0090 "mediaType"); 0091 0092 sub max_width(\@) { 0093 my ($strings) = @_; 0094 my ($answer) = 0; 0095 map { $answer = length if (length > $answer); } @$strings; 0096 return $answer; 0097 } 0098 0099 sub cols(\@) { 0100 my ($strings) = @_; 0101 my ($width) = max_width(@$strings) + 2; 0102 my ($answer) = floor((80 - 12)/$width); 0103 $answer = 1 if ($answer == 0); 0104 return $answer; 0105 } 0106 0107 sub rows(\@) { 0108 my ($strings) = @_; 0109 my ($cols) = cols(@$strings); 0110 return ceil(scalar @$strings / $cols); 0111 } 0112 0113 sub generate_vars(\@) { 0114 my ($strings) = @_; 0115 my (@strings) = sort sort map {my $a = $_; $a =~ s/ //g; "\$$a"} @$strings; 0116 my ($count) = scalar @strings; 0117 my ($width) = max_width(@strings); 0118 my ($cols) = cols(@strings); 0119 my ($pcols) = $cols; 0120 my ($rows) = rows(@strings); 0121 my ($full_rows) = $count % $rows; 0122 $full_rows = $rows if ($full_rows == 0); 0123 my ($answer) = ""; 0124 foreach my $i (0..$rows - 1) { 0125 $pcols-- if ($i == $full_rows); 0126 my ($fmt) = " " . join(" ", map {sprintf("%%-%ds", $width)} (1..$pcols)); 0127 my (@pvars) = map { $strings[$_]} grep { $_ % $rows == $i; } (0..$count - 1); 0128 $answer .= sprintf("$fmt\n", @pvars); 0129 } 0130 return $answer; 0131 } 0132 0133 sub usage() { 0134 my ($known_vars) = generate_vars(@all_vars); 0135 my ($exif_vars) = generate_vars(@exif_vars); 0136 my ($usage) = << "FINIS"; 0137 Usage: $0 [options...] [filter] 0138 0139 Filter a KPhotoAlbum database file for either images files or 0140 category values. 0141 0142 <filter> is a filter, written as a Perl expressions. Variable 0143 names are generated from the attributes. The supported 0144 variables are: 0145 0146 $known_vars 0147 The following EXIF attributes may be specified in the same way: 0148 0149 $exif_vars 0150 In addition, the following functions are provided to match on 0151 categoried information: 0152 0153 hasKeyword([\$value]) 0154 hasPerson([\$value]) 0155 hasPlace([\$value]) 0156 hasToken([\$value]) 0157 hasAttribute(\$category, [\$value]) 0158 matchesKeyword(\$pattern) 0159 matchesPerson(\$pattern) 0160 matchesPlace(\$pattern) 0161 matchesToken(\$pattern) 0162 matchesAttribute(\$category, \$pattern) 0163 0164 The "matches" functions accept Perl regular expressions (see 0165 perlre(1)). The "has" functions take the value as an optional 0166 argument; if not provided, they search for images that have any 0167 value of the specified category. 0168 0169 The following options are available: 0170 0171 -f|--filter-file file Location of file defining the filter. 0172 The filter-file is a Perl fragment; at 0173 a minimum, it must define `\$kpa_filter'. 0174 -d|--db filename Location of the index file. Defaults to 0175 your normal KPhotoAlbum database file. 0176 If you use EXIF filtering, the EXIF data 0177 is extracted from exif-info.db. 0178 -l|--list attribute Rather than listing matching files, 0179 this lists all values of the specified 0180 attribute. If you want to use 0181 a category such as keywords for this 0182 specify ``cat:Keywords'' as appropriate. 0183 -c|--count In combination with --list, prints the 0184 number of matching images for each 0185 value (histogram). 0186 --exif Extract EXIF data in addition to index 0187 file information. Enabled if the filter 0188 expression appears to require it. Normally 0189 not needed. You can specify --no-exif if 0190 you know you don't need EXIF data; this 0191 improves performance. 0192 --exif-vars=var,... List of EXIF variables to extract. 0193 See above for available variables. 0194 Restricting the extracted variables may 0195 improve performance. 0196 --dry-run Print a dry run of the filtering loop, 0197 for debugging. 0198 -h|--help Print this message. 0199 FINIS 0200 print STDERR $usage; 0201 exit(1); 0202 } 0203 0204 ################################################################ 0205 ################################################################ 0206 # Load files ################################################### 0207 ################################################################ 0208 ################################################################ 0209 0210 0211 ################################################################ 0212 # Utilities 0213 ################################################################ 0214 0215 sub isNode($$) { 0216 my ($node, $name) = @_; 0217 return ($node->nodeType() == 1 && lc $node->nodeName() eq $name); 0218 } 0219 0220 sub computeClosure($\@) { 0221 my ($category, $members) = @_; 0222 my (%answer); 0223 foreach my $value (@$members) { 0224 map { $answer{$_} = 1;} keys %{$group_closure{$category}{$value}}; 0225 } 0226 return sort keys %answer; 0227 } 0228 0229 ################################################################ 0230 # Categories 0231 ################################################################ 0232 0233 sub loadCategories($$) { 0234 my ($node, $compressed) = @_; 0235 foreach my $child($node->childNodes()) { 0236 next if !isNode($child, "category"); 0237 my ($category) = $child->getAttribute("name"); 0238 $categories{$category} = {}; 0239 $category_map{$category} = []; 0240 $member_groups{$category} = {}; 0241 $group_closure{$category} = {}; 0242 my (@members); 0243 foreach my $grandchild ($child->childNodes()) { 0244 next if !isNode($grandchild, "value"); 0245 my ($value) = $grandchild->getAttribute("value"); 0246 # This works for both compact and original file format 0247 $categories{$category}{$value} = 1; 0248 $category_map{$category}[$grandchild->getAttribute("id")] = $value; 0249 $group_closure{$category}{$value} = {}; 0250 $group_closure{$category}{$value}{$value} = 1; 0251 } 0252 } 0253 @category_names = sort keys %category_map; 0254 } 0255 0256 ################################################################ 0257 # Images 0258 ################################################################ 0259 0260 # Image options and values for uncompressed files. 0261 0262 sub loadUncompressedOptions(\%) { 0263 my ($image) = @_; 0264 my (%options); 0265 my ($node) = $$image{"__node"}; 0266 foreach my $child ($node->childNodes()) { 0267 next if !isNode($child, "options"); 0268 my (@members); 0269 foreach my $grandchild ($child->childNodes()) { 0270 next if !isNode($grandchild, "option"); 0271 my ($category) = $grandchild->getAttribute("name"); 0272 foreach my $greatgrandchild ($grandchild->childNodes()) { 0273 next if !isNode($greatgrandchild, "value"); 0274 my ($val) = $greatgrandchild->getAttribute("value"); 0275 push @members, $val; 0276 } 0277 map { $options{$category}{$_} = 1; } computeClosure($category, @members); 0278 } 0279 } 0280 return \%options; 0281 } 0282 0283 # Compressed XML files are simpler to parse; there's simply an attribute 0284 # for each category 0285 0286 sub loadCompressedOptions(\%) { 0287 my ($image) = @_; 0288 my (%options); 0289 foreach my $category (@category_names) { 0290 my ($members) = $$image{$category}; 0291 my (@members); 0292 if (defined $members && $members ne '') { 0293 my ($map) = $category_map{$category}; 0294 @members = map {$$map[$_]} split(/,/, $members); 0295 $options{$category} = {}; 0296 map { $options{$category}{$_} = 1; } computeClosure($category, @members); 0297 } 0298 } 0299 return \%options; 0300 } 0301 0302 sub loadOptions() { 0303 if (! defined $current_image{"options"}) { 0304 if ($current_image{"__compressed"}) { 0305 $current_image{"options"} = loadCompressedOptions(%current_image); 0306 } else { 0307 $current_image{"options"} = return loadUncompressedOptions(%current_image); 0308 } 0309 } 0310 } 0311 0312 sub hasAttribute($;$) { 0313 my ($category, $value) = @_; 0314 loadOptions(); 0315 if (! defined $category_map{$category}) { 0316 die "hasAttribute: no such category $category\n"; 0317 } 0318 if (defined $value) { 0319 return defined $current_image{"options"}{$category}{$value}; 0320 } else { 0321 return defined $current_image{"options"}{$category}; 0322 } 0323 } 0324 0325 sub hasKeyword(;$) { 0326 my ($value) = @_; 0327 return hasAttribute("Keywords", $value); 0328 } 0329 0330 sub hasPerson(;$) { 0331 my ($value) = @_; 0332 return hasAttribute("People", $value); 0333 } 0334 0335 sub hasPlace(;$) { 0336 my ($value) = @_; 0337 return hasAttribute("Places", $value); 0338 } 0339 0340 sub hasToken(;$) { 0341 my ($value) = @_; 0342 return hasAttribute("Token", $value); 0343 } 0344 0345 sub matchesAttribute($$) { 0346 my ($category, $value) = @_; 0347 loadOptions(); 0348 return grep(/$value/, keys %{$current_image{"options"}{$category}}) > 0; 0349 } 0350 0351 sub matchesKeyword($) { 0352 my ($value) = @_; 0353 return matchesAttribute("Keywords", $value); 0354 } 0355 0356 sub matchesPerson($) { 0357 my ($value) = @_; 0358 return matchesAttribute("People", $value); 0359 } 0360 0361 sub matchesPlace($) { 0362 my ($value) = @_; 0363 return matchesAttribute("Places", $value); 0364 } 0365 0366 sub matchesToken($) { 0367 my ($value) = @_; 0368 return matchesAttribute("Token", $value); 0369 } 0370 0371 sub makeVarcode($) { 0372 my ($identifier) = @_; 0373 my ($varname) = $identifier; 0374 $varname =~ s/ //g; 0375 return " my (\$$varname) = \$current_image{\"$identifier\"};"; 0376 } 0377 0378 sub loadImages($$) { 0379 my ($node, $compressed) = @_; 0380 my ($varcode) = join("\n", map {makeVarcode($_)} @standard_vars); 0381 if (! defined $kpa_filter) { 0382 $kpa_filter = '1'; 0383 } 0384 my ($code) = << 'EOF'; 0385 { 0386 no warnings "uninitialized"; 0387 no warnings "numeric"; 0388 my %items_found; 0389 my $matched_count = 0; 0390 foreach my $child ($node->childNodes()) { 0391 next if !isNode($child, "image"); 0392 %current_image=(); 0393 $current_image{"__node"} = $child; 0394 $current_image{"__compressed"} = $compressed; 0395 map { $current_image{$_->nodeName} = $_->value } $child->attributes(); 0396 EOF 0397 $code .= "$varcode\n"; 0398 $code .= << 'EOF'; 0399 # Restore any attributes defaulted in version 8 0400 $current_image{"angle"} = 0 if (! defined $current_image{"angle"}); 0401 $current_image{"endDate"} = $current_image{"startDate"} if (! defined $current_image{"endDate"}); 0402 if (! defined $current_image{"label"}) { 0403 my ($label) = $file; 0404 $label =~ s,^.*/(.*)\.[^.]*$,$1,; 0405 $current_image{"label"} = $label; 0406 } 0407 EOF 0408 $code .= << 'EOF'; 0409 my ($mediaType) = defined $videoLength ? "Video" : "Image"; 0410 EOF 0411 if ($opt_use_exif) { 0412 my ($exifdecl) = join("\n ", map {"my (\$$_);"} @exif_vars); 0413 my ($exifcode) = join("\n ", map {"\$$exif_vars[$_] = \$\$row[$_];"} (0..$#exif_vars)); 0414 $code .= << "EOF"; 0415 $exifdecl; 0416 if (my \$row = \$exif_data{"\$rootdir\$file"}) { 0417 $exifcode 0418 } 0419 EOF 0420 } 0421 $code .= << "EOF"; 0422 if ($kpa_filter) { 0423 \$matched_count++; 0424 EOF 0425 if ($opt_list_type) { 0426 if ($opt_list_type =~ /[Cc]at(egory)?:(.*)/) { 0427 $code .= << "EOF" 0428 loadOptions(); 0429 map { \$items_found{\$_}++; } keys \%{\$current_image{"options"}{"$2"}}; 0430 EOF 0431 } else { 0432 $code .= " \$items_found{\$$opt_list_type}++;\n"; 0433 } 0434 $code .= << 'EOF'; 0435 } 0436 } 0437 EOF 0438 if ($opt_print_count) { 0439 $code .= << 'EOF' 0440 print join("\n", map {sprintf("%7d %s", $items_found{$_}, $_);} sort keys %items_found), "\n"; 0441 printf("%7d Total\n", $matched_count); 0442 EOF 0443 } else { 0444 $code .= << 'EOF' 0445 print join("\n", sort keys %items_found), "\n"; 0446 EOF 0447 } 0448 } else { 0449 $code .= << 'EOF'; 0450 print "$file\n"; 0451 } 0452 } 0453 EOF 0454 } 0455 $code .= "}\n"; 0456 if ($opt_dryrun) { 0457 print STDERR $code; 0458 exit; 0459 } 0460 eval $code; 0461 if ($@) { 0462 my $known_vars = join("\n", sort map {my $a = $_; $a =~ s/ //g; " $a"} @all_vars); 0463 die "Filter $kpa_filter failed:\n\n$@\n"; 0464 0465 } 0466 } 0467 0468 ################################################################ 0469 # Member groups 0470 ################################################################ 0471 0472 sub loadMemberGroups($$) { 0473 my ($node, $compressed) = @_; 0474 foreach my $child ($node->childNodes()) { 0475 next if !isNode($child, "member"); 0476 my ($category) = $child->getAttribute("category"); 0477 my ($groupname) = $child->getAttribute("group-name"); 0478 if ($compressed) { 0479 my ($members) = $child->getAttribute("members"); 0480 if ($members) { 0481 my ($map) = $category_map{$category}; 0482 my (@members) = grep { ! $_ == 0 } split(/,/, $members); 0483 map { 0484 if (defined $$map[$_]) { 0485 $member_groups{$category}{$$map[$_]} = $groupname; 0486 } else { 0487 warn "Unknown keyword ID $_ in group $groupname\n"; 0488 } 0489 } @members; 0490 } 0491 } else { 0492 my ($member) = $child->getAttribute("member"); 0493 $member_groups{$category}{$member} = $groupname; 0494 } 0495 } 0496 foreach my $category (sort keys %member_groups) { 0497 foreach my $member (keys %{$member_groups{$category}}) { 0498 my ($parent) = $member_groups{$category}{$member}; 0499 # Break up any circular member groups 0500 my (%seen_parents); 0501 my ($parentid) = 1; 0502 do { 0503 if (defined $seen_parents{$parent}) { 0504 my (%reverse_parents) = reverse %seen_parents; 0505 warn "Circular member group found, members: " . join(" <= ", map { $reverse_parents{$_}} sort keys %reverse_parents) . "\n"; 0506 last; 0507 } 0508 $group_closure{$category}{$member}{$parent} = 1; 0509 $seen_parents{$parent} = $parentid++; 0510 } while (defined ($parent = $member_groups{$category}{$parent})); 0511 } 0512 } 0513 } 0514 0515 ################################################################ 0516 # Top level file loader 0517 ################################################################ 0518 0519 sub load_file($) { 0520 my ($file) = @_; 0521 my ($images); 0522 if ($opt_use_exif && @opt_exif_vars) { 0523 my (%exif_keys); 0524 my (%known_exif_keys); 0525 map {$known_exif_keys{$_} = 1;} @exif_vars; 0526 foreach my $exif (@opt_exif_vars) { 0527 map { $exif_keys{$_} = 1; } grep {defined $known_exif_keys{$_}}split(/[ ,]+/, $exif); 0528 } 0529 delete $exif_keys{"filename"}; 0530 @exif_vars = ("filename", keys %exif_keys); 0531 } 0532 0533 if ($opt_dryrun) { 0534 loadImages($images, 1); 0535 exit; 0536 } 0537 0538 my $doc = XML::LibXML->load_xml(location => $file); 0539 if (! $doc) { 0540 die "Can't open $file as a KPhotoAlbum database.\n"; 0541 } 0542 0543 my $kpa = ${$doc->findnodes('KPhotoAlbum')}[0]; 0544 if (! $kpa) { 0545 die "$file is not a KPhotoAlbum database.\n"; 0546 } elsif ($kpa->getAttribute("version") != 7 && 0547 $kpa->getAttribute("version") != 8) { 0548 die "kpa-list-images only works with version 7 and 8 files.\n"; 0549 } 0550 0551 $rootdir = $file; 0552 $rootdir =~ s,[^/]*$,,; 0553 if ($opt_use_exif) { 0554 my ($querystr) = 'SELECT ' . join(', ', "filename", @exif_vars) . " FROM exif"; 0555 0556 my $exif_db .= "${rootdir}exif-info.db"; 0557 0558 if (! -f $exif_db) { 0559 die "Expected EXIF database at $exif_db, but can't find it.\n" 0560 } 0561 my $EXIF_DB = DBI->connect("dbi:SQLite:$exif_db", undef, undef, { 0562 sqlite_open_flags => SQLITE_OPEN_READONLY, 0563 }); 0564 if (! defined $EXIF_DB) { 0565 die "Can't open EXIF database $exif_db.\n" 0566 } 0567 my $exif_query = $EXIF_DB->prepare($querystr); 0568 $exif_query->execute(); 0569 # This is measured to be considerably (about 15% total, 0570 # considerably more for EXIF database alone) than individual 0571 # lookups on a prepared query. 0572 while (my @row = $exif_query->fetchrow_array) { 0573 my ($filename) = shift @row; 0574 $exif_data{$filename} = \@row; 0575 } 0576 } 0577 my ($compressed) = int $kpa->getAttribute("compressed"); 0578 0579 foreach my $topcn ($kpa->childNodes()) { 0580 if (isNode($topcn, "categories")) { 0581 loadCategories($topcn, $compressed); 0582 if (! $opt_print_count && $opt_list_type && $opt_list_type =~ /^cat(egory)?:(.*)/ && !defined $kpa_filter) { 0583 $opt_list_type = $2; 0584 if (! defined $category_map{$opt_list_type}) { 0585 die "Can't list category $opt_list_type: no such category\n"; 0586 } 0587 my $stuff = join("\n", sort grep(defined $_ && $_ ne '', @{$category_map{$opt_list_type}})); 0588 if ($stuff) { 0589 print "$stuff\n"; 0590 } 0591 exit; 0592 } 0593 } elsif (isNode($topcn, "images")) { 0594 $images = $topcn; 0595 } elsif (isNode($topcn, "member-groups")) { 0596 loadMemberGroups($topcn, $compressed); 0597 } elsif (isNode($topcn, "blocklist")) { 0598 } elsif ($topcn->nodeType() == 1) { 0599 warn "Found unknown node " . $topcn->nodeName() . "\n"; 0600 } 0601 } 0602 # Load images last so that we can stream them through. 0603 loadImages($images, $compressed); 0604 } 0605 0606 sub get_standard_kpa_index_file() { 0607 my ($kpa_config) = $ENV{"HOME"} . "/.config/kphotoalbumrc"; 0608 open KPACONFIG, "<", "$kpa_config" or return ""; 0609 my ($imageDBFile) = ""; 0610 while (<KPACONFIG>) { 0611 if (/^imageDBFile=(.*)$/) { 0612 $imageDBFile = $1; 0613 last; 0614 } 0615 } 0616 close KPACONFIG; 0617 return $imageDBFile; 0618 } 0619 0620 my ($do_help); 0621 0622 my (%options) = ("f=s" => \$opt_filter_file, 0623 "filter-file=s" => \$opt_filter_file, 0624 "d=s" => \$opt_index_file, 0625 "db=s" => \$opt_index_file, 0626 "l=s" => \$opt_list_type, 0627 "list=s" => \$opt_list_type, 0628 "list-values=s" => \$opt_list_type, 0629 "exif!" => \$opt_use_exif, 0630 "exif-vars=s" => \@opt_exif_vars, 0631 "c" => \$opt_print_count, 0632 "count!" => \$opt_print_count, 0633 "dryrun!" => \$opt_dryrun, 0634 "dry-run!" => \$opt_dryrun, 0635 "h" => \$do_help, 0636 "help" => \$do_help, 0637 ); 0638 0639 Getopt::Long::Configure("bundling", "require_order"); 0640 if (!Getopt::Long::GetOptions(%options) || $do_help) { 0641 usage(); 0642 } 0643 0644 if ($opt_filter_file) { 0645 if (!($opt_filter_file =~ m,/,)) { 0646 $opt_filter_file = "./$opt_filter_file"; 0647 } 0648 my $retval = do $opt_filter_file; 0649 if ($@) { 0650 die "Cannot process filter file $opt_filter_file: $@\n"; 0651 } elsif (! defined $retval) { 0652 die "Cannot read filter file $opt_filter_file: $!\n"; 0653 } elsif (! defined $kpa_filter) { 0654 die "Filter file $opt_filter_file does not define \$kpa_filter.\n" 0655 } 0656 } elsif ($#ARGV >= 0) { 0657 $kpa_filter = $ARGV[0]; 0658 } 0659 if (! defined $opt_use_exif) { 0660 if ((defined $kpa_filter && $kpa_filter =~ /Exif_/) || 0661 (defined $opt_list_type && $opt_list_type =~ /Exif_/)) { 0662 $opt_use_exif = 1; 0663 } else { 0664 $opt_use_exif = 0; 0665 } 0666 } 0667 my ($index_file); 0668 if ($opt_index_file) { 0669 $index_file = $opt_index_file; 0670 } else { 0671 $index_file = get_standard_kpa_index_file(); 0672 } 0673 load_file($index_file);