File indexing completed on 2024-04-28 15:39:51

0001 #!/usr/bin/perl
0002 # SPDX-FileCopyrightText: 2020 Robert Krawitz <rlk@alum.mit.edu>
0003 # SPDX-License-Identifier: GPL-2.0-or-later
0004 
0005 # CLI to edit/manage KPhotoAlbum index files
0006 
0007 use strict;
0008 use warnings;
0009 
0010 use XML::LibXML;
0011 use Getopt::Long;
0012 use Carp("cluck");
0013 
0014 my ($kpa_attributes);
0015 
0016 my (%categories);
0017 # Map between category name and ID for compressed files
0018 my (%category_map);
0019 my (%blocklist);
0020 # This is stored as {category}{member}{groupname}, as members can only be
0021 # members of one group.  Weird, huh?  But this way, when we overlay the new
0022 # $member_groups{category}{member}{groupname} = is_referenced
0023 my (%member_groups);
0024 # $group_members{category}{groupname}{member} = is_referenced
0025 my (%group_members);
0026 # $category_images{category}{member} = is_referenced
0027 my (%categories_used);
0028 # $orphans{category}{member} = 1
0029 my (%orphans);
0030 # $category_remappings{$category}{$item}{"old"|"new"}
0031 # If there's only one category 0, we can safely remap it if it shows
0032 # up in a member group.  If there are more than one (indicated by a value
0033 # of -1 here), there's nothing we can do.
0034 my (%category_remappings);
0035 # $category_remappings{$id} = new_id
0036 my (%category_id_remap);
0037 
0038 my ($compress_output);
0039 
0040 # For add-item command
0041 my (@items_to_add) = ();
0042 my ($category_of_items_to_add) = '';
0043 my ($parent_of_items_to_add) = '';
0044 
0045 # For tag command
0046 my (@files_to_tag) = ();
0047 my ($tag_instead_of_untag) = 1;
0048 my ($category_of_items_to_tag) = '';
0049 my (@items_to_tag) = ();
0050 my (%map_items_to_tag) = ();
0051 
0052 # Order matters here; sort by date and then filename
0053 my (%images_seq);
0054 # But we also need fast access!
0055 my (%images);
0056 my (@image_list);
0057 my (@stacks_byimage);
0058 my (@stacks_byid);
0059 my (%stacks_remap);
0060 my (%stacks_to_remove);
0061 # Ordering within stacks does matter (particularly for the first image
0062 # on the stack).
0063 my (%stack_order);
0064 my ($max_stack_pass1) = 0;
0065 my ($opt_reject_new_images) = 0;
0066 my ($opt_keep_blocked_images) = 0;
0067 my ($opt_no_output) = 0;
0068 my ($opt_clean_unused_labels) = 0;
0069 my ($opt_replace_categories) = 0;
0070 my ($opt_force_compressed_output) = 0;
0071 my ($opt_force_uncompressed_output) = 0;
0072 my ($last_pass) = 1;
0073 my (%warned_idx0) = ();
0074 my ($opt_output_version) = 0;
0075 my ($output_version) = 0;
0076 
0077 sub usage() {
0078     my ($usage) = << 'FINIS';
0079 Usage: kpa-util [options] command index_file [command_args]
0080 
0081     Commands:
0082     clean
0083         Clean file using options below
0084     merge file1 file2
0085         Merge file2 into file1
0086     add-item category [--parent parent_item] item...
0087         Add the specified items to the specified category.
0088         Category must already exist.
0089         If --parent is specified, make them children
0090         of the specified item (must exist).
0091     tag category iten
0092         Apply the specified tag to the list of files
0093         from stdin
0094 
0095     If two files are provided, merge the two files and write the
0096     result to stdout.  file1 is the up-to-date file containing
0097     categories you wish to merge into file2; the result is printed
0098     to stdout.
0099 
0100     Keywords and other categories are combined, such that the
0101     result contains all information from both files.  Stacks are
0102     also combined and merged where appropriate.
0103 
0104     Image entries present in file1 are *not* copied to file2; a
0105     warning is printed.
0106 
0107     If only one file is provided, it is processed in the same way.
0108     This form can be used to clean up an index.xml file.
0109 
0110     kpa-merge currently handles version 7 and 8 files and by default
0111     writes the version of the first input file.
0112 
0113     kpa-merge can write either compressed or uncompressed output;
0114     by default it uses the compression of the first input file.
0115 
0116     Options:
0117 
0118     -R|--reject-new-images      Don't load new images from
0119                     the first file
0120     -B|--keep-blocked-images    Unblock blocked images in
0121                     the second file
0122     -n|--no-output          Don't actually write the
0123                     result (for testing purposes)
0124     -r|--replace-categories     Replace all categories from
0125                     images in the second file
0126                     with corresponding data from
0127                     the first (rather than merging)
0128     -c|--clean-unused-labels    Purge unused labels (useful
0129                     for one file usage)
0130     -C|--compressed-output      Force compressed output
0131     -N|--no-compressed-output   Force uncompressed output
0132     -V|--version            Force specified output
0133                     version (7 or 8).
0134 FINIS
0135     print STDERR $usage;
0136     exit(1);
0137 }
0138 
0139 ################################################################
0140 ################################################################
0141 # Load files ###################################################
0142 ################################################################
0143 ################################################################
0144 
0145 
0146 ################################################################
0147 # Utilities
0148 ################################################################
0149 
0150 sub getAttributes($) {
0151     my ($node) = @_;
0152     return $node->findnodes("./@*");
0153 }
0154 
0155 sub getAttribute(\@$) {
0156     my ($attributes, $name) = @_;
0157     foreach my $attr (@$attributes) {
0158     if ($name eq $attr->nodeName) {
0159         return $attr;
0160     }
0161     }
0162     return undef;
0163 }
0164 
0165 sub setAttribute(\@$$) {
0166     my ($attributes, $name, $value) = @_;
0167     my ($attr) = getAttribute(@$attributes, $name);
0168     if ($attr) {
0169     $attr->setValue($value);
0170     } else {
0171     $attr = XML::LibXML::Attr->new($name, $value);
0172     push @$attributes, $attr;
0173     }
0174 }
0175 
0176 sub setDefaultAttribute(\@$$) {
0177     my ($attributes, $name, $value) = @_;
0178     if (! getAttribute(@$attributes, $name)) {
0179     my $attr = XML::LibXML::Attr->new($name, $value);
0180     push @$attributes, $attr;
0181     }
0182 }
0183 
0184 sub isNode($$) {
0185     my ($node, $name) = @_;
0186     return ($node->nodeType() == 1 && lc $node->nodeName() eq $name);
0187 }
0188 
0189 ################################################################
0190 # Categories
0191 ################################################################
0192 
0193 sub loadCategory($$$) {
0194     my ($node, $pass, $compressed) = @_;
0195     my ($name) = $node->getAttribute("name");
0196     if (! defined $categories{"members"}{$name}) {
0197     $categories{"members"}{$name} = {};
0198     push @{$categories{"members_list"}}, $name;
0199     }
0200     $orphans{$name} = {};
0201     $category_remappings{$name} = {};
0202     $category_map{$name} = [];
0203     $category_id_remap{$name} = {};
0204     my ($category) = $categories{"members"}{$name};
0205     if ($pass == $last_pass) {
0206     $$category{"attributes"} = getAttributes($node);
0207     }
0208     $$category{"members"} = {} if (! defined $$category{"members"});
0209     my (@members);
0210     my $children = $node->childNodes();
0211     my ($category_max_id) = 0;
0212     my %items_to_remap;
0213     foreach my $i (1..$children->size()) {
0214     my ($child) = $children->get_node($i);
0215     next if $node->nodeType() != 1 || !isNode($child, "value");
0216     my ($value) = $child->getAttribute("value");
0217     my ($id) = $child->getAttribute("id");
0218     if ($id == 0) {
0219         print STDERR "Warning: $name:$value has id 0, will remap\n";
0220         $items_to_remap{$value} = $id;
0221     } elsif (defined $category_map{$name}[$id]) {
0222         print STDERR "Warning: duplicate ID for $name:$id!\n";
0223         $items_to_remap{$value} = $id;
0224     }
0225     $$category{"members"}{$value} = 1;
0226     $category_map{$name}[$id] = $value;
0227     if ($id > $category_max_id) {
0228         $category_max_id = $id;
0229     }
0230     }
0231     foreach my $remap (keys %items_to_remap) {
0232     $category_max_id++;
0233     print STDERR "Remapping $remap from $items_to_remap{$remap} to $category_max_id\n";
0234     $category_map{$remap}{$category_max_id} = $category_max_id;
0235     # Uncompressed databases don't have any problem with remapping,
0236     # since items are stored uncompressed.
0237     if ($compressed) {
0238         my ($id) = $items_to_remap{$remap};
0239         $category_remappings{$name}{$remap}{"old"} = $id;
0240         $category_remappings{$name}{$remap}{"new"} = $category_max_id;
0241         if (defined $category_id_remap{$id}) {
0242         print STDERR "*** Non-unique category remap for $id.\n";
0243         print STDERR "*** Will remove member-group mappings for this id!";
0244         $category_id_remap{$id} = -1;
0245         } else {
0246         $category_id_remap{$id} = $category_max_id;
0247         }
0248     }
0249     }
0250     foreach my $item_to_tag (@items_to_tag) {
0251     if ($category_of_items_to_tag eq $name && $tag_instead_of_untag && 
0252         ! defined $$category{'members'}{$item_to_tag}) {
0253         die "Error: attempting to tag files $category_of_items_to_tag/$item_to_tag, but item $category_of_items_to_tag/$item_to_tag does not exist.\n";
0254     }
0255     }
0256     if ($category_of_items_to_add eq $name) {
0257     if ($parent_of_items_to_add ne '' && ! defined $$category{'members'}{$parent_of_items_to_add}) {
0258         die "Requested parent $parent_of_items_to_add does not exist.\n";
0259     }
0260     foreach my $item (@items_to_add) {
0261         if (defined $$category{'members'}{$item}) {
0262         warn "$name/$item already exists, skipping.\n";
0263         } else {
0264         $$category{'members'}{$item} = 1;
0265         $category_map{$name}[++$category_max_id] = $item;
0266         }
0267     }
0268     }
0269 }
0270 
0271 sub loadCategories($$$) {
0272     my ($node, $pass, $compressed) = @_;
0273     my $children = $node->childNodes();
0274     if ($pass == $last_pass) {
0275     $categories{"attributes"} = getAttributes($node);
0276     }
0277     $categories{"members"} = {} if (! defined $categories{"members"});
0278     $categories{"members_list"} = () if (! defined $categories{"members_list"});
0279     # Category maps (mapping between name and ID in a compressed file)
0280     # will differ for each file.
0281     %category_map = ();
0282     foreach my $i (1..$children->size()) {
0283     my ($child) = $children->get_node($i);
0284     next if $node->nodeType() != 1 || !isNode($child, "category");
0285     loadCategory($child, $pass, $compressed);
0286     }
0287     if ($category_of_items_to_add ne '' &&
0288     ! defined $categories{'members'}{$category_of_items_to_add}) {
0289     die "Error: atempting to add items to category $category_of_items_to_add that is not present\n";
0290     }
0291     if ($category_of_items_to_tag ne '' && $tag_instead_of_untag &&
0292     ! defined $categories{'members'}{$category_of_items_to_tag}) {
0293     die "Error: attempting to tag files $category_of_items_to_tag, but category $category_of_items_to_tag does not exist.\n";
0294     }
0295     foreach my $item_to_tag (@items_to_tag) {
0296     if ($category_of_items_to_tag ne '' && $tag_instead_of_untag &&
0297         ! defined $categories{'members'}{$category_of_items_to_tag}{'members'}{$item_to_tag}) {
0298         die "Error: attempting to tag files $category_of_items_to_tag/$item_to_tag, but item $category_of_items_to_tag/$item_to_tag does not exist.\n";
0299     }
0300     }
0301 }
0302 
0303 ################################################################
0304 # Images
0305 ################################################################
0306 
0307 # Image options and values for uncompressed files.
0308 
0309 sub loadOptionValues($$$) {
0310     my ($node, $pass, $file) = @_;
0311     my ($name) = $node->getAttribute("name");
0312     $images{$file}{"options"}{$name} = {} if ($opt_replace_categories || ! defined $images{$file}{"options"}{$name});
0313     my $children = $node->childNodes();
0314     foreach my $i (1..$children->size()) {
0315     my ($child) = $children->get_node($i);
0316     next if $node->nodeType() != 1 || !isNode($child, "value");
0317     my ($val) = $child->getAttribute("value");
0318     if ($tag_instead_of_untag ||
0319         $name ne $category_of_items_to_tag ||
0320         ! defined $map_items_to_tag{$name}) {
0321         $images{$file}{"options"}{$name}{$val} = 1;
0322         $categories_used{$pass}{$name}{$val} = 1;
0323     }
0324     }
0325 }
0326 
0327 sub loadOptionTypes($$$) {
0328     my ($node, $pass, $file) = @_;
0329     my $children = $node->childNodes();
0330     foreach my $i (1..$children->size()) {
0331     my ($child) = $children->get_node($i);
0332     next if $node->nodeType() != 1 || !isNode($child, "option");
0333     $images{$file}{"options"} = {} if ($opt_replace_categories || ! defined $images{$file}{"options"});
0334     loadOptionValues($child, $pass, $file);
0335     }
0336 }
0337 
0338 sub loadUncompressedOptions($$$) {
0339     my ($node, $pass, $file) = @_;
0340     my $children = $node->childNodes();
0341     foreach my $i (1..$children->size()) {
0342     my ($child) = $children->get_node($i);
0343     next if $node->nodeType() != 1 || !isNode($child, "options");
0344     loadOptionTypes($child, $pass, $file);
0345     }
0346 }
0347 
0348 # Compressed XML files are simpler to parse; there's only one node for each
0349 # category.
0350 
0351 sub loadCompressedOptions($$$) {
0352     my ($node, $pass, $file) = @_;
0353     foreach my $category (sort keys %category_map) {
0354     my ($members) = $node->getAttribute($category);
0355     if (defined $members && $members ne '') {
0356         my ($map) = $category_map{$category};
0357         my (@members);
0358         my (@old_members) = split(/,/, $members);
0359         foreach my $id (@old_members) {
0360         if (defined $category_id_remap{$id}) {
0361             if ($category_id_remap{$id} > 0) {
0362             push @members, $category_id_remap{$id};
0363             } else {
0364             print STDERR "*** Cannot remap non-unique id $id on file $file\n";
0365             }
0366         } elsif ($id <= 0) {
0367             print STDERR "*** Invalid option ID 0 found category '$category' in $file; omitting\n";
0368         } else {
0369             push @members, $id;
0370         }
0371         }
0372         $images{$file}{"options"} = {} if ($opt_replace_categories || ! defined $images{$file}{"options"});
0373         $images{$file}{"options"}{$category} = {} if (! defined $images{$file}{"options"}{$category});
0374         map {
0375         if ($tag_instead_of_untag ||
0376             $category ne $category_of_items_to_tag ||
0377             ! defined $map_items_to_tag{$$map[$_]}) {
0378             $images{$file}{"options"}{$category}{$$map[$_]} = 1;
0379             $categories_used{$pass}{$category}{$$map[$_]} = 1;
0380         }
0381         } @members;
0382     }
0383     }
0384 }
0385 
0386 sub loadImage($$$) {
0387     my ($node, $pass, $compressed) = @_;
0388     my ($file) = $node->getAttribute("file");
0389     my ($stack) = $node->getAttribute("stackId");
0390     my ($stack_order) = $node->getAttribute("stackOrder");
0391     my ($image_already_defined) = defined $images{$file};
0392     $node->removeAttribute("stackId");
0393     $node->removeAttribute("stackOrder");
0394     if ($output_version == 7) {
0395     # Restore any attributes defaulted in version 8
0396     if (! $node->hasAttribute("angle")) {
0397         $node->setAttribute("angle", 0);
0398     }
0399     if (! $node->hasAttribute("endDate")) {
0400         $node->setAttribute("endDate", $node->getAttribute("startDate"));
0401     }
0402     if (! $node->hasAttribute("label")) {
0403         my ($label) = $file;
0404         $label =~ s,^.*/,,;
0405         $label =~ s/\.[^.]*$//;
0406         $node->setAttribute("label", $label);
0407     }
0408     } else {
0409     if ($node->hasAttribute("angle") &&
0410         $node->getAttribute("angle") eq "0") {
0411         $node->removeAttribute("angle");
0412     }
0413     if ($node->hasAttribute("endDate") &&
0414         $node->getAttribute("endDate") eq $node->getAttribute("startDate")) {
0415         $node->removeAttribute("endDate");
0416     }
0417     if ($node->hasAttribute("label")) {
0418         my ($label) = $file;
0419         $label =~ s,^.*/,,;
0420         $label =~ s/\.[^.]*$//;
0421         if ($node->getAttribute("label") eq $label) {
0422         $node->removeAttribute("label");
0423         }
0424     }
0425     }
0426 
0427     if (!defined $images{$file}) {
0428     # Always load images from the first file.  We might or might not
0429     # keep images only found in the second file depending upon what
0430     # the user requested.
0431     if ($pass > 0) {
0432         if ($blocklist{$file}) {
0433         if ($opt_keep_blocked_images) {
0434             delete $blocklist{$file};
0435         } else {
0436             warn "Skipping $file in destination blocklist\n";
0437             return;
0438         }
0439         } elsif ($opt_reject_new_images) {
0440         warn "Skipping image $file after initial load\n";
0441         return;
0442         }
0443     }
0444     $images{$file} = {};
0445     $images{$file}{"attributes"} = getAttributes($node);
0446     } else {
0447     # We want to use the pass1 attributes where available.
0448     # But special case width and height; we want to use a value that's
0449     # not -1.
0450     my (@attributes) = $node->getAttributes();
0451     my ($nattrs) = $images{$file}{"attributes"};
0452     foreach my $attribute (@attributes) {
0453         my ($name) = $attribute->nodeName;
0454         my ($value) = $attribute->value;
0455 
0456         if (($name eq "width" || $name eq "height")) {
0457         my ($attr1) = getAttribute(@$nattrs, $name);
0458         if ($value ne "-1" && (! $attr1 || $attr1->value eq "-1")) {
0459             warn "Fixing $name on $file (" . $attr1->value . " => $value)\n";
0460             $attr1->setValue($value);
0461         }
0462         } else {
0463         setDefaultAttribute(@$nattrs, $name, $value);
0464         }
0465     }
0466     }
0467     if ($stack) {
0468     $stacks_byimage[$pass]{$file} = $stack;
0469     $stacks_byid[$pass]{$stack} = [] if (! defined $stacks_byid[$pass]{$stack});
0470     if (defined $stacks_byid[$pass]{$stack}[$stack_order - 1]) {
0471         warn "Duplicate stack/order ($stack, $stack_order) found for $file and $stacks_byid[$pass]{$stack}[$stack_order - 1], appending.\n";
0472         push @{$stacks_byid[$pass]{$stack}}, $file;
0473     } else {
0474         $stacks_byid[$pass]{$stack}[$stack_order - 1] = $file;
0475     }
0476     if ($pass == $last_pass && $stack > $max_stack_pass1) {
0477         $max_stack_pass1 = $stack;
0478     }
0479     }
0480     my ($start_date) = $node->getAttribute("startDate");
0481     my ($sort_key) = "$start_date$file";
0482     $images_seq{$file} = $sort_key;
0483     if ($opt_replace_categories) {
0484     $images{$file}{"options"} = {};
0485     }
0486     if ($compressed) {
0487     loadCompressedOptions($node, $pass, $file);
0488     } else {
0489     loadUncompressedOptions($node, $pass, $file);
0490     }
0491 }
0492 
0493 
0494 sub loadImages($$$) {
0495     my ($node, $pass, $compressed) = @_;
0496     my $children = $node->childNodes();
0497     $stacks_byimage[$pass] = {};
0498     $stacks_byid[$pass] = {};
0499     foreach my $i (1..$children->size()) {
0500     my ($child) = $children->get_node($i);
0501     next if $node->nodeType() != 1 || !isNode($child, "image");
0502     loadImage($child, $pass, $compressed);
0503     }
0504     if ($category_of_items_to_tag ne '' && $tag_instead_of_untag) {
0505     my ($did_tag_images) = 0;
0506     foreach my $file (@files_to_tag) {
0507         if (! defined $images{$file}) {
0508         warn "Image $file does not exist, not tagging.\n";
0509         next;
0510         }
0511         $did_tag_images = 1;
0512         $images{$file}{"options"} = {} if (! defined $images{$file}{"options"});
0513         $images{$file}{"options"}{$category_of_items_to_tag} = {} if (! defined $images{$file}{"options"}{$category_of_items_to_tag});
0514         map { $images{$file}{"options"}{$category_of_items_to_tag}{$_} = 1; } @items_to_tag;
0515     }
0516     if ($did_tag_images) {
0517         map { $categories_used{$pass}{$category_of_items_to_tag}{$_} = 1; } @items_to_tag;
0518     }
0519     }
0520 }
0521 
0522 ################################################################
0523 # Block list
0524 ################################################################
0525 
0526 sub loadBlocklist($$$) {
0527     my ($node, $pass, $compressed) = @_;
0528     my $children = $node->childNodes();
0529     foreach my $i (1..$children->size()) {
0530     my ($child) = $children->get_node($i);
0531     next if $node->nodeType() != 1 || !isNode($child, "block");
0532     $blocklist{$child->getAttribute("file")} = 1;
0533     }
0534 }
0535 
0536 ################################################################
0537 # Member groups
0538 ################################################################
0539 
0540 sub loadMemberGroup($$) {
0541     my ($node, $compressed) = @_;
0542     my ($category) = $node->getAttribute("category");
0543     my ($groupname) = $node->getAttribute("group-name");
0544     if (! defined $categories{"members"}{$category}{"members"}{$groupname}) {
0545     if (! defined $orphans{$category}) {
0546         $orphans{$category}{$groupname} = 1;
0547     }
0548     if ($compressed && $node->hasAttribute("members") && $node->getAttribute("members") ne "") {
0549         my $suffix = (! $node->getAttribute("members") =~ /,/) ? 'ren' : '';
0550         printf STDERR "WARNING: Orphan group $category:$groupname has child$suffix %s!\n", $node->getAttribute("members");
0551     } else {
0552         print STDERR "Removing orphaned member-group $category:$groupname\n";
0553         return;
0554     }
0555     }
0556     $member_groups{$category} = {} if (! defined $member_groups{$category});
0557     $group_members{$category} = {} if (! defined $group_members{$category});
0558     $group_members{$category}{$groupname} = {} if (! defined $group_members{$category}{$groupname});
0559     if ($compressed) {
0560     my ($members) = $node->getAttribute("members");
0561     if ($members) {
0562         my ($map) = $category_map{$category};
0563         my (@old_members) = grep { ! $_ == 0 } split(/,/, $members);
0564         my (@members);
0565         foreach my $id (@old_members) {
0566         if (defined $category_id_remap{$id}) {
0567             if ($category_id_remap{$id} > 0) {
0568             push @members, $category_id_remap{$id};
0569             } else {
0570             print STDERR "*** Cannot remap non-unique id $id for member-group $category:$groupname\n";
0571             }
0572         } else {
0573             push @members, $id;
0574         }
0575         }
0576         map {
0577         $member_groups{$category}{$$map[$_]} = {} if (! defined $member_groups{$category}{$$map[$_]});
0578         $member_groups{$category}{$$map[$_]}{$groupname} = 1;
0579         $group_members{$category}{$groupname}{$$map[$_]} = 1;
0580         } @members;
0581     }
0582     } else {
0583     my ($member) = $node->getAttribute("member");
0584     $member_groups{$category}{$member} = {} if (! defined $member_groups{$category}{$member});
0585     $member_groups{$category}{$member}{$groupname} = 1;
0586     $group_members{$category}{$groupname}{$member} = 1;
0587     }
0588 }
0589 
0590 sub loadMemberGroups($$$) {
0591     my ($node, $pass, $compressed) = @_;
0592     my $children = $node->childNodes();
0593     foreach my $i (1..$children->size()) {
0594     my ($child) = $children->get_node($i);
0595     next if $node->nodeType() != 1 || !isNode($child, "member");
0596     loadMemberGroup($child, $compressed);
0597     }
0598     if ($category_of_items_to_add ne '' && $parent_of_items_to_add ne '') {
0599     foreach my $item (@items_to_add) {
0600         $member_groups{$category_of_items_to_add}{$item}{$parent_of_items_to_add} = 1;
0601         $group_members{$category_of_items_to_add}{$parent_of_items_to_add}{$item} = 1;
0602     }
0603     }
0604 }
0605 
0606 ################################################################
0607 # Top level file loader
0608 ################################################################
0609 
0610 sub load_file($$) {
0611     my ($file, $pass) = @_;
0612     print STDERR "Loading $file...";
0613     my $doc = XML::LibXML->load_xml(location => $file);
0614     if (! $doc) {
0615     usage();
0616     }
0617 
0618     my $kpa = ${$doc->findnodes('KPhotoAlbum')}[0];
0619 
0620     if ($pass == 0) {
0621     $kpa_attributes = $kpa->findnodes("./@*");
0622     }
0623 
0624     if ($pass == 0) {
0625     if ($opt_output_version != 0 &&
0626         $opt_output_version != 7 &&
0627         $opt_output_version != 8) {
0628         print STDERR "Output version must be 7 or 8";
0629         usage();
0630     }
0631     }
0632     if ($kpa->getAttribute("version") != 7 &&
0633     $kpa->getAttribute("version") != 8) {
0634     die "kpa-merge only works with version 7 and 8 files\n";
0635     }
0636     if ($pass == 0) {
0637     if ($opt_output_version) {
0638         $output_version = $opt_output_version;
0639     } else {
0640         $output_version = $kpa->getAttribute("version");
0641     }
0642     }
0643     # Always write a version 8 file right now.
0644     $kpa->setAttribute("version", $output_version);
0645 
0646     my ($compressed) = int $kpa->getAttribute("compressed");
0647     if ($pass == 0) {
0648     if ($opt_force_compressed_output) {
0649         $compress_output = 1;
0650     } elsif ($opt_force_uncompressed_output) {
0651         $compress_output = 0;
0652     } else {
0653         $compress_output = $compressed;
0654     }
0655     }
0656 
0657     my $children = $kpa->childNodes();
0658 
0659     foreach my $i (1..$children->size()) {
0660     my ($topcn) = $children->get_node($i);
0661     if (isNode($topcn, "categories")) {
0662         print STDERR "categories...";
0663         loadCategories($topcn, $pass, $compressed);
0664     } elsif (isNode($topcn, "images")) {
0665         print STDERR "images...";
0666         loadImages($topcn, $pass, $compressed);
0667     } elsif (isNode($topcn, "blocklist")) {
0668         print STDERR "blocklist...";
0669         loadBlocklist($topcn, $pass, $compressed);
0670     } elsif (isNode($topcn, "member-groups")) {
0671         print STDERR "member-groups...";
0672         loadMemberGroups($topcn, $pass, $compressed);
0673     } elsif ($topcn->nodeType() == 1) {
0674         warn "Found unknown node " . $topcn->nodeName() . "\n";
0675     }
0676     }
0677     if (keys %warned_idx0) {
0678     print STDERR "\n";
0679     foreach my $k (sort keys %warned_idx0) {
0680         warn "Found $warned_idx0{$k} files with index 0 ($k $category_map{$k}[0])\n";
0681     }
0682     }
0683     print STDERR "done.\n";
0684 }
0685 
0686 ################################################################
0687 ################################################################
0688 # Reconcile images #############################################
0689 ################################################################
0690 ################################################################
0691 
0692 # Reconcile stack IDs between the source and the merge files.
0693 # The merge file is considered to be authoritative.
0694 sub reconcile_stacks() {
0695     # We only need to look at stacks in the first file.  If a stack exists
0696     # in the second file but not the first, it won't be disturbed by this,
0697     # as intended.
0698     print STDERR "image stacks...";
0699     foreach my $file (sort keys %{$stacks_byimage[0]}) {
0700     if (! defined $stacks_byimage[1]{$file}) {
0701         my ($old_stack) = $stacks_byimage[0]{$file};
0702         my ($by_id_0) = $stacks_byid[0]{$old_stack};
0703         my ($found) = -1;
0704         foreach my $ofile (@$by_id_0) {
0705         # Gaps in stack indices
0706         next if (! defined $ofile);
0707         if (defined $stacks_byimage[1]{$ofile}) {
0708             if ($found == -1) {
0709             $found = $stacks_byimage[1]{$ofile};
0710             } elsif ($found != $stacks_byimage[1]{$ofile}) {
0711             # If an image is in a different stack in one file
0712             # vs the other, there's not much we can do.
0713             warn "INCONSISTENT STACKS for $file ($found, $stacks_byimage[1]{$ofile})!\n";
0714             }
0715         }
0716         }
0717         if ($found == -1) {
0718         my ($new_stack) = ++$max_stack_pass1;
0719         # Fix up all of the files in the renumbered stack
0720         map { $stacks_byimage[1]{$_} = $new_stack; } (grep { defined $_ } @$by_id_0);
0721         $stacks_byid[1]{$new_stack} = $stacks_byid[0]{$old_stack};
0722         } else {
0723         $stacks_byimage[1]{$file} = $found;
0724         push @{$stacks_byid[1]{$found}}, $file;
0725         }
0726     }
0727     }
0728     # Now, set the stack order for each image
0729     my ($new_stackid) = 1;
0730     foreach my $stackid (sort keys %{$stacks_byid[1]}) {
0731     my ($stack) = $stacks_byid[1]{$stackid};
0732     my ($order) = 1;
0733     foreach my $file (@$stack) {
0734         if (defined $file) {
0735         $stack_order{$file} = $order++;
0736         }
0737     }
0738     if ($order <= 2) {
0739         $stacks_to_remove{$stackid} = 1;
0740     } else {
0741         $stacks_remap{$stackid} = $new_stackid++;
0742     }
0743     }
0744 }
0745 
0746 sub reconcile_images() {
0747     # Now, stitch the two image sequences together.
0748     print STDERR "image sequences...";
0749     my (%invert_images) = reverse %images_seq;
0750     @image_list = map { $invert_images{$_}} sort keys %invert_images;
0751     print STDERR "done.\n";
0752 }
0753 
0754 # Find labels that are unreferenced by anything and purge them.  This
0755 # may be an iterative process, since labels may be related to other
0756 # labels by way of member groups; removing a label may result in
0757 # another label losing all of its references.  So we keep going until
0758 # we've found no further unreferenced labels.
0759 
0760 sub clean_unused_labels_pass(\%) {
0761     my ($categories_in_use) = @_;
0762     my ($removed_something) = 0;
0763 
0764     foreach my $category (sort keys %{$categories{"members"}}) {
0765     next if $category eq "Tokens";
0766     print STDERR "  Category $category...\n";
0767     my ($members) = $categories{"members"}{$category}{"members"};
0768     # "Member" here is the group name
0769     foreach my $member (sort keys %$members) {
0770         next if defined $$categories_in_use{$category}{$member};
0771         if (! defined $group_members{$category}{$member} ||
0772         ! scalar %{$group_members{$category}{$member}}) {
0773         # This is not used by any images and is not the name of a group.
0774         # Remove from categories
0775         print STDERR "   Purging $member\n";
0776         delete $$members{$member};
0777         # Remove this group membership
0778         my (@groups) = keys %{$member_groups{$category}{$member}};
0779         foreach my $group (@groups) {
0780             delete $member_groups{$category}{$member}{$group};
0781             if (scalar %{$member_groups{$category}{$member}} == 0) {
0782             print STDERR "      Deleting empty member-group $member\n";
0783             delete $member_groups{$category}{$member};
0784             }
0785             if (defined $group_members{$category}{$member}) {
0786             if (scalar %{$group_members{$category}{$member}} > 0) {
0787                 print STDERR "      WARNING: $member still has sub-members! Not deleting.\n";
0788             } else {
0789                 print STDERR "      Deleting empty member-group $member\n";
0790                 delete $group_members{$category}{$member};
0791             }
0792             }
0793             # And remove it from any group it's a member of.
0794             if (defined $group) {
0795             print STDERR "    Removing $member from\n";
0796             print STDERR "             $group\n";
0797             delete $group_members{$category}{$group}{$member};
0798             # Prune any groups in which this was the last member,
0799             # which may allow us to do more work in the next pass.
0800             if (scalar %{$group_members{$category}{$group}} == 0) {
0801                 print STDERR "    Removed last member from $group\n";
0802                 delete $group_members{$category}{$group};
0803             }
0804             }
0805         }
0806         $removed_something = 1;
0807         }
0808     }
0809     }
0810     return $removed_something;
0811 }
0812 
0813 sub clean_unused_labels() {
0814     print STDERR "\nCleaning unused labels...\n";
0815     my %categories_in_use;
0816 
0817     foreach my $category (keys %{$categories{"members"}}) {
0818     next if $category eq "Tokens";
0819     $categories_in_use{$category} = ();
0820     map { $categories_in_use{$category}{$_} = 1; } keys %{$categories_used{$last_pass}{$category}};
0821     if (! $opt_replace_categories && $last_pass > 0) {
0822         map { $categories_in_use{$category}{$_} = 1; } keys %{$categories_used{0}{$category}};
0823     }
0824     }
0825     my ($pass) = 0;
0826     do {
0827     print STDERR " Pass $pass...\n";
0828     $pass++;
0829     } while (clean_unused_labels_pass(%categories_in_use));
0830     print STDERR "done.\n";
0831 }
0832 
0833 ################################################################
0834 ################################################################
0835 # Write new file ###############################################
0836 ################################################################
0837 ################################################################
0838 
0839 # This code is a lot simpler; we don't have the same kinds of parse
0840 # issues or corner cases to worry about.
0841 
0842 sub copy_attributes($$;$) {
0843     my ($node, $attributes, $omit_pre_existing_categories) = @_;
0844     foreach my $attribute (@$attributes) {
0845     if (! $omit_pre_existing_categories ||
0846         ! defined $categories{"members"}{$attribute->nodeName}) {
0847         $node->setAttribute($attribute->nodeName, $attribute->value);
0848     }
0849     }
0850 }
0851 
0852 sub addElement($$$) {
0853     my ($dom, $node, $element) = @_;
0854     my ($nnode) = $dom->createElement($element);
0855     $node->appendChild($nnode);
0856     return $nnode;
0857 }
0858 
0859 sub build_categories($$) {
0860     my ($dom, $new_kpa) = @_;
0861     my ($new_categories) = addElement($dom, $new_kpa, 'Categories');
0862     copy_attributes($new_categories, $categories{"attributes"});
0863     my ($members) = $categories{"members"};
0864     %category_map = ();
0865 
0866     foreach my $cat (@{$categories{"members_list"}}) {
0867     my %cmap;
0868     my ($cnode) = addElement($dom, $new_categories, "Category");
0869     my ($cat_data) = $$members{$cat};
0870     copy_attributes($cnode, $$cat_data{"attributes"});
0871     my ($count) = 1;
0872     foreach my $value (sort keys %{$$cat_data{"members"}}) {
0873         my ($vnode) = addElement($dom, $cnode, "value");
0874         $cmap{$value} = $count;
0875         $vnode->setAttribute("value", $value);
0876         $vnode->setAttribute("id", $count++);
0877     }
0878     $category_map{$cat} = \%cmap;
0879     }
0880 }
0881 
0882 sub build_image_options($$$$) {
0883     my ($dom, $onode, $options, $iname) = @_;
0884     foreach my $option (sort keys %$options) {
0885     my ($oonode) = addElement($dom, $onode, "option");
0886     $oonode->setAttribute("name", $option);
0887     foreach my $value (sort keys %{$$options{$option}}) {
0888         my ($vnode) = addElement($dom, $oonode, "value");
0889         $vnode->setAttribute("value", $value);
0890     }
0891     }
0892 }
0893 
0894 sub build_images($$) {
0895     my ($dom, $new_kpa) = @_;
0896     my ($new_images) = addElement($dom, $new_kpa, 'images');
0897     my ($compressed) = int $new_kpa->getAttribute("compressed");
0898     foreach my $iname (@image_list) {
0899     my ($inode) = addElement($dom, $new_images, 'image');
0900     my ($image) = $images{$iname};
0901     copy_attributes($inode, $$image{"attributes"}, 1);
0902     if (defined $stacks_byimage[1]{$iname} &&
0903         ! defined $stacks_to_remove{$stacks_byimage[1]{$iname}}) {
0904         $inode->setAttribute("stackId",
0905                  $stacks_remap{$stacks_byimage[1]{$iname}});
0906         $inode->setAttribute("stackOrder", $stack_order{$iname});
0907     }
0908     if (defined $$image{"options"}) {
0909         if ($compressed) {
0910         foreach my $option (sort keys %{$$image{"options"}}) {
0911             my ($val) = join(",", sort {$a <=> $b} map { $category_map{$option}{$_} } keys %{$$image{"options"}{$option}});
0912             if ($val ne '') {
0913             $inode->setAttribute($option, $val);
0914             }
0915         }
0916         } else {
0917         my ($onode) = addElement($dom, $inode, 'options');
0918         build_image_options($dom, $onode, $$image{"options"}, $iname);
0919         }
0920     }
0921     }
0922 }
0923 
0924 sub build_blocklist($$) {
0925     my ($dom, $new_kpa) = @_;
0926     my ($new_blocklist) = addElement($dom, $new_kpa, 'blocklist');
0927     foreach my $file (sort keys %blocklist) {
0928     my ($bnode) = addElement($dom, $new_blocklist, "block");
0929     $bnode->setAttribute("file", $file);
0930     }
0931 }
0932 
0933 sub build_member_groups($$) {
0934     my ($dom, $new_kpa) = @_;
0935     my ($new_member_groups) = addElement($dom, $new_kpa, 'member-groups');
0936     my ($compressed) = int $new_kpa->getAttribute("compressed");
0937 
0938     if ($compressed) {
0939     foreach my $cat (sort keys %group_members) {
0940         my ($groups) = $group_members{$cat};
0941 #       print STDERR "************ $cat\n";
0942         foreach my $group (sort keys %$groups) {
0943 #       print STDERR "\n  ++ $group\n";
0944 #       print STDERR "        ", join("\n        ", map {"$_|$category_map{$cat}{$_}"} keys %{$$groups{$group}}), "\n";
0945 #       print STDERR "\n";
0946         my ($val) = join(",", sort {$a <=> $b} map {$category_map{$cat}{$_}} keys %{$$groups{$group}});
0947 #       print STDERR "\n   ----\n";
0948         my ($mnode) = addElement($dom, $new_member_groups, "member");
0949         $mnode->setAttribute("category", $cat);
0950         $mnode->setAttribute("group-name", $group);
0951         $mnode->setAttribute("members", $val);
0952         }
0953     }
0954     } else {
0955     foreach my $cat (sort keys %member_groups) {
0956         my ($clist) = $member_groups{$cat};
0957         foreach my $member (sort keys %$clist) {
0958         my ($groupname) = $$clist{$member};
0959         my ($mnode) = addElement($dom, $new_member_groups, "member");
0960         $mnode->setAttribute("category", $cat);
0961         $mnode->setAttribute("group-name", $groupname);
0962         $mnode->setAttribute("member", $member);
0963         }
0964     }
0965     }
0966 }
0967 
0968 sub build_new_doc() {
0969     print STDERR "Building new document...";
0970     my ($dom) = XML::LibXML::Document->new("1.0", "UTF-8");
0971     my ($new_kpa) = $dom->createElement('KPhotoAlbum');
0972     $dom->setDocumentElement($new_kpa);
0973     copy_attributes($new_kpa, $kpa_attributes);
0974     $new_kpa->setAttribute("compressed", $compress_output);
0975     print STDERR "categories...";
0976     build_categories($dom, $new_kpa);
0977     print STDERR "images...";
0978     build_images($dom, $new_kpa);
0979     print STDERR "blocklist...";
0980     build_blocklist($dom, $new_kpa);
0981     print STDERR "member groups...";
0982     build_member_groups($dom, $new_kpa);
0983     print STDERR "done.\n";
0984     return $dom;
0985 }
0986 
0987 ################################################################
0988 ################################################################
0989 # ...And the top level! ########################################
0990 ################################################################
0991 ################################################################
0992 
0993 my (%options) = ("R"                   => \$opt_reject_new_images,
0994          "reject-new-images"   => \$opt_reject_new_images,
0995          "B"                   => \$opt_keep_blocked_images,
0996          "keep-blocked-images" => \$opt_keep_blocked_images,
0997          "n"                   => \$opt_no_output,
0998          "no-output"           => \$opt_no_output,
0999          "N"                   => \$opt_force_uncompressed_output,
1000          "no-compressed-output"=> \$opt_force_uncompressed_output,
1001          "c"                   => \$opt_clean_unused_labels,
1002          "clean-unused-labels" => \$opt_clean_unused_labels,
1003          "C"                   => \$opt_force_compressed_output,
1004          "compressed-output"   => \$opt_force_compressed_output,
1005          "r"               => \$opt_replace_categories,
1006          "replace-categories"  => \$opt_replace_categories,
1007          "V:i"             => \$opt_output_version,
1008          "version:i"           => \$opt_output_version,
1009     );
1010 
1011 Getopt::Long::Configure("bundling", "require_order");
1012 if (!Getopt::Long::GetOptions(%options)) {
1013     usage();
1014 }
1015 
1016 sub check_argv(;$$) {
1017     my ($min_argv, $exact) = @_;
1018     if ($#ARGV < $min_argv || ($exact && $#ARGV != $min_argv)) {
1019     usage();
1020     }
1021 }
1022 
1023 check_argv(1);
1024 
1025 my ($command) = shift @ARGV;
1026 my ($src) = $ARGV[0];
1027 my ($merge);
1028 $last_pass = 0;
1029 $command = lc $command;
1030 $command =~ s/-/_/g;
1031 if ($command eq "clean") {
1032     check_argv(0, 1);
1033 } elsif ($command eq "merge") {
1034     check_argv(1, 1);
1035     $src = $ARGV[1];
1036     $merge = $ARGV[0];
1037     $last_pass = 1;
1038 } elsif ($command eq "add_category_item") {
1039     check_argv(2);
1040     shift @ARGV;
1041     $category_of_items_to_add = shift @ARGV;
1042     if ($ARGV[0] eq '--parent') {
1043     shift @ARGV;
1044     check_argv(1);
1045     $parent_of_items_to_add = shift @ARGV;
1046     }
1047     @items_to_add = @ARGV;
1048 } elsif ($command eq "tag_files" || $command eq "untag_files") {
1049     if ($command eq "untag" || $command eq "untag_files") {
1050     $tag_instead_of_untag = 0;
1051     }
1052     check_argv(2);
1053     shift @ARGV;
1054     $category_of_items_to_tag = shift @ARGV;
1055     @items_to_tag = @ARGV;
1056     map { $map_items_to_tag{$_} = 1; } @items_to_tag;
1057     while (<STDIN>) {
1058     chomp;
1059     push @files_to_tag, $_ if ($_ ne '')
1060     }
1061 } else {
1062     usage();
1063 }
1064 
1065 load_file($src, 0);
1066 load_file($merge, 1) if ($merge);
1067 
1068 print STDERR "Reconciling ";
1069 clean_unused_labels() if ($opt_clean_unused_labels);
1070 
1071 reconcile_stacks();
1072 
1073 reconcile_images();
1074 
1075 if (! $opt_no_output) {
1076     my ($doc) = build_new_doc();
1077 
1078     print STDERR "Writing...";
1079     $doc->toFH(\*STDOUT, 1);
1080     print STDERR "done.\n";
1081 }