File indexing completed on 2024-12-01 05:11:55

0001 // clang-format off
0002 /*
0003  Shared definitions for GNU DIFF
0004     Modified for KDiff3 by Joachim Eibl <joachim.eibl at gmx.de> 2003, 2004, 2005.
0005     The original file was part of GNU DIFF.
0006 
0007     Part of KDiff3 - Text Diff And Merge Tool
0008 
0009     SPDX-FileCopyrightText: 1988-2002 Free Software Foundation, Inc.
0010     SPDX-FileCopyrightText: 2002-2011 Joachim Eibl, joachim.eibl at gmx.de
0011     SPDX-FileCopyrightText: 2018-2020 Michael Reeves reeves.87@gmail.com
0012     SPDX-License-Identifier: GPL-2.0-or-later
0013 */
0014 // clang-format on
0015 
0016 #ifndef GNUDIFF_DIFF_H
0017 #define GNUDIFF_DIFF_H
0018 
0019 #include "LineRef.h"
0020 #include "Utils.h"
0021 
0022 #include <QtGlobal>
0023 
0024 #include <stdint.h>
0025 #include <sys/stat.h>
0026 #include <sys/types.h>
0027 
0028 #include <algorithm>
0029 #include <ctype.h>
0030 #include <limits.h>
0031 #include <stdlib.h>
0032 #include <string.h>
0033 #include <type_traits>
0034 
0035 #include <stdio.h>
0036 
0037 #include <QString>
0038 
0039 /* The integer type of a line number. */
0040 typedef qint64 GNULineRef;
0041 #define GNULINEREF_MAX std::numeric_limits<GNULineRef>::max()
0042 static_assert(std::is_signed<GNULineRef>::value, "GNULineRef must be signed.");
0043 static_assert(sizeof(GNULineRef) >= sizeof(size_t), "GNULineRef must be able to receive size_t values.");
0044 
0045 class GnuDiff
0046 {
0047   public:
0048     /* Variables for command line options */
0049 
0050     /* Nonzero if output cannot be generated for identical files.  */
0051     bool no_diff_means_no_output;
0052 
0053     /* Number of lines of context to show in each set of diffs.
0054    This is zero when context is not to be shown.  */
0055     GNULineRef context;
0056 
0057     /* The significance of white space during comparisons.  */
0058     enum
0059     {
0060         /* All white space is significant (the default).  */
0061         IGNORE_NO_WHITE_SPACE,
0062 
0063         /* Ignore changes due to tab expansion (-E).  */
0064         IGNORE_TAB_EXPANSION,
0065 
0066         /* Ignore changes in horizontal white space (-b).  */
0067         IGNORE_SPACE_CHANGE,
0068 
0069         /* Ignore all horizontal white space (-w).  */
0070         IGNORE_ALL_SPACE
0071     } ignore_white_space;
0072 
0073     /* Ignore changes that affect only numbers. (J. Eibl)  */
0074     bool bIgnoreNumbers;
0075     bool bIgnoreWhiteSpace;
0076 
0077     /* Files can be compared byte-by-byte, as if they were binary.
0078    This depends on various options.  */
0079     bool files_can_be_treated_as_binary;
0080 
0081     /* Ignore differences in case of letters (-i).  */
0082     bool ignore_case;
0083 
0084     /* Use heuristics for better speed with large files with a small
0085    density of changes.  */
0086     bool speed_large_files;
0087 
0088     /* Don't discard lines.  This makes things slower (sometimes much
0089    slower) but will find a guaranteed minimal set of changes.  */
0090     bool minimal;
0091 
0092     /* The result of comparison is an "edit script": a chain of `struct change'.
0093    Each `struct change' represents one place where some lines are deleted
0094    and some are inserted.
0095 
0096    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
0097    DELETED is the number of lines deleted here from file 0.
0098    INSERTED is the number of lines inserted here in file 1.
0099 
0100    If DELETED is 0 then LINE0 is the number of the line before
0101    which the insertion was done; vice versa for INSERTED and LINE1.  */
0102 
0103     struct change {
0104         change *link; /* Previous or next edit command  */
0105         GNULineRef inserted; /* # lines of file 1 changed here.  */
0106         GNULineRef deleted;  /* # lines of file 0 changed here.  */
0107         GNULineRef line0;    /* Line number of 1st deleted line.  */
0108         GNULineRef line1;    /* Line number of 1st inserted line.  */
0109         bool ignore;         /* Flag used in context.c.  */
0110     };
0111 
0112     /* Structures that describe the input files.  */
0113 
0114     /* Data on one input file being compared.  */
0115 
0116     struct file_data {
0117         /* Buffer in which text of file is read.  */
0118         const QChar *buffer;
0119 
0120         /* Allocated size of buffer, in QChars.  Always a multiple of
0121        sizeof(*buffer).  */
0122         size_t bufsize;
0123 
0124         /* Number of valid bytes now in the buffer.  */
0125         size_t buffered;
0126 
0127         /* Array of pointers to lines in the file.  */
0128         const QChar **linbuf;
0129 
0130         /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
0131        linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
0132        linebuf[linbuf_base ... valid_lines - 1] contain valid data.
0133        linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
0134         GNULineRef linbuf_base, buffered_lines, valid_lines, alloc_lines;
0135 
0136         /* Pointer to end of prefix of this file to ignore when hashing.  */
0137         const QChar *prefix_end;
0138 
0139         /* Count of lines in the prefix.
0140        There are this many lines in the file before linbuf[0].  */
0141         GNULineRef prefix_lines;
0142 
0143         /* Pointer to start of suffix of this file to ignore when hashing.  */
0144         const QChar *suffix_begin;
0145 
0146         /* Vector, indexed by line number, containing an equivalence code for
0147        each line.  It is this vector that is actually compared with that
0148        of another file to generate differences.  */
0149         GNULineRef *equivs;
0150 
0151         /* Vector, like the previous one except that
0152        the elements for discarded lines have been squeezed out.  */
0153         GNULineRef *undiscarded;
0154 
0155         /* Vector mapping virtual line numbers (not counting discarded lines)
0156        to real ones (counting those lines).  Both are origin-0.  */
0157         GNULineRef *realindexes;
0158 
0159         /* Total number of nondiscarded lines.  */
0160         GNULineRef nondiscarded_lines;
0161 
0162         /* Vector, indexed by real origin-0 line number,
0163        containing TRUE for a line that is an insertion or a deletion.
0164        The results of comparison are stored here.  */
0165         bool *changed;
0166 
0167         /* 1 if at end of file.  */
0168         bool eof;
0169 
0170         /* 1 more than the maximum equivalence value used for this or its
0171        sibling file.  */
0172         GNULineRef equiv_max;
0173     };
0174 
0175     /* Data on two input files being compared.  */
0176 
0177     struct comparison {
0178         file_data file[2];
0179         comparison const *parent; /* parent, if a recursive comparison */
0180     };
0181 
0182     /* Describe the two files currently being compared.  */
0183 
0184     file_data files[2];
0185 
0186     /* Declare various functions.  */
0187 
0188     /* analyze.c */
0189     change *diff_2_files(comparison *);
0190     /* io.c */
0191     bool read_files(file_data[], bool);
0192 
0193     /* util.c */
0194     bool lines_differ(const QChar *, size_t, const QChar *, size_t);
0195     void *zalloc(size_t);
0196 
0197   private:
0198     // gnudiff_analyze.cpp
0199     GNULineRef diag(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal, struct partition *part) const;
0200     void compareseq(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal);
0201     void discard_confusing_lines(file_data filevec[]);
0202     void shift_boundaries(file_data filevec[]);
0203     change *add_change(GNULineRef line0, GNULineRef line1, GNULineRef deleted, GNULineRef inserted, change *old);
0204     change *build_reverse_script(file_data const filevec[]);
0205     change *build_script(file_data const filevec[]);
0206 
0207     // gnudiff_io.cpp
0208     GNULineRef guess_lines(GNULineRef n, size_t s, size_t t);
0209     void find_and_hash_each_line(file_data *current);
0210     void find_identical_ends(file_data filevec[]);
0211 
0212     // gnudiff_xmalloc.cpp
0213     void *xmalloc(size_t n);
0214     void *xrealloc(void *p, size_t n);
0215     void xalloc_die();
0216 }; // class GnuDiff
0217 
0218 #endif