File indexing completed on 2024-12-01 05:11:55
0001 // clang-format off 0002 /* 0003 Shared definitions for GNU DIFF 0004 Modified for KDiff3 by Joachim Eibl <joachim.eibl at gmx.de> 2003, 2004, 2005. 0005 The original file was part of GNU DIFF. 0006 0007 Part of KDiff3 - Text Diff And Merge Tool 0008 0009 SPDX-FileCopyrightText: 1988-2002 Free Software Foundation, Inc. 0010 SPDX-FileCopyrightText: 2002-2011 Joachim Eibl, joachim.eibl at gmx.de 0011 SPDX-FileCopyrightText: 2018-2020 Michael Reeves reeves.87@gmail.com 0012 SPDX-License-Identifier: GPL-2.0-or-later 0013 */ 0014 // clang-format on 0015 0016 #ifndef GNUDIFF_DIFF_H 0017 #define GNUDIFF_DIFF_H 0018 0019 #include "LineRef.h" 0020 #include "Utils.h" 0021 0022 #include <QtGlobal> 0023 0024 #include <stdint.h> 0025 #include <sys/stat.h> 0026 #include <sys/types.h> 0027 0028 #include <algorithm> 0029 #include <ctype.h> 0030 #include <limits.h> 0031 #include <stdlib.h> 0032 #include <string.h> 0033 #include <type_traits> 0034 0035 #include <stdio.h> 0036 0037 #include <QString> 0038 0039 /* The integer type of a line number. */ 0040 typedef qint64 GNULineRef; 0041 #define GNULINEREF_MAX std::numeric_limits<GNULineRef>::max() 0042 static_assert(std::is_signed<GNULineRef>::value, "GNULineRef must be signed."); 0043 static_assert(sizeof(GNULineRef) >= sizeof(size_t), "GNULineRef must be able to receive size_t values."); 0044 0045 class GnuDiff 0046 { 0047 public: 0048 /* Variables for command line options */ 0049 0050 /* Nonzero if output cannot be generated for identical files. */ 0051 bool no_diff_means_no_output; 0052 0053 /* Number of lines of context to show in each set of diffs. 0054 This is zero when context is not to be shown. */ 0055 GNULineRef context; 0056 0057 /* The significance of white space during comparisons. */ 0058 enum 0059 { 0060 /* All white space is significant (the default). */ 0061 IGNORE_NO_WHITE_SPACE, 0062 0063 /* Ignore changes due to tab expansion (-E). */ 0064 IGNORE_TAB_EXPANSION, 0065 0066 /* Ignore changes in horizontal white space (-b). */ 0067 IGNORE_SPACE_CHANGE, 0068 0069 /* Ignore all horizontal white space (-w). */ 0070 IGNORE_ALL_SPACE 0071 } ignore_white_space; 0072 0073 /* Ignore changes that affect only numbers. (J. Eibl) */ 0074 bool bIgnoreNumbers; 0075 bool bIgnoreWhiteSpace; 0076 0077 /* Files can be compared byte-by-byte, as if they were binary. 0078 This depends on various options. */ 0079 bool files_can_be_treated_as_binary; 0080 0081 /* Ignore differences in case of letters (-i). */ 0082 bool ignore_case; 0083 0084 /* Use heuristics for better speed with large files with a small 0085 density of changes. */ 0086 bool speed_large_files; 0087 0088 /* Don't discard lines. This makes things slower (sometimes much 0089 slower) but will find a guaranteed minimal set of changes. */ 0090 bool minimal; 0091 0092 /* The result of comparison is an "edit script": a chain of `struct change'. 0093 Each `struct change' represents one place where some lines are deleted 0094 and some are inserted. 0095 0096 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 0097 DELETED is the number of lines deleted here from file 0. 0098 INSERTED is the number of lines inserted here in file 1. 0099 0100 If DELETED is 0 then LINE0 is the number of the line before 0101 which the insertion was done; vice versa for INSERTED and LINE1. */ 0102 0103 struct change { 0104 change *link; /* Previous or next edit command */ 0105 GNULineRef inserted; /* # lines of file 1 changed here. */ 0106 GNULineRef deleted; /* # lines of file 0 changed here. */ 0107 GNULineRef line0; /* Line number of 1st deleted line. */ 0108 GNULineRef line1; /* Line number of 1st inserted line. */ 0109 bool ignore; /* Flag used in context.c. */ 0110 }; 0111 0112 /* Structures that describe the input files. */ 0113 0114 /* Data on one input file being compared. */ 0115 0116 struct file_data { 0117 /* Buffer in which text of file is read. */ 0118 const QChar *buffer; 0119 0120 /* Allocated size of buffer, in QChars. Always a multiple of 0121 sizeof(*buffer). */ 0122 size_t bufsize; 0123 0124 /* Number of valid bytes now in the buffer. */ 0125 size_t buffered; 0126 0127 /* Array of pointers to lines in the file. */ 0128 const QChar **linbuf; 0129 0130 /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines. 0131 linebuf[linbuf_base ... buffered_lines - 1] are possibly differing. 0132 linebuf[linbuf_base ... valid_lines - 1] contain valid data. 0133 linebuf[linbuf_base ... alloc_lines - 1] are allocated. */ 0134 GNULineRef linbuf_base, buffered_lines, valid_lines, alloc_lines; 0135 0136 /* Pointer to end of prefix of this file to ignore when hashing. */ 0137 const QChar *prefix_end; 0138 0139 /* Count of lines in the prefix. 0140 There are this many lines in the file before linbuf[0]. */ 0141 GNULineRef prefix_lines; 0142 0143 /* Pointer to start of suffix of this file to ignore when hashing. */ 0144 const QChar *suffix_begin; 0145 0146 /* Vector, indexed by line number, containing an equivalence code for 0147 each line. It is this vector that is actually compared with that 0148 of another file to generate differences. */ 0149 GNULineRef *equivs; 0150 0151 /* Vector, like the previous one except that 0152 the elements for discarded lines have been squeezed out. */ 0153 GNULineRef *undiscarded; 0154 0155 /* Vector mapping virtual line numbers (not counting discarded lines) 0156 to real ones (counting those lines). Both are origin-0. */ 0157 GNULineRef *realindexes; 0158 0159 /* Total number of nondiscarded lines. */ 0160 GNULineRef nondiscarded_lines; 0161 0162 /* Vector, indexed by real origin-0 line number, 0163 containing TRUE for a line that is an insertion or a deletion. 0164 The results of comparison are stored here. */ 0165 bool *changed; 0166 0167 /* 1 if at end of file. */ 0168 bool eof; 0169 0170 /* 1 more than the maximum equivalence value used for this or its 0171 sibling file. */ 0172 GNULineRef equiv_max; 0173 }; 0174 0175 /* Data on two input files being compared. */ 0176 0177 struct comparison { 0178 file_data file[2]; 0179 comparison const *parent; /* parent, if a recursive comparison */ 0180 }; 0181 0182 /* Describe the two files currently being compared. */ 0183 0184 file_data files[2]; 0185 0186 /* Declare various functions. */ 0187 0188 /* analyze.c */ 0189 change *diff_2_files(comparison *); 0190 /* io.c */ 0191 bool read_files(file_data[], bool); 0192 0193 /* util.c */ 0194 bool lines_differ(const QChar *, size_t, const QChar *, size_t); 0195 void *zalloc(size_t); 0196 0197 private: 0198 // gnudiff_analyze.cpp 0199 GNULineRef diag(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal, struct partition *part) const; 0200 void compareseq(GNULineRef xoff, GNULineRef xlim, GNULineRef yoff, GNULineRef ylim, bool find_minimal); 0201 void discard_confusing_lines(file_data filevec[]); 0202 void shift_boundaries(file_data filevec[]); 0203 change *add_change(GNULineRef line0, GNULineRef line1, GNULineRef deleted, GNULineRef inserted, change *old); 0204 change *build_reverse_script(file_data const filevec[]); 0205 change *build_script(file_data const filevec[]); 0206 0207 // gnudiff_io.cpp 0208 GNULineRef guess_lines(GNULineRef n, size_t s, size_t t); 0209 void find_and_hash_each_line(file_data *current); 0210 void find_identical_ends(file_data filevec[]); 0211 0212 // gnudiff_xmalloc.cpp 0213 void *xmalloc(size_t n); 0214 void *xrealloc(void *p, size_t n); 0215 void xalloc_die(); 0216 }; // class GnuDiff 0217 0218 #endif