File indexing completed on 2024-05-19 05:42:29

0001 // ct_lvtshr_fuzzyutil.cpp                                           -*-C++-*-
0002 
0003 /*
0004 // Copyright 2023 Codethink Ltd <codethink@codethink.co.uk>
0005 // SPDX-License-Identifier: Apache-2.0
0006 //
0007 // Licensed under the Apache License, Version 2.0 (the "License");
0008 // you may not use this file except in compliance with the License.
0009 // You may obtain a copy of the License at
0010 //
0011 //     http://www.apache.org/licenses/LICENSE-2.0
0012 //
0013 // Unless required by applicable law or agreed to in writing, software
0014 // distributed under the License is distributed on an "AS IS" BASIS,
0015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 // See the License for the specific language governing permissions and
0017 // limitations under the License.
0018 */
0019 
0020 #include <ct_lvtshr_fuzzyutil.h>
0021 
0022 #include <algorithm>
0023 #include <vector>
0024 
0025 namespace Codethink::lvtshr {
0026 
0027 // the number means the number of different letters on each string.
0028 // adapted from different algorithms
0029 // 0 means that the strings are equal
0030 // 1 means one different letter
0031 // 2 means two different letters, and so on, untill string.size().
0032 size_t FuzzyUtil::levensteinDistance(const std::string& source, const std::string& target)
0033 {
0034     if (source.size() > target.size()) {
0035         return levensteinDistance(target, source);
0036     }
0037 
0038     const size_t min_size = source.size();
0039     const size_t max_size = target.size();
0040     std::vector<size_t> distances(min_size + 1);
0041 
0042     for (size_t i = 0; i <= min_size; ++i) {
0043         distances[i] = i;
0044     }
0045 
0046     for (size_t j = 1; j <= max_size; ++j) {
0047         size_t previous_diagonal = distances[0];
0048 
0049         distances[0] += 1;
0050         for (size_t i = 1; i <= min_size; ++i) {
0051             size_t previous_diagonal_save = distances[i];
0052 
0053             decltype(source.size()) idx_1 = i - 1;
0054             decltype(source.size()) idx_2 = j - 1;
0055 
0056             if (source[idx_1] == target[idx_2]) {
0057                 distances[i] = previous_diagonal;
0058             } else {
0059                 distances[i] = std::min({distances[i - 1], distances[i], previous_diagonal}) + 1;
0060             }
0061 
0062             previous_diagonal = previous_diagonal_save;
0063         }
0064     }
0065 
0066     return distances[min_size];
0067 }
0068 
0069 } // end namespace Codethink::lvtshr