spellcheck.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. /* Find near-matches for strings and identifiers.
  2. Copyright (C) 2015-2019 Free Software Foundation, Inc.
  3. This file is part of GCC.
  4. GCC is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU General Public License as published by the Free
  6. Software Foundation; either version 3, or (at your option) any later
  7. version.
  8. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  11. for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GCC; see the file COPYING3. If not see
  14. <http://www.gnu.org/licenses/>. */
  15. #ifndef GCC_SPELLCHECK_H
  16. #define GCC_SPELLCHECK_H
  17. typedef unsigned int edit_distance_t;
  18. const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
  19. /* spellcheck.c */
  20. extern edit_distance_t
  21. get_edit_distance (const char *s, int len_s,
  22. const char *t, int len_t);
  23. extern edit_distance_t
  24. get_edit_distance (const char *s, const char *t);
  25. extern const char *
  26. find_closest_string (const char *target,
  27. const auto_vec<const char *> *candidates);
  28. /* A traits class for describing a string-like type usable by
  29. class best_match.
  30. Specializations should provide the implementations of the following:
  31. static size_t get_length (TYPE);
  32. static const char *get_string (TYPE);
  33. get_string should return a non-NULL ptr, which does not need to be
  34. 0-terminated. */
  35. template <typename TYPE>
  36. struct edit_distance_traits {};
  37. /* Specialization of edit_distance_traits for C-style strings. */
  38. template <>
  39. struct edit_distance_traits<const char *>
  40. {
  41. static size_t get_length (const char *str)
  42. {
  43. gcc_assert (str);
  44. return strlen (str);
  45. }
  46. static const char *get_string (const char *str)
  47. {
  48. gcc_assert (str);
  49. return str;
  50. }
  51. };
  52. extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
  53. size_t candidate_len);
  54. /* A type for use when determining the best match against a string,
  55. expressed as a template so that we can match against various
  56. string-like types (const char *, frontend identifiers, and preprocessor
  57. macros).
  58. This type accumulates the best possible match against GOAL_TYPE for
  59. a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
  60. number of calls to get_edit_distance and to
  61. edit_distance_traits<T>::get_length. */
  62. template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
  63. class best_match
  64. {
  65. public:
  66. typedef GOAL_TYPE goal_t;
  67. typedef CANDIDATE_TYPE candidate_t;
  68. typedef edit_distance_traits<goal_t> goal_traits;
  69. typedef edit_distance_traits<candidate_t> candidate_traits;
  70. /* Constructor. */
  71. best_match (GOAL_TYPE goal,
  72. edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
  73. : m_goal (goal_traits::get_string (goal)),
  74. m_goal_len (goal_traits::get_length (goal)),
  75. m_best_candidate (NULL),
  76. m_best_distance (best_distance_so_far)
  77. {}
  78. /* Compare the edit distance between CANDIDATE and m_goal,
  79. and if it's the best so far, record it. */
  80. void consider (candidate_t candidate)
  81. {
  82. size_t candidate_len = candidate_traits::get_length (candidate);
  83. /* Calculate a lower bound on the candidate's distance to the goal,
  84. based on the difference in lengths; it will require at least
  85. this many insertions/deletions. */
  86. edit_distance_t min_candidate_distance
  87. = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
  88. /* If the candidate's length is sufficiently different to that
  89. of the goal string, then the number of insertions/deletions
  90. may be >= the best distance so far. If so, we can reject
  91. the candidate immediately without needing to compute
  92. the exact distance, since it won't be an improvement. */
  93. if (min_candidate_distance >= m_best_distance)
  94. return;
  95. /* If the candidate will be unable to beat the criterion in
  96. get_best_meaningful_candidate, reject it without computing
  97. the exact distance. */
  98. edit_distance_t cutoff = get_cutoff (candidate_len);
  99. if (min_candidate_distance > cutoff)
  100. return;
  101. /* Otherwise, compute the distance and see if the candidate
  102. has beaten the previous best value. */
  103. edit_distance_t dist
  104. = get_edit_distance (m_goal, m_goal_len,
  105. candidate_traits::get_string (candidate),
  106. candidate_len);
  107. if (dist < m_best_distance)
  108. {
  109. m_best_distance = dist;
  110. m_best_candidate = candidate;
  111. m_best_candidate_len = candidate_len;
  112. }
  113. }
  114. /* Assuming that BEST_CANDIDATE is known to be better than
  115. m_best_candidate, update (without recomputing the edit distance to
  116. the goal). */
  117. void set_best_so_far (CANDIDATE_TYPE best_candidate,
  118. edit_distance_t best_distance,
  119. size_t best_candidate_len)
  120. {
  121. gcc_assert (best_distance < m_best_distance);
  122. m_best_candidate = best_candidate;
  123. m_best_distance = best_distance;
  124. m_best_candidate_len = best_candidate_len;
  125. }
  126. /* Generate the maximum edit distance for which we consider a suggestion
  127. to be meaningful, given a candidate of length CANDIDATE_LEN. */
  128. edit_distance_t get_cutoff (size_t candidate_len) const
  129. {
  130. return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
  131. }
  132. /* Get the best candidate so far, but applying a filter to ensure
  133. that we return NULL if none of the candidates are close to the goal,
  134. to avoid offering nonsensical suggestions to the user. */
  135. candidate_t get_best_meaningful_candidate () const
  136. {
  137. /* If the edit distance is too high, the suggestion is likely to be
  138. meaningless. */
  139. if (m_best_candidate)
  140. {
  141. edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
  142. if (m_best_distance > cutoff)
  143. return NULL;
  144. }
  145. /* If the goal string somehow makes it into the candidate list, offering
  146. it as a suggestion will be nonsensical e.g.
  147. 'constexpr' does not name a type; did you mean 'constexpr'?
  148. Ultimately such suggestions are due to bugs in constructing the
  149. candidate list, but as a band-aid, do not offer suggestions for
  150. distance == 0 (where candidate == goal). */
  151. if (m_best_distance == 0)
  152. return NULL;
  153. return m_best_candidate;
  154. }
  155. /* Get the closest candidate so far, without applying any filtering. */
  156. candidate_t blithely_get_best_candidate () const
  157. {
  158. return m_best_candidate;
  159. }
  160. edit_distance_t get_best_distance () const { return m_best_distance; }
  161. size_t get_best_candidate_length () const { return m_best_candidate_len; }
  162. private:
  163. const char *m_goal;
  164. size_t m_goal_len;
  165. candidate_t m_best_candidate;
  166. edit_distance_t m_best_distance;
  167. size_t m_best_candidate_len;
  168. };
  169. #endif /* GCC_SPELLCHECK_H */