123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- /* Find near-matches for strings and identifiers.
- Copyright (C) 2015-2019 Free Software Foundation, Inc.
- This file is part of GCC.
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
- #ifndef GCC_SPELLCHECK_H
- #define GCC_SPELLCHECK_H
- typedef unsigned int edit_distance_t;
- const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
- /* spellcheck.c */
- extern edit_distance_t
- get_edit_distance (const char *s, int len_s,
- const char *t, int len_t);
- extern edit_distance_t
- get_edit_distance (const char *s, const char *t);
- extern const char *
- find_closest_string (const char *target,
- const auto_vec<const char *> *candidates);
- /* A traits class for describing a string-like type usable by
- class best_match.
- Specializations should provide the implementations of the following:
- static size_t get_length (TYPE);
- static const char *get_string (TYPE);
- get_string should return a non-NULL ptr, which does not need to be
- 0-terminated. */
- template <typename TYPE>
- struct edit_distance_traits {};
- /* Specialization of edit_distance_traits for C-style strings. */
- template <>
- struct edit_distance_traits<const char *>
- {
- static size_t get_length (const char *str)
- {
- gcc_assert (str);
- return strlen (str);
- }
- static const char *get_string (const char *str)
- {
- gcc_assert (str);
- return str;
- }
- };
- extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
- size_t candidate_len);
- /* A type for use when determining the best match against a string,
- expressed as a template so that we can match against various
- string-like types (const char *, frontend identifiers, and preprocessor
- macros).
- This type accumulates the best possible match against GOAL_TYPE for
- a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
- number of calls to get_edit_distance and to
- edit_distance_traits<T>::get_length. */
- template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
- class best_match
- {
- public:
- typedef GOAL_TYPE goal_t;
- typedef CANDIDATE_TYPE candidate_t;
- typedef edit_distance_traits<goal_t> goal_traits;
- typedef edit_distance_traits<candidate_t> candidate_traits;
- /* Constructor. */
- best_match (GOAL_TYPE goal,
- edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
- : m_goal (goal_traits::get_string (goal)),
- m_goal_len (goal_traits::get_length (goal)),
- m_best_candidate (NULL),
- m_best_distance (best_distance_so_far)
- {}
- /* Compare the edit distance between CANDIDATE and m_goal,
- and if it's the best so far, record it. */
- void consider (candidate_t candidate)
- {
- size_t candidate_len = candidate_traits::get_length (candidate);
- /* Calculate a lower bound on the candidate's distance to the goal,
- based on the difference in lengths; it will require at least
- this many insertions/deletions. */
- edit_distance_t min_candidate_distance
- = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
- /* If the candidate's length is sufficiently different to that
- of the goal string, then the number of insertions/deletions
- may be >= the best distance so far. If so, we can reject
- the candidate immediately without needing to compute
- the exact distance, since it won't be an improvement. */
- if (min_candidate_distance >= m_best_distance)
- return;
- /* If the candidate will be unable to beat the criterion in
- get_best_meaningful_candidate, reject it without computing
- the exact distance. */
- edit_distance_t cutoff = get_cutoff (candidate_len);
- if (min_candidate_distance > cutoff)
- return;
- /* Otherwise, compute the distance and see if the candidate
- has beaten the previous best value. */
- edit_distance_t dist
- = get_edit_distance (m_goal, m_goal_len,
- candidate_traits::get_string (candidate),
- candidate_len);
- if (dist < m_best_distance)
- {
- m_best_distance = dist;
- m_best_candidate = candidate;
- m_best_candidate_len = candidate_len;
- }
- }
- /* Assuming that BEST_CANDIDATE is known to be better than
- m_best_candidate, update (without recomputing the edit distance to
- the goal). */
- void set_best_so_far (CANDIDATE_TYPE best_candidate,
- edit_distance_t best_distance,
- size_t best_candidate_len)
- {
- gcc_assert (best_distance < m_best_distance);
- m_best_candidate = best_candidate;
- m_best_distance = best_distance;
- m_best_candidate_len = best_candidate_len;
- }
- /* Generate the maximum edit distance for which we consider a suggestion
- to be meaningful, given a candidate of length CANDIDATE_LEN. */
- edit_distance_t get_cutoff (size_t candidate_len) const
- {
- return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
- }
- /* Get the best candidate so far, but applying a filter to ensure
- that we return NULL if none of the candidates are close to the goal,
- to avoid offering nonsensical suggestions to the user. */
- candidate_t get_best_meaningful_candidate () const
- {
- /* If the edit distance is too high, the suggestion is likely to be
- meaningless. */
- if (m_best_candidate)
- {
- edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
- if (m_best_distance > cutoff)
- return NULL;
- }
- /* If the goal string somehow makes it into the candidate list, offering
- it as a suggestion will be nonsensical e.g.
- 'constexpr' does not name a type; did you mean 'constexpr'?
- Ultimately such suggestions are due to bugs in constructing the
- candidate list, but as a band-aid, do not offer suggestions for
- distance == 0 (where candidate == goal). */
- if (m_best_distance == 0)
- return NULL;
- return m_best_candidate;
- }
- /* Get the closest candidate so far, without applying any filtering. */
- candidate_t blithely_get_best_candidate () const
- {
- return m_best_candidate;
- }
- edit_distance_t get_best_distance () const { return m_best_distance; }
- size_t get_best_candidate_length () const { return m_best_candidate_len; }
- private:
- const char *m_goal;
- size_t m_goal_len;
- candidate_t m_best_candidate;
- edit_distance_t m_best_distance;
- size_t m_best_candidate_len;
- };
- #endif /* GCC_SPELLCHECK_H */
|