tre.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. /*
  2. tre.h - TRE public API definitions
  3. This software is released under a BSD-style license.
  4. See the file LICENSE for details and copyright.
  5. */
  6. #ifndef TRE_H
  7. #define TRE_H 1
  8. #include "tre-config.h"
  9. #ifdef HAVE_SYS_TYPES_H
  10. #include <sys/types.h>
  11. #endif /* HAVE_SYS_TYPES_H */
  12. #ifdef HAVE_LIBUTF8_H
  13. #include <libutf8.h>
  14. #endif /* HAVE_LIBUTF8_H */
  15. #ifdef TRE_USE_SYSTEM_REGEX_H
  16. /* Include the system regex.h to make TRE ABI compatible with the
  17. system regex. */
  18. #include TRE_SYSTEM_REGEX_H_PATH
  19. #define tre_regcomp regcomp
  20. #define tre_regexec regexec
  21. #define tre_regerror regerror
  22. #define tre_regfree regfree
  23. #endif /* TRE_USE_SYSTEM_REGEX_H */
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif
  27. #ifdef TRE_USE_SYSTEM_REGEX_H
  28. #ifndef REG_OK
  29. #define REG_OK 0
  30. #endif /* !REG_OK */
  31. #ifndef HAVE_REG_ERRCODE_T
  32. typedef int reg_errcode_t;
  33. #endif /* !HAVE_REG_ERRCODE_T */
  34. #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
  35. #define REG_LITERAL 0x1000
  36. #endif
  37. /* Extra tre_regcomp() flags. */
  38. #ifndef REG_BASIC
  39. #define REG_BASIC 0
  40. #endif /* !REG_BASIC */
  41. #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
  42. #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
  43. /* Extra tre_regexec() flags. */
  44. #define REG_APPROX_MATCHER 0x1000
  45. #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
  46. #else /* !TRE_USE_SYSTEM_REGEX_H */
  47. /* If the we're not using system regex.h, we need to define the
  48. structs and enums ourselves. */
  49. typedef int regoff_t;
  50. typedef struct {
  51. size_t re_nsub; /* Number of parenthesized subexpressions. */
  52. void *value; /* For internal use only. */
  53. } regex_t;
  54. typedef struct {
  55. regoff_t rm_so;
  56. regoff_t rm_eo;
  57. } regmatch_t;
  58. typedef enum {
  59. REG_OK = 0, /* No error. */
  60. /* POSIX tre_regcomp() return error codes. (In the order listed in the
  61. standard.) */
  62. REG_NOMATCH, /* No match. */
  63. REG_BADPAT, /* Invalid regexp. */
  64. REG_ECOLLATE, /* Unknown collating element. */
  65. REG_ECTYPE, /* Unknown character class name. */
  66. REG_EESCAPE, /* Trailing backslash. */
  67. REG_ESUBREG, /* Invalid back reference. */
  68. REG_EBRACK, /* "[]" imbalance */
  69. REG_EPAREN, /* "\(\)" or "()" imbalance */
  70. REG_EBRACE, /* "\{\}" or "{}" imbalance */
  71. REG_BADBR, /* Invalid content of {} */
  72. REG_ERANGE, /* Invalid use of range operator */
  73. REG_ESPACE, /* Out of memory. */
  74. REG_BADRPT /* Invalid use of repetition operators. */
  75. } reg_errcode_t;
  76. /* POSIX tre_regcomp() flags. */
  77. #define REG_EXTENDED 1
  78. #define REG_ICASE (REG_EXTENDED << 1)
  79. #define REG_NEWLINE (REG_ICASE << 1)
  80. #define REG_NOSUB (REG_NEWLINE << 1)
  81. /* Extra tre_regcomp() flags. */
  82. #define REG_BASIC 0
  83. #define REG_LITERAL (REG_NOSUB << 1)
  84. #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
  85. #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
  86. #define REG_USEBYTES (REG_UNGREEDY << 1)
  87. /* POSIX tre_regexec() flags. */
  88. #define REG_NOTBOL 1
  89. #define REG_NOTEOL (REG_NOTBOL << 1)
  90. /* Extra tre_regexec() flags. */
  91. #define REG_APPROX_MATCHER (REG_NOTEOL << 1)
  92. #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
  93. #endif /* !TRE_USE_SYSTEM_REGEX_H */
  94. /* REG_NOSPEC and REG_LITERAL mean the same thing. */
  95. #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
  96. #define REG_NOSPEC REG_LITERAL
  97. #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
  98. #define REG_LITERAL REG_NOSPEC
  99. #endif /* defined(REG_NOSPEC) */
  100. /* The maximum number of iterations in a bound expression. */
  101. #undef RE_DUP_MAX
  102. #define RE_DUP_MAX 255
  103. /* The POSIX.2 regexp functions */
  104. extern int
  105. tre_regcomp(regex_t *preg, const char *regex, int cflags);
  106. extern int
  107. tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
  108. regmatch_t pmatch[], int eflags);
  109. extern int
  110. tre_regcompb(regex_t *preg, const char *regex, int cflags);
  111. extern int
  112. tre_regexecb(const regex_t *preg, const char *string, size_t nmatch,
  113. regmatch_t pmatch[], int eflags);
  114. extern size_t
  115. tre_regerror(int errcode, const regex_t *preg, char *errbuf,
  116. size_t errbuf_size);
  117. extern void
  118. tre_regfree(regex_t *preg);
  119. #ifdef TRE_WCHAR
  120. #ifdef HAVE_WCHAR_H
  121. #include <wchar.h>
  122. #endif /* HAVE_WCHAR_H */
  123. /* Wide character versions (not in POSIX.2). */
  124. extern int
  125. tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
  126. extern int
  127. tre_regwexec(const regex_t *preg, const wchar_t *string,
  128. size_t nmatch, regmatch_t pmatch[], int eflags);
  129. #endif /* TRE_WCHAR */
  130. /* Versions with a maximum length argument and therefore the capability to
  131. handle null characters in the middle of the strings (not in POSIX.2). */
  132. extern int
  133. tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
  134. extern int
  135. tre_regnexec(const regex_t *preg, const char *string, size_t len,
  136. size_t nmatch, regmatch_t pmatch[], int eflags);
  137. /* regn*b versions take byte literally as 8-bit values */
  138. extern int
  139. tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags);
  140. extern int
  141. tre_regnexecb(const regex_t *preg, const char *str, size_t len,
  142. size_t nmatch, regmatch_t pmatch[], int eflags);
  143. #ifdef TRE_WCHAR
  144. extern int
  145. tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
  146. extern int
  147. tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
  148. size_t nmatch, regmatch_t pmatch[], int eflags);
  149. #endif /* TRE_WCHAR */
  150. #ifdef TRE_APPROX
  151. /* Approximate matching parameter struct. */
  152. typedef struct {
  153. int cost_ins; /* Default cost of an inserted character. */
  154. int cost_del; /* Default cost of a deleted character. */
  155. int cost_subst; /* Default cost of a substituted character. */
  156. int max_cost; /* Maximum allowed cost of a match. */
  157. int max_ins; /* Maximum allowed number of inserts. */
  158. int max_del; /* Maximum allowed number of deletes. */
  159. int max_subst; /* Maximum allowed number of substitutes. */
  160. int max_err; /* Maximum allowed number of errors total. */
  161. } regaparams_t;
  162. /* Approximate matching result struct. */
  163. typedef struct {
  164. size_t nmatch; /* Length of pmatch[] array. */
  165. regmatch_t *pmatch; /* Submatch data. */
  166. int cost; /* Cost of the match. */
  167. int num_ins; /* Number of inserts in the match. */
  168. int num_del; /* Number of deletes in the match. */
  169. int num_subst; /* Number of substitutes in the match. */
  170. } regamatch_t;
  171. /* Approximate matching functions. */
  172. extern int
  173. tre_regaexec(const regex_t *preg, const char *string,
  174. regamatch_t *match, regaparams_t params, int eflags);
  175. extern int
  176. tre_reganexec(const regex_t *preg, const char *string, size_t len,
  177. regamatch_t *match, regaparams_t params, int eflags);
  178. extern int
  179. tre_regaexecb(const regex_t *preg, const char *string,
  180. regamatch_t *match, regaparams_t params, int eflags);
  181. #ifdef TRE_WCHAR
  182. /* Wide character approximate matching. */
  183. extern int
  184. tre_regawexec(const regex_t *preg, const wchar_t *string,
  185. regamatch_t *match, regaparams_t params, int eflags);
  186. extern int
  187. tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
  188. regamatch_t *match, regaparams_t params, int eflags);
  189. #endif /* TRE_WCHAR */
  190. /* Sets the parameters to default values. */
  191. extern void
  192. tre_regaparams_default(regaparams_t *params);
  193. #endif /* TRE_APPROX */
  194. #ifdef TRE_WCHAR
  195. typedef wchar_t tre_char_t;
  196. #else /* !TRE_WCHAR */
  197. typedef unsigned char tre_char_t;
  198. #endif /* !TRE_WCHAR */
  199. typedef struct {
  200. int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
  201. void (*rewind)(size_t pos, void *context);
  202. int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
  203. void *context;
  204. } tre_str_source;
  205. extern int
  206. tre_reguexec(const regex_t *preg, const tre_str_source *string,
  207. size_t nmatch, regmatch_t pmatch[], int eflags);
  208. /* Returns the version string. The returned string is static. */
  209. extern char *
  210. tre_version(void);
  211. /* Returns the value for a config parameter. The type to which `result'
  212. must point to depends of the value of `query', see documentation for
  213. more details. */
  214. extern int
  215. tre_config(int query, void *result);
  216. enum {
  217. TRE_CONFIG_APPROX,
  218. TRE_CONFIG_WCHAR,
  219. TRE_CONFIG_MULTIBYTE,
  220. TRE_CONFIG_SYSTEM_ABI,
  221. TRE_CONFIG_VERSION
  222. };
  223. /* Returns 1 if the compiled pattern has back references, 0 if not. */
  224. extern int
  225. tre_have_backrefs(const regex_t *preg);
  226. /* Returns 1 if the compiled pattern uses approximate matching features,
  227. 0 if not. */
  228. extern int
  229. tre_have_approx(const regex_t *preg);
  230. #ifdef __cplusplus
  231. }
  232. #endif
  233. #endif /* TRE_H */
  234. /* EOF */