zdict.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #ifndef DICTBUILDER_H_001
  11. #define DICTBUILDER_H_001
  12. #if defined (__cplusplus)
  13. extern "C" {
  14. #endif
  15. /*====== Dependencies ======*/
  16. #include <stddef.h> /* size_t */
  17. /* ===== ZDICTLIB_API : control library symbols visibility ===== */
  18. #ifndef ZDICTLIB_VISIBILITY
  19. # if defined(__GNUC__) && (__GNUC__ >= 4)
  20. # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
  21. # else
  22. # define ZDICTLIB_VISIBILITY
  23. # endif
  24. #endif
  25. #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
  26. # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
  27. #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
  28. # define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
  29. #else
  30. # define ZDICTLIB_API ZDICTLIB_VISIBILITY
  31. #endif
  32. /*******************************************************************************
  33. * Zstd dictionary builder
  34. *
  35. * FAQ
  36. * ===
  37. * Why should I use a dictionary?
  38. * ------------------------------
  39. *
  40. * Zstd can use dictionaries to improve compression ratio of small data.
  41. * Traditionally small files don't compress well because there is very little
  42. * repetition in a single sample, since it is small. But, if you are compressing
  43. * many similar files, like a bunch of JSON records that share the same
  44. * structure, you can train a dictionary on ahead of time on some samples of
  45. * these files. Then, zstd can use the dictionary to find repetitions that are
  46. * present across samples. This can vastly improve compression ratio.
  47. *
  48. * When is a dictionary useful?
  49. * ----------------------------
  50. *
  51. * Dictionaries are useful when compressing many small files that are similar.
  52. * The larger a file is, the less benefit a dictionary will have. Generally,
  53. * we don't expect dictionary compression to be effective past 100KB. And the
  54. * smaller a file is, the more we would expect the dictionary to help.
  55. *
  56. * How do I use a dictionary?
  57. * --------------------------
  58. *
  59. * Simply pass the dictionary to the zstd compressor with
  60. * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
  61. * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
  62. * more advanced functions that allow selecting some options, see zstd.h for
  63. * complete documentation.
  64. *
  65. * What is a zstd dictionary?
  66. * --------------------------
  67. *
  68. * A zstd dictionary has two pieces: Its header, and its content. The header
  69. * contains a magic number, the dictionary ID, and entropy tables. These
  70. * entropy tables allow zstd to save on header costs in the compressed file,
  71. * which really matters for small data. The content is just bytes, which are
  72. * repeated content that is common across many samples.
  73. *
  74. * What is a raw content dictionary?
  75. * ---------------------------------
  76. *
  77. * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
  78. * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
  79. * content dictionary.
  80. *
  81. * How do I train a dictionary?
  82. * ----------------------------
  83. *
  84. * Gather samples from your use case. These samples should be similar to each
  85. * other. If you have several use cases, you could try to train one dictionary
  86. * per use case.
  87. *
  88. * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
  89. * dictionary. There are a few advanced versions of this function, but this
  90. * is a great starting point. If you want to further tune your dictionary
  91. * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
  92. * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
  93. *
  94. * If the dictionary training function fails, that is likely because you
  95. * either passed too few samples, or a dictionary would not be effective
  96. * for your data. Look at the messages that the dictionary trainer printed,
  97. * if it doesn't say too few samples, then a dictionary would not be effective.
  98. *
  99. * How large should my dictionary be?
  100. * ----------------------------------
  101. *
  102. * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
  103. * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
  104. * dictionary larger than that. But, most use cases can get away with a
  105. * smaller dictionary. The advanced dictionary builders can automatically
  106. * shrink the dictionary for you, and select a the smallest size that
  107. * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
  108. * A smaller dictionary can save memory, and potentially speed up
  109. * compression.
  110. *
  111. * How many samples should I provide to the dictionary builder?
  112. * ------------------------------------------------------------
  113. *
  114. * We generally recommend passing ~100x the size of the dictionary
  115. * in samples. A few thousand should suffice. Having too few samples
  116. * can hurt the dictionaries effectiveness. Having more samples will
  117. * only improve the dictionaries effectiveness. But having too many
  118. * samples can slow down the dictionary builder.
  119. *
  120. * How do I determine if a dictionary will be effective?
  121. * -----------------------------------------------------
  122. *
  123. * Simply train a dictionary and try it out. You can use zstd's built in
  124. * benchmarking tool to test the dictionary effectiveness.
  125. *
  126. * # Benchmark levels 1-3 without a dictionary
  127. * zstd -b1e3 -r /path/to/my/files
  128. * # Benchmark levels 1-3 with a dictionary
  129. * zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
  130. *
  131. * When should I retrain a dictionary?
  132. * -----------------------------------
  133. *
  134. * You should retrain a dictionary when its effectiveness drops. Dictionary
  135. * effectiveness drops as the data you are compressing changes. Generally, we do
  136. * expect dictionaries to "decay" over time, as your data changes, but the rate
  137. * at which they decay depends on your use case. Internally, we regularly
  138. * retrain dictionaries, and if the new dictionary performs significantly
  139. * better than the old dictionary, we will ship the new dictionary.
  140. *
  141. * I have a raw content dictionary, how do I turn it into a zstd dictionary?
  142. * -------------------------------------------------------------------------
  143. *
  144. * If you have a raw content dictionary, e.g. by manually constructing it, or
  145. * using a third-party dictionary builder, you can turn it into a zstd
  146. * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
  147. * provide some samples of the data. It will add the zstd header to the
  148. * raw content, which contains a dictionary ID and entropy tables, which
  149. * will improve compression ratio, and allow zstd to write the dictionary ID
  150. * into the frame, if you so choose.
  151. *
  152. * Do I have to use zstd's dictionary builder?
  153. * -------------------------------------------
  154. *
  155. * No! You can construct dictionary content however you please, it is just
  156. * bytes. It will always be valid as a raw content dictionary. If you want
  157. * a zstd dictionary, which can improve compression ratio, use
  158. * `ZDICT_finalizeDictionary()`.
  159. *
  160. * What is the attack surface of a zstd dictionary?
  161. * ------------------------------------------------
  162. *
  163. * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
  164. * zstd should never crash, or access out-of-bounds memory no matter what
  165. * the dictionary is. However, if an attacker can control the dictionary
  166. * during decompression, they can cause zstd to generate arbitrary bytes,
  167. * just like if they controlled the compressed data.
  168. *
  169. ******************************************************************************/
  170. /*! ZDICT_trainFromBuffer():
  171. * Train a dictionary from an array of samples.
  172. * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
  173. * f=20, and accel=1.
  174. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
  175. * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
  176. * The resulting dictionary will be saved into `dictBuffer`.
  177. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  178. * or an error code, which can be tested with ZDICT_isError().
  179. * Note: Dictionary training will fail if there are not enough samples to construct a
  180. * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
  181. * If dictionary training fails, you should use zstd without a dictionary, as the dictionary
  182. * would've been ineffective anyways. If you believe your samples would benefit from a dictionary
  183. * please open an issue with details, and we can look into it.
  184. * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
  185. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
  186. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
  187. * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
  188. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
  189. */
  190. ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
  191. const void* samplesBuffer,
  192. const size_t* samplesSizes, unsigned nbSamples);
  193. typedef struct {
  194. int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
  195. unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
  196. unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
  197. * NOTE: The zstd format reserves some dictionary IDs for future use.
  198. * You may use them in private settings, but be warned that they
  199. * may be used by zstd in a public dictionary registry in the future.
  200. * These dictionary IDs are:
  201. * - low range : <= 32767
  202. * - high range : >= (2^31)
  203. */
  204. } ZDICT_params_t;
  205. /*! ZDICT_finalizeDictionary():
  206. * Given a custom content as a basis for dictionary, and a set of samples,
  207. * finalize dictionary by adding headers and statistics according to the zstd
  208. * dictionary format.
  209. *
  210. * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
  211. * supplied with an array of sizes `samplesSizes`, providing the size of each
  212. * sample in order. The samples are used to construct the statistics, so they
  213. * should be representative of what you will compress with this dictionary.
  214. *
  215. * The compression level can be set in `parameters`. You should pass the
  216. * compression level you expect to use in production. The statistics for each
  217. * compression level differ, so tuning the dictionary for the compression level
  218. * can help quite a bit.
  219. *
  220. * You can set an explicit dictionary ID in `parameters`, or allow us to pick
  221. * a random dictionary ID for you, but we can't guarantee no collisions.
  222. *
  223. * The dstDictBuffer and the dictContent may overlap, and the content will be
  224. * appended to the end of the header. If the header + the content doesn't fit in
  225. * maxDictSize the beginning of the content is truncated to make room, since it
  226. * is presumed that the most profitable content is at the end of the dictionary,
  227. * since that is the cheapest to reference.
  228. *
  229. * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
  230. *
  231. * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
  232. * or an error code, which can be tested by ZDICT_isError().
  233. * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
  234. * instructed to, using notificationLevel>0.
  235. * NOTE: This function currently may fail in several edge cases including:
  236. * * Not enough samples
  237. * * Samples are uncompressible
  238. * * Samples are all exactly the same
  239. */
  240. ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
  241. const void* dictContent, size_t dictContentSize,
  242. const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
  243. ZDICT_params_t parameters);
  244. /*====== Helper functions ======*/
  245. ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
  246. ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
  247. ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
  248. ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
  249. #ifdef ZDICT_STATIC_LINKING_ONLY
  250. /* ====================================================================================
  251. * The definitions in this section are considered experimental.
  252. * They should never be used with a dynamic library, as they may change in the future.
  253. * They are provided for advanced usages.
  254. * Use them only in association with static linking.
  255. * ==================================================================================== */
  256. #define ZDICT_DICTSIZE_MIN 256
  257. /* Deprecated: Remove in v1.6.0 */
  258. #define ZDICT_CONTENTSIZE_MIN 128
  259. /*! ZDICT_cover_params_t:
  260. * k and d are the only required parameters.
  261. * For others, value 0 means default.
  262. */
  263. typedef struct {
  264. unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
  265. unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
  266. unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
  267. unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
  268. double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
  269. unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
  270. unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
  271. ZDICT_params_t zParams;
  272. } ZDICT_cover_params_t;
  273. typedef struct {
  274. unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
  275. unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
  276. unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
  277. unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
  278. unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
  279. double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
  280. unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
  281. unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
  282. unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
  283. ZDICT_params_t zParams;
  284. } ZDICT_fastCover_params_t;
  285. /*! ZDICT_trainFromBuffer_cover():
  286. * Train a dictionary from an array of samples using the COVER algorithm.
  287. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
  288. * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
  289. * The resulting dictionary will be saved into `dictBuffer`.
  290. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  291. * or an error code, which can be tested with ZDICT_isError().
  292. * See ZDICT_trainFromBuffer() for details on failure modes.
  293. * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
  294. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
  295. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
  296. * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
  297. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
  298. */
  299. ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
  300. void *dictBuffer, size_t dictBufferCapacity,
  301. const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
  302. ZDICT_cover_params_t parameters);
  303. /*! ZDICT_optimizeTrainFromBuffer_cover():
  304. * The same requirements as above hold for all the parameters except `parameters`.
  305. * This function tries many parameter combinations and picks the best parameters.
  306. * `*parameters` is filled with the best parameters found,
  307. * dictionary constructed with those parameters is stored in `dictBuffer`.
  308. *
  309. * All of the parameters d, k, steps are optional.
  310. * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
  311. * if steps is zero it defaults to its default value.
  312. * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
  313. *
  314. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  315. * or an error code, which can be tested with ZDICT_isError().
  316. * On success `*parameters` contains the parameters selected.
  317. * See ZDICT_trainFromBuffer() for details on failure modes.
  318. * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
  319. */
  320. ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  321. void* dictBuffer, size_t dictBufferCapacity,
  322. const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
  323. ZDICT_cover_params_t* parameters);
  324. /*! ZDICT_trainFromBuffer_fastCover():
  325. * Train a dictionary from an array of samples using a modified version of COVER algorithm.
  326. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
  327. * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
  328. * d and k are required.
  329. * All other parameters are optional, will use default values if not provided
  330. * The resulting dictionary will be saved into `dictBuffer`.
  331. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  332. * or an error code, which can be tested with ZDICT_isError().
  333. * See ZDICT_trainFromBuffer() for details on failure modes.
  334. * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
  335. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
  336. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
  337. * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
  338. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
  339. */
  340. ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
  341. size_t dictBufferCapacity, const void *samplesBuffer,
  342. const size_t *samplesSizes, unsigned nbSamples,
  343. ZDICT_fastCover_params_t parameters);
  344. /*! ZDICT_optimizeTrainFromBuffer_fastCover():
  345. * The same requirements as above hold for all the parameters except `parameters`.
  346. * This function tries many parameter combinations (specifically, k and d combinations)
  347. * and picks the best parameters. `*parameters` is filled with the best parameters found,
  348. * dictionary constructed with those parameters is stored in `dictBuffer`.
  349. * All of the parameters d, k, steps, f, and accel are optional.
  350. * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
  351. * if steps is zero it defaults to its default value.
  352. * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
  353. * If f is zero, default value of 20 is used.
  354. * If accel is zero, default value of 1 is used.
  355. *
  356. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  357. * or an error code, which can be tested with ZDICT_isError().
  358. * On success `*parameters` contains the parameters selected.
  359. * See ZDICT_trainFromBuffer() for details on failure modes.
  360. * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
  361. */
  362. ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
  363. size_t dictBufferCapacity, const void* samplesBuffer,
  364. const size_t* samplesSizes, unsigned nbSamples,
  365. ZDICT_fastCover_params_t* parameters);
  366. typedef struct {
  367. unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
  368. ZDICT_params_t zParams;
  369. } ZDICT_legacy_params_t;
  370. /*! ZDICT_trainFromBuffer_legacy():
  371. * Train a dictionary from an array of samples.
  372. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
  373. * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
  374. * The resulting dictionary will be saved into `dictBuffer`.
  375. * `parameters` is optional and can be provided with values set to 0 to mean "default".
  376. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
  377. * or an error code, which can be tested with ZDICT_isError().
  378. * See ZDICT_trainFromBuffer() for details on failure modes.
  379. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
  380. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
  381. * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
  382. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
  383. * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
  384. */
  385. ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
  386. void* dictBuffer, size_t dictBufferCapacity,
  387. const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
  388. ZDICT_legacy_params_t parameters);
  389. /* Deprecation warnings */
  390. /* It is generally possible to disable deprecation warnings from compiler,
  391. for example with -Wno-deprecated-declarations for gcc
  392. or _CRT_SECURE_NO_WARNINGS in Visual.
  393. Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
  394. #ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
  395. # define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
  396. #else
  397. # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
  398. # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
  399. # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
  400. # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
  401. # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
  402. # elif (ZDICT_GCC_VERSION >= 301)
  403. # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
  404. # elif defined(_MSC_VER)
  405. # define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
  406. # else
  407. # pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
  408. # define ZDICT_DEPRECATED(message) ZDICTLIB_API
  409. # endif
  410. #endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
  411. ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
  412. size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
  413. const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
  414. #endif /* ZDICT_STATIC_LINKING_ONLY */
  415. #if defined (__cplusplus)
  416. }
  417. #endif
  418. #endif /* DICTBUILDER_H_001 */