pyhash.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #ifndef Py_HASH_H
  2. #define Py_HASH_H
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. /* Helpers for hash functions */
  7. #ifndef Py_LIMITED_API
  8. PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
  9. PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
  10. PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
  11. #endif
  12. /* Prime multiplier used in string and various other hashes. */
  13. #define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
  14. /* Parameters used for the numeric hash implementation. See notes for
  15. _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on
  16. reduction modulo the prime 2**_PyHASH_BITS - 1. */
  17. #if SIZEOF_VOID_P >= 8
  18. # define _PyHASH_BITS 61
  19. #else
  20. # define _PyHASH_BITS 31
  21. #endif
  22. #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
  23. #define _PyHASH_INF 314159
  24. #define _PyHASH_NAN 0
  25. #define _PyHASH_IMAG _PyHASH_MULTIPLIER
  26. /* hash secret
  27. *
  28. * memory layout on 64 bit systems
  29. * cccccccc cccccccc cccccccc uc -- unsigned char[24]
  30. * pppppppp ssssssss ........ fnv -- two Py_hash_t
  31. * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t
  32. * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
  33. * ........ ........ eeeeeeee pyexpat XML hash salt
  34. *
  35. * memory layout on 32 bit systems
  36. * cccccccc cccccccc cccccccc uc
  37. * ppppssss ........ ........ fnv -- two Py_hash_t
  38. * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*)
  39. * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
  40. * ........ ........ eeee.... pyexpat XML hash salt
  41. *
  42. * (*) The siphash member may not be available on 32 bit platforms without
  43. * an unsigned int64 data type.
  44. */
  45. #ifndef Py_LIMITED_API
  46. typedef union {
  47. /* ensure 24 bytes */
  48. unsigned char uc[24];
  49. /* two Py_hash_t for FNV */
  50. struct {
  51. Py_hash_t prefix;
  52. Py_hash_t suffix;
  53. } fnv;
  54. /* two uint64 for SipHash24 */
  55. struct {
  56. uint64_t k0;
  57. uint64_t k1;
  58. } siphash;
  59. /* a different (!) Py_hash_t for small string optimization */
  60. struct {
  61. unsigned char padding[16];
  62. Py_hash_t suffix;
  63. } djbx33a;
  64. struct {
  65. unsigned char padding[16];
  66. Py_hash_t hashsalt;
  67. } expat;
  68. } _Py_HashSecret_t;
  69. PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
  70. #endif
  71. #ifdef Py_DEBUG
  72. PyAPI_DATA(int) _Py_HashSecret_Initialized;
  73. #endif
  74. /* hash function definition */
  75. #ifndef Py_LIMITED_API
  76. typedef struct {
  77. Py_hash_t (*const hash)(const void *, Py_ssize_t);
  78. const char *name;
  79. const int hash_bits;
  80. const int seed_bits;
  81. } PyHash_FuncDef;
  82. PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
  83. #endif
  84. /* cutoff for small string DJBX33A optimization in range [1, cutoff).
  85. *
  86. * About 50% of the strings in a typical Python application are smaller than
  87. * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
  88. * NEVER use DJBX33A for long strings!
  89. *
  90. * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
  91. * should use a smaller cutoff because it is easier to create colliding
  92. * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
  93. * provide a decent safety margin.
  94. */
  95. #ifndef Py_HASH_CUTOFF
  96. # define Py_HASH_CUTOFF 0
  97. #elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
  98. # error Py_HASH_CUTOFF must in range 0...7.
  99. #endif /* Py_HASH_CUTOFF */
  100. /* hash algorithm selection
  101. *
  102. * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
  103. * configure script.
  104. *
  105. * - FNV is available on all platforms and architectures.
  106. * - SIPHASH24 only works on platforms that don't require aligned memory for integers.
  107. * - With EXTERNAL embedders can provide an alternative implementation with::
  108. *
  109. * PyHash_FuncDef PyHash_Func = {...};
  110. *
  111. * XXX: Figure out __declspec() for extern PyHash_FuncDef.
  112. */
  113. #define Py_HASH_EXTERNAL 0
  114. #define Py_HASH_SIPHASH24 1
  115. #define Py_HASH_FNV 2
  116. #ifndef Py_HASH_ALGORITHM
  117. # ifndef HAVE_ALIGNED_REQUIRED
  118. # define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
  119. # else
  120. # define Py_HASH_ALGORITHM Py_HASH_FNV
  121. # endif /* uint64_t && uint32_t && aligned */
  122. #endif /* Py_HASH_ALGORITHM */
  123. #ifdef __cplusplus
  124. }
  125. #endif
  126. #endif /* !Py_HASH_H */