Snippets
  • Uploaded By: maroon
  • Author: maroon
  • Added: 8 months ago
  • Updated: Never
  • mIRC Version: used 6.35 & 7.52
  • Hits: 235
  • Size: 11.6KB
  • Downloads: 4
  • Review By: entropy

Hash functions: lookup3 & One-at-a-Time v1.0

Non-Crypto Hash Functions - Bob Jenkins' "lookup3" and 2 ways for "One at a Time" to handle unicode 128+ characters.
Not much to say that isn't in comments in the script.

  
  2    0  Login to Vote.


Source Code:
  1. ; $lookup3() $oat() and $oat2() are hashes designed to avoid collisions and avalanche the message bits across the hash
  2. ; If you need a 1-way crypto hash, try using a hash from the SHA2 family, such as $sha256()
  3. ;
  4. ; Bob Jenkins' non-crypto hashlittle2 by maroon, from http://burtleburtle.net/bob/c/lookup3.c (little means little-endian)
  5. ; This hash is designed for languages supporting circular rotate, so is slightly cumbersome in mSL
  6. ; Appears intended for long strings or needing hashes with 64 bits, but has slightly better qualities than $oat().
  7. ; $OAT should be faster than $LOOKUP3, but benchmark test shows that in mSL it's actually slower for a 12-byte string
  8. ; If speed is important and 32-bit hash is long enough, consider Bob Jenkins' "one at a time" hash in $oat() or $oat2()
  9. ; Syntax: $lookup3(text string [[[,bits][h] ,pc] ,pb] )
  10. ; Syntax: $lookup3(&binvar , b[bits][h] [[,pc] ,pb] )
  11. ; $2 is for optional parameters for the hash output
  12. ; bits = 1-64 (default=32 when not used) length of hash returned
  13. ; b = $1 is a &binary variable not a string beginning with ampersand
  14. ; h = output should be hexadecimal not numeric. Output isn't padded with 0's unless enough leftmost bits of hash are 0.
  15. ; //bset -t &v 1 abc | echo -a $lookup3(&v,b40) same as $lookup3(abc,40) $base($lookup3(abc,40),10,16) $lookup3(&v,40bh) $lookup3(&v,32bh)
  16. ; $3 = *pc and/or $4 = *pb. Both would be 32bit numbers 0-4294967295 to 'key' or 'seed' the hash.
  17. ; If you have only 1 value, best to use it as pc in $3 not pb in $4
  18. ; //echo -a $lookup3(abc,64h) $lookup3(abc,h) $lookup3(abc,h,12345678) $lookup3(abc,h,12345678,901234567)
  19. ;
  20. ; hash lengths above 32 obtained by returning all/part of %b as bits 33-64.
  21. ; bits 33-53 returns ((lowest N-32 bits of %b) multiplied by 2^32) plus %c
  22. ; mIRC's math is inaccurate above 2^53, so it uses my $bigint_mul() and $bigint_add() snippets to calculate N=54-64 bits.
  23. ; alternatively, you can request the hash as hexadecimal, which doesn't require math above 2^32
  24. ;
  25. ; Instead of extra code to handle lengths not exact multiple of 12, padded the input with 12 0x00's after obtaining the length.
  26. ; Note that initial values are altered by $1's length, preventing additional 0x00's at end of string producing an identical hash
  27.  
  28. alias lookup3 {
  29. if (b isin $2) { if (!$bvar($1,0)) return | var %bits $remove($2,b) | bcopy -tc &lookup3_k 1 $1 1 -1 }
  30. else { var %bits $2 | bset -tc &lookup3_k 1 $1 }
  31. ;a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
  32. ;c += *pb; (don't need mod2^32 here because next is addition followed by mod2^32)
  33. var %len = $bvar(&lookup3_k,0) , %i = 1 , %a = $calc(3735928559 + %len + $3) , %b = %a , %c = %a + 0 $+ $4
  34. bset &lookup3_k $calc(1+%len) 0 0 0 0 0 0 0 0 0 0 0 0
  35.  
  36. while (%len > 12) {
  37. ; a += k[i]; b+=k[i+1]; c+=k[i+2]; (tbd: might not need all mod2^32's) (for bigendian hash, change .long's to .nlong's)
  38. var %a = %a + $bvar(&lookup3_k,%i).long | var %a = %a % 4294967296 | inc %i 4
  39. var %b = %b + $bvar(&lookup3_k,%i).long | var %b = %b % 4294967296 | inc %i 4
  40. var %c = %c + $bvar(&lookup3_k,%i).long | var %c = %c % 4294967296 | inc %i 4
  41. ;mix
  42. ;a -= c; a ^= rot(c, 4); c += b;
  43. var %a = %a - %c | if (%a < 0) inc %a 4294967296 | var %t $calc((%c % 268435456) * 16) + $int($calc(%c / 268435456)) | var %a = $xor(%a,%t) | var %c = %c + %b | var %c = %c % 4294967296
  44. ;b -= a; b ^= rot(a, 6); a += c;
  45. var %b = %b - %a | if (%b < 0) inc %b 4294967296 | var %t $calc((%a % 67108864) * 64) + $int($calc(%a / 67108864)) | var %b = $xor(%b,%t) | var %a = %a + %c | var %a = %a % 4294967296
  46. ;c -= b; c ^= rot(b, 8); b += a;
  47. var %c = %c - %b | if (%c < 0) inc %c 4294967296 | var %t $calc((%b % 16777216) * 256) + $int($calc(%b / 16777216)) | var %c = $xor(%c,%t) | var %b = %b + %a | var %b = %b % 4294967296
  48. ;a -= c; a ^= rot(c,16); c += b;
  49. var %a = %a - %c | if (%a < 0) inc %a 4294967296 | var %t $calc((%c % 65536) * 65536) + $int($calc(%c / 65536)) | var %a = $xor(%a,%t) | var %c = %c + %b | var %c = %c % 4294967296
  50. ;b -= a; b ^= rot(a,19); a += c;
  51. var %b = %b - %a | if (%b < 0) inc %b 4294967296 | var %t $calc((%a % 8192) * 524288) + $int($calc(%a / 8192)) | var %b = $xor(%b,%t) | var %a = %a + %c | var %a = %a % 4294967296
  52. ;c -= b; c ^= rot(b, 4); b += a;
  53. var %c = %c - %b | if (%c < 0) inc %c 4294967296 | var %t $calc((%b % 268435456) * 16) + $int($calc(%b / 268435456)) | var %c = $xor(%c,%t) | var %b = %b + %a | var %b = %b % 4294967296
  54. dec %len 12
  55. }
  56.  
  57. ; a += k[i]; b+=k[i+1]; c+=k[i+2];
  58. var %a = %a + $bvar(&lookup3_k,%i).long | var %a = %a % 4294967296 | inc %i 4
  59. var %b = %b + $bvar(&lookup3_k,%i).long | var %b = %b % 4294967296 | inc %i 4
  60. var %c = %c + $bvar(&lookup3_k,%i).long | var %c = %c % 4294967296
  61. ;final
  62. ;c ^= b; c -= rot(b,14);
  63. var %c $xor(%c,%b) | var %t $calc((%b % 262144) * 16384) + $int($calc(%b / 262144)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296
  64. ;a ^= c; a -= rot(c,11);
  65. var %a $xor(%a,%c) | var %t $calc((%c % 2097152) * 2048) + $int($calc(%c / 2097152)) | var %a = %a - %t | if (%a < 0) inc %a 4294967296
  66. ;b ^= a; b -= rot(a,25);
  67. var %b $xor(%b,%a) | var %t $calc((%a % 128) * 33554432) + $int($calc(%a / 128)) | var %b = %b - %t | if (%b < 0) inc %b 4294967296
  68. ;c ^= b; c -= rot(b,16);
  69. var %c $xor(%c,%b) | var %t $calc((%b % 65536) * 65536) + $int($calc(%b / 65536)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296
  70. ;a ^= c; a -= rot(c,4);
  71. var %a $xor(%a,%c) | var %t $calc((%c % 268435456) * 16) + $int($calc(%c / 268435456)) | var %a = %a - %t | if (%a < 0) inc %a 4294967296
  72. ;b ^= a; b -= rot(a,14);
  73. var %b $xor(%b,%a) | var %t $calc((%a % 262144) * 16384) + $int($calc(%a / 262144)) | var %b = %b - %t | if (%b < 0) inc %b 4294967296
  74. ;c ^= b; c -= rot(b,24);
  75. var %c $xor(%c,%b) | var %t $calc((%b % 256) * 16777216) + $int($calc(%b / 256)) | var %c = %c - %t | if (%c < 0) inc %c 4294967296
  76.  
  77. if (h isin %bits) {
  78. var %bits $remove(%bits,h) | if ((!%bits) || (%bits == 32)) return $base(%c,10,16,8)
  79. var %p $ceil($calc(%bits /4))
  80. if (%bits isnum 1-32) return $base($and(%c, $calc(2^%bits -1)),10,16,%p)
  81. dec %p 8 | dec %bits 32
  82. if (%bits isnum 1-32) return $base($and(%b, $calc(2^%bits -1)),10,16,%p) $+ $base(%c,10,16,8)
  83. }
  84. else {
  85. if ((!%bits) || (%bits == 32)) return %c
  86. if (%bits isnum 1-32) return $and(%c, $calc(2^%bits -1))
  87. var %i = $and(%b, $calc(2^ (%bits -32) -1))
  88. if (%bits isnum 33-53) { return $calc(%i * 4294967296 + %c) }
  89. if (%bits isnum 54-64) { return $bigint_add($bigint_mul(%i,4294967296),%c) }
  90. }
  91. }
  92.  
  93. alias benchlookup3long { var %bench $ticks | var %x 1 | var %a $str(a,100) , %j 1000 | while (%j) { noop $lookup3(%a) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms }
  94. alias bench_oat_long { var %bench $ticks | var %x 1 | var %a $str(a,100) , %j 1000 | while (%j) { noop $oat(%a) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms }
  95. alias benchlookup3short { var %bench $ticks | var %x 1 | var %j 1000 | while (%j) { noop $lookup3(abcabcabcabc) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms }
  96. alias bench_oat_short { var %bench $ticks | var %x 1 | var %j 1000 | while (%j) { noop $oat(abcabcabcabc) | dec %j } | echo -ag time $calc($ticks - %bench) $+ ms }
  97.  
  98. ; Bob Jenkins' non-crypto "One at a time" hash by maroon, as described at https://en.wikipedia.org/wiki/Jenkins_hash_function
  99. ; It hashes a string of arbitrary length to a 1-32 bit hash.
  100. ; One usage includes reducing memory by storing hash of entries for a dictionary that's too bulky to keep in memory.
  101. ; If $oat(string) isn't in the array of hash values, then string isn't in disk dictionary.
  102. ; (Though obviously the presence of $oat(string) in the array isn't a guarantee that the string *is* in the
  103. ; disk dictionary, but it's a way to filter strings to reduce disk accesses.)
  104. ; Syntax: $oat(text string [[,bits],initial_val])
  105. ; Syntax: $oat(&binvar ,[bits] b [,initial_val]) Requires enough free memory to make a clone of the &binvar
  106. ; $1 is the string being hashed,
  107. ; $2 is for optional parameters for the hash output
  108. ; bits = 1-32 (default=32 when not used) length of hash returned
  109. ; b = $1 is a &binary variable not a string beginning with ampersand
  110. ; $3 is optional initial value for hash constant, for 'keying' or 'seeding' the hash
  111. ;
  112. ; $oat2(text string [[,bits],initial_val])
  113. ; Similar to $oat() except does not accept binary strings, because it uses unicode characters above 127 as
  114. ; numbers in the rang 128-65535 instead of multiple bytes.
  115. ; //bset -t &v 1 $chr(10004) | echo -a $bvar(&v,1-) vs $asc($chr(10004))
  116. ; Otherwise, $oat2() returns identical hashes to $oat() for 7-bit strings
  117. ; (mIRC 6.35 doesn't recognize codepoints 256+, and doesn't handle codepoints 128-255 the same without changing 'bset -t' into 'bset -ta')
  118. ;
  119. ; Hashes stored as decimal use up to 10 digits. As hex, hash can be stored as 8 digits max, i.e. $base(%h,10,16,8).
  120. ; As base36, can shorten to 7 digits, or 6 digits if limited to 31 bits (or 12 digits for 62 bits)
  121. ; //bset -t &v 1 abc | echo -a $oat(abc) $oat(&v,b) $oat(&v) $base($oat(abc,32),10,16,8) $base($oat(&v,b31),10,36)
  122. ; As Mime-64, if a 24-bit hash is enough, can store 24 bits as 4 encoded text characters, but the decimal value
  123. ; would need to be translated to a binary format which allows it to be stored in a binary variable as 4 bytes.
  124. ; //bset &a 1 $regsubex($base(4123456789,10,16,8),/(\S)(\S)/g,$base(\1\2,16,10) $chr(32)) | echo -a $bvar(&a,1-) | noop $encode(&a,bm) | echo -a $left($bvar(&a,1-).text,4)
  125. ; You can determine how many digits your hash takes when translated to BASE-36 by changing the first number of
  126. ; //echo -a $calc( 62 * $log(2) / $log(36) ) ... then round up any decimals
  127. ; Because a string of 1-or-more 0x00's always has a hash of zero, if that is of concern to you,
  128. ; either use a $3 'seed' value or alter the hash to initialize '%h = %len' instead of '%h = $3'.
  129. ; (extra 0x00's do affect the hash when the string is not entirely 0x00's)
  130.  
  131. alias oat {
  132. if (b isin $2) { if (!$bvar($1,0)) return | var %bits $remove($2,b) | bcopy -tc &oat_key 1 $1 1 -1 }
  133. else { var %bits $2 | bset -tc &oat_key 1 $1 }
  134. var %len = $bvar(&oat_key,0) , %i = 0 , %h = $3
  135. while (%i < %len) {
  136. !inc %i
  137. var %h = $calc(1025 * (%h + $bvar(&oat_key,%i)) % 4294967296)
  138. var %h = $xor(%h, $calc(%h / 64))
  139. }
  140. var %h = $calc((%h * 9) % 4294967296)
  141. var %h = $calc(($xor(%h, $calc(%h / 2048)) * 32769) % 4294967296)
  142. if (%bits !isnum 1-31) return %h | else return $and(%h, $calc(2^ %bits - 1))
  143. }
  144.  
  145. alias oat2 {
  146. var %len = $len($1) , %i = 0 , %h = $3
  147. while (%i < %len) {
  148. !inc %i
  149. var %h = $calc(1025 * (%h + $asc($mid($1,%i,1))) % 4294967296)
  150. var %h = $xor(%h, $calc(%h / 64))
  151. }
  152. var %h = $calc((%h * 9) % 4294967296)
  153. var %h = $calc(($xor(%h, $calc(%h / 2048)) * 32769) % 4294967296)
  154. if ($2 !isnum 1-31) return %h | else return $and(%h, $calc(2^ $2 - 1))
  155. }
  156.  


Comments
No Comments.

Login to Comment.