• Main Page
  • Related Pages
  • Modules
  • Namespaces
  • Data Structures
  • Files
  • Directories
  • File List
  • Globals

hashes.h

00001 /*
00002  * $Id$
00003  *
00004  * Copyright (C) 2006 iptelorg GmbH 
00005  *
00006  * Permission to use, copy, modify, and distribute this software for any
00007  * purpose with or without fee is hereby granted, provided that the above
00008  * copyright notice and this permission notice appear in all copies.
00009  *
00010  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
00011  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
00012  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
00013  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
00014  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
00015  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
00016  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00017  */
00018 /*
00019  * History:
00020  * --------
00021  *  2006-02-02  created by andrei
00022  *  2006-11-24  added numeric string optimized hash function (andrei)
00023  *  2006-12-13  split into hashes.h (more generic) and str_hash.h (andrei)
00024  *  2007-02-22  added case insensitive versions (andrei)
00025  */
00026 
00027 
00028 #ifndef _hashes_h
00029 #define _hashes_h
00030 
00031 #include "str.h"
00032 
00033 
00034 
00035 /* internal use: hash update
00036  * params: char* s   - string start,
00037  *         char* end - end
00038  *         char* p,  and unsigned v temporary vars (used)
00039  *         unsigned h - result
00040  * h should be initialized (e.g. set it to 0), the result in h */
00041 #define hash_update_str(s, end, p, v, h) \
00042         do{ \
00043                 for ((p)=(s); (p)<=((end)-4); (p)+=4){ \
00044                         (v)=(*(p)<<24)+((p)[1]<<16)+((p)[2]<<8)+(p)[3]; \
00045                         (h)+=(v)^((v)>>3); \
00046                 } \
00047                 switch((end)-(p)){\
00048                         case 3: \
00049                                 (v)=(*(p)<<16)+((p)[1]<<8)+(p)[2]; break; \
00050                         case 2: \
00051                                 (v)=(*(p)<<8)+p[1]; break; \
00052                         case 1: \
00053                                 (v)=*p; break; \
00054                         default: \
00055                                 (v)=0; break; \
00056                 } \
00057                 (h)+=(v)^((v)>>3); \
00058         }while(0)
00059 
00060 /* like hash_update_str, but case insensitive 
00061  * params: char* s   - string start,
00062  *         char* end - end
00063  *         char* p,  and unsigned v temporary vars (used)
00064  *         unsigned h - result
00065  * h should be initialized (e.g. set it to 0), the result in h */
00066 #define hash_update_case_str(s, end, p, v, h) \
00067         do{ \
00068                 for ((p)=(s); (p)<=((end)-4); (p)+=4){ \
00069                         (v)=((*(p)<<24)+((p)[1]<<16)+((p)[2]<<8)+(p)[3])|0x20202020; \
00070                         (h)+=(v)^((v)>>3); \
00071                 } \
00072                 (v)=0; \
00073                 for (;(p)<(end); (p)++){ (v)<<=8; (v)+=*(p)|0x20;} \
00074                 (h)+=(v)^((v)>>3); \
00075         }while(0)
00076 
00077 
00078 /* internal use: call it to adjust the h from hash_update_str */
00079 #define hash_finish(h) (((h)+((h)>>11))+(((h)>>13)+((h)>>23)))
00080 
00081 
00082 
00083 /* "raw" 2 strings hash
00084  * returns an unsigned int (which you can use modulo table_size as hash value)
00085  */
00086 inline static unsigned int get_hash2_raw(const str* key1, const str* key2)
00087 {
00088         char* p;
00089         register unsigned v;
00090         register unsigned h;
00091         
00092         h=0;
00093         
00094         hash_update_str(key1->s, key1->s+key1->len, p, v, h);
00095         hash_update_str(key2->s, key2->s+key2->len, p, v, h);
00096         return hash_finish(h);
00097 }
00098 
00099 
00100 
00101 /* "raw" 1 string hash
00102  * returns an unsigned int (which you can use modulo table_size as hash value)
00103  */
00104 inline static unsigned int get_hash1_raw(const char* s, int len)
00105 {
00106         const char* p;
00107         register unsigned v;
00108         register unsigned h;
00109         
00110         h=0;
00111         
00112         hash_update_str(s, s+len, p, v, h);
00113         return hash_finish(h);
00114 }
00115 
00116 
00117 
00118 /* a little slower than hash_* , but better distribution for 
00119  * numbers and about the same for strings */
00120 #define hash_update_str2(s, end, p, v, h) \
00121         do{ \
00122                 for ((p)=(s); (p)<=((end)-4); (p)+=4){ \
00123                         (v)=(*(p)*16777213)+((p)[1]*65537)+((p)[2]*257)+(p)[3]; \
00124                         (h)=16777259*(h)+((v)^((v)<<17)); \
00125                 } \
00126                 (v)=0; \
00127                 for (;(p)<(end); (p)++){ (v)*=251; (v)+=*(p);} \
00128                 (h)=16777259*(h)+((v)^((v)<<17)); \
00129         }while(0)
00130 
00131 /*  like hash_update_str2 but case insensitive */
00132 #define hash_update_case_str2(s, end, p, v, h) \
00133         do{ \
00134                 for ((p)=(s); (p)<=((end)-4); (p)+=4){ \
00135                         (v)=((*(p)|0x20)*16777213)+(((p)[1]|0x20)*65537)+\
00136                                 (((p)[2]|0x20)*257)+((p)[3]|0x20); \
00137                         (h)=16777259*(h)+((v)^((v)<<17)); \
00138                 } \
00139                 (v)=0; \
00140                 for (;(p)<(end); (p)++){ (v)*=251; (v)+=*(p)|0x20;} \
00141                 (h)=16777259*(h)+((v)^((v)<<17)); \
00142         }while(0)
00143 
00144 /* internal use: call it to adjust the h from hash_update_str */
00145 #define hash_finish2(h) (((h)+((h)>>7))+(((h)>>13)+((h)>>23)))
00146 
00147 
00148 
00149 /* a little slower than get_hash1_raw() , but better distribution for 
00150  * numbers and about the same for strings */
00151 inline static unsigned int get_hash1_raw2(const char* s, int len)
00152 {
00153         const char* p;
00154         register unsigned v;
00155         register unsigned h;
00156         
00157         h=0;
00158         
00159         hash_update_str2(s, s+len, p, v, h);
00160         return hash_finish2(h);
00161 }
00162 
00163 
00164 
00165 /* "raw" 2 strings hash optimized for numeric strings (see above)
00166  * returns an unsigned int (which you can use modulo table_size as hash value)
00167  */
00168 inline static unsigned int get_hash2_raw2(const str* key1, const str* key2)
00169 {
00170         char* p;
00171         register unsigned v;
00172         register unsigned h;
00173         
00174         h=0;
00175         
00176         hash_update_str2(key1->s, key1->s+key1->len, p, v, h);
00177         hash_update_str2(key2->s, key2->s+key2->len, p, v, h);
00178         return hash_finish2(h);
00179 }
00180 
00181 
00182 
00183 /* "raw" 2 strings case insensitive hash (like get_hash2_raw but case 
00184  * insensitive)
00185  * returns an unsigned int (which you can use modulo table_size as hash value)
00186  */
00187 inline static unsigned int get_hash2_case_raw(const str* key1, const str* key2)
00188 {
00189         char* p;
00190         register unsigned v;
00191         register unsigned h;
00192         
00193         h=0;
00194         
00195         hash_update_case_str(key1->s, key1->s+key1->len, p, v, h);
00196         hash_update_case_str(key2->s, key2->s+key2->len, p, v, h);
00197         return hash_finish(h);
00198 }
00199 
00200 
00201 
00202 /* "raw" 1 string case insensitive hash
00203  * returns an unsigned int (which you can use modulo table_size as hash value)
00204  */
00205 inline static unsigned int get_hash1_case_raw(const char* s, int len)
00206 {
00207         const char* p;
00208         register unsigned v;
00209         register unsigned h;
00210         
00211         h=0;
00212         
00213         hash_update_case_str(s, s+len, p, v, h);
00214         return hash_finish(h);
00215 }
00216 
00217 
00218 /* same as get_hash1_raw2, but case insensitive and slower
00219  * returns an unsigned int (which you can use modulo table_size as hash value)
00220  */
00221 inline static unsigned int get_hash1_case_raw2(const char* s, int len)
00222 {
00223         const char* p;
00224         register unsigned v;
00225         register unsigned h;
00226         
00227         h=0;
00228         
00229         hash_update_case_str2(s, s+len, p, v, h);
00230         return hash_finish2(h);
00231 }
00232 
00233 
00234 
00235 /* "raw" 2 strings hash optimized for numeric strings (see above)
00236  * same as get_hash2_raw2 but case insensitive and slower
00237  * returns an unsigned int (which you can use modulo table_size as hash value)
00238  */
00239 inline static unsigned int get_hash2_case_raw2(const str* key1,
00240                                                                                            const str* key2)
00241 {
00242         char* p;
00243         register unsigned v;
00244         register unsigned h;
00245         
00246         h=0;
00247         
00248         hash_update_case_str2(key1->s, key1->s+key1->len, p, v, h);
00249         hash_update_case_str2(key2->s, key2->s+key2->len, p, v, h);
00250         return hash_finish2(h);
00251 }
00252 
00253 
00254 /*
00255  * generic hashing - from the intial origins of ser
00256  */
00257 #define ch_h_inc h+=v^(v>>3)
00258 #define ch_icase(_c) (((_c)>='A'&&(_c)<='Z')?((_c)|0x20):(_c))
00259 
00260 /*
00261  * case sensitive hashing
00262  * - s1 - str to hash
00263  * - s2 - optional - continue hashing over s2
00264  * - size - optional - size of hash table (must be power of 1); if set (!=0),
00265  *   instead of hash id, returned value is slot index
00266  * return computed hash id or hash table slot index
00267  */
00268 static inline unsigned int core_hash(const str *s1, const str *s2,
00269                 const unsigned int size)
00270 {
00271         char *p, *end;
00272         register unsigned v;
00273         register unsigned h;
00274 
00275         h=0;
00276 
00277         end=s1->s+s1->len;
00278         for ( p=s1->s ; p<=(end-4) ; p+=4 ){
00279                 v=(*p<<24)+(p[1]<<16)+(p[2]<<8)+p[3];
00280                 ch_h_inc;
00281         }
00282         v=0;
00283         for (; p<end ; p++){ v<<=8; v+=*p;}
00284         ch_h_inc;
00285 
00286         if (s2) {
00287                 end=s2->s+s2->len;
00288                 for (p=s2->s; p<=(end-4); p+=4){
00289                         v=(*p<<24)+(p[1]<<16)+(p[2]<<8)+p[3];
00290                         ch_h_inc;
00291                 }
00292                 v=0;
00293                 for (; p<end ; p++){ v<<=8; v+=*p;}
00294                 ch_h_inc;
00295         }
00296         h=((h)+(h>>11))+((h>>13)+(h>>23));
00297         return size?((h)&(size-1)):h;
00298 }
00299 
00300 
00301 /*
00302  * case insensitive hashing
00303  * - s1 - str to hash
00304  * - s2 - optional - continue hashing over s2
00305  * - size - optional - size of hash table (must be power of 1); if set (!=0),
00306  *   instead of hash id, returned value is slot index
00307  * return computed hash id or hash table slot index
00308  */
00309 static inline unsigned int core_case_hash( str *s1, str *s2,
00310                 unsigned int size)
00311 {
00312         char *p, *end;
00313         register unsigned v;
00314         register unsigned h;
00315 
00316         h=0;
00317 
00318         end=s1->s+s1->len;
00319         for ( p=s1->s ; p<=(end-4) ; p+=4 ){
00320                 v=(ch_icase(*p)<<24)+(ch_icase(p[1])<<16)+(ch_icase(p[2])<<8)
00321                         + ch_icase(p[3]);
00322                 ch_h_inc;
00323         }
00324         v=0;
00325         for (; p<end ; p++){ v<<=8; v+=ch_icase(*p);}
00326         ch_h_inc;
00327 
00328         if (s2) {
00329                 end=s2->s+s2->len;
00330                 for (p=s2->s; p<=(end-4); p+=4){
00331                         v=(ch_icase(*p)<<24)+(ch_icase(p[1])<<16)+(ch_icase(p[2])<<8)
00332                                 + ch_icase(p[3]);
00333                         ch_h_inc;
00334                 }
00335                 v=0;
00336                 for (; p<end ; p++){ v<<=8; v+=ch_icase(*p);}
00337                 ch_h_inc;
00338         }
00339         h=((h)+(h>>11))+((h>>13)+(h>>23));
00340         return size?((h)&(size-1)):h;
00341 }
00342 
00343 
00344 #endif

Generated on Tue May 22 2012 13:10:07 for SIP Router by  doxygen 1.7.1