NEST  2.6.0,not_revisioned_source_dir@0
hashtable-common.h
Go to the documentation of this file.
1 // Copyright (c) 2010, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // ---
31 //
32 // Provides classes shared by both sparse and dense hashtable.
33 //
34 // sh_hashtable_settings has parameters for growing and shrinking
35 // a hashtable. It also packages zero-size functor (ie. hasher).
36 //
37 // Other functions and classes provide common code for serializing
38 // and deserializing hashtables to a stream (such as a FILE*).
39 
40 #ifndef UTIL_GTL_HASHTABLE_COMMON_H_
41 #define UTIL_GTL_HASHTABLE_COMMON_H_
42 
43 #include <sparseconfig.h>
44 #include <assert.h>
45 #include <stdio.h>
46 #include <stddef.h> // for size_t
47 #include <iosfwd>
48 #include <stdexcept> // For length_error
49 
50 /* If we're not using GNU C, elide __attribute__ */
51 #ifndef __GNUC__
52 # define __attribute__(x) /*NOTHING*/
53 #endif
54 
55 _START_GOOGLE_NAMESPACE_
56 
57 template <bool> struct SparsehashCompileAssert { };
58 #define SPARSEHASH_COMPILE_ASSERT(expr, msg) \
59  typedef SparsehashCompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] \
60  __attribute__((unused))
61 
62 namespace sparsehash_internal {
63 
64 // Adaptor methods for reading/writing data from an INPUT or OUPTUT
65 // variable passed to serialize() or unserialize(). For now we
66 // have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
67 // they are pointers, unlike typical use), or else a pointer to
68 // something that supports a Read()/Write() method.
69 //
70 // For technical reasons, we implement read_data/write_data in two
71 // stages. The actual work is done in *_data_internal, which takes
72 // the stream argument twice: once as a template type, and once with
73 // normal type information. (We only use the second version.) We do
74 // this because of how C++ picks what function overload to use. If we
75 // implemented this the naive way:
76 // bool read_data(istream* is, const void* data, size_t length);
77 // template<typename T> read_data(T* fp, const void* data, size_t length);
78 // C++ would prefer the second version for every stream type except
79 // istream. However, we want C++ to prefer the first version for
80 // streams that are *subclasses* of istream, such as istringstream.
81 // This is not possible given the way template types are resolved. So
82 // we split the stream argument in two, one of which is templated and
83 // one of which is not. The specialized functions (like the istream
84 // version above) ignore the template arg and use the second, 'type'
85 // arg, getting subclass matching as normal. The 'catch-all'
86 // functions (the second version above) use the template arg to deduce
87 // the type, and use a second, void* arg to achieve the desired
88 // 'catch-all' semantics.
89 
90 // ----- low-level I/O for FILE* ----
91 
92 template<typename Ignored>
93 inline bool read_data_internal(Ignored*, FILE* fp,
94  void* data, size_t length) {
95  return fread(data, length, 1, fp) == 1;
96 }
97 
98 template<typename Ignored>
99 inline bool write_data_internal(Ignored*, FILE* fp,
100  const void* data, size_t length) {
101  return fwrite(data, length, 1, fp) == 1;
102 }
103 
104 // ----- low-level I/O for iostream ----
105 
106 // We want the caller to be responsible for #including <iostream>, not
107 // us, because iostream is a big header! According to the standard,
108 // it's only legal to delay the instantiation the way we want to if
109 // the istream/ostream is a template type. So we jump through hoops.
110 template<typename ISTREAM>
111 inline bool read_data_internal_for_istream(ISTREAM* fp,
112  void* data, size_t length) {
113  return fp->read(reinterpret_cast<char*>(data), length).good();
114 }
115 template<typename Ignored>
116 inline bool read_data_internal(Ignored*, std::istream* fp,
117  void* data, size_t length) {
118  return read_data_internal_for_istream(fp, data, length);
119 }
120 
121 template<typename OSTREAM>
122 inline bool write_data_internal_for_ostream(OSTREAM* fp,
123  const void* data, size_t length) {
124  return fp->write(reinterpret_cast<const char*>(data), length).good();
125 }
126 template<typename Ignored>
127 inline bool write_data_internal(Ignored*, std::ostream* fp,
128  const void* data, size_t length) {
129  return write_data_internal_for_ostream(fp, data, length);
130 }
131 
132 // ----- low-level I/O for custom streams ----
133 
134 // The INPUT type needs to support a Read() method that takes a
135 // buffer and a length and returns the number of bytes read.
136 template <typename INPUT>
137 inline bool read_data_internal(INPUT* fp, void*,
138  void* data, size_t length) {
139  return static_cast<size_t>(fp->Read(data, length)) == length;
140 }
141 
142 // The OUTPUT type needs to support a Write() operation that takes
143 // a buffer and a length and returns the number of bytes written.
144 template <typename OUTPUT>
145 inline bool write_data_internal(OUTPUT* fp, void*,
146  const void* data, size_t length) {
147  return static_cast<size_t>(fp->Write(data, length)) == length;
148 }
149 
150 // ----- low-level I/O: the public API ----
151 
152 template <typename INPUT>
153 inline bool read_data(INPUT* fp, void* data, size_t length) {
154  return read_data_internal(fp, fp, data, length);
155 }
156 
157 template <typename OUTPUT>
158 inline bool write_data(OUTPUT* fp, const void* data, size_t length) {
159  return write_data_internal(fp, fp, data, length);
160 }
161 
162 // Uses read_data() and write_data() to read/write an integer.
163 // length is the number of bytes to read/write (which may differ
164 // from sizeof(IntType), allowing us to save on a 32-bit system
165 // and load on a 64-bit system). Excess bytes are taken to be 0.
166 // INPUT and OUTPUT must match legal inputs to read/write_data (above).
167 template <typename INPUT, typename IntType>
168 bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) {
169  *value = 0;
170  unsigned char byte;
171  // We require IntType to be unsigned or else the shifting gets all screwy.
172  SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
173  serializing_int_requires_an_unsigned_type);
174  for (size_t i = 0; i < length; ++i) {
175  if (!read_data(fp, &byte, sizeof(byte))) return false;
176  *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
177  }
178  return true;
179 }
180 
181 template <typename OUTPUT, typename IntType>
182 bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) {
183  unsigned char byte;
184  // We require IntType to be unsigned or else the shifting gets all screwy.
185  SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
186  serializing_int_requires_an_unsigned_type);
187  for (size_t i = 0; i < length; ++i) {
188  byte = (sizeof(value) <= length-1 - i)
189  ? 0 : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
190  if (!write_data(fp, &byte, sizeof(byte))) return false;
191  }
192  return true;
193 }
194 
195 // If your keys and values are simple enough, you can pass this
196 // serializer to serialize()/unserialize(). "Simple enough" means
197 // value_type is a POD type that contains no pointers. Note,
198 // however, we don't try to normalize endianness.
199 // This is the type used for NopointerSerializer.
200 template <typename value_type> struct pod_serializer {
201  template <typename INPUT>
202  bool operator()(INPUT* fp, value_type* value) const {
203  return read_data(fp, value, sizeof(*value));
204  }
205 
206  template <typename OUTPUT>
207  bool operator()(OUTPUT* fp, const value_type& value) const {
208  return write_data(fp, &value, sizeof(value));
209  }
210 };
211 
212 
213 // Settings contains parameters for growing and shrinking the table.
214 // It also packages zero-size functor (ie. hasher).
215 //
216 // It does some munging of the hash value in cases where we think
217 // (fear) the original hash function might not be very good. In
218 // particular, the default hash of pointers is the identity hash,
219 // so probably all the low bits are 0. We identify when we think
220 // we're hashing a pointer, and chop off the low bits. Note this
221 // isn't perfect: even when the key is a pointer, we can't tell
222 // for sure that the hash is the identity hash. If it's not, this
223 // is needless work (and possibly, though not likely, harmful).
224 
225 template<typename Key, typename HashFunc,
226  typename SizeType, int HT_MIN_BUCKETS>
227 class sh_hashtable_settings : public HashFunc {
228  public:
229  typedef Key key_type;
230  typedef HashFunc hasher;
231  typedef SizeType size_type;
232 
233  public:
235  const float ht_occupancy_flt,
236  const float ht_empty_flt)
237  : hasher(hf),
240  consider_shrink_(false),
241  use_empty_(false),
242  use_deleted_(false),
243  num_ht_copies_(0) {
244  set_enlarge_factor(ht_occupancy_flt);
245  set_shrink_factor(ht_empty_flt);
246  }
247 
248  size_type hash(const key_type& v) const {
249  // We munge the hash value when we don't trust hasher::operator().
250  return hash_munger<Key>::MungedHash(hasher::operator()(v));
251  }
252 
253  float enlarge_factor() const {
254  return enlarge_factor_;
255  }
256  void set_enlarge_factor(float f) {
257  enlarge_factor_ = f;
258  }
259  float shrink_factor() const {
260  return shrink_factor_;
261  }
262  void set_shrink_factor(float f) {
263  shrink_factor_ = f;
264  }
265 
267  return enlarge_threshold_;
268  }
270  enlarge_threshold_ = t;
271  }
273  return shrink_threshold_;
274  }
276  shrink_threshold_ = t;
277  }
278 
280  return static_cast<size_type>(x * enlarge_factor_);
281  }
283  return static_cast<size_type>(x * shrink_factor_);
284  }
285 
286  bool consider_shrink() const {
287  return consider_shrink_;
288  }
289  void set_consider_shrink(bool t) {
290  consider_shrink_ = t;
291  }
292 
293  bool use_empty() const {
294  return use_empty_;
295  }
296  void set_use_empty(bool t) {
297  use_empty_ = t;
298  }
299 
300  bool use_deleted() const {
301  return use_deleted_;
302  }
303  void set_use_deleted(bool t) {
304  use_deleted_ = t;
305  }
306 
308  return static_cast<size_type>(num_ht_copies_);
309  }
311  ++num_ht_copies_;
312  }
313 
314  // Reset the enlarge and shrink thresholds
315  void reset_thresholds(size_type num_buckets) {
316  set_enlarge_threshold(enlarge_size(num_buckets));
317  set_shrink_threshold(shrink_size(num_buckets));
318  // whatever caused us to reset already considered
319  set_consider_shrink(false);
320  }
321 
322  // Caller is resposible for calling reset_threshold right after
323  // set_resizing_parameters.
324  void set_resizing_parameters(float shrink, float grow) {
325  assert(shrink >= 0.0);
326  assert(grow <= 1.0);
327  if (shrink > grow/2.0f)
328  shrink = grow / 2.0f; // otherwise we thrash hashtable size
329  set_shrink_factor(shrink);
330  set_enlarge_factor(grow);
331  }
332 
333  // This is the smallest size a hashtable can be without being too crowded
334  // If you like, you can give a min #buckets as well as a min #elts
335  size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
336  float enlarge = enlarge_factor();
337  size_type sz = HT_MIN_BUCKETS; // min buckets allowed
338  while ( sz < min_buckets_wanted ||
339  num_elts >= static_cast<size_type>(sz * enlarge) ) {
340  // This just prevents overflowing size_type, since sz can exceed
341  // max_size() here.
342  if (static_cast<size_type>(sz * 2) < sz) {
343  throw std::length_error("resize overflow"); // protect against overflow
344  }
345  sz *= 2;
346  }
347  return sz;
348  }
349 
350  private:
351  template<class HashKey> class hash_munger {
352  public:
353  static size_t MungedHash(size_t hash) {
354  return hash;
355  }
356  };
357  // This matches when the hashtable key is a pointer.
358  template<class HashKey> class hash_munger<HashKey*> {
359  public:
360  static size_t MungedHash(size_t hash) {
361  // TODO(csilvers): consider rotating instead:
362  // static const int shift = (sizeof(void *) == 4) ? 2 : 3;
363  // return (hash << (sizeof(hash) * 8) - shift)) | (hash >> shift);
364  // This matters if we ever change sparse/dense_hash_* to compare
365  // hashes before comparing actual values. It's speedy on x86.
366  return hash / sizeof(void*); // get rid of known-0 bits
367  }
368  };
369 
370  size_type enlarge_threshold_; // table.size() * enlarge_factor
371  size_type shrink_threshold_; // table.size() * shrink_factor
372  float enlarge_factor_; // how full before resize
373  float shrink_factor_; // how empty before resize
374  // consider_shrink=true if we should try to shrink before next insert
376  bool use_empty_; // used only by densehashtable, not sparsehashtable
377  bool use_deleted_; // false until delkey has been set
378  // num_ht_copies is a counter incremented every Copy/Move
379  unsigned int num_ht_copies_;
380 };
381 
382 } // namespace sparsehash_internal
383 
384 #undef SPARSEHASH_COMPILE_ASSERT
385 _END_GOOGLE_NAMESPACE_
386 
387 #endif // UTIL_GTL_HASHTABLE_COMMON_H_
bool write_data(OUTPUT *fp, const void *data, size_t length)
Definition: hashtable-common.h:158
void set_use_deleted(bool t)
Definition: hashtable-common.h:303
size_type enlarge_size(size_type x) const
Definition: hashtable-common.h:279
float shrink_factor() const
Definition: hashtable-common.h:259
Definition: hashtable-common.h:200
void set_use_empty(bool t)
Definition: hashtable-common.h:296
bool read_data(INPUT *fp, void *data, size_t length)
Definition: hashtable-common.h:153
void set_enlarge_threshold(size_type t)
Definition: hashtable-common.h:269
bool consider_shrink() const
Definition: hashtable-common.h:286
bool write_data_internal_for_ostream(OSTREAM *fp, const void *data, size_t length)
Definition: hashtable-common.h:122
void reset_thresholds(size_type num_buckets)
Definition: hashtable-common.h:315
static size_t MungedHash(size_t hash)
Definition: hashtable-common.h:353
bool use_empty() const
Definition: hashtable-common.h:293
size_type shrink_threshold_
Definition: hashtable-common.h:371
size_type shrink_threshold() const
Definition: hashtable-common.h:272
assert(pNet!=0)
void set_resizing_parameters(float shrink, float grow)
Definition: hashtable-common.h:324
float shrink_factor_
Definition: hashtable-common.h:373
size_type min_buckets(size_type num_elts, size_type min_buckets_wanted)
Definition: hashtable-common.h:335
bool write_data_internal(Ignored *, FILE *fp, const void *data, size_t length)
Definition: hashtable-common.h:99
bool consider_shrink_
Definition: hashtable-common.h:375
size_type num_ht_copies() const
Definition: hashtable-common.h:307
bool write_bigendian_number(OUTPUT *fp, IntType value, size_t length)
Definition: hashtable-common.h:182
bool use_deleted() const
Definition: hashtable-common.h:300
sh_hashtable_settings(const hasher &hf, const float ht_occupancy_flt, const float ht_empty_flt)
Definition: hashtable-common.h:234
float enlarge_factor_
Definition: hashtable-common.h:372
void set_enlarge_factor(float f)
Definition: hashtable-common.h:256
bool read_data_internal(Ignored *, FILE *fp, void *data, size_t length)
Definition: hashtable-common.h:93
float enlarge_factor() const
Definition: hashtable-common.h:253
bool operator()(OUTPUT *fp, const value_type &value) const
Definition: hashtable-common.h:207
const Name x("x")
current scaling factor of the synaptic weight [0...1] (Tsodyks2_connection)
Definition: nest_names.h:356
size_type hash(const key_type &v) const
Definition: hashtable-common.h:248
static size_t MungedHash(size_t hash)
Definition: hashtable-common.h:360
Key key_type
Definition: hashtable-common.h:229
void set_shrink_factor(float f)
Definition: hashtable-common.h:262
bool read_bigendian_number(INPUT *fp, IntType *value, size_t length)
Definition: hashtable-common.h:168
HashFunc hasher
Definition: hashtable-common.h:230
void set_consider_shrink(bool t)
Definition: hashtable-common.h:289
size_type shrink_size(size_type x) const
Definition: hashtable-common.h:282
bool use_deleted_
Definition: hashtable-common.h:377
bool operator()(INPUT *fp, value_type *value) const
Definition: hashtable-common.h:202
Definition: hashtable-common.h:227
size_type enlarge_threshold_
Definition: hashtable-common.h:370
void set_shrink_threshold(size_type t)
Definition: hashtable-common.h:275
bool read_data_internal_for_istream(ISTREAM *fp, void *data, size_t length)
Definition: hashtable-common.h:111
Definition: hashtable-common.h:57
size_type enlarge_threshold() const
Definition: hashtable-common.h:266
bool use_empty_
Definition: hashtable-common.h:376
unsigned int num_ht_copies_
Definition: hashtable-common.h:379
void inc_num_ht_copies()
Definition: hashtable-common.h:310
SizeType size_type
Definition: hashtable-common.h:231