init

2025-03-24 14:01:41 -04:00 · 2025-03-24 14:01:41 -04:00 · 26b5169bb2
commit 26b5169bb2
6 changed files with 555 additions and 0 deletions
--- a/src/map.c
+++ b/src/map.c
@ -0,0 +1,316 @@
+//
+//  map.h
+//
+//  Created by Mashpoe on 1/15/21.
+//
+
+#include "map.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define HASHMAP_DEFAULT_CAPACITY 20
+#define HASHMAP_MAX_LOAD 0.75f
+#define HASHMAP_RESIZE_FACTOR 2
+
+hashmap *hashmap_create(void) {
+  hashmap *m = malloc(sizeof(hashmap));
+  if (m == NULL) {
+    return NULL;
+  }
+
+  m->capacity = HASHMAP_DEFAULT_CAPACITY;
+  m->count = 0;
+
+  m->tombstone_count = 0;
+
+  m->buckets = calloc(HASHMAP_DEFAULT_CAPACITY, sizeof(struct bucket));
+  if (m->buckets == NULL) {
+    free(m);
+    return NULL;
+  }
+
+  m->first = NULL;
+
+  // this prevents branching in hashmap_set.
+  // m->first will be treated as the "next" pointer in an imaginary bucket.
+  // when the first item is added, m->first will be set to the correct address.
+  m->last = (struct bucket *)&m->first;
+  return m;
+}
+
+void hashmap_free(hashmap *m) {
+  free(m->buckets);
+  free(m);
+}
+
+// puts an old bucket into a resized hashmap
+static struct bucket *resize_entry(hashmap *m, struct bucket *old_entry) {
+  uint32_t index = old_entry->hash % m->capacity;
+  for (;;) {
+    struct bucket *entry = &m->buckets[index];
+
+    if (entry->key == NULL) {
+      *entry = *old_entry; // copy data from old entry
+      return entry;
+    }
+
+    index = (index + 1) % m->capacity;
+  }
+}
+
+static int hashmap_resize(hashmap *m) {
+  int old_capacity = m->capacity;
+  struct bucket *old_buckets = m->buckets;
+
+  m->capacity *= HASHMAP_RESIZE_FACTOR;
+  // initializes all bucket fields to null
+  m->buckets = calloc(m->capacity, sizeof(struct bucket));
+  if (m->buckets == NULL) {
+    m->capacity = old_capacity;
+    m->buckets = old_buckets;
+    return -1;
+  }
+
+  // same trick; avoids branching
+  m->last = (struct bucket *)&m->first;
+
+  m->count -= m->tombstone_count;
+  m->tombstone_count = 0;
+
+  // assumes that an empty map won't be resized
+  do {
+    // skip entry if it's a "tombstone"
+    struct bucket *current = m->last->next;
+    if (current->key == NULL) {
+      m->last->next = current->next;
+      // skip to loop condition
+      continue;
+    }
+
+    m->last->next = resize_entry(m, m->last->next);
+    m->last = m->last->next;
+  } while (m->last->next != NULL);
+
+  free(old_buckets);
+  return 0;
+}
+
+#define HASHMAP_HASH_INIT 2166136261u
+
+// FNV-1a hash function
+static inline uint32_t hash_data(const unsigned char *data, size_t size) {
+  size_t nblocks = size / 8;
+  uint64_t hash = HASHMAP_HASH_INIT;
+  for (size_t i = 0; i < nblocks; ++i) {
+    hash ^= (uint64_t)data[0] << 0 | (uint64_t)data[1] << 8 |
+            (uint64_t)data[2] << 16 | (uint64_t)data[3] << 24 |
+            (uint64_t)data[4] << 32 | (uint64_t)data[5] << 40 |
+            (uint64_t)data[6] << 48 | (uint64_t)data[7] << 56;
+    hash *= 0xbf58476d1ce4e5b9;
+    data += 8;
+  }
+
+  uint64_t last = size & 0xff;
+  switch (size % 8) {
+  case 7:
+    last |= (uint64_t)data[6] << 56; /* fallthrough */
+  case 6:
+    last |= (uint64_t)data[5] << 48; /* fallthrough */
+  case 5:
+    last |= (uint64_t)data[4] << 40; /* fallthrough */
+  case 4:
+    last |= (uint64_t)data[3] << 32; /* fallthrough */
+  case 3:
+    last |= (uint64_t)data[2] << 24; /* fallthrough */
+  case 2:
+    last |= (uint64_t)data[1] << 16; /* fallthrough */
+  case 1:
+    last |= (uint64_t)data[0] << 8;
+    hash ^= last;
+    hash *= 0xd6e8feb86659fd93;
+  }
+
+  // compress to a 32-bit result.
+  // also serves as a finalizer.
+  return (uint32_t)(hash ^ hash >> 32);
+}
+
+static struct bucket *find_entry(hashmap *m, const void *key, size_t ksize,
+                                 uint32_t hash) {
+  uint32_t index = hash % m->capacity;
+
+  for (;;) {
+    struct bucket *entry = &m->buckets[index];
+
+    // compare sizes, then hashes, then key data as a last resort.
+    // check for tombstone
+    if ((entry->key == NULL && entry->value == 0) ||
+        // check for valid matching entry
+        (entry->key != NULL && entry->ksize == ksize && entry->hash == hash &&
+         memcmp(entry->key, key, ksize) == 0)) {
+      // return the entry if a match or an empty bucket is found
+      return entry;
+    }
+
+    // kind of a thicc condition;
+    // I didn't want this to span multiple if statements or functions.
+    if (entry->key == NULL ||
+        // compare sizes, then hashes, then key data as a last resort.
+        (entry->ksize == ksize && entry->hash == hash &&
+         memcmp(entry->key, key, ksize) == 0)) {
+      // return the entry if a match or an empty bucket is found
+      return entry;
+    }
+
+    // printf("collision\n");
+    index = (index + 1) % m->capacity;
+  }
+}
+
+int hashmap_set(hashmap *m, const void *key, size_t ksize, uintptr_t val) {
+  if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
+    if (hashmap_resize(m) == -1)
+      return -1;
+  }
+
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+  if (entry->key == NULL) {
+    m->last->next = entry;
+    m->last = entry;
+    entry->next = NULL;
+
+    ++m->count;
+
+    entry->key = key;
+    entry->ksize = ksize;
+    entry->hash = hash;
+  }
+  entry->value = val;
+  return 0;
+}
+
+int hashmap_get_set(hashmap *m, const void *key, size_t ksize,
+                    uintptr_t *out_in) {
+  if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
+    if (hashmap_resize(m) == -1)
+      return -1;
+  }
+
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+  if (entry->key == NULL) {
+    m->last->next = entry;
+    m->last = entry;
+    entry->next = NULL;
+
+    ++m->count;
+
+    entry->value = *out_in;
+    entry->key = key;
+    entry->ksize = ksize;
+    entry->hash = hash;
+
+    return 0;
+  }
+  *out_in = entry->value;
+  return 1;
+}
+
+int hashmap_set_free(hashmap *m, const void *key, size_t ksize, uintptr_t val,
+                     hashmap_callback c, void *usr) {
+  if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
+    if (hashmap_resize(m) == -1)
+      return -1;
+  }
+
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+  if (entry->key == NULL) {
+    m->last->next = entry;
+    m->last = entry;
+    entry->next = NULL;
+
+    ++m->count;
+
+    entry->key = key;
+    entry->ksize = ksize;
+    entry->hash = hash;
+    entry->value = val;
+    // there was no overwrite, exit the function.
+    return 0;
+  }
+  // allow the callback to free entry data.
+  // use old key and value so the callback can free them.
+  // the old key and value will be overwritten after this call.
+  int error = c(entry->key, ksize, entry->value, usr);
+
+  // overwrite the old key pointer in case the callback frees it.
+  entry->key = key;
+  entry->value = val;
+  return error;
+}
+
+int hashmap_get(hashmap *m, const void *key, size_t ksize, uintptr_t *out_val) {
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+
+  // if there is no match, output val will just be NULL
+  *out_val = entry->value;
+
+  return entry->key != NULL ? 1 : 0;
+}
+
+// doesn't "remove" the element per se, but it will be ignored.
+// the element will eventually be removed when the map is resized.
+void hashmap_remove(hashmap *m, const void *key, size_t ksize) {
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+
+  if (entry->key != NULL) {
+
+    // "tombstone" entry is signified by a NULL key with a nonzero value
+    // element removal is optional because of the overhead of tombstone checks
+    entry->key = NULL;
+    entry->value = 0xDEAD; // I mean, it's a tombstone...
+
+    ++m->tombstone_count;
+  }
+}
+
+void hashmap_remove_free(hashmap *m, const void *key, size_t ksize,
+                         hashmap_callback c, void *usr) {
+  uint32_t hash = hash_data(key, ksize);
+  struct bucket *entry = find_entry(m, key, ksize, hash);
+
+  if (entry->key != NULL) {
+    c(entry->key, entry->ksize, entry->value, usr);
+
+    // "tombstone" entry is signified by a NULL key with a nonzero value
+    // element removal is optional because of the overhead of tombstone checks
+    entry->key = NULL;
+    entry->value = 0xDEAD; // I mean, it's a tombstone...
+
+    ++m->tombstone_count;
+  }
+}
+
+int hashmap_size(hashmap *m) { return m->count - m->tombstone_count; }
+
+int hashmap_iterate(hashmap *m, hashmap_callback c, void *user_ptr) {
+  // loop through the linked list of valid entries
+  // this way we can skip over empty buckets
+  struct bucket *current = m->first;
+  int error = 0;
+
+  while (current != NULL) {
+    // "tombstone" check
+    if (current->key != NULL)
+      error = c(current->key, current->ksize, current->value, user_ptr);
+    if (error == -1)
+      break;
+
+    current = current->next;
+  }
+  return error;
+}