[Algorithm](EN) Hash table implementation in C/C++

Implement hash table in C/C++

Environment and Prerequisite

C++
Understanding of hash function and linked list

Hash Function

What is Hash Function?

Hash Function: Hash function is any function that can be used to map data of arbitrary size onto data of a fixed size. Sometimes hash function result could be same. In this case we call this as Collision.(H(s1) = H(s2))
In below picture, blue things on left are keys and each key goes into hash function and result into right side hashe values. Later on in hash table values on right side will be used as hash table array index.
Consider H() as hash function, then H(Jonh Smith) = 02

Hash Collision

Case which different key results in same hash value.
Consider H() as hash function and s1 and s2 as different string, then H(s1) = H(s2)
Solution for collision: Chaining or Open Addressing

Example

Below sample is simple hash function which get string and return integer value;
On internet there are many hash function implementations. Shift operation and prime number is widely used in hash function.

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

Hash Table

Data structure which save pair data key and value
Like below usage, it is a data structure use Lisa Smith as key and save 521-8976 as value.
There are already hash table implementations in each language like map in C++ and dictionary in python.
Theoretically, accessing time complexity is O(c). Hash table use more memory but take advantage of accessing time.
Use Chaining or Open Addressing for collision

Implementation

In this post, I use Chaining for collision.
Use Singly Linked List for Chaining

Common

Hash table implementation using linked list
Node is for data with key and value

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MAX_TABLE 5 // size of table
#define MAX_KEY 8 // include null
#define MAX_DATA 12 // num of datas for hash table
#define DELETE_COUNT 6 // num of datas for deletion
#define FIND_COUNT 8 // num of datas for finding

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // hash table
char keys[MAX_DATA][MAX_KEY]; // keys
int values[MAX_DATA]; // values

Init Function

Make key-value pairs using random function.

void init() {

	// hash table initiation
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// srand and seed for random function
	srand(time(NULL));

	// init values
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// init keys
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

String Function

Compare and copy function.

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

Hash Function

Just use same function as above
Use shift operatoin and prime number
We need to divide it by MAX_TABLE for hash table array.

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

Add

Simpel linked list hash add implementation
If there is same key in hash table, then replace its value.

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = hash(key);

	// insert if first is NULL
	if (tb[index] == NULL) {
		tb[index] = new_node;
	}
	// traverse list one by one
	// change duplicated value if not then add it to front

	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// if key is duplicated, then replace its value
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}

		// add to front if it is not duplicated
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

Find Value

Use linked list for finding.
If there is matching key, then save it to val and return true.
If there is no matching key, then return false

bool find(const char * key, int * val) {

	int index = hash(key);

	Node * cur = tb[index];

	// Find key by traversing list one by one
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

Deletion

Delete node if key is matched.
Just check the first of list. It makes easy for implementation.

bool destroy(const char * key) {

	int index = hash(key);

	// check first of list
	if (tb[index] == NULL) {
		return false;
	}

	// check first element
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// others
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

Print

Print all key-value pairs while traversing all table and lists.

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

Main

Test add, find and destroy functions.

int main() {

	char tmp_key[MAX_KEY];
	init();

	// Add

	printf("Add to hash table\n");
	for (int i = 0; i < MAX_DATA; ++i) {
		add(keys[i], values[i]);
	}

	print_hash();


	printf("\n");

	// Delete

	printf("Deleted keys: ");
	for (int i = 0; i < DELETE_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		printf("%s ", tmp_key);
		destroy(tmp_key);
	}
	printf("\n");

	print_hash();


	printf("\n");

	// Find

	int val;
	printf("Found: ");
	for (int i = 0; i < FIND_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		if (find(tmp_key, &val)) {
			printf("{ %s, %d } ", tmp_key, val);
		}
	}
	printf("\n");

	return 0;
}

Code

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MAX_TABLE 5 // size of table
#define MAX_KEY 8 // include null
#define MAX_DATA 12 // num of datas for hash table
#define DELETE_COUNT 6 // num of datas for deletion
#define FIND_COUNT 8 // num of datas for finding

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // hash table
char keys[MAX_DATA][MAX_KEY]; // keys
int values[MAX_DATA]; // values

void init() {

	// hash table initiation
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// srand and seed for random function
	srand(time(NULL));

	// init values
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// init keys
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = hash(key);

	// insert if first is NULL
	if (tb[index] == NULL) {
		tb[index] = new_node;
	}
	// traverse list one by one
	// change duplicated value if not then add it to front

	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// if key is duplicated, then replace its value
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}

		// add to front if it is not duplicated
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

bool find(const char * key, int * val) {

	int index = hash(key);

	Node * cur = tb[index];

	// Find key by traversing list one by one
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

bool destroy(const char * key) {

	int index = hash(key);

	// check first of list
	if (tb[index] == NULL) {
		return false;
	}

	// check first element
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// others
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

int main() {

	char tmp_key[MAX_KEY];
	init();

	// Add

	printf("Add to hash table\n");
	for (int i = 0; i < MAX_DATA; ++i) {
		add(keys[i], values[i]);
	}

	print_hash();


	printf("\n");

	// Delete

	printf("Deleted keys: ");
	for (int i = 0; i < DELETE_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		printf("%s ", tmp_key);
		destroy(tmp_key);
	}
	printf("\n");

	print_hash();


	printf("\n");

	// Find

	int val;
	printf("Found: ");
	for (int i = 0; i < FIND_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		if (find(tmp_key, &val)) {
			printf("{ %s, %d } ", tmp_key, val);
		}
	}
	printf("\n");

	return 0;
}

Verification

Modify hash function name(hash->my_hash)

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <map>
#include <string>

#define TOTAL_TEST_CASE 100

#define MAX_TABLE 300 // size of table
#define MAX_KEY 8 // include null
#define MAX_DATA 2000 // num of datas for hash table
#define DELETE_COUNT 1000 // num of datas for deletion

using namespace std;

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // hash table
char keys[MAX_DATA][MAX_KEY]; // keys
int values[MAX_DATA]; // values

void init() {

	// hash table initiation
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// srand and seed for random function
	srand(time(NULL));

	// init values
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// init keys
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

int my_hash(const char * str) {
	int hash = 401;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = my_hash(key);

	// insert if first is NULL
	if (tb[index] == NULL) {
		tb[index] = new_node;
	}
	// traverse list one by one
	// change duplicated value if not then add it to front
	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// if key is duplicated, then replace its value
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}
		// add to front if it is not duplicated
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

bool find(const char * key, int * val) {

	int index = my_hash(key);

	Node * cur = tb[index];

	// Find key by traversing list one by one
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

bool destroy(const char * key) {

	int index = my_hash(key);

	// check first of list
	if (tb[index] == NULL) {
		return false;
	}

	// check first element
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// others
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

int main() {

	int test_case = 1;
	int correct = 0;


	for (test_case = 1; test_case <= TOTAL_TEST_CASE; ++test_case) {

		init();

		bool is_equal = true;

		map<string, int> m;
		map<string, int>::iterator it;


		for (int i = 0; i < MAX_DATA; ++i) {
			add(keys[i], values[i]);
		}
		for (int i = 0; i < MAX_DATA; ++i) {
			if (m.count(keys[i]) == 0) {
				m.insert(make_pair(keys[i], values[i]));
			}
			else {
				m[keys[i]] = values[i];
			}

		}

		for (int i = 0; i < MAX_DATA; ++i) {

			int tmp;
			find(keys[i], &tmp);

			if (m[keys[i]] != tmp) {
				is_equal = false;
			}
		}

		char tmp_key[MAX_KEY];
		for (int i = 0; i < DELETE_COUNT; ++i) {
			my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
			destroy(tmp_key);
			m.erase(tmp_key);
		}

		for (int i = 0; i < MAX_DATA; ++i) {

			int tmp = -1;

			if (find(keys[i], &tmp) == false && m.count(keys[i]) == 0) {
				continue;
			}

			if (find(keys[i], &tmp) == true && m.count(keys[i]) == 1 && m[keys[i]] == tmp) {
				continue;
			}
			else {
				is_equal = false;
			}

		}

		if (is_equal) correct++;

	}

	printf("Total: %d / %d\n", correct, TOTAL_TEST_CASE);

	return 0;
}

Categories

Tags

Posts

About

Home

[Algorithm](EN) Hash table implementation in C/C++

April 06, 2019

[Algorithm](EN) Hash table implementation in C/C++

Environment and Prerequisite

Hash Function

What is Hash Function?

Hash Collision

Example

Hash Table

Implementation

Common

Init Function

String Function

Hash Function

Add

Find Value

Deletion

Print

Main

Code

Verification

Reference

[CI/CD] GitHub Actions actions/github-script에서 별도 파일을 실행하기

[CI/CD](EN) Usage of running a separate file in GitHub Actions actions/github-script

[MySQL] 데이터베이스 조작 시 TRANSACTION 사용하기