[Algorithm](EN) Trie Concept and Basic Problem

Implement trie and solve basic problem in C++.


Environment and Prerequisite

  • C++


What is Trie?

About Trie

  • Trie: Trie is a tree shape data structure for efficiently searching and storing strings.
  • From root to leaf nodes are strings which stored in trie data structure. We can distinguish end of string by adding terminal variable to each node.
  • By searching DFS method in above picture, we can find that to, tea, ted, ten, A, i, in, inn strings are stored in data structure.


Example of Trie

  • Orange color nodes are inserted strings.
  • be, bee, can, cat, cd are stored in trie.


Purpose of Trie

Purpose

  • The reason why we use trie is in time complexity of string searching. It is much more efficient than linear searching. However there is advantage that searching is efficient but also drawbacks that it saves all children pointer in each node so trie takes many spaces.
  • It can be used in searching keyword autocomplete, searching in dictionary and spelling checking. Reference is in References

Time Complexity

  • Consider L as longest string and M as total number of strings.
  • Creation time complexity: O(M*L), There are total M strings and each string only takes at most L time so it takes O(M*L). Insertion itself takes O(L).
  • Searching time complexity: O(L), It takes at most O(L) of time. Because it is tree structure.


Trie Implementation

Common

  • Only consider capital alphabets
#include <cstdio>
#include <cstring>

#define ALPHABETS 26

// Convert char to array index
// All are base on capital
int char_to_index(char c){
	return c - 'A';
}


Trie Code

  • is_terminal: It represents end of string.
  • void insert(const char * key): Add new string to trie structure.
  • Trie* find(const char * key): Check if trie has key(passed string argument) as prefix. If it has key as prefix then return end of key prefix node.
  • bool string_exist(const char * key): Check if trie has key(passed string argument). If it has key then return true. If not then return false.
struct Trie{

	bool is_terminal; // this represents end of string
	Trie * children[ALPHABETS];

	// Constructor
	Trie(): is_terminal(false){
		memset(children, 0, sizeof(children));
	}

	// Delete all children
	~Trie(){
		for(int i = 0; i < ALPHABETS; ++i){
			if(children[i])
				delete children[i];
		}
	}

	void insert(const char * key){
		if(*key == '\0'){
			is_terminal = true;
		}
		else{
			int index = char_to_index(*key);

			if(children[index] == 0)
				children[index] = new Trie();
			children[index]->insert(key + 1);
		}
	}

	Trie* find(const char * key){
		if(*key == 0){
			return this;
		}

		int index = char_to_index(*key);
		if(children[index] == 0){
			return NULL;
		}

		return children[index]->find(key+1);
	}

	bool string_exist(const char * key){
		if(*key == 0 && is_terminal){
			return true;
		}

		int index = char_to_index(*key);
		if(children[index] == 0){
			return false;
		}
		return children[index]->string_exist(key + 1);
	}

};


Main Function

  • Add check code in main function.
  • Main function checks whether trie works well.
int main (){

	Trie * root = new Trie();

	const char * words[5] = {"be", "bee", "can", "cat", "cd"};

	for(int i = 0; i < 5; ++i){
		printf("insert: %s\n", words[i]);
		root->insert(words[i]);
	}

	printf("\n");

	printf("%s: be\n", root->find("be") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: can\n", root->find("can") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: a\n", root->find("a") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: cat\n", root->find("cat") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: c\n", root->find("c") != 0 ? "Prefix Exist" : "Prefix Not Exist");

	printf("\n");

	printf("%s: c\n", root->string_exist("c") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: be\n", root->string_exist("be") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: bee\n", root->string_exist("bee") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: candy\n", root->string_exist("candy") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: cd\n", root->string_exist("cd") != 0 ? "String Exist" : "String Not Exist");

	delete root;

	return 0;
}


All codes

#include <cstdio>
#include <cstring>

#define ALPHABETS 26

// Convert char to array index
// All are base on capital
int char_to_index(char c){
	return c - 'A';
}

struct Trie{

	bool is_terminal; // this represents end of string
	Trie * children[ALPHABETS];

	// Constructor
	Trie(): is_terminal(false){
		memset(children, 0, sizeof(children));
	}

	// Delete all children
	~Trie(){
		for(int i = 0; i < ALPHABETS; ++i){
			if(children[i])
				delete children[i];
		}
	}

	void insert(const char * key){
		if(*key == '\0'){
			is_terminal = true;
		}
		else{
			int index = char_to_index(*key);

			if(children[index] == 0)
				children[index] = new Trie();
			children[index]->insert(key + 1);
		}
	}

	Trie* find(const char * key){
		if(*key == 0){
			return this;
		}

		int index = char_to_index(*key);
		if(children[index] == 0){
			return NULL;
		}

		return children[index]->find(key+1);
	}

	bool string_exist(const char * key){
		if(*key == 0 && is_terminal){
			return true;
		}

		int index = char_to_index(*key);
		if(children[index] == 0){
			return false;
		}
		return children[index]->string_exist(key + 1);
	}

};

int main (){

	Trie * root = new Trie();

	const char * words[5] = {"be", "bee", "can", "cat", "cd"};

	for(int i = 0; i < 5; ++i){
		printf("insert: %s\n", words[i]);
		root->insert(words[i]);
	}

	printf("\n");

	printf("%s: be\n", root->find("be") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: can\n", root->find("can") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: a\n", root->find("a") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: cat\n", root->find("cat") != 0 ? "Prefix Exist" : "Prefix Not Exist");
	printf("%s: c\n", root->find("c") != 0 ? "Prefix Exist" : "Prefix Not Exist");

	printf("\n");

	printf("%s: c\n", root->string_exist("c") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: be\n", root->string_exist("be") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: bee\n", root->string_exist("bee") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: candy\n", root->string_exist("candy") != 0 ? "String Exist" : "String Not Exist");
	printf("%s: cd\n", root->string_exist("cd") != 0 ? "String Exist" : "String Not Exist");

	delete root;

	return 0;
}


Trie Basic Problem

Baekjoon Online Judge 5052

  • Check if phone number list is in consistency. Print YES if it is in consistency if not then print NO.
  • In consistency means that any number is not prefix to other numbers.
  • Link: https://www.acmicpc.net/problem/5052


Solution and Method

  • It is string processing problem and also it contains prefix processing so using trie data structure is solution.
  • Solution 1: Insert all phone numbers to trie and search all phone numbers. If there is terminal node(is_terminal value is true) before end of the phone number, then return false.
  • Solution 2: Check inserted phone number is prefix to other numbers when insert into trie.


Solution Codes

Solution 1

  • Insert all phone numbers to trie and search all phone numbers. If there is terminal node(is_terminal value is true) before end of the phone number, then return false.
#include <cstdio>
#include <cstring>

int char_to_index(char c){
	return c - '0';
}

struct Trie {

	bool is_terminal;
	Trie * next[10];
	Trie(): is_terminal(false) {
		memset(next, 0, sizeof(next));
	}

	~Trie() {
		for (int i = 0; i < 10; ++i) {
			if (next[i]) {
				delete next[i];
			}
		}
	}

	void insert(const char * key) {

		if (*key == '\0') {
			is_terminal = true;
		}
		else {
			int index = char_to_index(*key);
			if (next[index] == 0)
				next[index] = new Trie();
			next[index]->insert(key + 1);
		}
	}

	bool find(const char * key) {

		if (*key == '\0') return true;

		// Return false if there is already terminal node before string is the end.
		if (is_terminal) return false;

		int index = char_to_index(*key);
		return next[index]->find(key + 1);
	}
};

int main() {

	int t;
	char phone_books[10000][11];
	scanf("%d", &t);

	while (t--) {
		int n;
		scanf("%d", &n);

		Trie * root = new Trie();
		for (int i = 0; i < n; ++i) {
			scanf("%s", &phone_books[i]);
			root->insert(phone_books[i]);
		}

		bool is_valid = true;
		for (int i = 0; i < n; ++i) {

			if (root->find(phone_books[i]) == false) {
				is_valid = false;
				break;
			}
		}
        delete root;
		printf("%s\n", is_valid ? "YES" : "NO");
	}
	return 0;
}

Solution 2

  • Check inserted phone number is prefix to other numbers when insert into trie.
#include <cstdio>
#include <cstring>

int char_to_index(char c){
	return c - '0';
}

struct Trie {

	bool is_terminal;
	Trie * next[10];

	Trie(): is_terminal(false) {
		memset(next, 0, sizeof(next));
	}

	~Trie() {
		for (int i = 0; i < 10; ++i) {
			if (next[i]) {
				delete next[i];
			}
		}
	}

	bool insert(const char * key) {

		if (*key == '\0') {
			is_terminal = true;
			return true;
		}
		else {
			// If it is already terminal node then there is a prefix match
			if(is_terminal == true) return false;

			int index = char_to_index(*key);

			if (next[index] == 0)
				next[index] = new Trie();

			// If next node is already exists and current's next is end of string then there is a prefix match
			else if(next[index] != 0 && *(key + 1) == '\0'){
				return false;
			}
			return next[index]->insert(key + 1);
		}
	}
};

int main() {

	int t;
	char phone_books[10000][11];
	scanf("%d", &t);

	while (t--) {
		int n;
		scanf("%d", &n);

		Trie * root = new Trie();

		bool ret = true;

		for (int i = 0; i < n; ++i) {
			scanf("%s", &phone_books[i]);

			if (ret == false) continue;

			ret = root->insert(phone_books[i]);

		}

		delete root;
		printf("%s\n", ret ? "YES" : "NO");
	}
	return 0;
}


References