December 26, 2018TWpower Reading time ~12 minutes

[Algorithm] C/C++에서 해시 테이블(Hash Table) 구현하기

업데이트(2019.02.04): 중복 및 map 관련 검증 에러 수정

업데이트(2019.01.19): 코드 수정 및 검증 코드 추가

C++에서 해시 테이블(Hash Table)을 구현해보자

환경

C++
연결 리스트와 해시 함수에 대한 이해(제일 하단 참고자료 참고)

해시 함수(Hash Function)

해시 함수(Hash Function)란?

Hash Function: 해시 함수는 임의의 길이의 문자열을 받아서 고정 문자열로 바꾸어주는 함수이다. 이 때 함수를 구현하는 방법에 따라서 해당 서로 다른 임의의 문자열이 같은 고정 문자열로 되기도 하며 이러한 부분을 충돌이라고 한다.(H(s1) = H(s2))
아래 사진의 경우에는 좌측에 파란 색들이 key이며 각 key값들이 해시 함수의 결과를 오른쪽 우측에 숫자로 바뀌었음을 보여준다. 나중에 해시 테이블에서는 이 key을 해시한 결과를 배열의 인덱스로 사용한다.
해시 함수를 H()라고 했을 때 H(Jonh Smith) = 02

해시 충돌(Hash Collision)

서로 다른 문자열을 해시한 결과가 동일한 경우
해시 함수를 H()라고 했을 때 서로 다른 문자열 s1과 s2에 대해서 H(s1) = H(s2)인 경우
해시 테이블을 구현할 때 해시 충돌이 일어나게 되면 Chaining 혹은 Open Addressing을 통해서 해결해야한다.

예시

아래는 아주 간단한 해시 함수의 예제로 문자열을 받아서 정수로 반환한다.
검색해보면 다양한 구현법이 존재하며 Shift연산과 소수를 이용해서 하면 더 해시 충돌을 막을 수 있다고 한다.(나중에 증명 같은건 찾아봐야지…)

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

해시 테이블(Hash Table)

key와 value로 된 쌍을 저장하는 자료구조이다.
아래 사진처럼 Lisa Smith라는 key로 521-8976의 value를 저장할 수 있도록 설계된 자료구조입니다.
C++에서는 map 그리고 python에서는 dictionary를 통해서 보다 쉽게 이용할 수 있습니다.(여기서는 한번 구현해 보는것에 의의!)
성능이 좋을 때는 O(c)에 접근을 할 수 있기 때문에 공간을 소비해서 접근속도를 늘리고 싶을 때 이용한다. 물론, O(c)도 해시 테이블의 용량이 엄청 크고 해시 함수도 충돌이 일어나지 않는다는 가정이 있을 때 가능하다.
해시 테이블을 구현할 때 해시 충돌이 일어나게 되면 Chaining 혹은 Open Addressing을 통해서 해결해야한다.

구현

여기서는 충돌일 있을때 Chaining을 이용했으며 Singly Linked List(단순 연결 리스트)를 이용해서 작성하였다.

공통

key에 따른 데이터들은 Chaining을 이용했기 때문레 List로 해시테이블을 구현
데이터를 저장하는 Node 선언
#define에 나와있는 값을 조절해서 테이블의 크기나 해시 테이블에 집어넣을 데이터의 수를 조절할 수 있습니다.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MAX_TABLE 5 // 테이블 크기
#define MAX_KEY 8 // include null
#define MAX_DATA 12 // 해시테이블에 넣을 데이터의 수
#define DELETE_COUNT 6 // 삭제할 데이터의 수
#define FIND_COUNT 8 // 찾을 데이터의 수

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // 해시 테이블(해당 인덱스에 리스트로 작성)
char keys[MAX_DATA][MAX_KEY]; // 문자열 key들
int values[MAX_DATA]; // key에 대응하는 값들

초기화 함수

랜덤 함수를 이용해서 key-value쌍들을 생성합니다.

void init() {

	// 해시테이블 초기화
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// 랜덤함수를 위한 srand와 seed
	srand(time(NULL));

	// key에 대응하는 값들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// 문자열 key들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

문자열 함수

복사와 비교 함수 직접 작성(따로 사용해도 무방!)

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

해시 함수

위에 소개했던 내용과 동일하게 Shift연산과 소수를 이용함
해시 테이블의 인덱스로 활용하기 때문에 반드시 MAX_TABLE의 나머지를 반환해야함

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

추가

일반적인 추가와 같으며 중복된 key가 있을 경우에는 값을 바꿉니다.

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = hash(key);

	if (tb[index] == NULL) {
		tb[index] = new_node;
	}

	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// key가 중복이면 값을 바꾸기
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}

		// 중복이 아니면 앞에다가 추가
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

값 찾기

리스트를 이용하면 어렵지 않게 찾을 수 있습니다. 쭉 리스트를 따라가면서 순회하고 값을 찾으면 됩니다.
값이 있으면 val에 저장하고 true를 반환하고 아니면 false를 반홥합니다.

bool find(const char * key, int * val) {

	int index = hash(key);

	Node * cur = tb[index];

	// 하나하나 찾아가면서 확인
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

삭제

삭제도 리스트를 이용하면 쉽게 할 수 있으며 첫번째 삭제만 주의해주면 됩니다.

bool destroy(const char * key) {

	int index = hash(key);

	// 처음이 비어있는지 확인
	if (tb[index] == NULL) {
		return false;
	}

	// 첫번째
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// 나머지의 경우
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

출력

테이블의 배열을 모두 돌고 각각의 원소를 순회하면서 key-value쌍을 출렵합니다.

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

메인

추가, 삭제 그리고 찾는 과정을 코드로 작성하여 테스트 하였습니다.

int main() {

	char tmp_key[MAX_KEY];
	init();

	// Add

	printf("Add to hash table\n");
	for (int i = 0; i < MAX_DATA; ++i) {
		add(keys[i], values[i]);
	}

	print_hash();


	printf("\n");

	// Delete

	printf("Deleted keys: ");
	for (int i = 0; i < DELETE_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		printf("%s ", tmp_key);
		destroy(tmp_key);
	}
	printf("\n");

	print_hash();


	printf("\n");

	// Find

	int val;
	printf("Found: ");
	for (int i = 0; i < FIND_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		if (find(tmp_key, &val)) {
			printf("{ %s, %d } ", tmp_key, val);
		}
	}
	printf("\n");

	return 0;
}

코드

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define MAX_TABLE 5 // 테이블 크기
#define MAX_KEY 8 // include null
#define MAX_DATA 12 // 해시테이블에 넣을 데이터의 수
#define DELETE_COUNT 6 // 삭제할 데이터의 수
#define FIND_COUNT 8 // 찾을 데이터의 수

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // 해시 테이블(해당 인덱스에 리스트로 작성)
char keys[MAX_DATA][MAX_KEY]; // 문자열 key들
int values[MAX_DATA]; // key에 대응하는 값들

void init() {

	// 해시테이블 초기화
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// 랜덤함수를 위한 srand와 seed
	srand(time(NULL));

	// key에 대응하는 값들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// 문자열 key들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

int hash(const char * str) {
	int hash = 401;
	int c;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = hash(key);

	if (tb[index] == NULL) {
		tb[index] = new_node;
	}

	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// key가 중복이면 값을 바꾸기
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}

		// 중복이 아니면 앞에다가 추가
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

bool find(const char * key, int * val) {

	int index = hash(key);

	Node * cur = tb[index];

	// 하나하나 찾아가면서 확인
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

bool destroy(const char * key) {

	int index = hash(key);

	// 처음이 비어있는지 확인
	if (tb[index] == NULL) {
		return false;
	}

	// 첫번째
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// 나머지의 경우
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

int main() {

	char tmp_key[MAX_KEY];
	init();

	// Add

	printf("Add to hash table\n");
	for (int i = 0; i < MAX_DATA; ++i) {
		add(keys[i], values[i]);
	}

	print_hash();


	printf("\n");

	// Delete

	printf("Deleted keys: ");
	for (int i = 0; i < DELETE_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		printf("%s ", tmp_key);
		destroy(tmp_key);
	}
	printf("\n");

	print_hash();


	printf("\n");

	// Find

	int val;
	printf("Found: ");
	for (int i = 0; i < FIND_COUNT; ++i) {
		my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
		if (find(tmp_key, &val)) {
			printf("{ %s, %d } ", tmp_key, val);
		}
	}
	printf("\n");

	return 0;
}

검증

hash 함수명이 충돌나서 변경(hash->my_hash)

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <map>
#include <string>

#define TOTAL_TEST_CASE 100

#define MAX_TABLE 300 // 테이블 크기
#define MAX_KEY 8 // include null
#define MAX_DATA 2000 // 해시테이블에 넣을 데이터의 수
#define DELETE_COUNT 1000 // 삭제할 데이터의 수

using namespace std;

struct Node {
	char key[MAX_KEY];
	int value;
	Node * next;
};

Node * tb[MAX_TABLE]; // 해시 테이블(해당 인덱스에 리스트로 작성)
char keys[MAX_DATA][MAX_KEY]; // 문자열 key들
int values[MAX_DATA]; // key에 대응하는 값들

void init() {

	// 해시테이블 초기화
	for (int i = 0; i < MAX_TABLE; ++i) {
		Node * cur = tb[i];
		Node * tmp;
		while (cur != NULL) {
			tmp = cur;
			cur = cur->next;
			free(tmp);
		}
		tb[i] = NULL;
	}

	// 랜덤함수를 위한 srand와 seed
	srand(time(NULL));

	// key에 대응하는 값들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		values[i] = rand() % 100 + 1;
	}

	// 문자열 key들 초기화
	for (int i = 0; i < MAX_DATA; ++i) {
		for (int j = 0; j < MAX_KEY - 1; ++j) {
			keys[i][j] = rand() % 26 + 97; // ASCII 97 ~ 122
		}
		keys[i][MAX_KEY - 1] = '\0';
	}

}

void my_str_cpy(char * dest, const char * src) {

	while (*src != '\0') {
		*dest = *src;
		dest++; src++;
	}
	*dest = '\0';

}

int my_str_cmp(const char * str1, const char * str2) {

	while (*str1 != '\0' && (*str1 == *str2)) {
		str1++;
		str2++;
	}
	return *str1 - *str2;

}

int my_hash(const char * str) {
	int hash = 401;

	while (*str != '\0') {
		hash = ((hash << 4) + (int)(*str)) % MAX_TABLE;
		str++;
	}

	return hash % MAX_TABLE;
}

void add(const char * key, int value) {

	Node * new_node = (Node *)malloc(sizeof(Node));
	my_str_cpy(new_node->key, key);
	new_node->value = value;
	new_node->next = NULL;

	int index = my_hash(key);

	// 처음이 비어있으면 할당
	if (tb[index] == NULL) {
		tb[index] = new_node;
	}
	// 아니면 하나하나 찾아서 중복이면 치환 아니면 제일 뒤에 추가
	else {

		Node * cur = tb[index];

		while (cur != NULL) {

			// key가 중복이면 값을 바꾸기
			if (my_str_cmp(cur->key, key) == 0) {
				cur->value = value;
				return;
			}

			cur = cur->next;
		}
		// 중복이 아니면 앞에다가 추가
		new_node->next = tb[index];
		tb[index] = new_node;
	}
}

bool find(const char * key, int * val) {

	int index = my_hash(key);

	Node * cur = tb[index];

	// 하나하나 찾아가면서 확인
	while (cur != NULL) {
		if (my_str_cmp(cur->key, key) == 0) {
			*val = cur->value;
			return true;
		}
		cur = cur->next;
	}

	return false;

}

bool destroy(const char * key) {

	int index = my_hash(key);

	// 처음이 비어있는지 확인
	if (tb[index] == NULL) {
		return false;
	}

	// 첫번째
	if (my_str_cmp(tb[index]->key, key) == 0) {
		Node * first = tb[index];
		tb[index] = tb[index]->next;
		free(first);
		return true;
	}

	// 나머지의 경우
	else {

		Node * cur = tb[index]->next;
		Node * prev = tb[index];

		while (cur != NULL && my_str_cmp(cur->key, key) != 0) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

void print_hash() {

	for (int i = 0; i < MAX_TABLE; ++i) {
		if (tb[i] != NULL) {

			printf("index: %d\n", i);

			Node * cur = tb[i];

			while (cur != NULL) {
				printf("{ %s, %d }, ", cur->key, cur->value);
				cur = cur->next;
			}
			printf("\n");
		}
	}

}

int main() {

	int test_case = 1;
	int correct = 0;


	for (test_case = 1; test_case <= TOTAL_TEST_CASE; ++test_case) {

		init();

		bool is_equal = true;

		map<string, int> m;
		map<string, int>::iterator it;


		for (int i = 0; i < MAX_DATA; ++i) {
			add(keys[i], values[i]);
		}
		for (int i = 0; i < MAX_DATA; ++i) {
			if (m.count(keys[i]) == 0) {
				m.insert(make_pair(keys[i], values[i]));
			}
			else {
				m[keys[i]] = values[i];
			}

		}

		for (int i = 0; i < MAX_DATA; ++i) {

			int tmp;
			find(keys[i], &tmp);

			if (m[keys[i]] != tmp) {
				is_equal = false;
			}
		}

		char tmp_key[MAX_KEY];
		for (int i = 0; i < DELETE_COUNT; ++i) {
			my_str_cpy(tmp_key, keys[rand() % MAX_DATA]);
			destroy(tmp_key);
			m.erase(tmp_key);
		}

		for (int i = 0; i < MAX_DATA; ++i) {

			int tmp = -1;

			if (find(keys[i], &tmp) == false && m.count(keys[i]) == 0) {
				continue;
			}

			if (find(keys[i], &tmp) == true && m.count(keys[i]) == 1 && m[keys[i]] == tmp) {
				continue;
			}
			else {
				is_equal = false;
			}

		}

		if (is_equal) correct++;

	}

	printf("Total: %d / %d\n", correct, TOTAL_TEST_CASE);

	return 0;
}

참고자료

December 23, 2018TWpower Reading time ~1 minute

[Linux] scp를 이용해 로컬과 원격에 파일 전송하기

업데이트(2018.03.16): 옵션 및 설명 추가

Linux 기반 운영체제에서 scp 명령어를 사용해서 로컬-원격 사이에 파일을 주고 받아보자.

환경 및 선수조건

Linux
Bash shell(/bin/bash)

scp 명령어

기본

scp: secure copy (remote file copy program)의 줄임말로 ssh를 이용해 네트워크로 연결된 호스트간에 파일을 주고 받는 명령어입니다.
로컬 -> 리모트 (보내기), 리모트 -> 로컬 (가져오기)와 리모트 -> 리모트 (다른 호스트끼리 전송) 로 복사가 모두 가능합니다.
ssh를 이용하기 때문에 password를 입력하거나 ssh 키파일과 같은 identity file을 이용해 파일 송수신이 가능합니다.

기본 사용 문법

manual page에 있는 자료

scp [options ...] [source] [target]

기본 형태

# Local -> Remote
scp 목적파일명(경로) 유저명@IP주소:목적디렉토리

# Remote -> Local
scp 유저명@IP주소:파일디렉토리 목적파일명(경로)

# Remote(source) -> Remote(target)
scp 유저명@IP주소:파일디렉토리 유저명@IP주소:파일디렉토리

옵션

-r: 재귀적으로 모든 폴더들을 복사합니다. 폴더를 복사할 때 사용하는 옵션으로 이때 전송하고자 하는 대상은 폴더로 지정하면 됩니다. 아래에 예제를 참고하시면 됩니다. symbolic link가 있는 경우에는 target에 symbolic link를 생성하지 않고 symbolic link가 가리키는 파일 혹은 폴더를 복사합니다.
-P: ssh 포트를 지정하는 옵션
-i: ssh 키파일과 같은 identity file의 경로를 지정하는 옵션
-v: verbose 모드로 상세내용을 보며 디버깅을 할 때 사용합니다.
-p: 파일의 수정 시간과 권한을 유지합니다.

예제

로컬 -> 리모트

패스워드 사용하는 경우

scp ~/test.txt twpower@[IP주소]:/home/twpower

-i 옵션
identity file을 지정해서 사용할 때

scp -i ~/.ssh/twpower-private-server ~/test.txt twpower@[IP주소]:/home/twpower

-r 옵션
폴더를 복사하는 경우

scp -r ~/test_folder/ twpower@[IP주소]:/home/twpower

-P 옵션

scp -P 22 ~/test.txt twpower@[IP주소]:/home/twpower

리모트 -> 로컬

패스워드 사용하는 경우

scp twpower@[IP주소]:/home/twpower/test.txt /Users/taewoo

-i 옵션
identity file을 지정해서 사용할 때

scp -i ~/.ssh/twpower-private-server twpower@[IP주소]:/home/twpower/test.txt /Users/taewoo

-r 옵션
폴더를 복사하는 경우

scp -r twpower@[IP주소]:/home/twpower/test_folder /Users/taewoo

-P 옵션

scp -P 22 twpower@[IP주소]:/home/twpower/test.txt /Users/taewoo

참고자료

December 20, 2018TWpower Reading time ~5 minutes

[Algorithm] C/C++에서 힙(Heap) 구현하기

업데이트(2019.01.19): 코드 수정 및 검증 코드 추가

C++에서 힙(Heap)을 구현해보자

환경

Heap이란?

개념

Heap이란 완전 이진 트리의 일종으로 부모 노드와 자식 노드간에 항상 대소관계가 성립하는 자료구조를 의미합니다.
아래의 사진을 보면 부모 노드가 자식 노드보다 항상 크며 이러한 구조를 최대힙(Max Heap)이라고 하고 그 반대를 최소힙(Min Heap)이라고 합니다.
이 때 부모와 자식 노드의 대소관계만 중요하며 형제 노드들간에는 관계가 없습니다.
이렇게 아래처럼 구현하고 위와 같은 속성(부모와 자식간에 대소관계)을 띄기 때문에 최상단 부모인 루트에는 항상 가장 크거나 작은 값이 오며 이를 이용해서 그 유명한 우선 순위 큐(Priority Queue)의 구현이 가능합니다.

구현

공통

완전 이진 트리는 배열로 구현합니다.
구현을 쉽게 하기 위해 배열을 사용할 때 인덱스는 1부터 사용합니다.
특정 노드의 배열 인덱스가 current라고 한다면 부모 노드는 current / 2를 통해 찾아갈 수 있고 자식 노드는 current * 2(좌측 자식 노드) 또는 current * 2 + 1(우측 자식 노드)을 통해서 찾아갈 수 있습니다.
현재 노드 인덱스: current
부모 노드 인덱스: current / 2
자식 노드들 인덱스 (순서 대로 좌우): current * 2, current * 2 + 1

삽입

완전 이진 트리 가장 끝에 원소를 추가하고 추가한 부분의 원소와 해당 원소의 부모 노드와 크기를 비교하고 바꿔가면서 위치를 찾습니다.

void push(int data) {

	// 힙의 가장 끝에 데이터 추가
	heap[++heap_count] = data;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 올리는 부분
	int child = heap_count;
	int parent = child / 2;
	while (child > 1 && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);
		child = parent;
		parent = child / 2;
	}

}

삭제

힙의 가장 첫번째 원소를 반환하고 첫번째 위치에 힙의 가장 끝 원소를 대입합니다.
첫번째 원소를 자식과 계속 비교하고 값을 바꿔가면서 힙을 재정렬합니다.
이 때, 자식은 왼쪽과 오른쪽 2개가 생기기 때문에 최대힙을 구현하는 경우에는 더 큰 자식과 값을 바꿉니다.(최소힙일 때는 그 반대!)

int pop() {

	// 힙의 가장 첫번째 원소를 반환
	// 힙의 가장 앞만 보고 있다!
	int result = heap[1];

	// 첫번째 원소를 힙의 가장 끝에 원소와 바꾸고
	// 가장 끝은 이제 쓰지 않을 예정이니 heap_count를 내려준다.
	swap(&heap[1], &heap[heap_count]);
	heap_count--;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 내리는 부분
	int parent = 1;
	int child = parent * 2;
	if (child + 1 <= heap_count) {
		child = (heap[child] > heap[child + 1]) ? child : child + 1;
	}

	while (child <= heap_count && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);

		parent = child;
		child = child * 2;
		if (child + 1 <= heap_count) {
			child = (heap[child] > heap[child + 1]) ? child : child + 1;
		}
	}

	return result;
}

코드

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define HEAP_SIZE 256
#define ARRAY_SIZE 10

// Max Heap 구현
// 구현을 쉽게 하기 위해서 값들은 모두 양수라고 가정!
// 배열에서 값 0은 비어있음을 의미한다고 가정하자!
// heap은 배열의 인덱스가 1부터 시작합니다!

int heap[HEAP_SIZE]; // max heap
int heap_count = 0; // heap의 원소의 갯수를 나타냄과 동시에 배열의 가장 끝 원소를 나타내며 heap의 끝을 나타내기도 합니다.

void swap(int * a, int * b) {
	int tmp = *a; *a = *b; *b = tmp;
}

void init() {
	heap_count = 0;
}

int size() {
	return heap_count;
}

void push(int data) {

	// 힙의 가장 끝에 데이터 추가
	heap[++heap_count] = data;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 올리는 부분
	int child = heap_count;
	int parent = child / 2;
	while (child > 1 && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);
		child = parent;
		parent = child / 2;
	}

}

int pop() {

	// 힙의 가장 첫번째 원소를 반환
	// 힙의 가장 앞만 보고 있다!
	int result = heap[1];

	// 첫번째 원소를 힙의 가장 끝에 원소와 바꾸고
	// 가장 끝은 이제 쓰지 않을 예정이니 heap_count를 내려준다.
	swap(&heap[1], &heap[heap_count]);
	heap_count--;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 내리는 부분
	int parent = 1;
	int child = parent * 2;
	if (child + 1 <= heap_count) {
		child = (heap[child] > heap[child + 1]) ? child : child + 1;
	}

	while (child <= heap_count && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);

		parent = child;
		child = child * 2;
		if (child + 1 <= heap_count) {
			child = (heap[child] > heap[child + 1]) ? child : child + 1;
		}
	}

	return result;
}

int main() {

	int arr[ARRAY_SIZE];

	// 랜덤함수를 위한 srand와 seed
	srand(time(NULL));

	// 배열 초기화
	for (int i = 0; i < ARRAY_SIZE; i++) {
		// 1 ~ 50까지의 난수 생성
		arr[i] = rand() % 50 + 1;
	}

	// 삽입
	for (int i = 0; i < ARRAY_SIZE; i++) {
		push(arr[i]);
	}

	// pop 하면서 값들 하나씩 확인
	// Max Heap이기 때문에 값들이 내림차순으로 정렬됨 -> Heap Sort
	for (int i = 0; i < ARRAY_SIZE; i++) {
		printf("%d ", pop());
	}
	printf("\n");

	return 0;
}

검증

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <iostream>
#include <functional>

#define HEAP_SIZE 256
#define TOTAL_TEST_CASE 100
#define TEST_ARRAY_SIZE 20

using namespace std;

int heap[HEAP_SIZE];
int heap_count = 0;

void swap(int * a, int * b) {
	int tmp = *a; *a = *b; *b = tmp;
}

void init() {
	heap_count = 0;
}

int size() {
	return heap_count;
}

void push(int data) {

	// 힙의 가장 끝에 데이터 추가
	heap[++heap_count] = data;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 올리는 부분
	int child = heap_count;
	int parent = child / 2;
	while (child > 1 && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);
		child = parent;
		parent = child / 2;
	}

}

int pop() {

	// 힙의 가장 첫번째 원소를 반환
	// 힙의 가장 앞만 보고 있다!
	int result = heap[1];

	// 첫번째 원소를 힙의 가장 끝에 원소와 바꾸고
	// 가장 끝은 이제 쓰지 않을 예정이니 heap_count를 내려준다.
	swap(&heap[1], &heap[heap_count]);
	heap_count--;

	// 아래의 과정은 child를 parent와 비교하면서 알맞은 위치로 하나씩 내리는 부분
	int parent = 1;
	int child = parent * 2;
	if (child + 1 <= heap_count) {
		child = (heap[child] > heap[child + 1]) ? child : child + 1;
	}

	while (child <= heap_count && heap[parent] < heap[child]) {
		swap(&heap[parent], &heap[child]);

		parent = child;
		child = child * 2;
		if (child + 1 <= heap_count) {
			child = (heap[child] > heap[child + 1]) ? child : child + 1;
		}
	}

	return result;
}

int main() {

	int arr[TEST_ARRAY_SIZE]; // heap에 이용할 자료들
	int ans_arr[TEST_ARRAY_SIZE]; // 검증에 사용할 자료들

	srand(time(NULL));

	int test_case;
	int correct = 0;

	for (test_case = 1; test_case <= TOTAL_TEST_CASE; ++test_case) {

		// 같은지 확인하는 변수
		bool is_equal = true;

		// 자료들 입력
		for (int i = 0; i < TEST_ARRAY_SIZE; i++) {
			arr[i] = rand() % 2000 + 1; // 1 ~ 2000 사이의 난수 생성
			ans_arr[i] = arr[i];
		}

		// heap 정렬
		for (int i = 0; i < TEST_ARRAY_SIZE; i++) {
			push(arr[i]);
		}
		for (int i = 0; i < TEST_ARRAY_SIZE; i++) {
			arr[i] = pop();
		}

		// 정답 생성
		// greater<int>()는 내림차순을 위해서 사용
		// sort(ans_arr, ans_arr+TEST_ARRAY_SIZE)만 사용하면 오름차순이 됨
		sort(ans_arr, ans_arr + TEST_ARRAY_SIZE, greater<int>());

		// 정답과 힙정렬 결과 비교
		for (int i = 0; i < TEST_ARRAY_SIZE; i++) {
			if (ans_arr[i] != arr[i]) {
				is_equal = false;
				break;
			}
		}

		if (is_equal) correct++;

	}

	printf("Total: %d / %d\n", correct, TOTAL_TEST_CASE);

	return 0;
}

참고자료

December 16, 2018TWpower Reading time ~5 minutes

[Algorithm] C/C++에서 단순 연결 리스트(Singly Linked List) 구현하기

업데이트(2019.02.03): 코드 일부 수정 및 개선

C++에서 단순 열결 리스트(Singly Linked List)를 구현해보자

환경

Singly Linked List란?

개념

아래처럼 하나의 노드에 필요한 정보를 담고 다음에 해당하는 노드를 가리키고 있는 자료구조로 포인터를 이용해 자료들을 선형으로 연결할 자료구조이다.
배열과 비교했을 때 추가 및 삭제가 쉽다는 장점이 있지만 접근할 때 O(n)만큼 걸린다는 단점이 있습니다.

구현

공통

구현을 편리하게 하기위해 전역으로 하나의 리스트만 만들었습니다.
큰 틀은 아래와 같으면 추가, 삭제 그리고 순회의 경우에 필요한 부분들을 수정하면 됩니다.
list는 리스트의 처음을 가리킵니다.

#include <stdio.h>
#include <stdlib.h>

// Node
struct Node {
	int data;
	Node * next;
};

// Global list
Node * list;

추가/삭제 공통

추가와 삭제의 경우에 첫번째 노드인지 아닌지를 고려해주면 구현이 편합니다.
리스트의 중간에서 자료의 추가나 제거가 이루어질 때 현재 노드를 가리키는 cur와 이전 노드를 가리키는 prev가 있으면 추가 및 삭제가 편합니다.
삭제의 경우에 삭제하고자 하는 노드가 있으면 true를 반환하고 없다면 false를 반환합니다.
함수 ascending_order_add의 경우에는 오름차순으로 리스트를 생성하는 예제 함수입니다.

추가

아래처럼 추가하려는 노드의 위치를 찾은 후에 새 노드의 next를 다음 노드를 가리키고 이전 노드가 추가하려는 노드를 가리키게 합니다.
코드는 크게 3가지 방식이 있습니다.
add: 새로 추가하는 방법
ascending_order_add: 중간에 추가하는 방법
add_unique: 중복된 값 없이 추가하는 방법

// Add - one by one
void add(int key) {

	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	// Check first element
	if (list == NULL) {
		list = new_node;
	}
	else {
		// Add new node to head
		new_node->next = list;
		list = new_node;
	}
}

// Add - add ascending order
void ascending_order_add(int key) {

	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	if (list == NULL) {
		list = new_node;
	}
	else {

		Node * cur = list;
		Node * prev = NULL;

		// If first element is larger than key
		if (cur->data > new_node->data) {
			new_node->next = cur;
			list = new_node;
		}
		// Other cases
		else {
			while (cur != NULL && cur->data < new_node->data) {
				prev = cur;
				cur = cur->next;
			}
			// Add in middle
			if (cur != NULL) {
				new_node->next = cur;
				prev->next = new_node;
			}
			// Add to end
			else {
				prev->next = new_node;
			}
		}
	}
}

// Add - add only unique value
void add_unique(int key) {
	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	if (list == NULL) {
		list = new_node;
	}
	else {
		Node * cur = list;

		// Duplication check
		while (cur != NULL) {
			if (cur->data == key) {
				return;
			}
			cur = cur->next;
		}

		new_node->next = list;
		list = new_node;
	}
}

삭제

아래처럼 삭제하려는 노드의 위치를 찾은 후에 이전노드가 삭제하려는 노드의 다음 노드를 가리키도록 하면 됩니다.
똑같이 처음을 주의해주고 쉽게 구현하기 위해 cur과 prev을 이용합니다.

// Remove
bool remove(int key) {

	if (list == NULL) {
		return false;
	}

	if (list->data == key) {
		Node * cur = list;
		list = list->next;
		free(cur);
		return true;
	}
	else {
		Node * cur = list->next;
		Node * prev = list;
		while (cur != NULL && cur->data != key) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

코드

#include <stdio.h>
#include <stdlib.h>

// Node
struct Node {
	int data;
	Node * next;
};

// Global list
Node * list;

// Init
void init() {

	if (list == NULL) {
		return;
	}
	else {
		Node * cur;
		cur = list;

		while (cur != NULL) {
			list = cur->next;
			free(cur);
			cur = list;
		}
	}
}

// Add - one by one
void add(int key) {

	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	// Check first element
	if (list == NULL) {
		list = new_node;
	}
	else {
		// Add new node to head
		new_node->next = list;
		list = new_node;
	}
}

// Add - add ascending order
void ascending_order_add(int key) {

	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	if (list == NULL) {
		list = new_node;
	}
	else {

		Node * cur = list;
		Node * prev = NULL;

		// If first element is larger than key
		if (cur->data > new_node->data) {
			new_node->next = cur;
			list = new_node;
		}
		// Other cases
		else {
			while (cur != NULL && cur->data < new_node->data) {
				prev = cur;
				cur = cur->next;
			}
			// Add in middle
			if (cur != NULL) {
				new_node->next = cur;
				prev->next = new_node;
			}
			// Add to end
			else {
				prev->next = new_node;
			}
		}
	}
}

// Add - add only unique value
void add_unique(int key) {
	Node * new_node = (Node*)malloc(sizeof(Node));
	new_node->data = key;
	new_node->next = NULL;

	if (list == NULL) {
		list = new_node;
	}
	else {
		Node * cur = list;

		// Duplication check
		while (cur != NULL) {
			if (cur->data == key) {
				return;
			}
			cur = cur->next;
		}

		new_node->next = list;
		list = new_node;
	}
}

// Remove
bool remove(int key) {

	if (list == NULL) {
		return false;
	}

	if (list->data == key) {
		Node * cur = list;
		list = list->next;
		free(cur);
		return true;
	}
	else {
		Node * cur = list->next;
		Node * prev = list;
		while (cur != NULL && cur->data != key) {
			prev = cur;
			cur = cur->next;
		}

		if (cur == NULL) return false;

		prev->next = cur->next;
		free(cur);
		return true;
	}
}

// Traverse
void traverse() {

	Node * cur = list;
	while (cur != NULL) {
		printf("%d ", cur->data);
		cur = cur->next;
	}
	printf("\n");

}

int main() {

	int arr[9] = { 2, 4, 6, 8, 1, 3, 5, 7, 9 };
	int arr_duplicated[18] = { 8, 1, 3, 2, 4, 6, 8, 1, 3, 5, 7, 9, 2, 4, 6, 5, 7, 9 };
	int arr_rmv[4] = { 2, 9, 8, 100 };


	// Add to list 1
	init();
	for (int i = 0; i < sizeof(arr) / sizeof(arr[0]); ++i) {
		add(arr[i]);
	}
	printf("After add(normal): ");
	traverse();


	// Add to list 2
	init();
	for (int i = 0; i < sizeof(arr) / sizeof(arr[0]); ++i) {
		ascending_order_add(arr[i]);
	}
	printf("After add(ascending): ");
	traverse();


	// Add to list 3
	init();
	for (int i = 0; i < sizeof(arr_duplicated) / sizeof(arr_duplicated[0]); ++i) {
		add_unique(arr_duplicated[i]);
	}
	printf("After add(unique): ");
	traverse();


	// Remove specific values in list
	for (int i = 0; i < sizeof(arr_rmv) / sizeof(arr_rmv[0]); ++i) {
		remove(arr_rmv[i]);
	}
	printf("After remove: ");
	traverse();

	return 0;

}

실행결과

After add(normal): 9 7 5 3 1 8 6 4 2
After add(ascending): 1 2 3 4 5 6 7 8 9
After add(unique): 9 7 5 6 4 2 3 1 8
After remove: 7 5 6 4 3 1

참고자료

https://en.wikipedia.org/wiki/Linked_list

December 13, 2018TWpower Reading time ~1 minute

[Linux] tee를 이용해 출력을 화면과 파일에 동시에 출력하기

Linux 기반 운영체제에서 tee 명령어를 사용해 화면과 파일에 동시에 출력해보자

환경 및 선수조건

Linux
Bash shell(/bin/bash)

tee 명령어

tee 명령어는 다음 아래 사진처럼 명령어의 출력 결과를 파일과 화면에 동시에 출력할 수 있도록 해주는 명령어입니다.
stdin을 받아서 stdout과 하나 이상의 파일에 그 입력을 출력하는겁니다.

기본 사용법

[ -a ]: 덮어쓰기 말고 해당 파일에 추가해서 입력합니다.
[ -i ]: interrupt를 무시하는 옵션
[ File ... ]: 파일들 이름입니다.

tee [ -a ] [ -i ] [ File ... ]

예제 - 명령어의 결과를 파일과 stdout으로 출력하기

$ echo test | tee tee-test-file.txt
test
$ cat tee-test-file.txt
test

예제 - shell에서 파일 생성하기

$ tee tee-test-file.txt << EOF
> Multi line test
> 1
> 2
> 3
> End!
> EOF
Multi line test
1
2
3
End!
$ cat tee-test-file.txt
Multi line test
1
2
3
End!

Categories

Tags

Posts

About

Home

Welcome to TWpower's Tech Blog

환경

해시 함수(Hash Function)

해시 함수(Hash Function)란?

해시 충돌(Hash Collision)

예시

해시 테이블(Hash Table)

구현

공통

초기화 함수

문자열 함수

해시 함수

추가

값 찾기

삭제

출력

메인

코드

검증

참고자료

환경 및 선수조건

scp 명령어

기본

기본 사용 문법

옵션

예제

로컬 -> 리모트

리모트 -> 로컬

참고자료

환경

Heap이란?

개념

구현

공통

삽입

삭제

코드

검증

참고자료

환경

Singly Linked List란?

개념

구현

공통

추가/삭제 공통

추가

삭제

코드

실행결과

참고자료

환경 및 선수조건

tee 명령어

기본 사용법

예제 - 명령어의 결과를 파일과 stdout으로 출력하기

예제 - shell에서 파일 생성하기

참고자료