最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列

程序员文章站 2022-03-31 19:08:00

...

最近在刷算法导论，第六章是堆排序，本文主要讲使用c++实现最大堆、最小堆以及最大优先级队列和最小优先级队列，还有堆排序。

对可以分为最大堆和最小堆。本文所使用的的堆是基于数组的完全二叉树实现。

最大堆

每个树节点的子树中的任意元素的值都不大于该节点的元素值。

最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列上图为算法导论中使用数组表示的最大堆示意图
实现最大堆首先要实现基于二叉树的寻找节点父节点左右子节点的方法。
由于在c++中数组的索引跟在算法导论的伪代码索引有所不同（数组是从0开始书中伪代码是从1开始计数）。

inline size_t get_parent(size_t i)
	{
		try
		{
			size_t index;
			if (i > 1)
				index = floor(i / 2) - 1;
			else if (i == 1)
				index = 0;
			
			assert(index < heap_size && index >= 0);
			return index;
		}
		catch(const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}	
	}

	inline size_t get_left(size_t i)
	{
		try
		{
			size_t index = 2 * i + 1;
			assert(index >= 0 && index < heap_size);
			return index;	
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	inline size_t get_right(size_t i)
	{
		try
		{
			size_t index = 2 * i + 2;
			assert(index >= 0 && index < heap_size);
			return index;
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

上面是实现寻找数组中节点的父节点、左右子节点的代码
因为是完全二叉树，所以满足这种索引计算关系，
注意这个跟算法导论的伪代码不完全相同！！！
下图为算法导论中的伪代码，借此说明原理
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列在这个max_heap的模板类中，数据成员是

array<T, 1024> data_array = { 0 };
size_t heap_size = 0;

一个是array数组模板一个变量用来表示堆中元素数量。
首先应该做的是初始化堆，即输入一个数组，将堆中的二叉树节点的元素初始化，如下代码：输入n是数组的大小

void init_data(T* input,int n)
	{
		for (int i = 0; i < n; i++)
		{
			data_array.at(i) = *(input+i);
			heap_size++;
		}
	}

最大堆的实现最核心的部分是对初始化了的数组进行调整以使之满足最大堆性质。

在介绍该成员函数之前，还需要一个辅助函数，即判断一个节点是内部节点还是叶节点，代码如下：

	int leaf_class(size_t i,int n)
	{
		if ((2 * i) < n && (2 * i + 1) < n)
			return 2;
		if ((2 * i) < n+1 && (2 * i + 1) == n)
			return 1;
		else
			return 0;
	}

上述代码将节点分成三种类型，返回整数2类型是拥有左右两个节点的类型。返回整数1是只有左节点没有右节点的类型；返回0是叶节点。

有了这个工具函数就可以实现维持最大堆性质的max_heap()成员函数了

void max_heapify(size_t i,int n)
	{
		assert(i < n+1);

		if (leaf_class(i,n) == 2)
		{
			size_t l = get_left(i);
			size_t r = get_right(i);
			size_t largest;

			if (l<n && data_array.at(l)>data_array.at(i))
			{
				largest = l;
			}

			else
			{
				largest = i;
			}

			if (r<n+1 && data_array.at(r)>data_array.at(largest))
			{
				largest = r;
			}

			if (largest != i)
			{
				T tmp;

				tmp = data_array.at(largest);
				data_array.at(largest) = data_array.at(i);
				data_array.at(i) = tmp;

				max_heapify(largest,n);
			}
		}

		else if (leaf_class(i,n) == 1)
		{
			size_t l = get_left(i);
			if (data_array.at(l) > data_array.at(i))
			{
				T tmp = data_array.at(i);

				data_array.at(i) = data_array.at(l);
				data_array.at(l) = tmp;
			}
		}
	}

算法导论的伪代码如下：
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列注意真实代码和伪代码的下标的不同哦
基本思想是如果一个节点的子节点比该节点大，就交换该节点和子节点中最大的。然后再递归的调用被交换过数据的子节点，这样就可以保证从上往下都是满足最大堆的性质了。但是仅仅这个函数还没有完成将一个初始化了的数组调整为最大堆的工作
需要下面的成员函数

void build_max_heap()
	{
		for (int i = floor(heap_size / 2) + 1; i >= 0; i--)
			max_heapify(i,get_heap_size()-1);
	}

原理很简单，只要将完全二叉树的倒数第二层的最后一个元素开始逐步向前每个元素调整满足最大堆的性质就可以了，毕竟最外层的元素都是叶子结点。这样就完成了最大堆的实现过程，完整代码如下：

template <typename T>
class max_heap
{
public:
	inline size_t get_parent(size_t i)
	{
		try
		{
			size_t index;
			if (i > 1)
				index = floor(i / 2) - 1;
			else if (i == 1)
				index = 0;
			
			assert(index < heap_size && index >= 0);
			return index;
		}
		catch(const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}	
	}

	inline size_t get_left(size_t i)
	{
		try
		{
			size_t index = 2 * i + 1;
			assert(index >= 0 && index < heap_size);
			return index;	
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	inline size_t get_right(size_t i)
	{
		try
		{
			size_t index = 2 * i + 2;
			assert(index >= 0 && index < heap_size);
			return index;
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	void max_heapify(size_t i,int n)
	{
		assert(i < n+1);

		if (leaf_class(i,n) == 2)
		{
			size_t l = get_left(i);
			size_t r = get_right(i);
			size_t largest;

			if (l<n && data_array.at(l)>data_array.at(i))
			{
				largest = l;
			}

			else
			{
				largest = i;
			}

			if (r<n+1 && data_array.at(r)>data_array.at(largest))
			{
				largest = r;
			}

			if (largest != i)
			{
				T tmp;

				tmp = data_array.at(largest);
				data_array.at(largest) = data_array.at(i);
				data_array.at(i) = tmp;

				max_heapify(largest,n);
			}
		}

		else if (leaf_class(i,n) == 1)
		{
			size_t l = get_left(i);
			if (data_array.at(l) > data_array.at(i))
			{
				T tmp = data_array.at(i);

				data_array.at(i) = data_array.at(l);
				data_array.at(l) = tmp;
			}
		}
	}

	size_t get_heap_size()
	{
		return heap_size;
	}

	int leaf_class(size_t i,int n)
	{
		if ((2 * i) < n && (2 * i + 1) < n)
			return 2;
		if ((2 * i) < n+1 && (2 * i + 1) == n)
			return 1;
		else
			return 0;
	}

	void build_max_heap()
	{
		for (int i = floor(heap_size / 2) + 1; i >= 0; i--)
			max_heapify(i,get_heap_size()-1);
	}
	
	void init_data(T* input,int n)
	{
		for (int i = 0; i < n; i++)
		{
			data_array.at(i) = *(input+i);
			heap_size++;
		}
	}

	void print_heap()
	{
		for (int i = 0; i < heap_size; i++)
			cout << data_array.at(i) << endl;
	}

	array<T, 1024> data_array = { 0 };
	size_t heap_size = 0;
	
private:

protected:
	
};

堆排序

设计出来最大堆的数据结构，实现堆排序就很简单了。因为最大堆的根节点就是数组中最大的元素，只要将最大的元素与数组最后的元素交换，在对根节点使用最大堆化的转换函数即可。唯一需要注意的是，因为我们已经将最大的元素放置在数组的最后面，所以需要创造一个新的变量，初始值是数组的长度，没调用一次就取出一个最大的元素，该变量减1，进而来实现在原有的数组上进行排序。代码实现中这个指示变量是n。
代码如下

template <typename T>
void heap_sort(max_heap<T> input,int size)
{
	int n = input.get_heap_size() - 1;
	for (size_t i = size-1; i > 0; i--)
	{
		T tmp = input.data_array.at(0);
		input.data_array.at(0) = input.data_array.at(i);
		input.data_array.at(i) = tmp;
		n--;
		input.max_heapify(0, n);
	}

	cout << endl;
	for (int i = 0; i < input.get_heap_size(); i++)
	{
		cout << input.data_array.at(i) << endl;
	}
}

算法导论中的伪代码如下：
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列

最大优先级队列

基于最大堆就可以实现最大优先级队列
返回队列中的最大值，就是数组第一个的元素，堆的根节点。
代码实现如下

T maximum()
	{
		if ((this->heap_size) == 0)
			assert("the max heap is empty!!!");
		else
			return this->data_array[0];
	}

提取最大的元素，方法是将最大堆的根节点元素与最后的元素交换，然后将最大堆的元素个数减1，对交换之后的根节点进行最大堆性质调整即可。
算法导论伪代码如下：
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列代码实现如下：

T heap_extract_max()
	{
		if (this->heap_size == 0)
			assert("the queue is empty!!1");
		else
		{
			T max = this->data_array.at(0);
			this->data_array.at(0) = this->data_array.at(this->heap_size - 1);
			this->heap_size--;
			this->max_heapify(0, this->heap_size - 1);
			return max;
		}
	}

还有一个功能函数是实现插入的基础：
即将节点i的元素值提升为key，然后从新排列最大堆的元素组合。这么实现的现实意义是将个别任务的优先级做调整，
算法导论伪代码如下
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列实现代码如下：

void heap_increase_key(int i,T key)
	{
		assert(0 <= i && i < this->heap_size);
		if (key < this->data_array.at(i))
			assert("new key is smaller than current key!");
		else
		{
			this->data_array.at(i) = key;
			while (i > 0 && this->data_array.at(this->get_parent(i)) < this->data_array.at(i))
			{
				std::swap(this->data_array.at(i), this->data_array.at(this->get_parent(i)));
				i = this->get_parent(i);
			}
		}
	}

核心思想就是提升完后跟父节点比大小，要是比父节点大，就交换，接着往上比。

最后一个功能就是优先级队列的插入元素
其实可以理解为在数组最后面元素的下一位加入待调整的元素，然后不断跟父节点比较，一级一级往上提就可以了。

书中伪代码如下：
最大堆、堆排序以及基于最大堆实现的最大优先级队列和基于最小堆实现的最小优先级队列代码实现如下：

void insert(T key)
	{
		this->data_array.at(this->get_heap_size()) = key;
		this->heap_size++;
		heap_increase_key(this->get_heap_size() - 1, key);
	}

最大优先级队列的完整实现如下：

template<typename T>
class max_priority_queue:public max_heap<T> 
{
public:
	T maximum()
	{
		if ((this->heap_size) == 0)
			assert("the max heap is empty!!!");
		else
			return this->data_array[this->heap_size-1];
	}

	T heap_extract_max()
	{
		if (this->heap_size == 0)
			assert("the queue is empty!!1");
		else
		{
			T max = this->data_array.at(0);
			this->data_array.at(0) = this->data_array.at(this->heap_size - 1);
			this->heap_size--;
			this->max_heapify(0, this->heap_size - 1);
			return max;
		}
	}

	void heap_increase_key(int i,T key)
	{
		assert(0 <= i && i < this->heap_size);
		if (key < this->data_array.at(i))
			assert("new key is smaller than current key!");
		else
		{
			this->data_array.at(i) = key;
			while (i > 0 && this->data_array.at(this->get_parent(i)) < this->data_array.at(i))
			{
				std::swap(this->data_array.at(i), this->data_array.at(this->get_parent(i)));
				i = this->get_parent(i);
			}
		}
	}

	void insert(T key)
	{
		this->data_array.at(this->get_heap_size()) = key;
		this->heap_size++;
		heap_increase_key(this->get_heap_size() - 1, key);
	}
};

测试代码：

void test_max_heap()
{
	max_heap<int> heap;
	
	int data[10] = { 4,1,3,2,16,9,10,14,8,7 };
	heap.init_data(data,10);
	heap.build_max_heap();
	heap.print_heap();
	heap_sort<int>(heap, 10);
}

void test_max_priority_queue()
{
	max_priority_queue<int> queue;

	int data[10] = { 4,1,3,2,16,9,10,14,8,7 };
	queue.init_data(data, 10);
	queue.build_max_heap();
	queue.print_heap();
	cout << endl;

	/*queue.heap_extract_max();
	queue.print_heap();*/

	/*queue.heap_increase_key(9, 60);
	queue.print_heap();*/

	queue.insert(100);
	queue.print_heap();
}

最小堆和最小优先队列

这个实现起来实际上跟最大堆和最大优先级队列反着来就可以，毕竟都是通过比较的方法实现的。

template <typename T>
class min_heap
{
public:
	inline size_t get_parent(size_t i)
	{
		try
		{
			size_t index;
			if (i > 1)
				index = floor(i / 2) - 1;
			else if (i == 1)
				index = 0;

			assert(index < heap_size && index >= 0);
			return index;
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	inline size_t get_left(size_t i)
	{
		try
		{
			size_t index = 2 * i + 1;
			assert(index >= 0 && index < heap_size);
			return index;
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	inline size_t get_right(size_t i)
	{
		try
		{
			size_t index = 2 * i + 2;
			assert(index >= 0 && index < heap_size);
			return index;
		}
		catch (const std::exception& e)
		{
			std::cout << e.what() << '\n';
			throw;
		}
	}

	void min_heapify(size_t i, int n)
	{
		assert(i < n + 1);

		if (leaf_class(i, n) == 2)
		{
			size_t l = get_left(i);
			size_t r = get_right(i);
			size_t smallest;

			if (l<n && data_array.at(l)<data_array.at(i))
			{
				smallest = l;
			}

			else
			{
				smallest = i;
			}

			if (r<n + 1 && data_array.at(r)<data_array.at(smallest))
			{
				smallest = r;
			}

			if (smallest != i)
			{
				T tmp;

				tmp = data_array.at(smallest);
				data_array.at(smallest) = data_array.at(i);
				data_array.at(i) = tmp;

				min_heapify(smallest, n);
			}
		}

		else if (leaf_class(i, n) == 1)
		{
			size_t l = get_left(i);
			if (data_array.at(l) < data_array.at(i))
			{
				T tmp = data_array.at(i);

				data_array.at(i) = data_array.at(l);
				data_array.at(l) = tmp;
			}
		}
	}

	size_t get_heap_size()
	{
		return heap_size;
	}

	int leaf_class(size_t i, int n)
	{
		if ((2 * i) < n && (2 * i + 1) < n)
			return 2;
		if ((2 * i) < n + 1 && (2 * i + 1) == n)
			return 1;
		else
			return 0;
	}

	void build_min_heap()
	{
		for (int i = floor(heap_size / 2) + 1; i >= 0; i--)
			min_heapify(i, get_heap_size() - 1);
	}

	void init_data(T* input, int n)
	{
		for (int i = 0; i < n; i++)
		{
			data_array.at(i) = *(input + i);
			heap_size++;
		}
	}

	void print_heap()
	{
		for (int i = 0; i < heap_size; i++)
			cout << data_array.at(i) << endl;
	}

	array<T, 1024> data_array = { 0 };
	size_t heap_size = 0;

private:

protected:

};


template<typename T>
class min_priority_queue :public min_heap<T>
{
public:
	T minimum()
	{
		if ((this->heap_size) == 0)
			assert("the max heap is empty!!!");
		else
			return this->data_array[0];
	}

	T heap_extract_min()
	{
		if (this->heap_size == 0)
			assert("the queue is empty!!1");
		else
		{
			T min = this->data_array.at(0);
			this->data_array.at(0) = this->data_array.at(this->heap_size - 1);
			this->heap_size--;
			this->min_heapify(0, this->heap_size - 1);
			return min;
		}
	}

	void heap_increase_key(int i, T key)
	{
		assert(0 <= i && i < this->heap_size);
		if (key > this->data_array.at(i))
			assert("new key is larger than current key!");
		else
		{
			this->data_array.at(i) = key;
			while (i > 0 && this->data_array.at(this->get_parent(i)) > this->data_array.at(i))
			{
				std::swap(this->data_array.at(i), this->data_array.at(this->get_parent(i)));
				i = this->get_parent(i);
			}
		}
	}

	void insert(T key)
	{
		this->data_array.at(this->get_heap_size()) = key;
		this->heap_size++;
		heap_increase_key(this->get_heap_size() - 1, key);
	}

};

测试代码如下：

void test_min_heap()
{
	min_heap<int> heap;

	int data[10] = { 4,1,3,2,16,9,10,14,8,7 };
	heap.init_data(data, 10);
	heap.build_min_heap();
	heap.print_heap();
	//heap_sort<int>(heap, 10);
}

void test_min_priority_queue()
{
	min_priority_queue<int> queue;

	int data[10] = { 4,1,3,2,16,9,10,14,8,7 };
	queue.init_data(data, 10);
	queue.build_min_heap();
	queue.print_heap();
	cout << endl;

	/*queue.heap_extract_min();
	queue.print_heap();*/
	/*queue.heap_increase_key(1, 0);
	queue.print_heap();*/

	queue.insert(0);
	queue.print_heap();
}

相关标签： c++ 最大堆最大优先级队列堆排序算法导论

上一篇：用最大堆实现最大优先队列(Python)

下一篇：堆与最大堆