C 数据结构堆
引言 - 数据结构堆
堆结构都很耳熟, 从堆排序到优先级队列, 我们总会看见它的身影. 相关的资料太多了,
堆 - https://zh.wikipedia.org/wiki/%e5%a0%86%e7%a9%8d
无数漂亮的图片接二连三, 但目前没搜到一个工程中可以舒服用的代码库. 本文由此痛点而来.
写一篇奇妙数据结构堆的终结代码. 耳熟终究比不过手热 ->---
对于 heap 接口思考, 我是这样设计
#ifndef _h_heap #define _h_heap // // cmp_f - 比较行为 > 0 or = 0 or < 0 // : int add_cmp(const void * now, const void * node) // typedef int (* cmp_f)(); // // node_f - 销毁行为 // : void list_die(void * node) // typedef void (* node_f)(void * node); // // head_t 堆的类型结构 // typedef struct heap * heap_t; // // heap_create - 创建符合规则的堆 // fcmp : 比较行为, 规则 fcmp() <= 0 // return : 返回创建好的堆对象 // extern heap_t heap_create(cmp_f fcmp); // // heap_delete - 销毁堆 // h : 堆对象 // fdie : 销毁行为, 默认 null // return : void // extern void heap_delete(heap_t h, node_f fdie); // // heap_insert - 堆插入数据 // h : 堆对象 // node : 操作对象 // return : void // extern void heap_insert(heap_t h, void * node); // // heap_remove - 堆删除数据 // h : 堆对象 // arg : 操作参数 // fcmp : 比较行为, 规则 fcmp() == 0 // return : 找到的堆节点 // extern void * heap_remove(heap_t h, void * arg, cmp_f fcmp); // // heap_top - 查看堆顶结点数据 // h : 堆对象 // return : 堆顶节点 // extern void * heap_top(heap_t h); // // heap_top - 摘掉堆顶结点数据 // h : 堆对象 // return : 返回堆顶节点 // extern void * heap_pop(heap_t h); #endif//_h_heap
heap_t 是不完全类型实体指针, 其中 struct heap 是这样设计
#include "heap.h" #include <stdlib.h> #include <assert.h> #define uint_heap (1<<5u) struct heap { cmp_f fcmp; // 比较行为 unsigned len; // heap 长度 unsigned cap; // heap 容量 void ** data; // 数据节点数组 }; // heap_expand - 添加节点扩容 inline void heap_expand(struct heap * h) { if (h->len >= h->cap) { h->data = realloc(h->data, h->cap<<=1); assert(h->data); } }
从中可以看出当前堆结构是可以保存 void * 数据. 其中通过 heap::fcmp 比较行为来调整关系.
有了堆的数据结构定义, 那么堆的创建和销毁业务代码就被无脑的确定了 ~
// // heap_create - 创建符合规则的堆 // fcmp : 比较行为, 规则 fcmp() <= 0 // return : 返回创建好的堆对象 // inline heap_t heap_create(cmp_f fcmp) { struct heap * h = malloc(sizeof(struct heap)); assert(h && fcmp); h->fcmp = fcmp; h->len = 0; h->cap = uint_heap; h->data = malloc(sizeof(void *) * uint_heap); assert(h->data && uint_heap > 0); return h; } // // heap_delete - 销毁堆 // h : 堆对象 // fdie : 销毁行为, 默认 null // return : void // void heap_delete(heap_t h, node_f fdie) { if (null == h || h->data == null) return; if (fdie && h->len > 0) for (unsigned i = 0; i < h->len; ++i) fdie(h->data[i]); free(h->data); h->data = null; h->len = 0; free(h); }
随后将迎接这个终结者堆的全貌. 此刻读者可以先喝口水 : )
前言 - 写一段终结代码
堆结构中最核心两处就是向下调整和向上调整过程代码
// down - 堆节点下沉, 从上到下沉一遍 static void down(cmp_f fcmp, void * data[], unsigned len, unsigned x) { void * m = data[x]; for (unsigned i = x * 2 + 1; i < len; i = x * 2 + 1) { if (i + 1 < len && fcmp(data[i+1], data[i]) < 0) ++i; if (fcmp(m, data[i]) <= 0) break; data[x] = data[i]; x = i; } data[x] = m; } // up - 堆节点上浮, 从下到上浮一遍 static void up(cmp_f fcmp, void * node, void * data[], unsigned x) { while (x > 0) { void * m = data[(x-1)>>1]; if (fcmp(m, node) <= 0) break; data[x] = m; x = (x-1)>>1; } data[x] = node; }
如何理解其中奥妙呢. 可以这么看, 索引 i 节点的左子树索引为 2i+1, 右子树树索引为 2i+2 = (2i+1)+1.
相反的索引为 i 节点的父亲节点就是 (i-1)/2 = (i-1)>>1. 这就是堆节点调整的无上奥妙. 随后的代码就
很轻松出手了
// // heap_insert - 堆插入数据 // h : 堆对象 // node : 操作对象 // return : void // inline void heap_insert(heap_t h, void * node) { heap_expand(h); up(h->fcmp, node, h->data, h->len++); } // // heap_top - 查看堆顶结点数据 // h : 堆对象 // return : 堆顶节点 // inline void * heap_top(heap_t h) { return h->len <= 0 ? null : *h->data; } // // heap_top - 摘掉堆顶结点数据 // h : 堆对象 // return : 返回堆顶节点 // inline void * heap_pop(heap_t h) { void * node = heap_top(h); if (node && --h->len > 0) { // 尾巴节点一定比小堆顶节点大, 那么要下沉 h->data[0] = h->data[h->len]; down(h->fcmp, h->data, h->len, 0); } return node; }
看完上面代码可以再回看下 down 和 up 代码布局. 是不是堆节点调整全部技巧已经了然于胸 ~
随后我们介绍堆删除任意节点大致算法思路
1' 循环遍历, 找到要删除节点
2' 如果删除后堆空, 或者删除的是最后节点, 那直接搞定
3' 最后节点复制到待删除节点位置处
4' 最后节点和待删除节点权值相等, 不调整节点关系
5' 最后节点比待删除节点权值大, 向下调整节点关系(基于小顶堆设计)
6' 最后节点比待删除节点权值小, 向上调整节点关系
从上可以看出堆删除节点算法复杂度是 o(n) + o(logn) = o(n). 请欣赏具体代码
// // heap_remove - 堆删除数据 // h : 堆对象 // arg : 操作参数 // fcmp : 比较行为, 规则 fcmp() == 0 // return : 找到的堆节点 // void * heap_remove(heap_t h, void * arg, cmp_f fcmp) { if (h == null || h->len <= 0) return null; // 开始查找这个节点 unsigned i = 0; fcmp = fcmp ? fcmp : h->fcmp; do { void * node = h->data[i]; if (fcmp(arg, node) == 0) { if (--h->len > 0 && h->len != i) { // 尾巴节点和待删除节点比较 int ret = h->fcmp(h->data[h->len], node); // 小顶堆, 新的值比老的值小, 那么上浮 if (ret < 0) up(h->fcmp, h->data[h->len], h->data, i); else if (ret > 0) { // 小顶堆, 新的值比老的值大, 那么下沉 h->data[i] = h->data[h->len]; down(h->fcmp, h->data, h->len, i); } } return node; } } while (++i < h->len); return null; }
到这堆数据结构基本代码都已经搞定. 开始写写测试用例跑跑
#include "heap.h" #include <stdio.h> struct node { int value; }; static inline int node_cmp(const struct node * l, const struct node * r) { return l->value - r->value; } static void heap_print(heap_t h) { struct heap { cmp_f fcmp; // 比较行为 unsigned len; // heap 长度 unsigned cap; // heap 容量 void ** data; // 数据节点数组 } * x = (struct heap *)h; // 数据打印for (unsigned i = 0; i < x->len; ++i) { struct node * node = x->data[i]; printf("%d ", node->value); } putchar('\n'); } int main() { heap_t h = heap_create(node_cmp); struct node a[] = { { 53 }, { 17 }, { 78 }, { 9 }, { 45 }, { 65 }, { 87 }, { 23} }; for (int i = 0; i < sizeof a / sizeof *a; ++i) heap_insert(h, a + i); heap_print(h); // 数据打印 struct node * node; while ((node = heap_pop(h))) { printf("%d ", node->value); } putchar('\n'); // 重新插入数据 for (int i = 0; i < sizeof a / sizeof *a; ++i) heap_insert(h, a + i); // 删除操作 - 下沉 heap_remove(h, &(struct node){ 17 }, null); heap_print(h); // 插入操作 heap_insert(h, &(struct node){ 17 }); heap_print(h); // 删除操作 - 上浮 heap_remove(h, &(struct node){ 78 }, null); heap_print(h); heap_delete(h, null); return 0; }
最终运行结果如下
后续堆相关代码变化, 可以参照 - https://github.com/wangzhione/structc/blob/master/structc/struct/heap.c
说到引用 github 想起一个 git 好用配置安利给大家 ~ 从此 git ll 就活了.
git config --global color.diff auto
git config --global color.status auto git config --global color.branch auto git config --global color.interactive auto git config --global alias.ll "log --graph --all --pretty=format:'%cred%h %creset -%c(yellow)%d%creset %s %cgreen(%cr) %c(bold blue)<%an>%creset' --abbrev-commit --date=relative"
奇妙数据结构堆, 终结在这里, 后面内容可以忽略. 期待下次再扯了 ~
正文 - 顺带赠送个点心
其实到这本不该再说什么. 单纯看上面就足够了. 但不知道有没有朋友觉得你总是说 c 数据结构. 效
果好吗? 对技术提升效果明显吗? 这里不妨利用我们对 c 理解, 来分析一个业务代码. 感受下一通百通.
我试着用 go 中数据结构源码举例子. 重点看下 go 源码包中 "container/list" 链表用法(比较简单)
package main
import (
"container/list"
"fmt"
)
func main() {
// 构造链表对象
pers := list.new()
// persion 普通人对象
type persion struct {
name string
age int
}
// 链表对象数据填充
pers.pushback(&persion{"wang", 27})
pers.pushfront(&persion{"zhi", 27})
// 开始遍历处理
for e := pers.front(); e != nil; e = e.next() {
per, ok := e.value.(*persion)
if !ok {
panic(fmt.sprint("persion list faild", e.value))
}
fmt.println(per)
}
for e := pers.front(); e != nil; {
next := e.next()
pers.remove(e)
e = next
}
fmt.println(pers.len())
}
运行结果是
$ go run list-demo.go &{zhi 27} &{wang 27} 0
通过上面演示 demo, 大致知道了 list 包用法. 随后开始着手解析 "container/list" 源码
// copyright 2009 the go authors. all rights reserved.
// use of this source code is governed by a bsd-style
// license that can be found in the license file.
// package list implements a doubly linked list.
//
// to iterate over a list (where l is a *list):
// for e := l.front(); e != nil; e = e.next() {
// // do something with e.value
// }
//
package list
// element is an element of a linked list.
type element struct {
// next and previous pointers in the doubly-linked list of elements.
// to simplify the implementation, internally a list l is implemented
// as a ring, such that &l.root is both the next element of the last
// list element (l.back()) and the previous element of the first list
// element (l.front()).
next, prev *element
// the list to which this element belongs.
list *list
// the value stored with this element.
value interface{}
}
// next returns the next list element or nil.
func (e *element) next() *element {
if p := e.next; e.list != nil && p != &e.list.root {
return p
}
return nil
}
// prev returns the previous list element or nil.
func (e *element) prev() *element {
if p := e.prev; e.list != nil && p != &e.list.root {
return p
}
return nil
}
// list represents a doubly linked list.
// the zero value for list is an empty list ready to use.
type list struct {
root element // sentinel list element, only &root, root.prev, and root.next are used
len int // current list length excluding (this) sentinel element
}
// init initializes or clears list l.
func (l *list) init() *list {
l.root.next = &l.root
l.root.prev = &l.root
l.len = 0
return l
}
// new returns an initialized list.
func new() *list { return new(list).init() }
// len returns the number of elements of list l.
// the complexity is o(1).
func (l *list) len() int { return l.len }
// front returns the first element of list l or nil if the list is empty.
func (l *list) front() *element {
if l.len == 0 {
return nil
}
return l.root.next
}
// back returns the last element of list l or nil if the list is empty.
func (l *list) back() *element {
if l.len == 0 {
return nil
}
return l.root.prev
}
// lazyinit lazily initializes a zero list value.
func (l *list) lazyinit() {
if l.root.next == nil {
l.init()
}
}
// insert inserts e after at, increments l.len, and returns e.
func (l *list) insert(e, at *element) *element {
n := at.next
at.next = e
e.prev = at
e.next = n
n.prev = e
e.list = l
l.len++
return e
}
// insertvalue is a convenience wrapper for insert(&element{value: v}, at).
func (l *list) insertvalue(v interface{}, at *element) *element {
return l.insert(&element{value: v}, at)
}
// remove removes e from its list, decrements l.len, and returns e.
func (l *list) remove(e *element) *element {
e.prev.next = e.next
e.next.prev = e.prev
e.next = nil // avoid memory leaks
e.prev = nil // avoid memory leaks
e.list = nil
l.len--
return e
}
// remove removes e from l if e is an element of list l.
// it returns the element value e.value.
// the element must not be nil.
func (l *list) remove(e *element) interface{} {
if e.list == l {
// if e.list == l, l must have been initialized when e was inserted
// in l or l == nil (e is a zero element) and l.remove will crash
l.remove(e)
}
return e.value
}
// pushfront inserts a new element e with value v at the front of list l and returns e.
func (l *list) pushfront(v interface{}) *element {
l.lazyinit()
return l.insertvalue(v, &l.root)
}
// pushback inserts a new element e with value v at the back of list l and returns e.
func (l *list) pushback(v interface{}) *element {
l.lazyinit()
return l.insertvalue(v, l.root.prev)
}
// insertbefore inserts a new element e with value v immediately before mark and returns e.
// if mark is not an element of l, the list is not modified.
// the mark must not be nil.
func (l *list) insertbefore(v interface{}, mark *element) *element {
if mark.list != l {
return nil
}
// see comment in list.remove about initialization of l
return l.insertvalue(v, mark.prev)
}
// insertafter inserts a new element e with value v immediately after mark and returns e.
// if mark is not an element of l, the list is not modified.
// the mark must not be nil.
func (l *list) insertafter(v interface{}, mark *element) *element {
if mark.list != l {
return nil
}
// see comment in list.remove about initialization of l
return l.insertvalue(v, mark)
}
// movetofront moves element e to the front of list l.
// if e is not an element of l, the list is not modified.
// the element must not be nil.
func (l *list) movetofront(e *element) {
if e.list != l || l.root.next == e {
return
}
// see comment in list.remove about initialization of l
l.insert(l.remove(e), &l.root)
}
// movetoback moves element e to the back of list l.
// if e is not an element of l, the list is not modified.
// the element must not be nil.
func (l *list) movetoback(e *element) {
if e.list != l || l.root.prev == e {
return
}
// see comment in list.remove about initialization of l
l.insert(l.remove(e), l.root.prev)
}
// movebefore moves element e to its new position before mark.
// if e or mark is not an element of l, or e == mark, the list is not modified.
// the element and mark must not be nil.
func (l *list) movebefore(e, mark *element) {
if e.list != l || e == mark || mark.list != l {
return
}
l.insert(l.remove(e), mark.prev)
}
// moveafter moves element e to its new position after mark.
// if e or mark is not an element of l, or e == mark, the list is not modified.
// the element and mark must not be nil.
func (l *list) moveafter(e, mark *element) {
if e.list != l || e == mark || mark.list != l {
return
}
l.insert(l.remove(e), mark)
}
// pushbacklist inserts a copy of an other list at the back of list l.
// the lists l and other may be the same. they must not be nil.
func (l *list) pushbacklist(other *list) {
l.lazyinit()
for i, e := other.len(), other.front(); i > 0; i, e = i-1, e.next() {
l.insertvalue(e.value, l.root.prev)
}
}
// pushfrontlist inserts a copy of an other list at the front of list l.
// the lists l and other may be the same. they must not be nil.
func (l *list) pushfrontlist(other *list) {
l.lazyinit()
for i, e := other.len(), other.back(); i > 0; i, e = i-1, e.prev() {
l.insertvalue(e.value, &l.root)
}
}
list 包中最核心的数据结构无外乎 element 和 list 互相引用的结构
// element is an element of a linked list.
type element struct {
// next and previous pointers in the doubly-linked list of elements.
// to simplify the implementation, internally a list l is implemented
// as a ring, such that &l.root is both the next element of the last
// list element (l.back()) and the previous element of the first list
// element (l.front()).
next, prev *element
// the list to which this element belongs.
list *list
// the value stored with this element.
value interface{}
}
// next returns the next list element or nil.
func (e *element) next() *element {
if p := e.next; e.list != nil && p != &e.list.root {
return p
}
return nil
}
// prev returns the previous list element or nil.
func (e *element) prev() *element {
if p := e.prev; e.list != nil && p != &e.list.root {
return p
}
return nil
}
// list represents a doubly linked list.
// the zero value for list is an empty list ready to use.
type list struct {
root element // sentinel list element, only &root, root.prev, and root.next are used
len int // current list length excluding (this) sentinel element
}
它是一个特殊循环双向链表. 特殊在 element::list 指向头节点.
随着我们对 list 内存布局理解后, 后面的业务代码实现起来就很一般了. 例如这里
// pushbacklist inserts a copy of an other list at the back of list l.
// the lists l and other may be the same. they must not be nil.
func (l *list) pushbacklist(other *list) {
l.lazyinit()
for i, e := other.len(), other.front(); i > 0; i, e = i-1, e.next() {
l.insertvalue(e.value, l.root.prev)
}
}
其实可以实现的更贴合 list 库总体的风格, 性能还更好
// pushbacklist inserts a copy of an other list at the back of list l.
// the lists l and other may be the same. they must not be nil.
func (l *list) pushbacklist(other *list) {
l.lazyinit()
for e := other.front(); e != nil; e = e.next() {
l.insertvalue(e.value, l.root.prev)
}
}
是不是发现上层代码理解起来心智负担不大. 不过 go 中 slice list map 都不是线程安全的.
特殊场景需要自行加锁. 这里不过多扯. 以后有机会会详细分析 go 中锁源码实现. 最后通过
上面 list 包真实现一个 lru cache
package cache
import (
"container/list"
"sync"
)
// entry 存储实体内容
type entry struct {
key interface{}
value interface{}
}
// cache lru 缓存实现
type cache struct {
// x 保证 lru 访问安全
m sync.mutex
// max 表示缓存容量的最大值, 0 表示无限缓存
max uint
// list 循环双向链表
list *list.list
// pond 缓存的池子
pond map[interface{}]*list.element
}
// new 新建一个 lru 缓存对象
func new(max uint) *cache {
return &cache{
max: max,
list: list.new(),
pond: make(map[interface{}]*list.element),
}
}
// remove 通过 *list.element 删除
func (c *cache) remove(e *list.element) {
n, ok := c.list.remove(e).(*entry)
if ok {
delete(c.pond, n.key)
}
}
// set 添加缓存
func (c *cache) set(key, value interface{}) {
c.m.lock()
defer c.m.unlock()
if e, ok := c.pond[key]; ok {
if value == nil {
// set key nil <=> remove key
c.remove(e)
} else {
e.value = value
c.list.movetofront(e)
}
return
}
// 如果是首次添加
c.pond[key] = c.list.pushfront(&entry{key, value})
// 如果超出池子缓存开始清理
if c.max != 0 && uint(c.list.len()) > c.max {
c.remove(c.list.back())
}
}
// get 获取缓存
func (c *cache) get(key interface{}) (interface{}, bool) {
c.m.lock()
defer c.m.unlock()
if e, ok := c.pond[key]; ok {
c.list.movetofront(e)
return e.value.(*entry).value, true
}
return nil, false
}
用起来很容易
c := cache.new(1)
c.set("123", "123")
c.set("234", "234")
fmt.println(c.get("123"))
fmt.println(c.get("234"))
是不是离开了 c, 整个世界也很简单. 没啥设计模式, 有的是性能还可以, 也能用.
希望能帮到有心人 ~
后记 - 那个打开的大门
- https://music.163.com/#/song?id=426027293
每个男人心里都有一块净土, 只不过生活所逼硬生生的, 藏在心底最深处 . ... ..