B-Tree 设计与实现总结--《算法导论》
总结自《introduction to algorithm》第3版,第18章的B-Tree。实现是用的java代码
B-Tree其实是一种多路平衡树,主要是用在对辅存中的数据做增删改查,所以更大的时间消耗其实是在读写辅存。不过为了实现的简便我在实现中并不考虑辅存的读写。
定义
节点 的属性
- 关键字的个数
- 每个关键字缝隙() 之间有一个孩子节点,即共有 个孩子(除叶子节点外)
- x.isleaf 标识是否是叶子
- 关键字有严格的递增顺序
此外,除了叶子节点外,每个节点的关键字都有一个上界和下界,我们称 为B-Tree 的度,则,
接下来我们来证明B-tree 的高度
节点的实现,我这里是将每个节点底层用两个数组来实现,一个entry,另外一个是childs,且大小固定为
class TreeNode<K ,V>{
Item<K,V>[] elememts;//关键字
TreeNode<K,V>[] childs;//孩子节点
int size;
boolean isLeaf;
@SuppressWarnings("unchecked")
public TreeNode(){
elememts = new Item[(degree <<1) -1];
childs = new TreeNode[degree <<1];
size =0;
isLeaf = true;
}
public boolean isLeaf() {
return isLeaf;
}
public final Item<K, V> last() {
return elememts[size-1];
}
public final Item<K, V> first() {
return elememts[0];
}
public final TreeNode<K, V> lastChild() {
return childs[size];
}
public final TreeNode<K, V> firstChild() {
return childs[0];
}
}
B-Tree 的高度
B-Tree 的根至少有 个关键字,且至少每个节点的度数至少是 因此
B-Tree get 查
这是B-Tree的查询实现
public V get(K key) {
TreeNode<K, V> cl = this.root;
while (!cl.isLeaf) {//非叶子节点
int idx = lowerBound(key, cl);// 找到严格大于key的item,返回值与 Arrays.binSearch类似
if(idx>=0)return cl.elememts[idx].getValue();
else cl = cl.childs[idx];//递归访问孩子
}
int idx = lowerBound(key, cl);
if(idx<0)return null;
else return cl.elememts[idx].value;
}
复杂度就是lowerBound查询每个节点的复杂度和树的深度
B-tree insert 增
插入一个关键字
插入关键字的时候,不能像二叉树搜索树一样直接创建一个新的叶子节点插入(这样会破坏B-Tree的性质),而是插入到一个已经创建好的叶子节点上,这样也会出现一个问题:
当节点是满的的时候 插入会破坏B-Tree的性质,因此我们引入一个新的操作,split
, 将当前节点分裂成两个子节点,每个子节点各自 个,中间一个元素提升到父亲,当人正阳可能会引起新的分裂。我们递归的处理就好了,不过由于从根往下搜索的时候就能确定哪些节点是会分裂的,因此我们下搜的时候就将其分裂开来。
插入主代码
public boolean put(K key, V value) {
if(root == null){
root = new TreeNode<>();
addItem(new Item<>(key, value), root, 0);
return true;
}
TreeNode<K, V> r = this.root;
if(isFull(r)){//r.size == 2*deg -1, 分裂树高+1
TreeNode<K, V> newRoot = new TreeNode<>();
this.root = newRoot;
addChild(r, newRoot, 0);
splitChild(newRoot,0);
return putNonFull(newRoot,new Item<>(key, value));
}else return putNonFull(r, new Item<>(key, value));
}
private boolean putNonFull(TreeNode<K, V> node, Item<K, V> entry) {
int idx = lowerBound(entry, node);
if(idx>=0){//修改value,return false
node.elememts[idx].value = entry.value;
return false;
}
idx = -idx -1;
if(node.isLeaf){//叶子节点直接插入
addItem(entry, node, idx);
}else{
TreeNode<K, V> lc = node.childs[idx];
if(isFull(lc)){//分裂
splitChild(node, idx);
if(keyCmp.compare(entry, node.elememts[idx]) > 0)lc = node.childs[idx+1];
putNonFull(lc, entry);
}else putNonFull(lc, entry);
}
return true;
}
private void splitChild(TreeNode<K, V> par, int idx){
par.isLeaf =false;
TreeNode<K, V> rc = new TreeNode<>();
TreeNode<K, V> lc = par.childs[idx];
assert isFull(lc); // lc.size == 2*degree - 1
int sz =0;
int deg = this.degree;
while (sz < deg-1) {
rc.elememts[sz] = lc.elememts[sz+deg];
sz++;
}
rc.size = sz;
if(!lc.isLeaf){
rc.isLeaf = lc.isLeaf;
while (sz>=0) {
rc.childs[sz] = lc.childs[sz+deg];
--sz;
}
}
lc.size = deg -1;
addItem(lc.elememts[deg-1], par, idx);
addChild(rc, par, idx+1);
}
删除
与插入对应,删除的时候也要引入一个操作,就是connect 链接,因为删除的时候我们保证每个节点至少有 个儿子(而不是B-tree 的t-1) 个。
算法导论中讨论了删除的时候的几种情况,但是没有提供伪代码,我这里按照它的思想实现了
可将代码对照着上面的图式看看
/*
* 删除对应的节点,应用算法导论中的算法
* key: 删除节点对应的key
* return : false: 如果节点不存在,反之删除并且return true;
*/
public boolean remove(K key) {
TreeNode<K, V> cl = this.root;
if(cl.size==0){//高度减一,根被删除,树高降低的唯一方式
this.root = cl.childs[0];
cl = this.root;
}
TreeNode<K, V> par = null;
int degLowerBound = this.degree;
while (!cl.isLeaf) {
checkInvirantDelete(cl);
int keyIdx = lowerBound(key, cl);
if(keyIdx>=0){//key 在节点cl中
TreeNode<K, V>lc = cl.childs[keyIdx],rc = cl.childs[keyIdx+1];
if(lc.size >=degLowerBound){// case 2a递归删除 key的前驱,将前驱存在cl中,
Item<K, V> pre = lc.last();
cl.elememts[keyIdx] = pre;
key = pre.key;
cl = lc;//递归删除左子树上的前驱节点;
}else if (rc.size>=degLowerBound) {
Item<K, V> sus = rc.first();
cl.elememts[keyIdx] = sus;
key = sus.key;
cl = rc;
}else{// 两颗子树都只有 deg-1个,合并 lc,keyItem,rc;
cl = connect(cl, keyIdx);
}
}else{//case 3, 不在cl中
keyIdx = - keyIdx -1;//lower bound
TreeNode<K, V> now = cl.childs[keyIdx];
if(now.size < degLowerBound){//仅有deg-1个关键字
TreeNode<K, V> brother;
if(keyIdx-1>=0 && (brother = cl.childs[keyIdx-1]).size>=degLowerBound){
TreeNode<K, V> adCl = brother.lastChild();// 移到now
addItem(cl.elememts[keyIdx], now, 0);
addChild(adCl, now, 0);
//将brother[last]移到 cl
cl.elememts[keyIdx] = deleteItemAndSon(brother, brother.size-1);
}else if(keyIdx+1 <=cl.size &&(brother = cl.childs[keyIdx+1]).size>=degLowerBound){
TreeNode<K, V> adCl = brother.firstChild();
addItem(cl.elememts[keyIdx], now, degLowerBound-1);
addChild(adCl, par, degLowerBound);
cl.elememts[keyIdx] = deleteItemAndSon(brother, 0);
}else {
//链接右兄弟
now = connect(cl, keyIdx);
}
}
cl = now;
}
}
//cl为叶子节点
int keyIdx = lowerBound(key, cl);
if(keyIdx<0)return false;
//删除叶子节点上的 key
for(int bound = cl.size-1;keyIdx<bound; ++keyIdx)cl.elememts[keyIdx] = cl.elememts[keyIdx+1];
--cl.size;
return true;
}
/*
* 链接node.childs[idx] + node.ele[idx]+node.childs[idx+1]
* node : 父亲节点
* idx: 链接的元素位置
* return: node.elements[idx]
*/
private TreeNode<K, V> connect(TreeNode<K, V> node,int idx) {
TreeNode<K, V> lc = node.childs[idx],rc = node.childs[idx+1];
assert lc.size == this.degree-1 && rc.size == this.degree-1;
int deg = this.degree;
Item<K, V> ret = deleteItemAndSon(node, idx);
lc.elememts[deg-1] = ret;
for(int i=0 ;i<deg-1 ; ++i){
lc.elememts[i+deg] = rc.elememts[i];
lc.childs[i+deg] = rc.childs[i];
}
lc.childs[deg*2 -1] = rc.childs[deg-1];
lc.size = 2*deg-1;
return lc;
}
/*
* 删除node[idx] 对应位置的Item和son
* node: 待删除节点
* idx: 下标
*/
private Item<K, V> deleteItemAndSon(TreeNode<K, V> node,int idx) {
Item<K, V> ret = node.elememts[idx];
int n = node.size;
for(int i=idx ; i < n-1 ; ++i){
node.elememts[i] = node.elememts[i+1];
node.childs[i+1] = node.childs[i+2];
}
--node.size;
return ret;
}
完整代码
package dataStructure;
/*
* 根据算法导论对B-Tree 的简单实现,主要在于应用算法,而不是考试实际的使用,因此为了简单默认key是可比较的,并且没有纳入java collection Framework
* 主要实现以下算法:
* delete
* search
* insert
* get
* connect
* 即增删改查
* author : zouzhitao
*/
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Queue;
public class BTree<K extends Comparable<K>,V> {// 简化代码实现,默认 key是可以比较的
private static final int DEFAULT_DEGREE = 3;// 默认b-tree的度数
private final int degree;// b-tree 的度
private TreeNode<K, V> root;
private final Comparator<Item<K, V>> keyCmp = new Comparator<BTree.Item<K,V>>() {
@Override
public int compare(Item<K, V> o1, Item<K, V> o2) {
return o1.key.compareTo(o2.key);
}
};
public BTree(int degree) {
super();
this.degree = degree;
}
public BTree() {
this.degree = DEFAULT_DEGREE;
}
public boolean put(K key, V value) {
if(root == null){
root = new TreeNode<>();
addItem(new Item<>(key, value), root, 0);
return true;
}
TreeNode<K, V> r = this.root;
if(isFull(r)){//r.size == 2*deg -1, 分裂树高+1
TreeNode<K, V> newRoot = new TreeNode<>();
this.root = newRoot;
addChild(r, newRoot, 0);
splitChild(newRoot,0);
return putNonFull(newRoot,new Item<>(key, value));
}else return putNonFull(r, new Item<>(key, value));
}
/*
* 插入时候保证每个节点的size至少
*/
private final void checkInvirantDelete(TreeNode<K, V> node) {
boolean ok = node == this.root ||(node.size >=degree);
assert ok: "不满足度数大于等于this.degree";
}
/*
* 删除对应的节点,应用算法导论中的算法
* key: 删除节点对应的key
* return : false: 如果节点不存在,反之删除并且return true;
*/
public boolean remove(K key) {
TreeNode<K, V> cl = this.root;
if(cl.size==0){//高度减一,根被删除,树高降低的唯一方式
this.root = cl.childs[0];
cl = this.root;
}
TreeNode<K, V> par = null;
int degLowerBound = this.degree;
while (!cl.isLeaf) {
checkInvirantDelete(cl);
int keyIdx = lowerBound(key, cl);
if(keyIdx>=0){//key 在节点cl中
TreeNode<K, V>lc = cl.childs[keyIdx],rc = cl.childs[keyIdx+1];
if(lc.size >=degLowerBound){// case 2a递归删除 key的后继
Item<K, V> pre = lc.last();
cl.elememts[keyIdx] = pre;
key = pre.key;
cl = lc;//递归删除左子树上的前驱节点;
}else if (rc.size>=degLowerBound) {
Item<K, V> sus = rc.first();
cl.elememts[keyIdx] = sus;
key = sus.key;
cl = rc;
}else{// 两颗子树都只有 deg-1个,合并 lc,keyItem,rc;
cl = connect(cl, keyIdx);
}
}else{//case 3, 不在cl中
keyIdx = - keyIdx -1;//lower bound
TreeNode<K, V> now = cl.childs[keyIdx];
if(now.size < degLowerBound){//仅有deg-1个关键字
TreeNode<K, V> brother;
if(keyIdx-1>=0 && (brother = cl.childs[keyIdx-1]).size>=degLowerBound){
TreeNode<K, V> adCl = brother.lastChild();// 移到now
addItem(cl.elememts[keyIdx], now, 0);
addChild(adCl, now, 0);
//将brother[last]移到 cl
cl.elememts[keyIdx] = deleteItemAndSon(brother, brother.size-1);
}else if(keyIdx+1 <=cl.size &&(brother = cl.childs[keyIdx+1]).size>=degLowerBound){
TreeNode<K, V> adCl = brother.firstChild();
addItem(cl.elememts[keyIdx], now, degLowerBound-1);
addChild(adCl, par, degLowerBound);
cl.elememts[keyIdx] = deleteItemAndSon(brother, 0);
}else {
//链接右兄弟
now = connect(cl, keyIdx);
}
}
cl = now;
}
}
//cl为叶子节点
int keyIdx = lowerBound(key, cl);
if(keyIdx<0)return false;
//删除叶子节点上的 key
for(int bound = cl.size-1;keyIdx<bound; ++keyIdx)cl.elememts[keyIdx] = cl.elememts[keyIdx+1];
--cl.size;
return true;
}
/*
* 链接node.childs[idx] + node.ele[idx]+node.childs[idx+1]
* node : 父亲节点
* idx: 链接的元素位置
* return: node.elements[idx]
*/
private TreeNode<K, V> connect(TreeNode<K, V> node,int idx) {
TreeNode<K, V> lc = node.childs[idx],rc = node.childs[idx+1];
assert lc.size == this.degree-1 && rc.size == this.degree-1;
int deg = this.degree;
Item<K, V> ret = deleteItemAndSon(node, idx);
lc.elememts[deg-1] = ret;
for(int i=0 ;i<deg-1 ; ++i){
lc.elememts[i+deg] = rc.elememts[i];
lc.childs[i+deg] = rc.childs[i];
}
lc.childs[deg*2 -1] = rc.childs[deg-1];
lc.size = 2*deg-1;
return lc;
}
/*
* 删除node[idx] 对应位置的Item和son
* node: 待删除节点
* idx: 下标
*/
private Item<K, V> deleteItemAndSon(TreeNode<K, V> node,int idx) {
Item<K, V> ret = node.elememts[idx];
int n = node.size;
for(int i=idx ; i < n-1 ; ++i){
node.elememts[i] = node.elememts[i+1];
node.childs[i+1] = node.childs[i+2];
}
--node.size;
return ret;
}
private boolean putNonFull(TreeNode<K, V> node, Item<K, V> entry) {
int idx = lowerBound(entry, node);
if(idx>=0){//修改value,return false
node.elememts[idx].value = entry.value;
return false;
}
idx = -idx -1;
if(node.isLeaf){//叶子节点直接插入
addItem(entry, node, idx);
}else{
TreeNode<K, V> lc = node.childs[idx];
if(isFull(lc)){//分裂
splitChild(node, idx);
if(keyCmp.compare(entry, node.elememts[idx]) > 0)lc = node.childs[idx+1];
putNonFull(lc, entry);
}else putNonFull(lc, entry);
}
return true;
}
/*
* 搜索key对应的value
* return : return key对应的value 或者null,没有搜索到
*/
public V get(K key) {
TreeNode<K, V> cl = this.root;
while (!cl.isLeaf) {
int idx = lowerBound(key, cl);
if(idx>=0)return cl.elememts[idx].getValue();
else cl = cl.childs[idx];
}
int idx = lowerBound(key, cl);
if(idx<0)return null;
else return cl.elememts[idx].value;
}
public String toString() {
Queue<TreeNode<K, V>> Q= new ArrayDeque<>();
StringBuilder ret = new StringBuilder("root ");
Q.add(this.root);
int nodecnt = 0;
while (!Q.isEmpty()) {
TreeNode<K, V> vis = Q.remove();
ret.append((nodecnt++)+" [ ");
for(int i=0 ; i< vis.size ; ++i){
Item<K, V> entry = vis.elememts[i];
ret.append("( "+entry.key+ " = " + entry.value+" ),");
}
ret.append("]\n");
if(!vis.isLeaf)for(int i=0 ; i<= vis.size ;++i)Q.add(vis.childs[i]);
}
return ret.toString();
}
private void splitChild(TreeNode<K, V> par, int idx){
par.isLeaf =false;
TreeNode<K, V> rc = new TreeNode<>();
TreeNode<K, V> lc = par.childs[idx];
assert isFull(lc); // lc.size == 2*degree - 1
int sz =0;
int deg = this.degree;
while (sz < deg-1) {
rc.elememts[sz] = lc.elememts[sz+deg];
sz++;
}
rc.size = sz;
if(!lc.isLeaf){
rc.isLeaf = lc.isLeaf;
while (sz>=0) {
rc.childs[sz] = lc.childs[sz+deg];
--sz;
}
}
lc.size = deg -1;
addItem(lc.elememts[deg-1], par, idx);
addChild(rc, par, idx+1);
}
private void addItem(Item<K,V> entry,TreeNode<K, V> now, int idx) {
for(int i = now.size-1 ; i >=idx ; --i)
now.elememts[i+1] = now.elememts[i];
now.elememts[idx] = entry;
now.size++;
}
/*
* 二分搜索第一个大于key的位置,如果key相等,返回-1;
*/
private final int lowerBound(Item<K, V> key, TreeNode<K, V> now) {
return Arrays.binarySearch(now.elememts,0,now.size,key, keyCmp);
}
private int lowerBound(K key, TreeNode<K, V> node) {
Item<K, V> tmp = new Item<>(key);
return lowerBound(tmp, node);
}
private void addChild(TreeNode<K, V> child, TreeNode<K, V> par, int idx) {
assert idx <= par.size && par.size <= 2*degree-1;
for(int i= par.size ; i >idx ; --i)
par.childs[i] = par.childs[i-1];
par.childs[idx] = child;
}
private boolean isFull(final TreeNode<K,V> now) {
return now.size == this.degree * 2 -1;
}
static class Item<K,V> implements Map.Entry<K, V>{
K key;
V value;
public Item(K key, V value) {
super();
this.key = key;
this.value = value;
}
public Item(K key) {
this.key = key;
}
@Override
public K getKey() {
return key;
}
@Override
public V getValue() {
return value;
}
@Override
public V setValue(V value) {
V oldValue = value;
this.value = value;
return oldValue;
}
public boolean equals(Object o) {
if(!(o instanceof Item<?, ?>))
return false;
Item<?, ?> tmp = (Item<?, ?>)o;
return valueEqual(this.key, tmp.key) && valueEqual(this.value,tmp.value);
}
}
class TreeNode<K ,V>{
Item<K,V>[] elememts;
TreeNode<K,V>[] childs;
int size;
boolean isLeaf;
@SuppressWarnings("unchecked")
public TreeNode(){
elememts = new Item[(degree <<1) -1];
childs = new TreeNode[degree <<1];
size =0;
isLeaf = true;
}
public boolean isLeaf() {
return isLeaf;
}
public final Item<K, V> last() {
return elememts[size-1];
}
public final Item<K, V> first() {
return elememts[0];
}
public final TreeNode<K, V> lastChild() {
return childs[size];
}
public final TreeNode<K, V> firstChild() {
return childs[0];
}
}
private static final boolean valueEqual(Object o1,Object o2) {
return o1==null? o2==null : o1.equals(o2);
}
public static void main(String[] args) {
BTree<Character, Integer> map = new BTree<Character, Integer>();
String keys = "ACDEGJKMNOPRSTUVXYZ";
for(int i=0 ; i< keys.length() ; ++i){
map.put(keys.charAt(i), i);
}
System.out.println("init btree\n\n"+map);
map.put(keys.charAt(0), 3);
System.out.println("after call put('A',3)\n\n"+map);
map.remove('A');
System.out.println("delete A\n\n"+map);
map.remove('B');
System.out.println("affter call remove('B')\n"+map);
}
}
上一篇: MySQL普通索引和唯一索引的区别
下一篇: 设计模式之一:六大设计原则