本文考虑使用区块链智能合约solidity语言实现简单B树的构建、插入元素方法和查询方法。B树的实现难点在于结点的分裂的操作、分裂的判断、元素的移动等。智能合约实现的难点在于solidity语言中不存在‘指针’这一数据结构,增加了对于依赖指针的树状数据结构的实现难度。考虑可以使用mapping来存储结点数据结构,使用数组来存放child结点的键。进而达到使用mapping+数组下标来替代指针的目的。
struct node{
uint[] keys;
uint[] child;
bool leaf;
uint8 n;
}
在结构体node当中,keys存放数据的键,child存放此结点的孩子结点的下标,leaf代表此节点是否为叶子节点,n为此结点中已经存放的key的数目。
uint root = 0;
uint8 t=2;
mapping(uint=>node) tree;
同时,为了实现的方便,还需要存储B树的根节点的下标root。B树的度t也需要单独列出。mapping为存放树节点的映射。
Properties of B-Tree
- All leaves are at same level.
- A B-Tree is defined by the term minimum degree ‘t’. The value of t depends upon disk block size.
- Every node except root must contain at least t-1 keys. Root may contain minimum 1 key.
- All nodes (including root) may contain at most 2t – 1 keys.
- Number of children of a node is equal to the number of keys in it plus 1.
- All keys of a node are sorted in increasing order. The child between two keys k1 and k2 contains all keys in range from k1 and k2.
- B-Tree grows and shrinks from root which is unlike Binary Search Tree. Binary Search Trees grow downward and also shrink from downward.
- Like other balanced Binary Search Trees, time complexity to search, insert and delete is O(Logn).
Insertion
1) Initialize x as root.
2) While x is not leaf, do following
..a) Find the child of x that is going to to be traversed next. Let the child be y.
..b) If y is not full, change x to point to y.
..**c) **If y is full, split it and change x to point to one of the two parts of y. If k is smaller than mid key in y, then set x as first part of y. Else second part of y. When we split y, we move a key from y to its parent x.
3) The loop in step 2 stops when x is leaf. x must have space for 1 extra key as we have been splitting all nodes in advance. So simply insert k to x.
Note that the algorithm follows the Cormen book. It is actually a proactive insertion algorithm where before going down to a node, we split it if it is full. The advantage of splitting before is, we never traverse a node twice. If we don’t split a node before going down to it and split it only if new key is inserted (reactive), we may end up traversing all nodes again from leaf to root. This happens in cases when all nodes on the path from root to leaf are full. So when we come to the leaf node, we split it and move a key up. Moving a key up will cause a split in parent node (because parent was already full). This cascading effect never happens in this proactive insertion algorithm. There is a disadvantage of this proactive insertion though, we may do unnecessary splits.
Let us understand the algorithm with an example tree of minimum degree ‘t’ as 3 and a sequence of integers 10, 20, 30, 40, 50, 60, 70, 80 and 90 in an initially empty B-Tree.
Initially root is NULL. Let us first insert 10.
Let us now insert 20, 30, 40 and 50. They all will be inserted in root because maximum number of keys a node can accommodate is 2*t – 1 which is 5.
Let us now insert 60. Since root node is full, it will first split into two, then 60 will be inserted into the appropriate child.
Let us now insert 70 and 80. These new keys will be inserted into the appropriate leaf without any split.
Let us now insert 90. This insertion will cause a split. The middle key will go up to the parent.
pragma solidity ^0.4.11;
contract btree{
uint root = 0; //root pointer index
uint8 constant t=3; //minimum degree
struct node{
uint[2*t-1] keys; // An array of keys
uint[2*t] child; //child pointers
bool leaf; //boolean whether it is a leaf node
uint8 n; // current key number
}
mapping(uint=>node) tree; //storage of tree nodes
function ssindex(uint id,uint lb,uint ub)internal constant returns(uint i,uint j){
node tt = tree[id];
i=0;
while(i<tt.n && lb>tt.keys[i]){
i++;
}
j=i;
while(j<tt.n && ub>tt.keys[j]){
j++;
}
}
function range(uint lb, uint ub)constant public returns(uint){
return _range(root,lb,ub);
}
function _range(uint id,uint lb, uint ub) internal constant returns(uint){
node storage tt = tree[id];
uint r = 0;
if(ub<tt.keys[0] || lb>tt.keys[tt.n-1]){
}
else{
for(uint i=0;i<tt.n;i++){
if(tt.keys[i]>=lb && tt.keys[i] <=ub){
r += tt.keys[i];
}
}
}
if(tt.leaf==false){
uint lbi;
uint ubi;
(lbi,ubi) = ssindex(id,lb,ub);
for(i=lbi;i<=ubi;i++){
r += _range(tt.child[i],lb,ub);
}
}
return r;
}
// Function to search key k in subtree rooted with this node
function _search(uint id,uint k)internal constant returns(uint ){
uint i=0;
while(i<tree[id].n && k > tree[id].keys[i]){
i++;
}
if(tree[id].keys[i]==k){
return k;
}
if(tree[id].leaf==true){
return 0;
}
return _search(tree[id].child[i],k);
}
function search(uint k)public constant returns(uint){
return _search(root,k);
}
function insert_list(uint[] l)public{
for(uint i=0;i<l.length;i++){
insert(l[i]);
}
}
// The main function that inserts a new key in this B-Tree
function insert(uint k) public{
// If tree is empty
if(root == 0){
root = 2*k; //set an identifier of root
uint[2*t-1] tk;
uint[2*t] tc;
tree[root] = node({
keys: tk,
child:tc,
leaf:true,
n:1
}); //initilize a new node with specific parameters
tree[root].keys[0]=k; //insert key k into root
}
else{
//if root is full
if(tree[root].n==(2*t-1)){
//create a node node as the parent of the root
uint s = 2*k;
uint[2*t-1] tk2;
uint[2*t] tc2;
tree[s] = node({
keys:tk2,
child:tc2,
leaf:false,
n:0
});
//set root as the first child of node s
tree[s].child[0] = root;
//split the first child of s:root node
splitchild(k,s,0);
// New root has two children now. Decide which of the
// two children is going to have new key
uint i=0;
if(tree[s].keys[0]<k){
i++;
}
insertnonfull(tree[s].child[i],k);
//change root
root = s;
}
else{
// If root is not full, call insertNonFull for root
insertnonfull(root,k);
}
}
}
function splitchild(uint k,uint id,uint i)internal{
uint z = 3*(id+i)*(k+1);//set a new identifier of the node z
uint y = tree[id].child[i];
// Create a new node which is going to store (t-1) keys
// of y
uint[2*t-1] tk;
uint[2*t] tc;
tree[z] = node({
keys:tk,
child:tc,
leaf:tree[y].leaf,
n:t-1
});
// Copy the last (t-1) keys of y to z
for(uint j=0;j<t-1;j++){
tree[z].keys[j] = tree[y].keys[j+t];
}
// Copy the last t children of y to z
if(tree[y].leaf == false){
for(j=0;j<t;j++){
tree[z].child[j] = tree[y].child[j+t];
}
}
// Reduce the number of keys in y
tree[y].n = t-1;
// Since this node is going to have a new child,
// create space of new child
for(j=tree[id].n;j>=i+1;j--){
tree[id].child[j+1] = tree[id].child[j];
}
// Link the new child to this node
tree[id].child[i+1]=z;
// A key of y will move to this node. Find location of
// new key and move all greater keys one space ahead
if(tree[id].n>0){
for(j=tree[id].n-1;j>=i&&j>=1;j--){
tree[id].keys[j+1] = tree[id].keys[j];
}
if(j==0){
tree[id].keys[j+1] = tree[id].keys[j];
}
}
// Copy the middle key of y to this node
tree[id].keys[i] = tree[y].keys[t-1];
// Increment count of keys in this node
tree[id].n = tree[id].n + 1;
}
// A utility function to insert a new key in this node
// The assumption is, the node must be non-full when this
// function is called
function insertnonfull(uint id,uint k)internal{
node storage tt = tree[id];
// Initialize index as index of rightmost element
uint i = tt.n-1;
// If this is a leaf node
if(tt.leaf == true){
// The following loop does two things
// a) Finds the location of new key to be inserted
// b) Moves all greater keys to one place ahead
while(i>=1 && tt.keys[i]>k){
tt.keys[i+1] = tt.keys[i];
i--;
}
if(i==0&&tt.keys[i]>k){
// Insert the new key at found location
tt.keys[i+1] = tt.keys[i];
tt.keys[i] = k;
}else{
// Insert the new key at found location
tt.keys[i+1] = k;
}
tt.n = tt.n+1;
}
else{// If this node is not leaf
// Find the child which is going to have the new key
while(i>=1 && tt.keys[i] > k){
i--;
}
if(i==0 && tt.keys[i]>k){
// See if the found child is full
if(tree[tt.child[i]].n==(2*t-1)){
// If the child is full, then split it
splitchild(k,id,i);
// After split, the middle key of C[i] goes up and
// C[i] is splitted into two. See which of the two
// is going to have the new key
if(tt.keys[i]<k)
i++;
}
insertnonfull(tt.child[i],k);
}
else{
// See if the found child is full
if(tree[tt.child[i+1]].n==(2*t-1)){
// If the child is full, then split it
splitchild(k,id,i+1);
// After split, the middle key of C[i] goes up and
// C[i] is splitted into two. See which of the two
// is going to have the new key
if(tt.keys[i+1]<k)
i++;
}
insertnonfull(tt.child[i+1],k);
}
}
}
}