Version2
Version 1
只能针对离散型输入属性,和预测Label为二分类的情况
——2017.11.4
主函数
size_kvs=size(key_value);
label_class=map_reduce(label);
positive_num=label_class(2,2);
negative_num=label_class(1,2);
entropy=Entropy(size_kvs(1),positive_num,negative_num);
ID3=zeros(size_kvs(2),1);
for id=1:size_kvs(2)
kv=key_value(:,id);
KVS=map_reduce(kv);
positive_kv=[];
negative_kv=[];
for i=1:size_kvs(1)
if label(i)==label_class(2,1)
positive_kv=[positive_kv;kv(i)];
elseif label(i)==label_class(1,1)
negative_kv=[negative_kv;kv(i)];
end
end
positive_KVS=map_reduce(positive_kv);
negative_KVS=map_reduce(negative_kv);
entropy_conditional=Entropy_Conditional(KVS,size_kvs(1),positive_KVS,negative_KVS);
% 计算联合熵
ID3(id)=entropy-entropy_conditional;
end
% 选择最大的联合熵的属性id
[value,id]=max(ID3)
伪map_reduce函数
function KVS=map_reduce(k_v)
input=k_v;
unique_input=unique(k_v,'rows');
count=histc(input,unique_input);
KVS=[unique_input,count];
end
计算熵值
function entropy=Entropy(num,num_p,num_n)
entropy=0;
p1=(-num_p/num);
p2=(-num_n/num);
if p1==0
entropy=entropy+0;
else
entropy=entropy+p1*log2(num_p/num);
end
if p2==0
entropy=entropy+0;
else
entropy=entropy+(-num_n/num)*log2(num_n/num);
end
end
计算条件熵
function entropy=Entropy_Conditional(KVS,num,p_KVS,n_KVS)
size_KVS=size(KVS);
size_p=size(p_KVS);
size_n=size(n_KVS);
c_entropy=zeros(size_KVS(1),2);
for i=1:size_KVS(1)
for j=1:size_p(1)
if KVS(i,1)==p_KVS(j,1)
c_entropy(i,1)=p_KVS(j,2);
end
end
end
for i=1:size_KVS(1)
for j=1:size_n(1)
if KVS(i,1)==n_KVS(j,1)
c_entropy(i,2)=n_KVS(j,2);
end
end
end
c_entropy=[KVS,c_entropy];
pc=0;
for i=1:size_KVS(1)
e=Entropy(c_entropy(i,2),c_entropy(i,3),c_entropy(i,4));
pc=pc+(c_entropy(i,2)/num)*e;
end
entropy=pc;
end