数据挖掘决策树java
max = Gain[i];
return_atrribute = atrribute;
}
i++;
}
return return_atrribute;
}
//node:在当前结点构造决策树
//deData:数据集
//flags:指示在当前结点构造决策树时哪些数据是需要的
//attributes:未分类的属性集
this.element = e;
}
public String getValue() {
return this.value;
}
public void setValue(String v) {
this.value = v;
}
public LinkedHashSet<TreeNode> getChilds() {
for(int i = 0; i < deData.length; i++) {
if(flags[i] == true) {
if(classMap.containsKey(deData[i][classIndex])) {
int count = classMap.get(deData[i][classIndex]);
}
System.out.println();
for(int i = 0; i < class_count; i++) {
System.out.print(class_vector[i] + " ");
}
System.out.println();
*/
//计算InforD
double InfoD = 0.0;
//Gain数组存放当前结点未分类属性的Gain值
double Gain[] = new double[atrributes.size()];
//每条数据中归类的下标,为每条数据的最后一个值
int class_index = deData[0].length - 1;
//属性名,该结点在该属性上进行分类
}
}
//对结点进行赋值,该结点为叶结点
node.setElement(mostClass);
node.setChilds(null);
System.out.println("yezhi:" + node.getElement() + ":" + node.getValue());
return;
}
//如果待分类数据全都属于一个类
String return_atrribute = null;
//计算每个未分类属性的Gain值
int count = 0; //计算到第几个属性
for(String atrribute:atrributes) {
//该属性有多少个值,该属性有多少个分类
int values_count, class_count;
class_count = classes.size();
int values_vector[] = new int[values_count * class_count];
int class_vector[] = new int[class_count];
for(int i = 0; i < deData.length; i++) {
if(flags[i] == true) {
class_name = deData[i][class_index];
classSet.add(class_name);
}
}
//则该结点为叶结点,设置有关值,然后返回
if(classSet.size() == 1) {
node.setElement(class_name);
//如果待分类属性已空
if(attributes.isEmpty() == true) {
//从数据集中选择多数类,遍历符合条件的所有数据
HashMap<String,Integer> classMap = new HashMap<String,Integer>();
int classIndex = deData[0].length - 1;
int class_index = deData[0].length - 1;
String class_name = null;
HashSet<String> classSet = new HashSet<String>();
for(int i = 0; i < deData.length; i++) {
}
for(int j = 0; j < class_count; j++) {
if(values_vector[i*class_count+j] != 0) {
double k = values_vector[i*class_count+j];
middle = middle - Math.log(k/attr_count) / Math.log(2.0) * k / attr_count;
while(it.hasNext()) {
String strClass = (String)it.next();
if(classMap.get(strClass) > mostCount) {
mostClass = strClass;
mostCount = classMap.get(strClass);
for(int i = 0; i < deData.length; i++) {
if(flags[i] == true) {
values.add(deData[i][index]);
classes.add(deData[i][class_index]);
}
}
values_count = values.size();
if(flags[i] == true) {
int j = 0;
for(String v:values) {
if(deData[i][index].equals(v)) {
break;
} else {
j++;
}
}
int k = 0;
for(String c:classes) {
if(deData[i][class_index].equals(c)) {
}
}
InfoA += middle * attr_count / class_total;
}
Gain[count] = InfoD - InfoA;
count++;
}
double max = 0.0;
int i = 0;
for(String atrribute:atrributes) {
if(Gain[i] > max) {
double class_total = 0.0;
for(int i = 0; i < class_vector.length; i++){
class_total += class_vector[i];
}
for(int i = 0; i < class_vector.length; i++){
if(class_vector[i] == 0) {
break;
} else {
k++;
}
}
values_vector[j*class_count+k]++;
class_vector[k]++;
}
}
/* //输出各项统计值
for(int i = 0; i < values_count * class_count; i++) {
System.out.print(values_vector[i] + " ");
return this.childs;
}
public void setChilds(LinkedHashSet<TreeNode> childs) {
this.childs = childs;
}
}
//决策树类
class DecisionTree {
TreeNode root; //决策树的树根结点
public DecisionTree() {
continue;
} else {
double d = Math.log(class_vector[i]/class_total) / Math.log(2.0) * class_vector[i] / class_total;
InfoD = InfoD - d;
}
}
//计算InfoA
double InfoA = 0.0;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Iterator;
//选自csdn博客
//决策树的树结点类
class TreeNode {
String element; //该值为数据的属性名称
}
public TreeNode(String value) {