当前位置:文档之家› 数据挖掘决策树算法Java实现

数据挖掘决策树算法Java实现


Gain[count] = InfoD - InfoA; count++; }
double max = 0.0; int i = 0; for(String atrribute:atrributes) {
if(Gain[i] > max) { max = Gain[i]; return_atrribute = atrribute;
for(int j = 0; j < class_count; j++) { attr_count += values_vector[i*class_count+j];
}
for(int j = 0; j < class_count; j++) { if(values_vector[i*class_count+j] != 0) { double k =
/* //输出各项统计值 for(int i = 0; i < values_count * class_count; i++) {
System.out.print(values_vector[i] + " "); }
System.out.println(); for(int i = 0; i < class_count; i++) {
return;
//如果待分类数据全都属于一个类 int class_index = deData[0].length - 1; String class_name = null; HashSet<String> classSet = new HashSet<String>(); for(int i = 0; i < deData.length; i++) {
if(flags[i] == true) {
if(classMap.containsKey(deData[i][classIndex])) { int count =
classMap.get(deData[i][classIndex]); classMap.put(deData[i][classIndex],
//计算每个未分类属性的 Gain值 int count = 0; //计算到第几个属性 for(String atrribute:atrributes) {
//该属性有多少个值,该属性有多少个分类 int values_count, class_count; //属性值对应的下标 int index = attrIndexMap.get(atrribute);
//Gain数组存放当前结点未分类属性的Gain值 double Gain[] = new double[atrributes.size()]; //每条数据中归类的下标,为每条数据的最后一个值 int class_index = deData[0].length - 1; //属性名,该结点在该属性上进行分类 String return_atrribute = null;
//存放属性的各个值和分类值 LinkedHashSet<String> values = new LinkedHashSet<String>(); LinkedHashSet<String> classes = new LinkedHashSet<String>();
for(int i = 0; i < deData.length; i++) { if(flags[i] == true) { values.add(deData[i][index]); classes.add(deData[i][class_index]); }
values_vector[i*class_count+j]; middle = middle - Math.log(k/attr_count)
/ Math.log(2.0) * k / attr_count; }
}
InfoA += middle * attr_count / class_total; }
//决策树的树结点类 class TreeNode {
String element; //该值为数据的属性名称 String value; //上一个分裂属性在此结点的值 LinkedHashSet<TreeNode> childs; //结点的子结点,以有顺序的链式哈希集存储
public TreeNode() { this.element = null; this.value = null; this.childs = null;
count+1); } else { classMap.put(deData[i][classInValue()); }
//选择多数类 String mostClass = null; int mostCount = 0; Iterator<String> it = classMap.keySet().iterator(); while(it.hasNext()) {
for(int i = 0; i < deData.length; i++) { if(flags[i] == true) { int j = 0; for(String v:values) { if(deData[i][index].equals(v)) { break; } else { j++; } }
String strClass = (String)it.next(); if(classMap.get(strClass) > mostCount) {
mostClass = strClass; mostCount = classMap.get(strClass); } } //对结点进行赋值,该结点为叶结点 node.setElement(mostClass); node.setChilds(null); System.out.println("yezhi:" + node.getElement() + ":" +
}
public void setChilds(LinkedHashSet<TreeNode> childs) { this.childs = childs;
} }
//决策树类 class DecisionTree {
TreeNode root; //决策树的树根结点
public DecisionTree() { root = new TreeNode();
int k = 0; for(String c:classes) {
if(deData[i][class_index].equals(c)) { break;
} else { k++;
} }
values_vector[j*class_count+k]++; class_vector[k]++; } }
}
public String selectAtrribute(TreeNode node,String[][] deData, boolean flags[],
LinkedHashSet<String> atrributes, HashMap<String,Integer> attrIndexMap) {
}
public TreeNode(String value) { this.element = null; this.value = value; this.childs = null;
}
public String getElement() { return this.element;
}
public void setElement(String e) { this.element = e;
class_total += class_vector[i]; } for(int i = 0; i < class_vector.length; i++){
if(class_vector[i] == 0) { continue;
} else { double d = Math.log(class_vector[i]/class_total)
}
public String getValue() { return this.value;
}
public void setValue(String v) { this.value = v;
}
public LinkedHashSet<TreeNode> getChilds() { return this.childs;
if(flags[i] == true) { class_name = deData[i][class_index]; classSet.add(class_name);
} } //则该结点为叶结点,设置有关值,然后返回 if(classSet.size() == 1) {
} values_count = values.size(); class_count = classes.size();
int values_vector[] = new int[values_count * class_count]; int class_vector[] = new int[class_count];
} i++; }
return return_atrribute; }
//node:在当前结点构造决策树 //deData:数据集 //flags:指示在当前结点构造决策树时哪些数据是需要的 //attributes:未分类的属性集 //attrIndexMap:属性与对应数据下标 public void buildDecisionTree(TreeNode node, String[][] deData, boolean flags[],
相关主题