当前位置:文档之家› 汉字字频统计

汉字字频统计

//计算频率
for (int j = 0; j < chlist.size(); j++) {
// System.out.println("list.size:" + chlist.size());
//判断该字符是否在字表里
int i = 0;
for (i = 0; i < chtable.size(); i++) {
//在字表里,统计重复次数并跳出循环
char c = ' ';
Object ob = chtable.get(i);
if ((tempint >= '\u4e00' && tempint <= '\u9fa5')
|| (tempint >= '\uf900' && tempint <= '\ufa2d')) {
char tempchar = (char) tempint;
// System.out.println(tempchar);
e.printStackTrace();
}
return chlist;
}
/**
*该函数用于从文件中读取中文字符,并返回它出现的次数
*
* @param filename
* @return
*/
public static ArrayList readFromFile(String filename, ArrayList chtable) {
//排序
ArrayList chlist = sort(chtable, numlist);
//计算汉nt i = 0; i < numlist.size(); i++) {
sum = sum + (Integer) numlist.get(i);
}
System.out.println("--------------------显示结果-------------------");
sheet.addCell(lsh);
jxl.write.Number nsh = new jxl.write.Number(1, 101, sh);
sheet.addCell(nsh);
//写入字频总和
for(int i=0;i<freal.size();i++)
{
if((Float)freal.get(i) != 0f)
import java.awt.List;
import java.io.*;
import jxl.*;
import jxl.write.*;
import java.text.DecimalFormat;
import java.util.ArrayList;
public class statistics {
freal.add(fre6);
freal.add(fre7);
ArrayList nal = new ArrayList();
nal.add(1);
nal.add(20);
nal.add(100);
nal.add(600);
nal.add(2000);
nal.add(3000);
nal.add(6000);
File file = new File(filename);
Reader reader = null;
ArrayList numlist = new ArrayList();
//初始化字符出现的次数集合
for (int i = 0; i < chtable.size(); i++) {
numlist.add(0);
float fre7 = freqSum(freqlist, 6000);
ArrayList freal = new ArrayList();
freal.add(fre1);
freal.add(fre2);
freal.add(fre3);
freal.add(fre4);
freal.add(fre5);
int listi = (Integer) numlist.get(i);
int listj = (Integer) numlist.get(j);
if (listi < listj) {
numlist.set(i, listj);
numlist.set(j, listi);
char chi = (Character) chlist.get(i);
sheet.addCell(label1);
Label label2 = new Label(1, 0, "频率");
sheet.addCell(label2);
for(int i=0;i<100;i++)
{
//中文字符
Label label = new Label(0, i+1, chlist.get(i).toString());
char tempchar = (char) tempint;
// System.out.println(tempchar);
// System.out.println("list.size:" + chlist.size());
//判断该字符是否出现过
int i = 0;
for (i = 0; i < chlist.size(); i++) {
sheet.addCell(label);
//出现的频率
jxl.write.Number number = new jxl.write.Number(1, i+1, (Float)freqlist.get(i));
sheet.addCell(number);
}
//写入熵值
Label lsh = new Label(0, 101, "熵值");
ArrayList chlist = new ArrayList();
File file = new File(filename);
Reader reader = null;
try {
//一次读一个字符
reader = new InputStreamReader(new FileInputStream(file));
*/
public static ArrayList sort(ArrayList chtable, ArrayList numlist) {
ArrayList chlist = chtable;
for (int i = 0; i < numlist.size(); i++) {
for (int j = i + 1; j < numlist.size(); j++) {
sheet.addCell(nfreq);
}
}
//写入数据
book.write();
//并关闭文件
book.close();
} catch (Exception e) {
System.out.println(e);
}
}
public static ArrayList readFromTable(String filename) {
//生成工作表,参数0表示这是第一页
WritableSheet sheet = book.createSheet(sum+"字", 0);
/*
*生成一个保存数字的单元格必须使用Number的完整包路径,否则有语法歧义
*/
//表头
Label label1 = new Label(0, 0, "字符");
char chj = (Character) chlist.get(j);
chlist.set(i, chj);
chlist.set(j, chi);
}
}
}
return chlist;
}
/**
*该函数用来计算各个汉字出现的频率,并且显示出指定个数的结果
*
* @param chlist
* @param numlist
// System.out.println("重复!");
break;
}
}
//字符从未出现过
if (i == chlist.size()) {
// System.out.println("新字符!");
chlist.add(tempchar);
}
}
}
reader.close();
} catch (Exception e) {
public static void main(String[] args) {
//读字表
ArrayList chtable = readFromTable("CHTable.txt");
System.out.println("字表大小为:" + chtable.size());
//读文件
ArrayList numlist = readFromFile("10.txt", chtable);
//返回指定个数的汉字频率统计结果
ArrayList freqlist = frequency(chlist, numlist, sum, 100);
相关主题