当前位置:文档之家› POI读取word转换html

POI读取word转换html

86.
87.//当前字符
88.charcurrentChar = cr.text().charAt(0);
89.
90.//判断是否为回车符
91.if(currentChar == ENTER_ASCII)
92.tempString +="<br/>";
93.//判断是否为空格符
94.elseif(currentChar == SPACE_ASCII)
44.
45./**
46.*读取每个文字样式
47.*
48.* @param fileName
49.* @throws Exception
50.*/
51.publicvoidgetWordAndStyle(String fileName)throwsException {
52.
53.FileInputStream in =newFileInputStream(newFile(fileName));
101.
102.String fontStyle ="<span style='font-family:"+ cr.getFontName() +";font-size:"+ cr.getFontSize() /2+"pt;";
103.
104.if(cr.isBold())
105.fontStyle +="font-weight:bold;";
11.importjava.io.IOException;
12.importjava.io.OutputStream;
13.importjava.io.OutputStreamWriter;
14.
15.importorg.apache.poi.hwpf.HWPFDocument;
16.importorg.apache.poi.hwpf.model.PicturesTable;
54.
55.HWPFDocument doc =newHWPFDocument(in);
56.
57.//取得文档中字符的总数
58.intlength = doc.characterLength();
59.
60.//创建图片容器
61.PicturesTable pTable = doc.getPicturesTable();
ermodel.CharacterRun;
ermodel.Picture;
ermodel.Range;
20.
21./**
74.
75.if(pTable.hasPicture(cr)) {
76.
77.//读写图片
78.this.readPicture(pTable, cr);
79.
80.}else{
81.
82.Range range2 =newRange(i +1, i +2, doc);
83.
84.//第二个字符
85.CharacterRun cr2 = range2.getCharacterRun(0);
POI读取word转换html
文章分类:Java编程
apache POI读取word文档的文档比较少,所以只有自己慢慢的摸索,这篇文章也属于比较基础入门的,主要是针对读取word中的图片,以及文字的各种样式,如有不好的地方,请各位多多指教!
JavБайду номын сангаас代码
1./**
2.*
3.*/
4.packagecom.util;
95.tempString +="&nbsp;";
96.//判断是否为水平制表符
97.elseif(currentChar == TABULATION_ASCII)
98.tempString +=" &nbsp;&nbsp;&nbsp;";
99.//比较前后2个字符是否具有相同的格式
100.booleanflag = compareCharStyle(cr, cr2);
106.if(cr.isItalic())
107.fontStyle +="font-style:italic;";
108.
109.if(flag && i != length -2)
110.tempString += currentChar;
35.*/
36.privatestaticfinalshortSPACE_ASCII =32;
37.
38./**
39.*水平制表符ASCII码
40.*/
41.privatestaticfinalshortTABULATION_ASCII =9;
42.
43.privateString htmlText ="";
5.
6.importjava.io.BufferedWriter;
7.importjava.io.File;
8.importjava.io.FileInputStream;
9.importjava.io.FileNotFoundException;
10.importjava.io.FileOutputStream;
22.*
23.* @author张廷下午10:36:40
24.*
25.*/
26.publicclassWordToHtml {
27.
28./**
29.*回车符ASCII码
30.*/
31.privatestaticfinalshortENTER_ASCII =13;
32.
33./**
34.*空格符ASCII码
68.
69.for(inti =0; i < length -1; i++) {
70.//整篇文章的字符通过一个个字符的来判断,range为得到文档的范围
71.Range range =newRange(i, i +1, doc);
72.
73.CharacterRun cr = range.getCharacterRun(0);
62.
63.htmlText ="<html><head><title>"+ doc.getSummaryInformation().getTitle() +"</title></head><body>";
64.
65.//创建临时字符串,好加以判断一串字符是否存在相同格式
66.
67.String tempString ="";
相关主题