当前位置:文档之家› word域代码转换html丢失解决办法

word域代码转换html丢失解决办法

.Word转html存在域代码丢失。

Aspose ,jacob,poi都无法解决在使用jocob转换成html时域代码会被<!--[if supportFields]><!—[end if]-->包裹可以统一提取出来转换成latex ,latex转换成图片,解决word域代码丢失问题private void processFormula(List<Node> nodes) throws UnsupportedEncodingException{for(int i = nodes.size()-1;i>=0;i--){Node node =nodes.get(i);if(node instanceof Element){Element e = (Element)node;processFormula(e.childNodes());}else if(node instanceof Comment){String commentText = node.toString();if(commentText.contains("<!--[if supportFields]>")){Comment comment = (Comment)node;String latex = EqFormulaLatexUtil.getLatex(comment);if(StringUtils.isNotBlank(latex)){String latexTemp = newString(Base64.encodeBase64(latex.getBytes()));latex = URLEncoder.encode(latex, "utf-8");latex = latex.replace("+", "%20");String url = LATEXURL+latex;node.after("<img encode='encode'data-latex='"+latexTemp+"' src='"+url+"'/>");}}}}}/****/package .tlsys.rawpaper2x.utils;import java.io.File;import java.io.IOException;import .URLEncoder;import java.util.ArrayList;. import java.util.List;import org.jsoup.Jsoup;import ment;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.springframework.util.CollectionUtils;import mon.util.StringUtils;/*** @author feiwang8** 2016年8月24日下午3:45:05*/public class EqFormulaLatexUtil {/*** 左括号替代符号*/public static final String LEFTBRACKETRE = "く";/*** 右括号替代符号*/public static final String RIGHTBRACKETRE = "ぐ";/*** 左括号*/public static final String LEFTBRACKET = "(";/*** 右括号*/public static final String RIGHTBRACKET = ")";/*** 双正则*/public static final String REGEXPART_F="^((\\([\\s\\S]*\\)|[^\\(\\)])+),((\\([\\s\\S]*\\)|[^\\(\\)])+)$";/*** 单正则*/public static final String REGEXPART="((\\([\\s\\S]*\\)|[^\\(\\)])+)";/*** 逗号替代符号*/public static final String COMMARE = "ぃ";/*** 逗号*/public static final String COMMA = ",";/*** 更加comment 获取latex* @param comment html Comment* @return latex*/public static String getLatex(Comment comment){ return getLatex(comment.toString());}/*** 根据comment 获取latex* @param comment html Comment* @return latex*/public static String getLatex(String comment){ String html = comment.replace("<!--[if supportFields]>", "").replace("<![endif]-->", "").replace((char) 10 + "", "").replace((char) 13 + "", "");Document doc = Jsoup.parse(html);Elements sups = doc.select("sup");Elements subs = doc.select("sub");//纠正上标for(Element sup:sups){String text = sup.text();sup.tagName("span");sup.text("\\s("+text+", )");}//纠正下标for(Element sub:subs){String text = sub.text();sub.tagName("span");sub.text("\\s( ,"+text+")");}String eqtext = doc.body().text();eqtext = mergeSubSup(eqtext);// 无法被StringUtil 判断为空白的空白eqtext = eqtext.replace((char) 8203 + "", "");eqtext = eqtext.replace((char) 160 + "", " ").replace("\\ ", "");eqtext = eqtext.trim();// System.out.println(eqtext+"-------------->");if(eqtext.startsWith("eq")){eqtext = eqtext.replaceFirst("eq", "").trim();eqtext = eqtext.replace("\\"+LEFTBRACKET, "\\"+LEFTBRACKETRE).replace("\\"+RIGHTBRACKET,"\\"+RIGHTBRACKETRE).replace("\\"+COMMA, "\\"+COMMARE);String latex = parserElements(eqtext);return latex.replace("\\"+LEFTBRACKETRE, "\\"+LEFTBRACKET).replace("\\"+RIGHTBRACKETRE,"\\"+RIGHTBRACKET).replace("\\"+COMMARE, "\\"+COMMA);}return "";}private static String mergeSubSup(String latex){//这里需要合并上下标例如:eq \i\su(\s(i, )\s(=, )\s(1, ),\s( ,3),x)//--->eq \i\su(\s(i=1, ),\s( ,3),x)//TODOreturn latex;}/**** @param eqtext* @return*/private static String parserElements(String eqtext){String latex = "";if(eqtext.contains("\\")&&eqtext.contains(LEFTBRACKET)&&eqtext.contains(RIGHTBR ACKET)){String preText = eqtext.substring(0,eqtext.indexOf("\\"));int startIndex = eqtext.indexOf("\\");int endIndex= getNextLeftBra(startIndex,'(',eqtext);int nextBra = getNextBraIndex(endIndex+1,eqtext);String name = eqtext.substring(startIndex,endIndex);String text = eqtext.substring(endIndex+1,nextBra);String suffText= eqtext.substring(nextBra+1,eqtext.length());latex = parserElements(preText) + parserToLatex(name.trim(),text)+parserElements(suffText);}else{latex = eqtext;}return latex;}private static int getNextLeftBra(int start,char sym,String eqtext){ for(int i =start;i<eqtext.length();i++ ){char leftbra = eqtext.charAt(i);if(leftbra == sym){return i;}}return eqtext.length();}/*** 获取配对括号的位置* @param text 文本* @return 位置*/private static int getNextBraIndex(int start,String text){int leftbra = 0;int rightbra = 0;for(int i = start-1 ; i<text.length();i++){char c = text.charAt(i);if(c == ')'){leftbra ++;}if(c =='('){rightbra++;}if(rightbra!=0&&leftbra==rightbra&&i>=start){return i;}}return 0;}private static String parserToLatex(String name,String text){ String latex = "";name = name.toLowerCase();//分式if(name.equals("\\f")){latex +=getFLatex(name, text);//根式}else if(name.equals("\\r")){latex +=getRLatex(name, text);//上下标}else if(name.startsWith("\\s")){latex+=getSLatex(name, text);//\a矩阵\al左对齐;\ac居中;\ar右对齐;\con元素排成n 列;\vsn行间增加n 磅;\hsn列间增加n磅}else if(name.startsWith("\\a")){latex += getALatex(name, text);}else if(name.startsWith("\\b")){latex+=getBLatex(name, text);//平移}else if(name.startsWith("\\d")){latex+=getDLatex(name, text);//积分}else if(name.startsWith("\\i")){latex+=getILatex(name, text);//列表}else if(name.startsWith("\\l")){latex+=getLLatex(name, text);//重叠}else if(name.startsWith("\\o")){latex+=getOLatex(name, text);//框}else if(name.startsWith("\\x")){latex+=getXLatex(name, text);//空白}else if(StringUtils.isBlank(name)){latex+=parserElements(text);}else{System.err.println("error parserToLatex");}return latex;}/*** 根式多次根式* @param name name* @param text text* @return latex*/private static String getRLatex(String name,String text){String latex = "";List<String> args = getArgs(text);if(args.size()==2){latex +=" \\sqrt[";latex +=parserElements(args.get(0));latex +="]{";latex +=parserElements(args.get(1));latex +="}";}else if(args.size()==1){latex +=" \\sqrt{";latex +=parserElements(args.get(0));latex +="}";}else{System.err.println("error getRLatex");}return latex;}/*** 分式* @param name name* @param text text* @return latex*/private static String getFLatex(String name,String text){String latex = "";List<String> args = getArgs(text);if(args.size()==2){latex +=" \\frac{";latex +=parserElements(args.get(0));latex +="}{";latex +=parserElements(args.get(1));latex +="}";}else{System.err.println("error getFLatex");}return latex;}/*** //\a矩阵\al左对齐;\ac居中;\ar右对齐;\con元素排成n 列;\vsn行间增加n 磅;\hsn列间增加n磅* @param name name* @param text text* @return latex*/private static String getALatex(String name,String text){String latex = "";List<String> args =getArgs(text);String n = name.replaceAll("^[\\S\\s]*\\\\co([0-9]*)[\\S\\s]*$", "$1");//列数int col = 1;if(n.matches("[0-9]*")) col = Integer.valueOf(n);for(int i = 0 ;i<args.size();i++){if(i!=0&&i%col==0){latex += " \\\\ ";}latex += parserElements(args.get(i));}return latex;}/*** 左括号使用字符\lc\;右括号使用字符\rc\;左右括号都使用字符\bc\* @param name name* @param text text* @return latex*/private static String getBLatex(String name,String text){String latex = "";String lc="";String rc="";String bc="";List<String> args =getArgs(text);name = name.replace(LEFTBRACKETRE,LEFTBRACKET).replace(RIGHTBRACKETRE, RIGHTBRACKET);if(name.contains("\\lc\\")||name.contains("\\rc\\")){lc = name.replaceAll("^[\\s\\S]*\\\\lc\\\\([\\s\\S])[\\s\\S]*$", "$1");rc = name.replaceAll("^[\\s\\S]*\\\\rc\\\\([\\s\\S])[\\s\\S]*$", "$1");lc = StringUtils.isBlank(lc)||lc.equals(name)?".":lc;rc = StringUtils.isBlank(rc)||rc.equals(name)?".":rc;//方程组处理if(lc.equals("{")&&rc.equals(".")){latex+="\\begin{cases}";for(int i = 0 ;i<args.size();i++){if(i!=0)latex += " \\\\ ";latex += parserElements(args.get(i));}latex+="\\end{cases}";//矩阵处理}else{lc = lc.replace("{", "\\{");rc = rc.replace("}", "\\}");latex+="\\left"+lc+"\\begin{matrix}";for(int i = 0 ;i<args.size();i++){if(i!=0)latex += " \\\\ ";latex += parserElements(args.get(i));}latex+="\\end{matrix}\\right"+rc;}}else if(name.contains("\\bc\\")){bc = name.replaceAll("^[\\s\\S]*\\\\bc\\\\([\\s\\S])[\\s\\S]*$", "$1");latex+="\\left"+bc+"\\begin{matrix}";for(int i = 0 ;i<args.size();i++){if(i!=0)latex += " \\\\ ";latex += parserElements(args.get(i));}latex+="\\end{matrix}\\right"+bc;}else{System.err.println("error getBLatex");}return latex;}/*** 平移\fon右边n磅;\ban左边n磅;\li为下一个字符前的空白添加下划线* @param name name* @param text text* @return latex*/private static String getDLatex(String name,String text){//TODOreturn parserElements(text);}/*** 积分* {EQ \i (a,b,3x+1 dx)}* \su 生成求和公式\pr 生成求积公式\in 积分限不在符号的上下,而在符号之右* \fc\c将符号c设置为固定高度的字符\vc\c 符号高度与第三个元素高度一致* @param name name* @param text text* @return latex*/private static String getILatex(String name,String text){String latex = "";String sym = "";List<String> args = getArgs(text);if(args.size()==3){//默认上下标在符号上下//∑if(name.contains("\\su")){sym = "\\sum";//上下标在右侧if(name.contains("\\in"))sym+="\\nolimits";//Π}else if(name.contains("\\pr")){sym = "\\prod";if(name.contains("\\in"))sym+="\\nolimits";//\int}else if(name.contains("\\fc\\")){sym = name.replace("^[\\s\\S]*\\fc\\([^\\ \\(]*)[\\s\\S]*$", "$1");}else{sym = "\\int";}latex+=sym+"_{"+parserElements(args.get(0))+"}^{"+parserElements(args.get(1))+"} "+parserElements(args.get(2));}else{System.err.println("error getILatex");}return latex;}/*** \l(): 使用任意个数的元素组成列表。

相关主题