word导出html实现在线预览
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
public class WordToHtml {
public static void main(String[] args) {
try {
wordToHtml("E:\me\2.docx", "E:\me\", "123.html");
// wordToHtml("E:\me\2.doc", "E:\me\", "12.html");
} catch (Exception e) {
e.printStackTrace();
}
}
public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception { convert2Html(wordPath, htmlPath, newFilename);} public static void writeFile(String content, String path) throws Exception { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos)); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } /** * 将word转换成html * 支持 .doc and .docx * @param fileName word文件名 * @param outPutFilePath html存储路径 * @param newFileName html名 * @throws Exception */public static void convert2Html(String fileName, String outPutFilePath,String newFileName) throws Exception { String substring = fileName.substring(fileName.lastIndexOf(".")+1); ByteArrayOutputStream out = new ByteArrayOutputStream(); /** * word2007和word2003的构建方式不同, * 前者的构建方式是xml,后者的构建方式是dom树。 * 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc * 相应的,apache.poi提供了不同的实现类。 */ if("docx".equals(substring)){
// writeFile(new String("
对不起,.docx格式的word文档,暂时不能生成预览".getBytes("utf-8")), outPutFilePath+newFileName); //step 1 : load DOCX into XWPFDocument InputStream inputStream = new FileInputStream(new File(fileName)); XWPFDocument document = new XWPFDocument(inputStream); //step 2 : prepare XHTML options final String imageUrl = ""; XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl))); options.setIgnoreStylesIfUnused(false); options.setFragment(true); options.URIResolver(new IURIResolver() {
// @Override 重写的方法,加上这个报错,你看看是啥问题
public String resolve(String uri) {
return imageUrl + uri;
}
});
//step 3 : convert XWPFDocument to XHTML XHTMLConverter.getInstance().convert(document, out, options); }else{ HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); //save pictures List pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null&&!pics.isEmpty()){ for(int i=0;i
}