千家信息网

word导出html实现在线预览

发表于:2024-11-16 作者:千家信息网编辑
千家信息网最后更新 2024年11月16日,需要的maven依赖经过编译,必须版本配合一致: fr.opensagres.xdocreport org.apache.poi.xwpf.converter.core 1.0.5 fr.opensagres.xdocreport org.apache.poi.xwpf.converter.xhtml 1.0.5 org.apache.commons commons-io 1.3.2 org.apache.poi poi-scratchpad 3.17 org.apache.commons commons-collections4 4.0 org.apache.poi poi 3.17 org.apache.xmlbeans xmlbeans 2.6.0 org.apache.poi poi-ooxml 3.14 org.apache.commons commons-lang3 3.4 package com.zyhao.openec.excel.utils;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

public class WordToHtml {
public static void main(String[] args) {
try {
wordToHtml("E:\me\2.docx", "E:\me\", "123.html");
// wordToHtml("E:\me\2.doc", "E:\me\", "12.html");
} catch (Exception e) {
e.printStackTrace();
}
}

public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws Exception {            convert2Html(wordPath, htmlPath, newFilename);}    public static void writeFile(String content, String path) throws Exception {        FileOutputStream fos = null;        BufferedWriter bw = null;    try {            File file = new File(path);        fos = new FileOutputStream(file);            bw = new BufferedWriter(new OutputStreamWriter(fos));            bw.write(content);      } catch (FileNotFoundException fnfe) {            fnfe.printStackTrace();        } catch (IOException ioe) {            ioe.printStackTrace();        } finally {            try {                if (bw != null)                    bw.close();                if (fos != null)                    fos.close();            } catch (IOException ie) {            }        }    }    /** * 将word转换成html * 支持 .doc and .docx * @param fileName word文件名 * @param outPutFilePath html存储路径 * @param newFileName html名 * @throws Exception  */public static void convert2Html(String fileName, String outPutFilePath,String newFileName)            throws Exception {    String substring = fileName.substring(fileName.lastIndexOf(".")+1);    ByteArrayOutputStream out = new ByteArrayOutputStream();    /**     * word2007和word2003的构建方式不同,     * 前者的构建方式是xml,后者的构建方式是dom树。     * 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc     * 相应的,apache.poi提供了不同的实现类。     */    if("docx".equals(substring)){

// writeFile(new String(" 对不起,.docx格式的word文档,暂时不能生成预览".getBytes("utf-8")), outPutFilePath+newFileName);

        //step 1 : load DOCX into XWPFDocument        InputStream inputStream = new FileInputStream(new File(fileName));        XWPFDocument document = new XWPFDocument(inputStream);        //step 2 : prepare XHTML options        final String imageUrl = "";        XHTMLOptions options = XHTMLOptions.create();        options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl)));        options.setIgnoreStylesIfUnused(false);        options.setFragment(true);        options.URIResolver(new IURIResolver() {

// @Override 重写的方法,加上这个报错,你看看是啥问题
public String resolve(String uri) {
return imageUrl + uri;
}
});

        //step 3 : convert XWPFDocument to XHTML        XHTMLConverter.getInstance().convert(document, out, options);    }else{        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(                    DocumentBuilderFactory.newInstance().newDocumentBuilder()                            .newDocument());         wordToHtmlConverter.setPicturesManager( new PicturesManager()             {                 public String savePicture( byte[] content,                         PictureType pictureType, String suggestedName,                         float widthInches, float heightInches )                 {                     return suggestedName;                 }             } );            wordToHtmlConverter.processDocument(wordDocument);            //save pictures            List pics=wordDocument.getPicturesTable().getAllPictures();          if(pics!=null&&!pics.isEmpty()){            for(int i=0;i

}

0