千家信息网

Java怎么实现Word/Pdf/TXT转html

发表于:2025-01-17 作者:千家信息网编辑
千家信息网最后更新 2025年01月17日,本篇内容介绍了"Java怎么实现Word/Pdf/TXT转html"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能
千家信息网最后更新 2025年01月17日Java怎么实现Word/Pdf/TXT转html

本篇内容介绍了"Java怎么实现Word/Pdf/TXT转html"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!

一:Java实现将word转换为html

1:引入依赖

 1  2   fr.opensagres.xdocreport 3   fr.opensagres.xdocreport.document 4   1.0.5 5  6   7   fr.opensagres.xdocreport  8   org.apache.poi.xwpf.converter.xhtml  9   1.0.5 10 11   12   org.apache.poi13   poi14   3.1215 16 17   org.apache.poi18   poi-scratchpad19   3.1220 

2:代码demo

  1 package com.svse.controller;  2   3 import javax.xml.parsers.DocumentBuilderFactory;  4 import javax.xml.parsers.ParserConfigurationException;  5 import javax.xml.transform.OutputKeys;  6 import javax.xml.transform.Transformer;  7 import javax.xml.transform.TransformerException;  8 import javax.xml.transform.TransformerFactory;  9 import javax.xml.transform.dom.DOMSource; 10 import javax.xml.transform.stream.StreamResult; 11  12 import org.apache.poi.hwpf.HWPFDocument; 13 import org.apache.poi.hwpf.converter.PicturesManager; 14 import org.apache.poi.hwpf.converter.WordToHtmlConverter; 15 import org.apache.poi.hwpf.usermodel.PictureType; 16 import org.apache.poi.xwpf.converter.core.BasicURIResolver; 17 import org.apache.poi.xwpf.converter.core.FileImageExtractor; 18 import org.apache.poi.xwpf.converter.core.FileURIResolver; 19 import org.apache.poi.xwpf.converter.core.IURIResolver; 20 import org.apache.poi.xwpf.converter.core.IXWPFConverter; 21 import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; 22 import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; 23 import org.apache.poi.xwpf.usermodel.XWPFDocument; 24 /** 25  * word 转换成html 26  */ 27 public class TestWordToHtml { 28  29     public static  final String STORAGEPATH="C://works//files//"; 30     public static  final String IP="192.168.30.222"; 31     public static  final String PORT="8010"; 32     public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException { 33         TestWordToHtml wt=new TestWordToHtml(); 34         //wt.Word2003ToHtml("甲骨文考证.doc"); 35         wt.Word2007ToHtml("甲骨文考证.docx"); 36  37     } 38        39      /** 40      * 2003版本word转换成html 41      * @throws IOException 42      * @throws TransformerException 43      * @throws ParserConfigurationException 44      */ 45     public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException { 46         47         final String imagepath = STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片  图片会保存在此路径 48         final String strRanString=getRandomNum(); 49         String filepath =STORAGEPATH; 50         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html"; 51         final String file = filepath + fileName; 52         InputStream input = new FileInputStream(new File(file)); 53         HWPFDocument wordDocument = new HWPFDocument(input); 54         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); 55         //设置图片存放的位置 56         wordToHtmlConverter.setPicturesManager(new PicturesManager() { 57             public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { 58                 File imgPath = new File(imagepath); 59                 if(!imgPath.exists()){//图片目录不存在则创建 60                     imgPath.mkdirs(); 61                 } 62                  63                 File file = new File(imagepath +strRanString+suggestedName); 64                 try { 65                     OutputStream os = new FileOutputStream(file); 66                     os.write(content); 67                     os.close(); 68                 } catch (FileNotFoundException e) { 69                     e.printStackTrace(); 70                 } catch (IOException e) { 71                     e.printStackTrace(); 72                 } 73                  74                 return  "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName; 75                // return imagepath +strRanString+suggestedName; 76             } 77         }); 78          79         //解析word文档 80         wordToHtmlConverter.processDocument(wordDocument); 81         Document htmlDocument = wordToHtmlConverter.getDocument(); 82          83         File htmlFile = new File(filepath +strRanString+htmlName); 84         OutputStream outStream = new FileOutputStream(htmlFile); 85          86  87         DOMSource domSource = new DOMSource(htmlDocument); 88         StreamResult streamResult = new StreamResult(outStream); 89  90         TransformerFactory factory = TransformerFactory.newInstance(); 91         Transformer serializer = factory.newTransformer(); 92         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); 93         serializer.setOutputProperty(OutputKeys.INDENT, "yes"); 94         serializer.setOutputProperty(OutputKeys.METHOD, "html"); 95          96         serializer.transform(domSource, streamResult); 97         outStream.close(); 98          99         System.out.println("生成html文件路径:"+ "http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);100     }101 102     /**103      * 2007版本word转换成html104      * @throws IOException105      */106     public void Word2007ToHtml(String fileName) throws IOException {107         108        final String strRanString=getRandomNum();109         110         String filepath = STORAGEPATH+strRanString;111         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";112         File f = new File(STORAGEPATH+fileName);  113         if (!f.exists()) {  114             System.out.println("Sorry File does not Exists!");  115         } else {  116             if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {  117                 try {118                     // 1) 加载word文档生成 XWPFDocument对象  119                     InputStream in = new FileInputStream(f);  120                     XWPFDocument document = new XWPFDocument(in);  121       122                     // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)  123                     File imageFolderFile = new File(filepath);  124                     XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));  125                     options.setExtractor(new FileImageExtractor(imageFolderFile));  126                     options.URIResolver(new IURIResolver() {127                         public String resolve(String uri) {128                             //http://192.168.30.222:8010//uploadFile/....129                             return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;130                         }131                     });132                     133                     options.setIgnoreStylesIfUnused(false);  134                     options.setFragment(true);  135                       136                     // 3) 将 XWPFDocument转换成XHTML  137                     OutputStream out = new FileOutputStream(new File(filepath + htmlName));  138                     IXWPFConverter converter = XHTMLConverter.getInstance();139                     converter.convert(document,out, options);140                     //XHTMLConverter.getInstance().convert(document, out, options);  141                     System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);142                 } catch (Exception e) {143                     e.printStackTrace();144                 }145             146             } else {  147                 System.out.println("Enter only MS Office 2007+ files");  148             }  149         }  150     }  151 152      /**153      *功能说明:生成时间戳154      *创建人:zsq155      *创建时间:2019年12月7日 下午2:37:09156      *157      */158      public static String getRandomNum(){159          Date dt = new Date();160          SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");  161          String str=sdf.format(dt);162          return str;163      }164      165    }

二:Java实现将Pdf转换为html

1: 引入依赖

 1  2             net.sf.cssbox 3             pdf2dom 4             1.7 5           6          7             org.apache.pdfbox 8             pdfbox 9             2.0.1210         11         12             org.apache.pdfbox13             pdfbox-tools14             2.0.1215  16

2:代码Demo

 1 public class PdfToHtml { 2  3   /* 4     pdf转换html 5      */ 6     public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath)  { 7        // String outputPath = "C:\\works\\files\\ZSQ保密知识测试题库.html"; 8     9        //try() 写在()里面会自动关闭流10         try{11             BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));12             //加载PDF文档13             //PDDocument document = PDDocument.load(bytes);14             PDDocument document = PDDocument.load(new File(inPdfPath));15             PDFDomTree pdfDomTree = new PDFDomTree();16             pdfDomTree.writeText(document,out);17         } catch (Exception e) {18             e.printStackTrace();19         }20     }21 22     public static void main(String[] args) throws IOException {23         PdfToHtml ph=new PdfToHtml();24         String pdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";25         String outputPath="C:\\works\\files\\武研中心行政考勤制度.html";26         ph.pdfToHtmlTest(pdfPath,outputPath);27   }28 29 }

三:Java实现将TXT转换为html

 1  /* 2      * txt文档转html 3        filePath:txt原文件路径 4        htmlPosition:转化后生成的html路径 5     */ 6     public static void txtToHtml(String filePath, String htmlPosition) { 7         try { 8             //String encoding = "GBK"; 9             File file = new File(filePath);10             if (file.isFile() && file.exists()) { // 判断文件是否存在11                 InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");12                 // 考虑到编码格式13                 BufferedReader bufferedReader = new BufferedReader(read);14                 // 写文件15                 FileOutputStream fos = new FileOutputStream(new File(htmlPosition));16                 OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");17                 BufferedWriter bw = new BufferedWriter(osw);18                 String lineTxt = null;19                 while ((lineTxt = bufferedReader.readLine()) != null) {20                     bw.write("   "+lineTxt + "
");21 }22 bw.close();23 osw.close();24 fos.close();25 read.close();26 } else {27 System.out.println("找不到指定的文件");28 }29 } catch (Exception e) {30 System.out.println("读取文件内容出错");31 e.printStackTrace();32 }33 }

"Java怎么实现Word/Pdf/TXT转html"的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注网站,小编将为大家输出更多高质量的实用文章!

0