自动秒收录

word转化为pdf怎么转换 (word转html)


文章编号:487 / 更新时间:2023-11-08 11:32:59 / 浏览:
Java


  1. 添加jar

    org.apache.commons
    commons-compress
    1.19


    org.apache.xmlbeans
    xmlbeans
    3.1.0


    org.apache.poi
    poi
    4.1.2


    org.apache.poi
    poi-ooxml
    4.1.2


    org.apache.poi
    poi-scratchpad
    4.1.2


    fr.opensagres.xdocreport
    fr.opensagres.poi.xwpf.converter.xhtml
    2.0.3


    fr.opensagres.xdocreport
    fr.opensagres.poi.xwpf.converter.core
    2.0.3


    org.jsoup
    jsoup
    1.11.3


    cn.hutool
    hutool-all
    5.0.2

2. 代码生成

package com.gccx.core.util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import java.util.HashMap;
import java.util.Map;
public class JsoupUtils {
    private static Map getHtmlCss(String html) {
        org.jsoup.nodes.Document doc = Jsoup.parse(html);
        String[] styles = doc.head().select("style").html().split("\r\n");
        Map css = new HashMap<>();
        for (String style : styles) {
            String[] kv = style.split("\\{|\\}");
            css.put(kv[0], kv[1]);
        return css;
    public static String changeHtmlCssLineStyle(String html) {
        Map css = getHtmlCss(html);
        org.jsoup.nodes.Document doc = Jsoup.parse(html);
        Element body = doc.body();
        for (String key : css.keySet()) {
            body.select(key).attr("style", css.get(key)).outerHtml();
        return body.html();
package com.gccx.core.util;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.URLUtil;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.usermodel.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.commons.CommonsMultipartFile;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class WordToHtmlUtil {
     * logger
    private static final Logger logger = LoggerFactory.getLogger(WordToHtmlUtil.class);
     * 解析docx成html
     * @param file
     * @return
     * @throws IOException
    public static String Word2007ToHtml(MultipartFile file) throws IOException {
        if (file.isEmpty() || file.getSize() <= 0) {
            logger.error("Sorry File does not Exists!");
            return null;
        } else {
            if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {
                // 1) 加载word文档生成 XWPFDocument对象
                InputStream in = file.getInputStream();
                XWPFDocument document = new XWPFDocument(in);
                // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                XHTMLConverter.getInstance().convert(document, baos, null);
                String content = baos.toString();
                baos.close();
                return content;
            } else {
                logger.error("Enter only MS Office 2007+ files");
                return null;
     * 解析doc文章成html 不存图片
     * @param file
     * @return
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws TransformerException
    public static String Word2003ToHtml(MultipartFile file)
            throws IOException, ParserConfigurationException, TransformerException {
        if (file.isEmpty() || file.getSize() <= 0) {
            logger.error("Sorry File does not Exists!");
            return null;
        } else {
            if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
                InputStream input = file.getInputStream();
                HWPFDocument wordDocument = new HWPFDocument(input);
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
                // 解析word文档
                wordToHtmlConverter.processDocument(wordDocument);
                Document htmlDocument = wordToHtmlConverter.getDocument();
                // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource(htmlDocument);
                StreamResult streamResult = new StreamResult(baos);
                TransformerFactory factory = TransformerFactory.newInstance();
                Transformer serializer = factory.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                // 也可以使用字符数组流获取解析的内容
                String content = new String(baos.toByteArray());
                baos.close();
                return content;
            } else {
                logger.error("Enter only MS Office 2003 files");
                return null;
     * 解析doc成html 并保存图片文件到本地
     * @param file
     * @return
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws TransformerException
    public static String Word2003ToHtmlAndSaveImage(String docsTempImages, MultipartFile file)
            throws IOException, ParserConfigurationException, TransformerException {
        if (file.isEmpty() || file.getSize() <= 0) {
            logger.error("Sorry File does not Exists!");
            return null;
        } else {
            if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
                HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
                //设置图片存放的位置
                wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                    public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
                        File imgPath = new File(docsTempImages);
                        if (!imgPath.exists()) {//图片目录不存在则创建
                            imgPath.mkdirs();
                        File file = new File(docsTempImages + suggestedName);
                        try {
                            OutputStream os = new FileOutputStream(file);
                            os.write(content);
                            os.close();
                        } catch (FileNotFoundException e) {
                            e.printStackTrace();
                        } catch (IOException e) {
                            e.printStackTrace();
                        return docsTempImages + suggestedName;
                //解析word文档
                wordToHtmlConverter.processDocument(wordDocument);
                Document document = wordToHtmlConverter.getDocument();
                // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource(document);
                StreamResult streamResult = new StreamResult(baos);
                TransformerFactory factory = TransformerFactory.newInstance();
                Transformer serializer = factory.newTransformer();
//                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.ENCODING, "gb2312");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                baos.close();
                // 也可以使用字符数组流获取解析的内容
                return new String(baos.toByteArray());
            } else {
                logger.error("Enter only MS Office 2003 files");
                return null;
     * 获取word中的图片名称和本地url(doc或docx)
     * 返回map<图片名称, 存储的图片url地址>
     * @param uploadPath     图片存放路径
     * @param docsTempImages 本地临时图片存放地址(这个工具类Word2003ToHtmlAndSaveImage的方法存到了系统临时文件夹里)
     * @param file
     * @return
     * @throws IOException
    public static Map getImageMaps(String uploadPath, String docsTempImages, MultipartFile file) throws IOException {
        //返回map
        HashMap map = new HashMap<>();
        if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {
            //获取存在word里的图片文件
            InputStream in = file.getInputStream();
            XWPFDocument document = new XWPFDocument(in);
            List paragraphs = document.getParagraphs();
            if (CollUtil.isNotEmpty(paragraphs)) {
                paragraphs.forEach(p -> {
                    List runs = p.getRuns();
                    if (CollUtil.isNotEmpty(runs)) {
                        runs.forEach(r -> {
                            List pictures = r.getEmbeddedPictures();
                            if (CollUtil.isNotEmpty(pictures)) {
                                pictures.forEach(c -> {
                                    //这里找到word中的图片的名字和数据
                                    XWPFPictureData pictureData = c.getPictureData();
                                    String fileName = pictureData.getFileName();
                                    byte[]>                                    //保存到本地获取url
                                    String localUrl = saveImageToLocalWithByte(fileName,>                                    map.put(pictureData.getFileName(), localUrl);
        } else if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
            try {
                File dir = new File(docsTempImages);
                //如果目录不为空遍历存储到项目中
                if (!FileUtil.isEmpty(dir)) {
                    Arrays.asList(FileUtil.ls(docsTempImages)).forEach(f -> {
                        String name = f.getName();
                        BufferedInputStream inputStream = FileUtil.getInputStream(f);
                        String localUrl = saveImageToLocalWithStream(name, inputStream, uploadPath);
                        map.put(name, localUrl);
            } finally {
                //删除临时文件夹
                FileUtil.del(docsTempImages);
        return map;
     * 保存图片到项目中,返回路径(byte[])
     * @param name       图片名字
     * @param>     * @param uploadPath 存储路径
     * @return
    private static String saveImageToLocalWithByte(String name, byte[]>        FileUtil.writeBytes(data, uploadPath + name);
        //自己项目的ip和端口,html图片地址要用,或者根据自己需求指定存到什么地方,自定义
        String ipAndPort = "";
        return URLUtil.normalize(ipAndPort + name);
     * 保存图片到项目中,返回路径(inputStream)
     * @param name        图片名字
     * @param inputStream 输入流
     * @param uploadPath  存储路径
     * @return
    private static String saveImageToLocalWithStream(String name, InputStream inputStream, String uploadPath) {
        savePic(uploadPath, inputStream, name);
        //自己项目的ip和端口,html图片地址要用,或者根据自己需求指定存到什么地方,自定义
        String ipAndPort = "";
        return URLUtil.normalize(ipAndPort + name);
     * 保存图片
     * @param path        存储路径
     * @param inputStream 输入流
     * @param fileName    文件名称
    private static void savePic(String path, InputStream inputStream, String fileName) {
        OutputStream os = null;
        try {
            // 2、保存到临时文件
            // 1K的数据缓冲
            byte[] bs = new byte[1024];
            // 读取到的数据长度
            int len;
            // 输出的文件流保存到本地文件
            File tempFile = new File(path);
            if (!tempFile.exists()) {
                tempFile.mkdirs();
            os = new FileOutputStream(tempFile.getPath() + File.separator + fileName);
            // 开始读取
            while ((len = inputStream.read(bs)) != -1) {
                os.write(bs, 0, len);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 完毕,关闭所有链接
            try {
                os.close();
                inputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
    static FileItem createFileItem(String filePath) {
        FileItemFactory factory = new DiskFileItemFactory(16, null);
        String textFieldName = "textField";
        int num = filePath.lastIndexOf(".");
        String extFile = filePath.substring(num);
        String path = filePath.substring(0, num);
        path = path.replace("\\", "/");
        String[] fileNames = path.split("/");
        String fileName = fileNames[fileNames.length - 1];
        FileItem item = factory.createItem(textFieldName, "text/plain", true, fileName + extFile);
        File newfile = new File(filePath);
        int bytesRead = 0;
        byte[] buffer = new byte[8192];
        try {
            FileInputStream fis = new FileInputStream(newfile);
            OutputStream os = item.getOutputStream();
            while ((bytesRead = fis.read(buffer, 0, 8192)) != -1) {
                os.write(buffer, 0, bytesRead);
            os.close();
            fis.close();
        } catch (IOException e) {
            e.printStackTrace();
        return item;
    public static void main(String[] args) throws Exception {
        String path1 = "/Users/name/Downloads/全款合同.doc";
        String path2 = "/Users/name/Downloads/买卖合同.docx";
        MultipartFile file1 = new CommonsMultipartFile(createFileItem(path1));
        MultipartFile file2 = new CommonsMultipartFile(createFileItem(path2));
        System.out.println(WordToHtmlUtil.Word2003ToHtml(file1));
        // 生成2007
        WordToHtmlUtil.Word2007ToHtml(file2);
  1. 页面调整
字体:font-family:SimSun;    font-family:Times New Roman
表格宽度:.t1{width:100%;}
表格td样式 去掉:width:1.1131945in;
body:



相关标签: Java

本文地址:https://www.0558.la/article/692448c225d83720b8c6.html

上一篇:解决docker下载镜像速度过慢解决docker启动...
下一篇:新注册的域名多久可以备案新注册的域名要如...

温馨提示

做上本站友情链接,在您站上点击一次,即可自动收录并自动排在本站第一位!
<a href="https://www.0558.la/" target="_blank">自动秒收录</a>