jinlin
2025-03-21 77d58298d00c11ade8862ca8acb0fdef5a45322e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package com.example.client.service;
 
 
public class Pdf2word {/*
 
    public static void main(String[] args) {
        try {
            Map<String, String> map = new HashMap<String, String>();
            // 加载 PDF 文档
            Document pdfDocument = new Document("C:\\Users\\admin\\Desktop\\可靠性建模工具与仿真算法库接口(讨论稿).pdf");
            // 遍历所有页面
            for (Page page : pdfDocument.getPages()) {
                // 使用 TextFragmentAbsorber 查找所有文本片段
                TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber();
                page.accept(textFragmentAbsorber);
 
                // 获取所有 TextFragments
                TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
                for (TextFragment textFragment : textFragmentCollection) {
                    // 获取当前文本片段的字体名称
                    System.out.println("Paragraph text: " + textFragment.getText());
                    String fontName = textFragment.getTextState().getFont().getFontName();
                    System.out.println("pdf Font: " + fontName);
                    map.put(textFragment.getText(), fontName);
                    // 查找并设置相应的字体
                    Font font = FontRepository.findFont(fontName);
                    if (font != null) {
                        textFragment.getTextState().setFont(font);
                    }
 
                }
            }
 
            // 将 PDF 转换为 Word 文档
            DocSaveOptions saveOptions = new DocSaveOptions();
            // 设置模式为流动布局
            saveOptions.setMode(DocSaveOptions.RecognitionMode.Flow);
 
 
            pdfDocument.save("C:\\Users\\admin\\Desktop\\33.docx", saveOptions);
 
 
            com.aspose.words.Document doc = null;
            try {
                doc = new com.aspose.words.Document("C:\\Users\\admin\\Desktop\\33.docx");
            } catch (Exception e) {
                e.printStackTrace();
            }
 
            // 遍历文档中的所有段落
            for (Paragraph para : (Iterable<Paragraph>) doc.getChildNodes(NodeType.PARAGRAPH, true)) {
 
                System.out.println("wordParagraph text: " + para.getText().trim());
                String fontName = null;
*//*                if (map.get(para.getText().trim()+"xj"+textFragmentNum)!=null){
                    fontName = map.get(para.getText().trim()+"xj"+textFragmentNum);
                }else *//*
                if (map.get(para.getText().trim()) != null) {
                    fontName = map.get(para.getText().trim());
                } else {
                    String newText = getStringNotDi(para.getText().trim());
                    if (map.get(newText) != null) {
                        fontName = map.get(newText);
                    }
                }
                // 遍历段落中的所有 Run 节点
                if (fontName != null) {
                    for (Run run : (Iterable<Run>) para.getChildNodes(NodeType.RUN, true)) {
                        run.getFont().setName(fontName);
                    }
                }
            }
            try {
                doc.save("C:\\Users\\admin\\Desktop\\33.docx");
            } catch (Exception e) {
                e.printStackTrace();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public static String getStringNotDi(String text) {
        String newText = null;
        Integer startP = null;
        Integer endP = null;
        for (Integer i = 0; i < text.length(); i++) {
            if (startP == null) {
                if (isChinese(text.charAt(i))) {
                    startP = i;
                }
            } else {
                if (!isChinese(text.charAt(i))) {
                    endP = i;
                    break;
                }else{
                    endP = i;
                }
            }
        }
        newText=text.substring(startP, endP);
 
        return newText;
    }
 
    public static boolean isChinese(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
                || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
                || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
            return true;
        }
        return false;
    }*/
}