package com.example.client.service;
|
|
|
public class Pdf2word {/*
|
|
public static void main(String[] args) {
|
try {
|
Map<String, String> map = new HashMap<String, String>();
|
// 加载 PDF 文档
|
Document pdfDocument = new Document("C:\\Users\\admin\\Desktop\\可靠性建模工具与仿真算法库接口(讨论稿).pdf");
|
// 遍历所有页面
|
for (Page page : pdfDocument.getPages()) {
|
// 使用 TextFragmentAbsorber 查找所有文本片段
|
TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber();
|
page.accept(textFragmentAbsorber);
|
|
// 获取所有 TextFragments
|
TextFragmentCollection textFragmentCollection = textFragmentAbsorber.getTextFragments();
|
for (TextFragment textFragment : textFragmentCollection) {
|
// 获取当前文本片段的字体名称
|
System.out.println("Paragraph text: " + textFragment.getText());
|
String fontName = textFragment.getTextState().getFont().getFontName();
|
System.out.println("pdf Font: " + fontName);
|
map.put(textFragment.getText(), fontName);
|
// 查找并设置相应的字体
|
Font font = FontRepository.findFont(fontName);
|
if (font != null) {
|
textFragment.getTextState().setFont(font);
|
}
|
|
}
|
}
|
|
// 将 PDF 转换为 Word 文档
|
DocSaveOptions saveOptions = new DocSaveOptions();
|
// 设置模式为流动布局
|
saveOptions.setMode(DocSaveOptions.RecognitionMode.Flow);
|
|
|
pdfDocument.save("C:\\Users\\admin\\Desktop\\33.docx", saveOptions);
|
|
|
com.aspose.words.Document doc = null;
|
try {
|
doc = new com.aspose.words.Document("C:\\Users\\admin\\Desktop\\33.docx");
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
|
// 遍历文档中的所有段落
|
for (Paragraph para : (Iterable<Paragraph>) doc.getChildNodes(NodeType.PARAGRAPH, true)) {
|
|
System.out.println("wordParagraph text: " + para.getText().trim());
|
String fontName = null;
|
*//* if (map.get(para.getText().trim()+"xj"+textFragmentNum)!=null){
|
fontName = map.get(para.getText().trim()+"xj"+textFragmentNum);
|
}else *//*
|
if (map.get(para.getText().trim()) != null) {
|
fontName = map.get(para.getText().trim());
|
} else {
|
String newText = getStringNotDi(para.getText().trim());
|
if (map.get(newText) != null) {
|
fontName = map.get(newText);
|
}
|
}
|
// 遍历段落中的所有 Run 节点
|
if (fontName != null) {
|
for (Run run : (Iterable<Run>) para.getChildNodes(NodeType.RUN, true)) {
|
run.getFont().setName(fontName);
|
}
|
}
|
}
|
try {
|
doc.save("C:\\Users\\admin\\Desktop\\33.docx");
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
}
|
public static String getStringNotDi(String text) {
|
String newText = null;
|
Integer startP = null;
|
Integer endP = null;
|
for (Integer i = 0; i < text.length(); i++) {
|
if (startP == null) {
|
if (isChinese(text.charAt(i))) {
|
startP = i;
|
}
|
} else {
|
if (!isChinese(text.charAt(i))) {
|
endP = i;
|
break;
|
}else{
|
endP = i;
|
}
|
}
|
}
|
newText=text.substring(startP, endP);
|
|
return newText;
|
}
|
|
public static boolean isChinese(char c) {
|
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
|
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
|
return true;
|
}
|
return false;
|
}*/
|
}
|