使用poi对docx文件指定内容标注并高亮
package com.ruoyi;import org.apache.poi.POIXMLDocumentPart;import org.apache.poi.openxml4j.exceptions.InvalidFormatException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxm
·
package com.ruoyi;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.xwpf.usermodel.TextSegement;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTEmptyImpl;
import org.w3c.dom.Node;
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
import javax.xml.namespace.QName;
import java.io.*;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;
/**
* @program: ruoyi
* @create: 2021-10-08 11:33
* @author: sxl
* @description:
**/
public class MDocXTest {
public static void main(String[] args) throws Exception {
InputStream is = new FileInputStream(new File("D:\\核工业口述史(编辑稿)0423_妇女出版社.doc"));
XWPFDocument doc = new XWPFDocument(is);
XWPFCommentsDocument commentsDocument = createCommentsDocument(doc);
for (XWPFParagraph p : doc.getParagraphs()) {
if (p.getText().contains("聚会")) {
List<TextSegement> segments = searchText(p, "聚会", new PositionInParagraph());
List<XWPFRun> runs = p.getRuns();
for(int j=0;j<segments.size();j++){
TextSegement segment = segments.get(j);
int beginRunIndex = 0;
int endRunIndex = 0;
if (segment != null) {
beginRunIndex = segment.getBeginRun()+j*2;
endRunIndex = segment.getEndRun()+j*2;
} else if (p.getText().contains("聚会")) {
// 当无法精确定位时,则批注当前段落
beginRunIndex = 0;
endRunIndex = runs.size() - 1;
} else {
System.out.println("未在段落{}中找到到对应内容{},批注被跳过"+ p.getText());
return;
}
XWPFRun begin;
XWPFRun end;
if (beginRunIndex == endRunIndex) {
XWPFRun run = runs.get(beginRunIndex);
String runText = run.getText(0);
if (runText.equals("聚会")) {
highLight(p, run);
begin = end = run;
} else {
begin = end = containsMatch(p, "聚会", run, beginRunIndex);
}
} else {
begin = runs.get(beginRunIndex);
end = runs.get(endRunIndex);
// 获得第一个run标签,匹配内容
String beginText = getBeginString(begin.text(), "聚会");
// 获得最后一个run标签,匹配内容
String endText = getEndString(end.text(), "聚会");
// 高亮中间的文本
for (int i = beginRunIndex + 1; i < endRunIndex; i++) {
XWPFRun run = runs.get(i);
highLight(p, run);
}
begin = beginMatch(p, beginText, begin, beginRunIndex);
end = endMatch(p, endText, end, endRunIndex);
}
addComment(commentsDocument, p, "sxl", "批注内容", begin, end);
}
}
}
File file = new File("D:\\test2.docx");
FileOutputStream out = new FileOutputStream(file);
doc.write(out);
out.close();
doc.close();
}
private static XWPFRun containsMatch(XWPFParagraph p, String content, XWPFRun run, int runIndex) {
String runText = run.text();
String[] arr = runText.split(content);
run.setText(arr[0], 0);
XWPFRun newRun = p.insertNewRun(runIndex + 1);
newRun.setText(content);
highLight(p, newRun);
p.insertNewRun(runIndex + 2).setText(arr.length==1?content:arr[1]);
return newRun;
}
private static void highLight(XWPFParagraph p, XWPFRun run) {
CTRPr pRpr = getRunCTRPr(p, run);
CTHighlight highlight = pRpr.isSetHighlight() ? pRpr
.getHighlight() : pRpr.addNewHighlight();
highlight.setVal(STHighlightColor.YELLOW);
}
private static XWPFRun beginMatch(XWPFParagraph p, String content, XWPFRun run, int beginRunIndex) {
String runText = run.text();
if (runText.equals(content)) {
highLight(p, run);
return run;
}
run.setText(runText.replace(content, ""), 0);
XWPFRun newRun = p.insertNewRun(beginRunIndex + 1);
newRun.setText(content);
highLight(p, newRun);
return newRun;
}
/**
* 设置指定段落高亮
*/
/* private void highLightParagraph(XWPFParagraph p, String content) {
TextSegement segment = p.searchText(content, new PositionInParagraph());
int beginRunIndex = segment.getBeginRun();
int endRunIndex = segment.getEndRun();
List<XWPFRun> runs = p.getRuns();
if (beginRunIndex == endRunIndex) {
XWPFRun run = runs.get(beginRunIndex);
String runText = run.getText(0);
if (runText.equals(content)) {
highLight(p, run);
} else {
containsMatch(p, content, run, beginRunIndex);
}
} else {
begin = runs.get(beginRunIndex);
end = runs.get(endRunIndex);
// 获得第一个run标签,匹配内容
String beginText = getBeginString(begin.text(), content);
// 获得最后一个run标签,匹配内容
String endText = getEndString(end.text(), content);
// 高亮中间的文本
for (int i = beginRunIndex + 1; i < endRunIndex; i++) {
XWPFRun run = runs.get(i);
highLight(p, run);
}
}
}*/
private static XWPFRun endMatch(XWPFParagraph p, String content, XWPFRun run, int endRunIndex) {
String runText = run.text();
if (runText.equals(content)) {
highLight(p, run);
return run;
}
run.setText(runText.replace(content, ""), 0);
XWPFRun newRun = p.insertNewRun(endRunIndex);
newRun.setText(content);
highLight(p, newRun);
return newRun;
}
private static void addComment(XWPFCommentsDocument commentsDocument, XWPFParagraph paragraph, String author, String content, XWPFRun begin, XWPFRun end) {
BigInteger commentId = BigInteger.valueOf(1);
CTComments comments = commentsDocument.getComments();
CTComment ctComment = comments.addNewComment();
ctComment.setAuthor(author);
ctComment.setInitials(author.substring(0,1));
ctComment.setDate(new GregorianCalendar(Locale.CHINA));
ctComment.addNewP().addNewR().addNewT().setStringValue(content);
ctComment.setId(commentId);
CTMarkupRange rangeStart = paragraph.getCTP().addNewCommentRangeStart();
rangeStart.setId(commentId);
begin.getCTR().getDomNode().insertBefore(rangeStart.getDomNode(), begin.getCTR().getDomNode().getFirstChild());
CTMarkup reference = paragraph.getCTP().addNewR().addNewCommentReference();
reference.setId(commentId);
CTMarkupRange rangeEnd = paragraph.getCTP().addNewCommentRangeEnd();
rangeEnd.setId(commentId);
// 获得结束点之后的Node对象
Node next = end.getCTR().getDomNode().getNextSibling();
if (next != null) {
next.insertBefore(reference.getDomNode(), next.getFirstChild());
next.insertBefore(rangeEnd.getDomNode(), next.getFirstChild());
}
}
private static XWPFCommentsDocument createCommentsDocument(XWPFDocument document) throws InvalidFormatException, InvalidFormatException {
OPCPackage opcPackage = document.getPackage();
PackagePartName partName = PackagingURIHelper.createPartName("/word/comments.xml");
PackagePart part = opcPackage.createPart(partName, "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml");
XWPFCommentsDocument commentsDocument = new XWPFCommentsDocument(part);
String rId = "rId" + (document.getRelationParts().size() + 1);
document.addRelation(rId, XWPFRelation.COMMENT, commentsDocument);
return commentsDocument;
}
private static class XWPFCommentsDocument extends POIXMLDocumentPart {
private final CTComments comments;
private XWPFCommentsDocument(PackagePart part) {
super(part);
comments = CommentsDocument.Factory.newInstance().addNewComments();
}
private CTComments getComments() {
return comments;
}
@Override
protected void commit() throws IOException {
XmlOptions xmlOptions = new XmlOptions(DEFAULT_XML_OPTIONS);
xmlOptions.setSaveSyntheticDocumentElement(new QName(CTComments.type.getName().getNamespaceURI(), "comments"));
PackagePart part = getPackagePart();
OutputStream out = part.getOutputStream();
comments.save(out, xmlOptions);
out.close();
}
}
/**
* 得到XWPFRun的CTRPr
*/
public static CTRPr getRunCTRPr(XWPFParagraph p, XWPFRun pRun) {
CTRPr pRpr;
if (pRun.getCTR() != null) {
pRpr = pRun.getCTR().getRPr();
if (pRpr == null) {
pRpr = pRun.getCTR().addNewRPr();
}
} else {
pRpr = p.getCTP().addNewR().addNewRPr();
}
return pRpr;
}
/**
* POI本身的searchText不排除CTEmptyImpl的情况导致查不到文本
*/
public static List<TextSegement> searchText(XWPFParagraph paragraph, String searched, PositionInParagraph startPos) {
int startRun = startPos.getRun(),
startText = startPos.getText(),
startChar = startPos.getChar();
int beginRunPos = 0, candCharPos = 0;
boolean newList = false;
List<TextSegement> segList = new ArrayList<>();
CTR[] rArray = paragraph.getCTP().getRArray();
for (int runPos = startRun; runPos < rArray.length; runPos++) {
int beginTextPos = 0, beginCharPos = 0, textPos = 0, charPos = 0;
CTR ctRun = rArray[runPos];
XmlCursor c = ctRun.newCursor();
c.selectPath("./*");
try {
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTText) {
if (textPos >= startText) {
String candidate = ((CTText) o).getStringValue();
if (runPos == startRun) {
charPos = startChar;
} else {
charPos = 0;
}
for (; charPos < candidate.length(); charPos++) {
if ((candidate.charAt(charPos) == searched.charAt(0)) && (candCharPos == 0)) {
beginTextPos = textPos;
beginCharPos = charPos;
beginRunPos = runPos;
newList = true;
}
if (candidate.charAt(charPos) == searched.charAt(candCharPos)) {
if (candCharPos + 1 < searched.length()) {
candCharPos++;
} else if (newList) {
TextSegement segment = new TextSegement();
segment.setBeginRun(beginRunPos);
segment.setBeginText(beginTextPos);
segment.setBeginChar(beginCharPos);
segment.setEndRun(runPos);
segment.setEndText(textPos);
segment.setEndChar(charPos);
segList.add(segment);
}
} else {
candCharPos = 0;
}
}
}
textPos++;
} else if (o instanceof CTProofErr) {
c.removeXml();
} else if (o instanceof CTRPr || o instanceof CTEmptyImpl) {
//do nothing
} else {
candCharPos = 0;
}
}
} finally {
c.dispose();
}
}
return segList;
}
public static String getBeginString(String begin, String content) {
for (int i = 0; i < begin.length(); i++) {
if (content.startsWith(begin)) {
return begin;
}
begin = begin.substring(1);
}
return begin;
}
public static String getEndString(String end, String content) {
for (int i = end.length() - 1; i > 0; i--) {
if (content.endsWith(end)) {
return end;
}
end = end.substring(0, i);
}
return end;
}
}
更多推荐
已为社区贡献1条内容
所有评论(0)