本文介绍怎样用iText,读取PDF内容,以及内容的位置,并替换指定内容,输出新的个人文件java
主要用到的功能:
1.读取PDF内容,以及内容的位置
2.复制PDF
3.修改PDF,在PDF指定位置输出内容ide
示例代码使用的jar字体
<dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>27.0.1-jre</version> </dependency>
public class WordItem { //记录PDF中内容的bean //所在页数 private Integer pageNum; //x坐标 private Float x; //y坐标 private Float y; //内容 private String content; public Integer getPageNum() { return pageNum; } public void setPageNum(Integer pageNum) { this.pageNum = pageNum; } public Float getX() { return x; } public void setX(Float x) { this.x = x; } public Float getY() { return y; } public void setY(Float y) { this.y = y; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } }
public class ReplaceWordItem extends WordItem{ //须要替换的关键字,替换后的内容和位置 //文档中搜索的关键字 private String key; //写入的内容 private String value; //偏移的位置 1:上 2:右 3:下面 4:左边 private int site = 2; //偏移的量 private float size = 30; public ReplaceWordItem(String key, String value, float size) { super(); this.key = key; this.value = value; this.size = size; } public ReplaceWordItem() { super(); } public ReplaceWordItem(String key, String value, int site, float size) { super(); this.key = key; this.value = value; this.site = site; this.size = size; } public String getKey() { return key; } public void setKey(String key) { this.key = key; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } public int getSite() { return site; } public void setSite(int site) { this.site = site; } public float getSize() { return size; } public void setSize(float size) { this.size = size; } }
public class KeyWordPositionListener implements RenderListener{ //用来解析PDF内容的类 //页面上全部的词 private List<WordItem> allItems = new ArrayList<WordItem>(); /** * 第几页 */ private Integer pageNumber; private WordItem prevItem = new WordItem(); @Override public void beginTextBlock() { // TODO Auto-generated method stub } @Override public void renderText(TextRenderInfo renderInfo) { //读取PDF时,有些肉眼看上去是一行的字,可能会被解析为多个,致使找不到知足条件的关键字,这里作了简单的处理 //即若是一些词是连续的,先后没有空白字符串,即认为是一个词 String content = renderInfo.getText().trim(); Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange(); System.out.println("content=" + content + " x="+textRectangle.getX() + " y="+textRectangle.getY()); WordItem item = null; boolean newFlag = false; if(Strings.isNullOrEmpty(prevItem.getContent())) { item = new WordItem(); newFlag = true; }else { if(allItems.size() == 0) { item = new WordItem(); newFlag = true; }else { item = allItems.get(allItems.size()-1); } } //内容会断开,如代理机构名称 变成 代理机构 名称 2个部分???????????? //关键字相关信息 if(!content.equals("")) { if(newFlag) { item.setPageNum(pageNumber); item.setContent(content); item.setX((float)textRectangle.getX()); item.setY((float)textRectangle.getY()); allItems.add(item); //先保存全部的项 }else { //以前有内容 item.setContent(item.getContent() + content); } } prevItem = new WordItem(); prevItem.setContent(content); } @Override public void endTextBlock() { // TODO Auto-generated method stub } @Override public void renderImage(ImageRenderInfo renderInfo) { // TODO Auto-generated method stub } public List<WordItem> getAllItems() { return allItems; } public void setAllItems(List<WordItem> allItems) { this.allItems = allItems; } public Integer getPageNumber() { return pageNumber; } public void setPageNumber(Integer pageNumber) { this.pageNumber = pageNumber; } }
public class SearchWord { /** * 从PDF中读取内容 * 内容与关键字比对,若是知足条件,则在匹配内容的指定位置,增长须要显示的内容(替换关键字,修改PDF) */ public static void main(String[] args) throws Exception { String path = "in.pdf"; String outPath = "out.pdf"; PdfReader reader = new PdfReader(path); PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(outPath)); //关键字,以及替换后的内容及位置 List<ReplaceWordItem> keyList = Lists.newArrayList( new ReplaceWordItem("邮编", "苏州21510000", 2), new ReplaceWordItem("代理机构名称", "苏州XXX事务所", 80) ); //找到的位置,匹配到的关键字 List<ReplaceWordItem> keyItemList = matchPage(reader, keyList); //修改PDF for(int i=0; i<keyItemList.size(); i++) { ReplaceWordItem keyItem = keyItemList.get(i); PdfContentByte overContent = stamper.getOverContent(keyItem.getPageNum()); overContent.beginText(); //字体和大小 BaseFont bf = BaseFont.createFont("C:/Windows/Fonts/simsun.ttc,1", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); overContent.setFontAndSize(bf, 10F); //位置 overContent.setTextMatrix(keyItem.getX() + keyItem.getSize() + 30, keyItem.getY()); //内容 overContent.showText(keyItem.getValue()); overContent.endText(); } stamper.close(); } /** * 知足关键字的位置 */ public static List<ReplaceWordItem> matchPage(PdfReader reader, List<ReplaceWordItem> keywordList) throws Exception { //文档里全部的内容 List<WordItem> allItemList = new ArrayList<>(); for(int page=1; page<=reader.getNumberOfPages(); page++){ KeyWordPositionListener renderListener = new KeyWordPositionListener(); renderListener.setPageNumber(page); PdfReaderContentParser parse = new PdfReaderContentParser(reader); parse.processContent(page, renderListener); Rectangle rectangle = reader.getPageSize(page); System.out.println(rectangle.getWidth() + " " + rectangle.getHeight() + " " + rectangle.getLeft() + " " + rectangle.getRight()); //PageSize.A4 210mm*297mm //Itext单位 Pt 1pt = 0.35mm //public static final Rectangle A4 = new RectangleReadOnly(595,842); allItemList.addAll(renderListener.getAllItems()); } for (WordItem wordItem : allItemList) { System.out.println("wordItem.getContent() " + wordItem.getContent()); } List<ReplaceWordItem> keyItemList = new ArrayList<>(); //那些知足关键字 for (ReplaceWordItem key : keywordList) { for (WordItem pageItem : allItemList) { if(Objects.equal(key.getKey(), pageItem.getContent())) { key.setPageNum(pageItem.getPageNum()); key.setX(pageItem.getX()); key.setY(pageItem.getY()); keyItemList.add(key); //找到第一个就结束 break; } } } return keyItemList; } }