itext替换pdf中的中文

时间 2019-12-08

标签 itext 替换 pdf 中文繁體版

原文原文链接

在网上找了一段时间，发现有两个版本的java

1，使用aspose.pdf，这篇文章写得很清楚https://blog.csdn.net/da_keng/article/details/65633498，我稍微改了一下，能够替换多个字符canvas

public static void main(String[] args) {
        String srcPath = "E:\\CA\\入职合同模版样例.pdf";
        String targetPath = "E:\\CA\\out.pdf";
        Map<String, String> map = new HashMap<String, String>();
        map.put("[$合同编号$]", "ZR-20181009-00000164");
        map.put("[$乙方姓名$]", "TroubleA");
        map.put("[$签字日期$]", "2018/10/10 11:24:30");
        test(srcPath, targetPath, map);
    }

    public static void test(String srcPath, String targetPath, Map<String, String> map){
        InputStream license = Main.class.getClassLoader().getResourceAsStream("\\license.xml");
        try {
            new License().setLicense(license);
        } catch (Exception e) {
            e.printStackTrace();
        }
        Document pdfDoc = new Document(srcPath);
        for (Map.Entry<String, String> entry : map.entrySet()) {
            System.out.println("Key = " + entry.getKey() + ", Value = " + entry.getValue());

            TextFragmentAbsorber textFragmentAbsorber = new TextFragmentAbsorber(entry.getKey());
            PageCollection pages = pdfDoc.getPages();
            System.out.println("文档总页码数："+pages.size());
            pages.accept(textFragmentAbsorber);
            int i = 0;
            for (TextFragment textFragment :(Iterable<TextFragment>) textFragmentAbsorber.getTextFragments()) {
                textFragment.setText(entry.getValue());
                textFragment.getTextState().setBackgroundColor(com.aspose.pdf.Color.getRed());  //添加红色背景
                System.out.println(++i);
            }
        }
        pdfDoc.save(targetPath);
    }

2，像这位连接中的大佬同样，https://blog.csdn.net/sishenkankan/article/details/53107195，使用具体的java代码去寻找x,y的位置而后画一个白板覆盖再从新写上须要替换的文字，有如下两个缺点，数组

a)当须要替换的文字不在一个TextRenderInfo里面，没法找到比较长的须要替换的app

b)当pdf包含有多页的时候，不方便具体的控制替换到哪一页ide

因此我又百度了一个可以定位pdf中须要替换的文字位置的方法，https://blog.csdn.net/sdizoea/article/details/75105798，像文中大佬同样直接定位而后解决了文字不在一个块中时查找的麻烦，贴一下全部的代码，一共有6个类，测试

package com.sinosoft.lis.utils;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.log.Logger;
import com.itextpdf.text.log.LoggerFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;

import java.io.*;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

/**
 * 替换PDF文件某个区域内的文本
 */
public class PdfReplacer {
    private static final Logger logger = LoggerFactory.getLogger(PdfReplacer.class);

    private int fontSize;
    private Map<String, ReplaceRegion> replaceRegionMap = new HashMap<String, ReplaceRegion>();
    private Map<String, String> replaceTextMap =new HashMap<String, String>();
    private ByteArrayOutputStream output;
    private PdfReader reader;
    private PdfStamper stamper;
    private PdfContentByte canvas;
    private Font font;

    public PdfReplacer(byte[] pdfBytes) throws DocumentException, IOException{
        init(pdfBytes);
    }

    public PdfReplacer(String fileName) throws IOException, DocumentException{
        FileInputStream in = null;
        try{
            in =new FileInputStream(fileName);
            byte[] pdfBytes = new byte[in.available()];
            in.read(pdfBytes);
            init(pdfBytes);
        }finally{
            in.close();
        }
    }

    private void init(byte[] pdfBytes) throws DocumentException, IOException{
        logger.info("初始化开始");
        reader = new PdfReader(pdfBytes);
        output = new ByteArrayOutputStream();
        stamper = new PdfStamper(reader, output);
        canvas = stamper.getOverContent(1);
        setFont(10);
        logger.info("初始化成功");
    }

    private void close() throws DocumentException, IOException{
        if(reader != null){
            reader.close();
        }
        if(output != null){
            output.close();
        }

        output=null;
        replaceRegionMap=null;
        replaceTextMap=null;
    }

    public void replaceText(float x, float y, float w,float h, String text){
        ReplaceRegion region = new ReplaceRegion(text); 	//用文本做为别名
        region.setH(h);
        region.setW(w);
        region.setX(x);
        region.setY(y);
        addReplaceRegion(region);
        this.replaceText(text, text);
    }

    public void replaceText(String name, String text){
        this.replaceTextMap.put(name, text);
    }

    /**
     * 替换文本
     * @throws IOException
     * @throws DocumentException
     */
    private void process() throws DocumentException, IOException{
        try{
            parseReplaceText();
            canvas.saveState();
            Set<Entry<String, ReplaceRegion>> entrys = replaceRegionMap.entrySet();
            for (Entry<String, ReplaceRegion> entry : entrys) {
                ReplaceRegion value = entry.getValue();
                canvas.setColorFill(BaseColor.RED);
                canvas.rectangle(value.getX(),value.getY(),value.getW(),value.getH());
            }
            canvas.fill();
            canvas.restoreState();
            //开始写入文本
            canvas.beginText();
            for (Entry<String, ReplaceRegion> entry : entrys) {
                ReplaceRegion value = entry.getValue();
                //设置字体
                canvas.setFontAndSize(font.getBaseFont(), getFontSize());
                canvas.setTextMatrix(value.getX(),value.getY()+2/*修正背景与文本的相对位置*/);
                canvas.showText((String) replaceTextMap.get(value.getAliasName()));
            }
            canvas.endText();
        }finally{
            if(stamper != null){
                stamper.close();
            }
        }
    }

    /**
     * 未指定具体的替换位置时，系统自动查找位置
     */
    private void parseReplaceText() {
        PdfPositionParse parse = new PdfPositionParse(reader);
        Set<Entry<String, String>> entrys = this.replaceTextMap.entrySet();
        for (Entry<String, String> entry : entrys) {
            if(this.replaceRegionMap.get(entry.getKey()) == null){
                parse.addFindText(entry.getKey());
            }
        }

        try {
            Map<String, ReplaceRegion> parseResult = parse.parse();
            Set<Entry<String, ReplaceRegion>> parseEntrys = parseResult.entrySet();
            for (Entry<String, ReplaceRegion> entry : parseEntrys) {
                if(entry.getValue() != null){
                    this.replaceRegionMap.put(entry.getKey(), entry.getValue());
                }
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }

    }

    /**
     * 生成新的PDF文件
     * @param fileName
     * @throws DocumentException
     * @throws IOException
     */
    public void toPdf(String fileName) throws DocumentException, IOException{
        FileOutputStream fileOutputStream = null;
        try{
            process();
            fileOutputStream = new FileOutputStream(fileName);
            fileOutputStream.write(output.toByteArray());
            fileOutputStream.flush();
        }catch(IOException e){
            logger.error(e.getMessage(), e);
            throw e;
        }finally{
            if(fileOutputStream != null){
                fileOutputStream.close();
            }
            close();
        }
        logger.info("文件生成成功");
    }

    /**
     * 将生成的PDF文件转换成二进制数组
     * @return
     * @throws DocumentException
     * @throws IOException
     */
    public byte[] toBytes() throws DocumentException, IOException{
        try{
            process();
            logger.info("二进制数据生成成功");
            return output.toByteArray();
        }finally{
            close();
        }
    }

    /**
     * 添加替换区域
     * @param replaceRegion
     */
    public void addReplaceRegion(ReplaceRegion replaceRegion){
        this.replaceRegionMap.put(replaceRegion.getAliasName(), replaceRegion);
    }

    /**
     * 经过别名获得替换区域
     * @param aliasName
     * @return
     */
    public ReplaceRegion getReplaceRegion(String aliasName){
        return this.replaceRegionMap.get(aliasName);
    }

    public int getFontSize() {
        return fontSize;
    }

    /**
     * 设置字体大小
     * @param fontSize
     * @throws DocumentException
     * @throws IOException
     */
    public void setFont(int fontSize) throws DocumentException, IOException{
        if(fontSize != this.fontSize){
            this.fontSize = fontSize;
            BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
            font = new Font(bf,this.fontSize,Font.BOLD);
        }
    }

    public void setFont(Font font){
        if(font == null){
            throw new NullPointerException("font is null");
        }
        this.font = font;
    }

    public void manipulatePdf1(String src, String dest, Map<String, String> replaceTextMap) throws Exception {
        PdfReader reader = new PdfReader(src);
        PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));

        for (String key : replaceTextMap.keySet()) {
            float x, y;
            int pageNum;

            List<MatchItem> list = MatchItem.matchPage(src, key);
            logger.info(list.toString());
            for (int i = 0; i < list.size(); i++) {
                x = list.get(i).getX();
                y = list.get(i).getY();
                pageNum = list.get(i).getPageNum();
                logger.info("字段：" + key + " 定位至 第 " + pageNum + " 页  x:" + x + "---y:" + y);
                PdfContentByte canvas = stamper.getOverContent(pageNum);

                BaseFont bf = BaseFont.createFont(
                        URLDecoder.decode(PdfReplacer.class.getResource("/fonts/adobesongstd-light.otf").getFile()),
                        BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
                Font font = new Font(bf, 10, Font.BOLD);
                // 设置字体和大小
                canvas.setFontAndSize(font.getBaseFont(), 12);
                canvas.saveState();
                canvas.setColorFill(BaseColor.RED);
                canvas.rectangle(x, y + 1, 85, 13);
                canvas.fill();
                canvas.restoreState();
                // 开始写入文本
                canvas.beginText();
                // 设置字体的输出位置
                canvas.setTextMatrix(x, y + 2);
                // 要输出的text
                canvas.showText(replaceTextMap.get(key));
                canvas.endText();
            }

        }

        stamper.close();
        reader.close();
    }

    public PdfReplacer() {
    }

    /**
     * 测试使用主方法
     */
    public static void main(String[] args) throws Exception {
        String SRC = "E:\\CA\\入职合同模版样例.pdf";
        String DEST = "E:\\CA\\out.pdf";

        Map<String, String> replaceMap = new HashMap<>();
        replaceMap.put("[$合同编号$]", "ZR-20181009-00000164");
        replaceMap.put("[$乙方姓名$]", "TroubleA");
        replaceMap.put("[$签字日期$]", "2018/10/10 11:24:30");
        replaceMap.put("[$终止日期$]", "2018/10/10 end");

        File file = new File(DEST);
			if (!outFile.getParentFile().exists()) {
				outFile.getParentFile().mkdirs();
			}
        new PdfReplacer().manipulatePdf1(SRC, DEST, replaceMap);
    }
}

package com.sinosoft.lis.utils;

/**
 * 须要替换的区域
 */
public class ReplaceRegion {

    private String aliasName;
    private Float x;
    private Float y;
    private Float w = 12f;
    private Float h = 2f;
    private int PageSize;

    public ReplaceRegion(String aliasName) {
        this.aliasName = aliasName;
    }

    public int getPageSize() {
        return PageSize;
    }

    public void setPageSize(int pageSize) {
        PageSize = pageSize;
    }

    /**
     * 替换区域的别名
     */
    public String getAliasName() {
        return aliasName;
    }

    public void setAliasName(String aliasName) {
        this.aliasName = aliasName;
    }

    public Float getX() {
        return x;
    }

    public void setX(Float x) {
        this.x = x;
    }

    public Float getY() {
        return y;
    }

    public void setY(Float y) {
        this.y = y;
    }

    public Float getW() {
        return w;
    }

    public void setW(Float w) {
        this.w = w;
    }

    public Float getH() {
        return h;
    }

    public void setH(Float h) {
        this.h = h;
    }
}

package com.sinosoft.lis.utils;

import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 解析PDF中文本的x,y位置
 */
public class PdfPositionParse {

    private PdfReader reader;
    private List<String> findText = new ArrayList<String>();	//须要查找的文本
    private PdfReaderContentParser parser;

    public PdfPositionParse(String fileName) throws IOException{
        FileInputStream in = null;
        try{
            in =new FileInputStream(fileName);
            byte[] bytes = new byte[in.available()];
            in.read(bytes);
            init(bytes);
        }finally{
            in.close();
        }
    }

    public PdfPositionParse(byte[] bytes) throws IOException{
        init(bytes);
    }

    private boolean needClose = true;
    /**
     * 传递进来的reader不会在PdfPositionParse结束时关闭
     * @param reader
     */
    public PdfPositionParse(PdfReader reader){
        this.reader = reader;
        parser = new PdfReaderContentParser(reader);
        needClose = false;
    }

    public void addFindText(String text){
        this.findText.add(text);
    }

    private void init(byte[] bytes) throws IOException {
        reader = new PdfReader(bytes);
        parser = new PdfReaderContentParser(reader);
    }

    /**
     * 解析文本
     * @throws IOException
     */
    public Map<String, ReplaceRegion> parse() throws IOException{
        try{
            if(this.findText.size() == 0){
                throw new NullPointerException("没有须要查找的文本");
            }
            PositionRenderListener listener = new PositionRenderListener(this.findText);
            parser.processContent(6, listener);
            return listener.getResult();
        }finally{
            if(reader != null && needClose){
                reader.close();
            }
        }
    }
}

package com.sinosoft.lis.utils;

import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * pdf渲染监听,当找到渲染的文本时，获得文本的坐标x,y,w,h
 */
public class PositionRenderListener implements RenderListener{

    private List<String> findText;
    private float defaultH;		///出现没法取到值的状况，默认为12
    private float fixHeight;	//可能出现没法彻底覆盖的状况，提供修正的参数，默认为2

    public PositionRenderListener(List<String> findText, float defaultH,float fixHeight) {
        this.findText = findText;
        this.defaultH = defaultH;
        this.fixHeight = fixHeight;
    }

    public PositionRenderListener(List<String> findText) {
        this.findText = findText;
        this.defaultH = 12;
        this.fixHeight = -1;
    }

    @Override
    public void beginTextBlock() {

    }

    @Override
    public void endTextBlock() {

    }

    @Override
    public void renderImage(ImageRenderInfo imageInfo) {
    }

    private Map<String, ReplaceRegion> result = new HashMap<String, ReplaceRegion>();

    @Override
    public void renderText(TextRenderInfo textInfo) {
    }

    public Map<String, ReplaceRegion> getResult() {
        for (String key : findText) {	//补充没有找到的数据
            if(this.result.get(key) == null){
                this.result.put(key, null);
            }
        }
        return this.result;
    }
}

还有用来替换寻找x，y位置的两个类字体

package com.sinosoft.lis.utils;

import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;

import java.util.ArrayList;
import java.util.List;

import static com.sinosoft.lis.utils.KeyWordPositionListener.findKeywordItems;

/**
 * 用来保存关键字新建的对象
 */
public class MatchItem {
    private Integer pageNum;
    private Float x;
    private Float y;
    private Float pageWidth;
    private Float pageHeight;
    private String content;

    public Integer getPageNum() {
        return pageNum;
    }

    public void setPageNum(Integer pageNum) {
        this.pageNum = pageNum;
    }

    public Float getX() {
        return x;
    }

    public void setX(Float x) {
        this.x = x;
    }

    public Float getY() {
        return y;
    }

    public void setY(Float y) {
        this.y = y;
    }

    public Float getPageWidth() {
        return pageWidth;
    }

    public void setPageWidth(Float pageWidth) {
        this.pageWidth = pageWidth;
    }

    public Float getPageHeight() {
        return pageHeight;
    }

    public void setPageHeight(Float pageHeight) {
        this.pageHeight = pageHeight;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String toString() {
        return "MatchItem [pageNum=" + pageNum + ", x=" + x + ", y=" + y
                + ", pageWidth=" + pageWidth + ", pageHeight=" + pageHeight
                + ", content=" + content + "]";
    }

    public MatchItem(String content) {
        this.content = content;
    }

    public MatchItem() {
    }

    public static List matchPage(String fileName, String keyword) throws Exception {
        List items = new ArrayList();
        PdfReader reader = new PdfReader(fileName);
        int pageSize = reader.getNumberOfPages();
        for (int page = 1; page <= pageSize; page++) {
            items.addAll(matchPage(reader, page, keyword));
        }
        return items;
    }

    public static List matchPage(PdfReader reader, Integer pageNumber, String keyword) throws Exception {
        KeyWordPositionListener renderListener = new KeyWordPositionListener();
        renderListener.setKeyword(keyword);
        PdfReaderContentParser parse = new PdfReaderContentParser(reader);
        Rectangle rectangle = reader.getPageSize(pageNumber);
        renderListener.setPageNumber(pageNumber);
        renderListener.setCurPageSize(rectangle);
        parse.processContent(pageNumber, renderListener);
        return findKeywordItems(renderListener, keyword);
    }

}

package com.sinosoft.lis.utils;

import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
import com.sinosoft.lis.comm.PubFun;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by Administrator on 2018/10/9.
 */
public class KeyWordPositionListener implements RenderListener {
    private List<MatchItem> matches = new ArrayList<MatchItem>();
    private List<MatchItem> allItems = new ArrayList<MatchItem>();
    private Rectangle curPageSize;

    /**
     * 匹配的关键字
     */
    private String keyword;
    /**
     * 匹配的当前页
     */
    private Integer pageNumber;

    public void beginTextBlock() {
        //do nothing
    }

    public void renderText(TextRenderInfo renderInfo) {
        String content = renderInfo.getText();
        content = content.replace("<", "").replace("《", "").replace("(", "").replace("（", "").replace("\"", "").replace("'", "")
                .replace(">", "").replace("》", "").replace(")", "").replace("）", "").replace("、", "").replace(".", "")
                .replace("：", "").replace(":", "").replace(" ", "");
        Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange();
        MatchItem item = new MatchItem();
        item.setContent(content);
        item.setPageNum(pageNumber);
        item.setPageWidth(curPageSize.getWidth());
        item.setPageHeight(curPageSize.getHeight());
        item.setX((float)textRectangle.getX());
        item.setY((float)textRectangle.getY());
        if(!PubFun.isEmpty(content)){
            if(content.equalsIgnoreCase(keyword)) {
                matches.add(item);
            }
        }else{
            item.setContent("空字符串");
        }
        allItems.add(item);//先保存全部的项
    }

    public void endTextBlock() {
        //do nothing
    }

    public void renderImage(ImageRenderInfo renderInfo) {
        //do nothing
    }

    /**
     * 设置须要匹配的当前页
     * @param pageNumber
     */
    public void setPageNumber(Integer pageNumber) {
        this.pageNumber = pageNumber;
    }

    /**
     * 设置须要匹配的关键字，忽略大小写
     * @param keyword
     */
    public void setKeyword(String keyword) {
        this.keyword = keyword;
    }

    /**
     * 返回匹配的结果列表
     * @return
     */
    public List<MatchItem> getMatches() {
        return matches;
    }

    void setCurPageSize(Rectangle rect) {
        this.curPageSize = rect;
    }

    public List<MatchItem> getAllItems() {
        return allItems;
    }

    public void setAllItems(List<MatchItem> allItems) {
        this.allItems = allItems;
    }

    public static List findKeywordItems(KeyWordPositionListener renderListener,String keyword){
        //先判断本页中是否存在关键词
        List<MatchItem> allItems = renderListener.getAllItems();//全部块LIST
        StringBuffer sbtemp = new StringBuffer("");
        for(MatchItem item : allItems){//将一页中全部的块内容链接起来组成一个字符串。
            sbtemp.append(item.getContent());
        }
        if(sbtemp.toString().indexOf(keyword) == -1){//一页组成的字符串没有关键词，直接return
            return renderListener.getMatches();
        }
        //第一种状况：关键词与块内容彻底匹配的项
        List matches = renderListener.getMatches();
        //第二种状况：多个块内容拼成一个关键词，则一个一个来匹配，组装成一个关键词
        sbtemp = new StringBuffer("");
        List tempItems = new ArrayList();
        for(MatchItem item : allItems){
            //1，关键词中存在某块 2，拼装的连续的块=关键词  3，避开某个块彻底匹配关键词
            //关键词 中国移动  而块为 中 ，国，移动
            //关键词 中华人民  而块为中，华人民共和国  这种状况解决不了，也不容许存在
            if(keyword.indexOf(item.getContent()) != -1 && !keyword.equals(item.getContent())){
                tempItems.add(item);
                sbtemp.append(item.getContent());
                if(keyword.indexOf(sbtemp.toString()) == -1){//若是暂存的字符串和关键词 再也不匹配时
                    sbtemp = new StringBuffer(item.getContent());
                    tempItems.clear();
                    tempItems.add(item);
                }
                if(sbtemp.toString().equalsIgnoreCase(keyword)){//暂存的字符串正好匹配到关键词时
                    MatchItem tmpitem = getRightItem(tempItems, keyword);
                    if(tmpitem != null){
                        matches.add(tmpitem);//获得匹配的项
                    }
                    sbtemp = new StringBuffer("");//清空暂存的字符串
                    tempItems.clear();//清空暂存的LIST
                    continue;//继续查找
                }
            }else{//若是找不到则清空
                sbtemp = new StringBuffer("");
                tempItems.clear();
            }
        }
        //第三种状况：关键词存在块中
        for(MatchItem item : allItems){
            if(item.getContent().indexOf(keyword) != -1 && !keyword.equals(item.getContent())){
                matches.add(item);
            }
        }
        return matches;
    }

    public static MatchItem getRightItem(List<MatchItem> tempItems,String keyword){
        for(MatchItem item:tempItems){
            if(keyword.indexOf(item.getContent()) != -1 && !keyword.equals(item.getContent())){
                return item;
            }
        } return null;
    }

    public KeyWordPositionListener() {
    }
}

注：因为个人需求是替换文中的固定字段，而且字段长度是我这边控制的都是4个字，因此我固定了85宽度以及15高度的一个红色图片，在查找到文字的x，y位置之后，直接将该图片覆盖到pdf中，而后再到图片上从新写字this