六月 28, 2017 11:01:56 下午 org.apache.pdfbox.contentstream.PDFStreamEngine operatorException
严重: Cannot read JPEG2000 image: Java Advanced Imaging (JAI) Image I/O Tools are not installedjava
原由:pdf文件中可能包含的扫描件是JPEG2000格式的图片,这样pdfbox在转换的过程当中须要JAI的支持。git
解决:github
<dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-core</artifactId> <version>1.3.1</version> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-jpeg2000</artifactId> <version>1.3.0</version> </dependency>
测试代码:apache
package com.demo.pdfreader; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.PDFRenderer; /** * pdfbox 实现 * * @author Administrator * */ public class PdfBoxApp { private static File file = new File("Eclipse插件开发(原书第3版).pdf"); public static void main(String[] args) { PDDocument doc = null; try { doc = PDDocument.load(file); } catch (IOException e) { e.printStackTrace(); } if (doc == null) return; PDFRenderer renderer = new PDFRenderer(doc); int pageCount = doc.getNumberOfPages();// 总页数 for (int i = 0; i < pageCount; i++) { // 按页读取 BufferedImage image = null; try { image = renderer.renderImage(i, 2.5f); } catch (IOException e) { System.out.println(i + " parse error"); } if (image == null) continue; try { // 保存转换后的图片 ImageIO.write(image, "PNG", new File("images/" + (i + 1) + ".png")); } catch (IOException e) { } } } }
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.3</version> </dependency>