今天写了一段获取MIME类型的代码,对比用org.apache.tika和net.sf.jmimemagic。javascript
jdk版本是1.8.css
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>hui</groupId> <artifactId>TestWithMaven</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>TestWithMaven</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> <version>2.9.2</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.13</version> </dependency> <dependency> <groupId>org.apache.ibatis</groupId> <artifactId>ibatis-core</artifactId> <version>3.0</version> </dependency> <dependency> <groupId>org.mybatis</groupId> <artifactId>mybatis</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.38</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-context-support</artifactId> <version>4.2.2.RELEASE</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>1.13</version> </dependency> <dependency> <groupId>net.sf.jmimemagic</groupId> <artifactId>jmimemagic</artifactId> <version>0.1.4</version> </dependency> <dependency> <groupId>xml-apis</groupId> <artifactId>xmlParserAPIs</artifactId> <version>2.0.2</version> </dependency> </dependencies> </project>
package mime; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.FileNameMap; import java.net.URLConnection; import java.net.URLEncoder; import javax.activation.MimetypesFileTypeMap; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import net.sf.jmimemagic.Magic; import net.sf.jmimemagic.MagicException; import net.sf.jmimemagic.MagicMatch; import net.sf.jmimemagic.MagicMatchNotFoundException; import net.sf.jmimemagic.MagicParseException; public class FileUtils { public static String getMimeTypeByFileTypeMap(String path) { MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap(); // 默认没有pdf的,若是传入pdf的,会默认application/octet-stream,也没有application/xml mimetypesFileTypeMap.addMimeTypes("application/pdf pdf"); File f = new File(path); return mimetypesFileTypeMap.getContentType(f); } public static String getMimeTypeByFileTypeMap2(String path) { String mimeType = null; int idx = path.lastIndexOf('.'); if (idx == -1) { mimeType = "application/octet-stream"; } else { String fileExtension = path.substring(idx).toLowerCase(); if (fileExtension.equals(".html")) { mimeType = "text/html"; } else if (fileExtension.equals(".css")) { mimeType = "text/css"; } else if (fileExtension.equals(".js")) { mimeType = "application/javascript"; } else if (fileExtension.equals(".gif")) { mimeType = "image/gif"; } else if (fileExtension.equals(".png")) { mimeType = "image/png"; } else if (fileExtension.equals(".txt")) { mimeType = "text/plain"; } else if (fileExtension.equals(".xml")) { mimeType = "application/xml"; } else if (fileExtension.equals(".json")) { mimeType = "application/json"; } else { MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap(); mimeType = mimeTypesMap.getContentType(path); } } return mimeType; } public static String getMimeTypeBySpring(String path) { ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap(); // 没有application/xml String contentType = mimeMap.getContentType(path); return contentType; } public static String getMimeByFileNameMap(String fileUrl) { FileNameMap fileNameMap = URLConnection.getFileNameMap(); try { String mimeType = fileNameMap .getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8")); if (mimeType == null) { mimeType = "application/octet-stream"; } return mimeType; } catch (UnsupportedEncodingException e) { e.printStackTrace(); return ""; } } public static String getMimeByTika(String fileUrl) { String mimeType = null; try { ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); metadata.add(Metadata.CONTENT_ENCODING, "utf-8"); metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl); // Parser parser = new DefaultParser();获取不到MIME类型 Parser parser = new AutoDetectParser(); ParseContext context = new ParseContext(); context.set(Parser.class, parser); parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context); for (String name : metadata.names()) { System.out.println(name); } mimeType = metadata.get(Metadata.CONTENT_TYPE); } catch (IOException | TikaException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } return mimeType; } public static String getMimeByJMimeMagic(String fileUrl) { MagicMatch match; try { match = Magic.getMagicMatch(new File(fileUrl), true); return match.getMimeType(); } catch (MagicParseException | MagicMatchNotFoundException | MagicException e) { e.printStackTrace(); } return ""; } }
package mime; public class MIMETest { public static void main(String[] args) { // src目录 // String fileName = "funds.properties"; String fileName = "createPerson.sql"; // String path = getPath(fileName); // 绝对路径 // String path = "E:/test/process.txt"; // String path = "E:/test/02.jpg"; // String path = "E:/Anheng/receiver-design.pdf"; // String path = "E:/api/dom4j.chm"; // String path = "E:/eclipse/ajax/pom.xml"; // String path = "E:/test/person.json"; // String path = "E:/test/file.java"; // String path = "E:/test/static.ftl"; // String path = "E:/test/rest.jerseySpring.war"; // String path = "E:/test/upload/myeclipse.exe"; String path = "E:/test/upload/myeclipse.ini"; System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is " + FileUtils.getMimeTypeByFileTypeMap(path)); System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is " + FileUtils.getMimeTypeByFileTypeMap2(path)); System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is " + FileUtils.getMimeTypeBySpring(path)); System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is " + FileUtils.getMimeByFileNameMap(path)); /* Tika会检查路径的合法性; 并且properties文件会返回text/x-java-properties,以上只会返回application/octet-stream; .sql文件会返回text/x-sql,以上几种也是只会返回application/octet-stream; .json文件会返回application/json,以上几种除非添加了这一类型,否则返回application/octet-stream; .java文件会返回text/x-java-source,前两种返回application/octet-stream,后两种返回text/plain; .ftl文件会返回text/html,以上返回application/octet-stream; .war文件会返回application/x-tika-java-web-archive,以上返回application/octet-stream; .exe文件会返回application/x-dosexec,以上返回application/octet-stream; .ini文件会返回text/x-ini,以上返回application/octet-stream; */ System.out.println("getMimeByTika: Mime Type of " + path + " is " + FileUtils.getMimeByTika(path)); System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is " + FileUtils.getMimeByJMimeMagic(path)); } private static String getPath(String fileName) { String prefix = System.getProperty("user.dir"); String fileSeparator = System.getProperty("file.separator"); String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator + "resources" + fileSeparator; String path = prefix + sourcePath + fileName; return path; } }
本来只测试Tika,即不加入jmimemagic的依赖时,测试正常,后来加入jmimemagic依赖,报错以下:html
Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580) at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69) at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218) at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341) at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51) at mime.FileUtils.getMimeByTika(FileUtils.java:103) at mime.MIMETest.main(MIMETest.java:48) Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93) at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170) at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577) ... 6 more Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source) at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source) at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source) at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119) ... 10 more
按照错误提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();处打断点,在加net.sf.jmimemagi依赖先后对比异常缘由,发现了下面一个现象 :java
加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子类是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,该类在jdk自带jar包mysql
rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如图:web
而加net.sf.jmimemagi后,javax.xml.parsers.SAXParserFactory的子类变成了ajax
xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,spring
如图:sql
该类在setFeature()时抛出了异常。即有两个相同名称的实现类,致使冲突报了异常。故咱们将xercesImpl-2.2.4.0.jar排除掉便可,修改后的pom.xml以下所示:apache
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>hui</groupId> <artifactId>TestWithMaven</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>TestWithMaven</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>joda-time</groupId> <artifactId>joda-time</artifactId> <version>2.9.2</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.13</version> </dependency> <dependency> <groupId>org.apache.ibatis</groupId> <artifactId>ibatis-core</artifactId> <version>3.0</version> </dependency> <dependency> <groupId>org.mybatis</groupId> <artifactId>mybatis</artifactId> <version>3.4.0</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.38</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-context-support</artifactId> <version>4.2.2.RELEASE</version> </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-core</artifactId> <version>1.13</version> </dependency> <dependency> <groupId>net.sf.jmimemagic</groupId> <artifactId>jmimemagic</artifactId> <version>0.1.4</version> <exclusions> <exclusion> <groupId>xerces</groupId> <artifactId>xercesImpl</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>xml-apis</groupId> <artifactId>xmlParserAPIs</artifactId> <version>2.0.2</version> </dependency> </dependencies> </project>
至此,再运行,则各方法都再也不抛异常。