24 January 2020

Apache Tika magic header

An easy way to identify files (~file):


package cl.ejemplo.mimetypes;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
/**
*
* @author German
*
*
java-duke-logo.jpg > image/jpeg
java-duke-logo.pdf > image/jpeg
apache-tika.pdf > application/pdf
apache-tika.jpg > application/pdf
*/
public class TestMagicHeader {
public static void main(String[] args) {
Tika tika = new Tika();
//deberia ser llamada relativa
String path = "/Users/devwebcl/development/files/";
String[] files = {
"java-duke-logo.jpg", // jpeg
"java-duke-logo.pdf", // jpeg
"apache-tika.pdf", // pdf
"apache-tika.jpg"}; // pdf
for (String file : files) {
String mimeType = tika.detect(lee(path + file));
System.out.println(file + " --> " + mimeType);
}
}
//read file
public static byte[] lee(String input) {
File file = new File(input);
FileInputStream fin = null;
try {
fin = new FileInputStream(file);
byte fileContent[] = new byte[(int)file.length()];
fin.read(fileContent);
return fileContent;
}
catch (FileNotFoundException e) {
System.out.println(e);
}
catch (IOException ioe) {
System.out.println(ioe);
}
finally {
try {
if (fin != null) {
fin.close();
}
}
catch (IOException ioe) {
System.out.println(ioe);
}
}
return null;
}
}

Blog Archive

Disclaimer

The views expressed on this blog are my own and do not necessarily reflect the views of Oracle.