You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by ju...@cox.net on 2003/04/24 08:59:25 UTC

Faster autodetect of mime-type upon "svn add"

The attached patch teaches svn_io_detect_mimetype to guess mime
types from suffixes of filenames.  This allows the user to say
"svn add foo.html bar.jpg" and svn:mime-type is set for these
files.  The original code in r5718 would leave foo.html typeless
and bar.jpg is given application/octet-stream.

It currently uses a table of suffixes and corresponding mime
types (built from /etc/mime.types file Debian GNU/Linux ships as
part of the mime-support package).  Although the patch should be
usable as it is, one improvement I would like to see is to get
this table out of the executable binary, and instead read from
an external file.  Further, maybe the user's configuration file
should be able to name which file to use as the table.  I can
surely implement that for Unix but the problem I am having is
that I do know how the subversion project arranges this kind of
thing portably (I am new touching subversion code).  I do not
see a supplemental file that is read at runtime in the current
code to model after (well, the subversion configuration file is
such a file but that is kind-a special), to make the resulting
code portable across platforms, and also to make the modified
build process install that mime.types file at the correct place.

Helps, suggestions, and/or code takeovers are all welcome.


Index: subversion/libsvn_subr/io.c
===================================================================
--- subversion/libsvn_subr/io.c	(revision 5718)
+++ subversion/libsvn_subr/io.c	(working copy)
@@ -19,6 +19,7 @@
 
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <assert.h>
 
 #include <apr_lib.h>
@@ -1301,7 +1302,285 @@
   return SVN_NO_ERROR;
 }
 
+/* A built-in table to map filename suffixes to mime types.
+ * We might want to have an external file for this.
+ *
+ * The elements of the table *MUST* be sorted by suffix, since
+ * the code that uses this table uses bsearch(3) from <stdlib.h>.
+ * It should not have the usual NULL sentinel at the end.  The caller
+ * uses the size of the array to know exactly how big the table is.
+ */
+static struct mime_types_table {
+  char const *suffix;
+  char const *mime_type;
+} mime_types[] = {
+  {"323", "text/h323"},
+  {"ai", "application/postscript"},
+  {"aif", "audio/x-aiff"},
+  {"aifc", "audio/x-aiff"},
+  {"aiff", "audio/x-aiff"},
+  {"art", "image/x-jg"},
+  {"asc", "text/plain"},
+  {"asf", "video/x-ms-asf"},
+  {"asx", "video/x-ms-asf"},
+  {"au", "audio/basic"},
+  {"avi", "video/x-msvideo"},
+  {"bat", "application/x-msdos-program"},
+  {"bcpio", "application/x-bcpio"},
+  {"bin", "application/octet-stream"},
+  {"bmp", "image/x-ms-bmp"},
+  {"book", "application/x-maker"},
+  {"cat", "application/vnd.ms-pki.seccat"},
+  {"cdf", "application/x-cdf"},
+  {"cdr", "image/x-coreldraw"},
+  {"cdt", "image/x-coreldrawtemplate"},
+  {"cdy", "application/vnd.cinderella"},
+  {"chrt", "application/x-kchart"},
+  {"class", "application/x-java-vm"},
+  {"cls", "text/x-tex"},
+  {"com", "application/x-msdos-program"},
+  {"cpio", "application/x-cpio"},
+  {"cpt", "image/x-corelphotopaint"},
+  {"crl", "application/x-pkcs7-crl"},
+  {"crt", "application/x-x509-ca-cert"},
+  {"csm", "application/cu-seeme"},
+  {"css", "text/css"},
+  {"csv", "text/comma-separated-values"},
+  {"cu", "application/cu-seeme"},
+  {"dcr", "application/x-director"},
+  {"deb", "application/x-debian-package"},
+  {"dif", "video/x-dv"},
+  {"diff", "text/plain"},
+  {"dir", "application/x-director"},
+  {"djv", "image/x-djvu"},
+  {"djvu", "image/x-djvu"},
+  {"dl", "video/dl"},
+  {"dll", "application/x-msdos-program"},
+  {"dms", "application/x-dms"},
+  {"doc", "application/msword"},
+  {"dot", "application/msword"},
+  {"dv", "video/x-dv"},
+  {"dvi", "application/x-dvi"},
+  {"dxr", "application/x-director"},
+  {"eps", "application/postscript"},
+  {"etx", "text/x-setext"},
+  {"exe", "application/x-msdos-program"},
+  {"ez", "application/andrew-inset"},
+  {"fb", "application/x-maker"},
+  {"fbdoc", "application/x-maker"},
+  {"fig", "application/x-xfig"},
+  {"fli", "video/fli"},
+  {"fm", "application/x-maker"},
+  {"frame", "application/x-maker"},
+  {"frm", "application/x-maker"},
+  {"gcf", "application/x-graphing-calculator"},
+  {"gf", "application/x-tex-gf"},
+  {"gif", "image/gif"},
+  {"gl", "video/gl"},
+  {"gnumeric", "application/x-gnumeric"},
+  {"gsf", "application/x-font"},
+  {"gsm", "audio/x-gsm"},
+  {"gtar", "application/x-gtar"},
+  {"hdf", "application/x-hdf"},
+  {"hqx", "application/mac-binhex40"},
+  {"hta", "application/hta"},
+  {"htm", "text/html"},
+  {"html", "text/html"},
+  {"ica", "application/x-ica"},
+  {"ice", "x-conference/x-cooltalk"},
+  {"ief", "image/ief"},
+  {"iges", "model/iges"},
+  {"igs", "model/iges"},
+  {"iii", "application/x-iphone"},
+  {"ins", "application/x-internet-signup"},
+  {"isp", "application/x-internet-signup"},
+  {"jar", "application/x-java-archive"},
+  {"jng", "image/x-jng"},
+  {"jnlp", "application/x-java-jnlp-file"},
+  {"jpe", "image/jpeg"},
+  {"jpeg", "image/jpeg"},
+  {"jpg", "image/jpeg"},
+  {"js", "application/x-javascript"},
+  {"kar", "audio/midi"},
+  {"kil", "application/x-killustrator"},
+  {"kpr", "application/x-kpresenter"},
+  {"kpt", "application/x-kpresenter"},
+  {"ksp", "application/x-kspread"},
+  {"kwd", "application/x-kword"},
+  {"kwt", "application/x-kword"},
+  {"latex", "application/x-latex"},
+  {"lha", "application/x-lha"},
+  {"lsf", "video/x-la-asf"},
+  {"lsx", "video/x-la-asf"},
+  {"ltx", "text/x-tex"},
+  {"lzh", "application/x-lzh"},
+  {"lzx", "application/x-lzx"},
+  {"m3u", "audio/x-mpegurl"},
+  {"maker", "application/x-maker"},
+  {"man", "application/x-troff-man"},
+  {"mdb", "application/msaccess"},
+  {"me", "application/x-troff-me"},
+  {"mesh", "model/mesh"},
+  {"mid", "audio/midi"},
+  {"midi", "audio/midi"},
+  {"mif", "application/x-mif"},
+  {"mml", "text/mathml"},
+  {"mng", "video/x-mng"},
+  {"moc", "text/x-moc"},
+  {"mov", "video/quicktime"},
+  {"movie", "video/x-sgi-movie"},
+  {"mp2", "audio/mpeg"},
+  {"mp3", "audio/mpeg"},
+  {"mpe", "video/mpeg"},
+  {"mpeg", "video/mpeg"},
+  {"mpega", "audio/mpeg"},
+  {"mpg", "video/mpeg"},
+  {"mpga", "audio/mpeg"},
+  {"ms", "application/x-troff-ms"},
+  {"msh", "model/mesh"},
+  {"msi", "application/x-msi"},
+  {"mxu", "video/vnd.mpegurl"},
+  {"nb", "application/mathematica"},
+  {"nc", "application/x-netcdf"},
+  {"o", "application/x-object"},
+  {"oda", "application/oda"},
+  {"ogg", "application/x-ogg"},
+  {"old", "application/x-trash"},
+  {"oza", "application/x-oz-application"},
+  {"p7r", "application/x-pkcs7-certreqresp"},
+  {"pac", "application/x-ns-proxy-autoconfig"},
+  {"pat", "image/x-coreldrawpattern"},
+  {"pbm", "image/x-portable-bitmap"},
+  {"pcf", "application/x-font"},
+  {"pcf.Z", "application/x-font"},
+  {"pcx", "image/pcx"},
+  {"pdb", "chemical/x-pdb"},
+  {"pdf", "application/pdf"},
+  {"pfa", "application/x-font"},
+  {"pfb", "application/x-font"},
+  {"pgm", "image/x-portable-graymap"},
+  {"pgn", "application/x-chess-pgn"},
+  {"pgp", "application/pgp-signature"},
+  {"php", "application/x-httpd-php"},
+  {"php3", "application/x-httpd-php3"},
+  {"php3p", "application/x-httpd-php3-preprocessed"},
+  {"php4", "application/x-httpd-php4"},
+  {"phps", "application/x-httpd-php-source"},
+  {"pht", "application/x-httpd-php"},
+  {"phtml", "application/x-httpd-php"},
+  {"pk", "application/x-tex-pk"},
+  {"pl", "application/x-perl"},
+  {"pls", "audio/x-scpls"},
+  {"pm", "application/x-perl"},
+  {"png", "image/png"},
+  {"pnm", "image/x-portable-anymap"},
+  {"pot", "application/vnd.ms-powerpoint"},
+  {"ppm", "image/x-portable-pixmap"},
+  {"pps", "application/vnd.ms-powerpoint"},
+  {"ppt", "application/vnd.ms-powerpoint"},
+  {"prf", "application/pics-rules"},
+  {"ps", "application/postscript"},
+  {"psd", "image/x-photoshop"},
+  {"qt", "video/quicktime"},
+  {"qtl", "application/x-quicktimeplayer"},
+  {"ra", "audio/x-realaudio"},
+  {"ram", "audio/x-pn-realaudio"},
+  {"ras", "image/x-cmu-raster"},
+  {"rgb", "image/x-rgb"},
+  {"rm", "audio/x-pn-realaudio"},
+  {"roff", "application/x-troff"},
+  {"rpm", "audio/x-pn-realaudio-plugin"},
+  {"rtf", "text/rtf"},
+  {"rtx", "text/richtext"},
+  {"sct", "text/scriptlet"},
+  {"sd2", "audio/x-sd2"},
+  {"sda", "application/vnd.stardivision.draw"},
+  {"sdc", "application/vnd.stardivision.calc"},
+  {"sdd", "application/vnd.stardivision.impress"},
+  {"sdp", "application/vnd.stardivision.impress-packed"},
+  {"sds", "application/vnd.stardivision.chart"},
+  {"sdw", "application/vnd.stardivision.writer"},
+  {"ser", "application/x-java-serialized-object"},
+  {"sgl", "application/vnd.stardivision.writer-global"},
+  {"shar", "application/x-shar"},
+  {"sid", "audio/prs.sid"},
+  {"sik", "application/x-trash"},
+  {"silo", "model/mesh"},
+  {"sit", "application/x-stuffit"},
+  {"skd", "application/x-koan"},
+  {"skm", "application/x-koan"},
+  {"skp", "application/x-koan"},
+  {"skt", "application/x-koan"},
+  {"smd", "application/vnd.stardivision.mail"},
+  {"smf", "application/vnd.stardivision.math"},
+  {"smi", "application/smil"},
+  {"smil", "application/smil"},
+  {"snd", "audio/basic"},
+  {"spl", "application/x-futuresplash"},
+  {"src", "application/x-wais-source"},
+  {"stl", "application/vnd.ms-pki.stl"},
+  {"sty", "text/x-tex"},
+  {"sv4cpio", "application/x-sv4cpio"},
+  {"sv4crc", "application/x-sv4crc"},
+  {"svg", "image/svg+xml"},
+  {"svgz", "image/svg+xml"},
+  {"swf", "application/x-shockwave-flash"},
+  {"swfl", "application/x-shockwave-flash"},
+  {"t", "application/x-troff"},
+  {"tar", "application/x-tar"},
+  {"taz", "application/x-gtar"},
+  {"tex", "text/x-tex"},
+  {"texi", "application/x-texinfo"},
+  {"texinfo", "application/x-texinfo"},
+  {"text", "text/plain"},
+  {"tgz", "application/x-gtar"},
+  {"tif", "image/tiff"},
+  {"tiff", "image/tiff"},
+  {"tr", "application/x-troff"},
+  {"tsp", "application/dsptype"},
+  {"tsv", "text/tab-separated-values"},
+  {"txt", "text/plain"},
+  {"uls", "text/iuls"},
+  {"ustar", "application/x-ustar"},
+  {"vcd", "application/x-cdlink"},
+  {"vcf", "text/x-vcard"},
+  {"vcs", "text/x-vcalendar"},
+  {"vor", "application/vnd.stardivision.writer"},
+  {"vrm", "x-world/x-vrml"},
+  {"vrml", "x-world/x-vrml"},
+  {"wad", "application/x-doom"},
+  {"wav", "audio/x-wav"},
+  {"wbmp", "image/vnd.wap.wbmp"},
+  {"wbxml", "application/vnd.wap.wbxml"},
+  {"wk", "application/x-123"},
+  {"wml", "text/vnd.wap.wml"},
+  {"wmlc", "application/vnd.wap.wmlc"},
+  {"wmls", "text/vnd.wap.wmlscript"},
+  {"wmlsc", "application/vnd.wap.wmlscriptc"},
+  {"wp5", "application/wordperfect5.1"},
+  {"wrl", "x-world/x-vrml"},
+  {"wsc", "text/scriptlet"},
+  {"wz", "application/x-wingz"},
+  {"xbm", "image/x-xbitmap"},
+  {"xhtml", "text/html"},
+  {"xlb", "application/vnd.ms-excel"},
+  {"xls", "application/vnd.ms-excel"},
+  {"xml", "text/xml"},
+  {"xpm", "image/x-xpixmap"},
+  {"xsl", "text/xml"},
+  {"xwd", "image/x-xwindowdump"},
+  {"xyz", "chemical/x-xyz"},
+  {"zip", "application/zip"},
+};
 
+/* Bsearch callback to look up a given file suffix from the above table. */
+static int
+suffix_compare(const void *key_, const void *mime_types_table_elem_)
+{
+  return strcmp((const char *)key_, 
+		((struct mime_types_table*)mime_types_table_elem_)->suffix);
+}
+
 svn_error_t *
 svn_io_detect_mimetype (const char **mimetype,
                         const char *file,
@@ -1314,6 +1593,7 @@
   apr_status_t apr_err;
   unsigned char block[1024];
   apr_size_t amt_read = sizeof (block);
+  char const *suffix = strrchr(file, '.');
 
   /* Default return value is NULL. */
   *mimetype = NULL;
@@ -1326,6 +1606,19 @@
                               "Can't detect mimetype of non-file '%s'",
                               file);
 
+  /* The filename check can be done without opening it, so do that first. */
+  if (suffix) {
+    struct mime_types_table *t;
+    suffix++; /* skip over the '.' */
+    t = bsearch(suffix, mime_types,
+		sizeof(mime_types) / sizeof(mime_types[0]),
+		sizeof(mime_types[0]), suffix_compare);
+    if (t) {
+      *mimetype = t->mime_type;
+      return SVN_NO_ERROR;
+    }
+  }
+
   SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
 
   /* Read a block of data from FILE. */



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: Faster autodetect of mime-type upon "svn add"

Posted by Karl Fogel <kf...@newton.ch.collab.net>.
junkio@cox.net writes:
> The attached patch teaches svn_io_detect_mimetype to guess mime
> types from suffixes of filenames.  This allows the user to say
> "svn add foo.html bar.jpg" and svn:mime-type is set for these
> files.  The original code in r5718 would leave foo.html typeless
> and bar.jpg is given application/octet-stream.

You might want to also look at / annotate

   http://subversion.tigris.org/issues/show_bug.cgi?id=1233

which is a similar patch.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org