freebsd-ports/textproc/libtextcat/files/patch-src_constants.h
Thierry Thomas 3a00ffa77e Import patches (imported from OpenOffice.org 2.3 by Fedora, at least in
part). These patches, released under a BSD license, seem to improve the
accuracy of language detection, especially those that don't have a
Latin script.
2007-08-23 22:13:35 +00:00

45 lines
1.1 KiB
C

--- src/constants.h.orig Thu May 22 13:32:43 2003
+++ src/constants.h Thu Aug 23 22:47:07 2007
@@ -39,6 +39,8 @@
*/
#include <limits.h>
+#define _UTF8_
+
#define DESCRIPTION "out of place"
/* Reported matches are those fingerprints with a score less than best
@@ -59,14 +61,21 @@
/* Maximum number of n-grams in a fingerprint */
#define MAXNGRAMS 400
-/* Maximum size of an n-gram? */
-#define MAXNGRAMSIZE 5
+/* Maximum number of character of an n-gram? */
+#define MAXNGRAMSYMBOL 5
+
+/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
+#ifdef _UTF8_
+#define MAXNGRAMSIZE 20
+#else
+#define MAXNGRAMSIZE MAXNGRAMSYMBOL
+#endif
/* Which characters are not acceptable in n-grams? */
#define INVALID(c) (isspace((int)c) || isdigit((int)c))
/* Minimum size (in characters) for accepting a document */
-#define MINDOCSIZE 25
+#define MINDOCSIZE 6
/* Maximum penalty for missing an n-gram in fingerprint */
#define MAXOUTOFPLACE 400
@@ -75,5 +84,8 @@
#define TABLEPOW 13
#define MAXSCORE INT_MAX
+
+/* where the fingerprints files are stored */
+#define DEFAULT_FINGERPRINTS_PATH ""
#endif