pkgsrc-wip/libtextcat/patches/patch-src_common.c
Francois Tigeot 770ef3524a Add Open/LibreOffice patches.
These patches add support for utf8 and seem to improve language detection
accuracy, especially for non-latin scripts.
2011-02-17 13:19:17 +00:00

234 lines
6.1 KiB
C

$NetBSD: patch-src_common.c,v 1.1 2011/02/17 13:19:17 ftigeot Exp $
--- src/common.c.orig 2003-05-22 11:32:43 +0000
+++ src/common.c
@@ -3,23 +3,23 @@
*
* Copyright (c) 2003, WiseGuys Internet B.V.
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- *
+ *
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
- *
+ *
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
- *
+ *
* - Neither the name of the WiseGuys Internet B.V. nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -114,11 +114,11 @@ extern char* wg_strdup( const char *s )
wgmem_error( "Error while strduping %u bytes.\n", strlen(s) );
}
- return( result );
+ return( result );
}
-extern void* wg_realloc( void *ptr, size_t size )
-{
+extern void* wg_realloc( void *ptr, size_t size )
+{
void *result;
if (!size) {
@@ -131,7 +131,7 @@ extern void* wg_realloc( void *ptr, size
wgmem_error( "Error while reallocing %u bytes.\n", size );
}
- return( result );
+ return( result );
}
extern void wg_free( void *mem )
@@ -148,12 +148,12 @@ extern char *wg_getline( char *line, int
if ( fgets(line, size, fp) == NULL ) {
return NULL;
}
-
+
/** kill term null **/
if ( (p = strpbrk( line, "\n\r" )) ) {
*p = '\0';
- }
-
+ }
+
return line;
}
@@ -164,39 +164,39 @@ extern char *wg_getline( char *line, int
*
* ARGUMENTS:
* - result:
- *
+ *
* After the split, this array contains pointers to the start of each
* detected segment. Must be preallocated and at least as large as
* maxsegments. The pointers point into the dest buffer.
- *
- * - dest:
- *
+ *
+ * - dest:
+ *
* String into which result points as an index. Must be preallocated, and
* at least as big as src. You can use src as dest, but in that case src
* is overwritten!
- *
- * - src:
- *
+ *
+ * - src:
+ *
* The string to split. Sequences of whitespace are treated as separators, unless
* escaped. There are two ways to escape: by using single quotes (anything
* between single quotes is treated as one segment), or by using a backslash
* to escape the next character. The backslash escape works inside quotation
* as well.
- *
+ *
* Example:
- *
+ *
* "It\'s very\ easy 'to use WiseGuys\' wg_split()' function" is split into:
- *
+ *
* "It's"
* "very easy"
* "to use WiseGuys' wg_split()"
* "function"
- *
- * - maxsegments:
- *
+ *
+ * - maxsegments:
+ *
* The maximum number of segments. If the splitter runs out of segments,
* the remainder of the string is stored in the last segment.
- *
+ *
* RETURN VALUE:
* The number of segments found.
*/
@@ -218,12 +218,12 @@ unsigned int wg_split( char **result, ch
switch (state) {
case 0:
/*** Skip spaces ***/
- while ( isspace((int) *p) ) {
+ while ( isspace((unsigned char) *p) ) {
p++;
}
state = 1;
- case 1:
+ case 1:
/*** Start segment ***/
result[cnt] = w;
cnt++;
@@ -232,12 +232,12 @@ unsigned int wg_split( char **result, ch
case 2:
/*** Unquoted segment ***/
while (*p) {
- if ( isspace((int) *p) ) {
+ if ( isspace((unsigned char) *p) ) {
*w++ = '\0';
p++;
state = 0;
break;
- }
+ }
else if ( *p == '\'' ) {
/*** Start quotation ***/
p++;
@@ -292,17 +292,17 @@ unsigned int wg_split( char **result, ch
}
+#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
extern void wg_timerstart(wgtimer_t *t)
{
-#ifdef HAVE_GETTIMEOFDAY
gettimeofday( &(t->start), NULL );
-#endif
}
+#endif /* TL : no struct timeval under Win32 */
+#ifdef HAVE_GETTIMEOFDAY /* TL : no struct timeval under Win32 */
extern uint4 wg_timerstop(wgtimer_t *t)
{
-#ifdef HAVE_GETTIMEOFDAY
uint4 result;
gettimeofday( &(t->stop), NULL );
result = (t->stop.tv_sec - t->start.tv_sec) * 1000000 +
@@ -312,25 +312,23 @@ extern uint4 wg_timerstop(wgtimer_t *t)
t->start.tv_usec = t->stop.tv_usec;
return result;
-#else
- return 0;
-#endif
}
+#endif /* TL : no struct timeval under Win32 */
/**
* wg_strgmov -- a guarded strcpy() variation
- *
+ *
* copies src to dest (including terminating zero), and returns
* pointer to position of terminating zero in dest. The function is
* guaranteed not to write past destlimit. If the copy couldn't be
- * finished, the function returns NULL after restoring the first
- * character in dest for your convenience (since this is usually a zero).
+ * finished, the function returns NULL after restoring the first
+ * character in dest for your convenience (since this is usually a zero).
*/
char *wg_strgmov( char *dest, const char *src, const char *destlimit )
{
char tmp, *w;
-
+
if ( !dest || dest >= destlimit ) {
return NULL;
}
@@ -355,7 +353,7 @@ char *wg_strgmov( char *dest, const char
}
/*
- * wg_trim() -- remove whitespace surrounding a string.
+ * wg_trim() -- remove whitespace surrounding a string.
*
* Example: " bla bla bla " becomes "bla bla bla" after trimming.
*
@@ -373,12 +371,12 @@ char *wg_trim( char *dest, const char *s
char *lastnonspace = &dest[-1];
const char *p = src;
char *w = dest;
-
- while ( isspace((int)*p) ) {
+
+ while ( isspace((unsigned char)*p) ) {
p++;
}
while (*p) {
- if ( !isspace((int)*p) ) {
+ if ( !isspace((unsigned char)*p) ) {
lastnonspace = w;
}
*w++ = *p++;